From fff5c2a78a75b0b4537a13b68e88a7ee086352e6 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 13 Sep 2024 17:40:20 -0700 Subject: [PATCH] Implement `select`. Add a `select` function, defined only on platforms where it doesn't have an `FD_SETSIZE` limitation. --- src/backend/libc/event/syscalls.rs | 54 +++++++++ src/event/mod.rs | 4 + src/event/poll.rs | 2 +- src/event/select.rs | 57 +++++++++ src/lib.rs | 2 + tests/event/main.rs | 2 + tests/event/select.rs | 180 +++++++++++++++++++++++++++++ 7 files changed, 300 insertions(+), 1 deletion(-) create mode 100644 src/event/select.rs create mode 100644 tests/event/select.rs diff --git a/src/backend/libc/event/syscalls.rs b/src/backend/libc/event/syscalls.rs index dcd0135f0..fdba539a4 100644 --- a/src/backend/libc/event/syscalls.rs +++ b/src/backend/libc/event/syscalls.rs @@ -16,6 +16,8 @@ use crate::event::port::Event; target_os = "espidf" ))] use crate::event::EventfdFlags; +#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +use crate::event::FdSetElement; use crate::event::PollFd; use crate::io; #[cfg(solarish)] @@ -125,6 +127,58 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, +) -> io::Result { + let timeout_data; + let timeout_ptr = match timeout { + Some(timeout) => { + // Convert from `Timespec` to `c::timeval`. + timeout_data = c::timeval { + tv_sec: timeout.tv_sec, + tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, + }; + &timeout_data + } + None => core::ptr::null(), + }; + + // On Apple platforms, use the specially mangled `select` which doesn't + // have an `FD_SETSIZE` limitation. + #[cfg(apple)] + { + extern "C" { + #[link_name = "select$DARWIN_EXTSN$NOCANCEL"] + fn select( + nfds: c::c_int, + readfds: *mut FdSetElement, + writefds: *mut FdSetElement, + errorfds: *mut FdSetElement, + timeout: *const c::timeval, + ) -> c::c_int; + } + + ret_c_int(select(nfds, readfds, writefds, exceptfds, timeout_ptr)) + } + + // Otherwise just use the normal `select`. + #[cfg(not(apple))] + { + ret_c_int(c::select( + nfds, + readfds.cast(), + writefds.cast(), + exceptfds.cast(), + timeout_ptr as *mut c::timeval, + )) + } +} + #[cfg(solarish)] pub(crate) fn port_create() -> io::Result { unsafe { ret_owned_fd(c::port_create()) } diff --git a/src/event/mod.rs b/src/event/mod.rs index dab9c6932..be0f1bc89 100644 --- a/src/event/mod.rs +++ b/src/event/mod.rs @@ -16,6 +16,8 @@ mod pause; mod poll; #[cfg(solarish)] pub mod port; +#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +mod select; #[cfg(any( linux_kernel, @@ -27,3 +29,5 @@ pub use eventfd::{eventfd, EventfdFlags}; #[cfg(not(any(windows, target_os = "redox", target_os = "wasi")))] pub use pause::*; pub use poll::{poll, PollFd, PollFlags}; +#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +pub use select::{select, FdSetElement, Timespec}; diff --git a/src/event/poll.rs b/src/event/poll.rs index 0937dd6fd..2b60a920f 100644 --- a/src/event/poll.rs +++ b/src/event/poll.rs @@ -2,7 +2,7 @@ use crate::{backend, io}; pub use backend::event::poll_fd::{PollFd, PollFlags}; -/// `poll(self.fds, timeout)` +/// `poll(self.fds, timeout)`—Wait for events on lists of file descriptors. /// /// # References /// - [Beej's Guide to Network Programming] diff --git a/src/event/select.rs b/src/event/select.rs new file mode 100644 index 000000000..85a5b6a9b --- /dev/null +++ b/src/event/select.rs @@ -0,0 +1,57 @@ +use crate::{backend, io}; + +pub use crate::timespec::Timespec; + +/// Bitfield array element type for use with [`select`]. +#[cfg(all( + target_pointer_width = "64", + any(target_os = "freebsd", target_os = "dragonfly") +))] +pub type FdSetElement = i64; + +/// Bitfield array element type for use with [`select`]. +#[cfg(not(all( + target_pointer_width = "64", + any(target_os = "freebsd", target_os = "dragonfly") +)))] +pub type FdSetElement = i32; + +/// `select(nfds, readfds, writefds, exceptfds, timeout)`—Wait for events on +/// sets of file descriptors. +/// +/// This `select` wrapper differs from POSIX in that `nfds` is not limited to +/// `FD_SETSIZE`. Instead of using the opaque fixed-sized `fd_set` type, this +/// function takes raw pointers to arrays of `nfds / size_of::()` +/// elements of type `FdSetElement`. +/// +/// In particular, on Apple platforms, it behaves as if +/// `_DARWIN_UNLIMITED_SELECT` were predefined. And on Linux platforms, it is +/// not defined because Linux's `select` always has an `FD_SETSIZE` limitation. +/// On Linux, it is recommended to use [`poll`] instead. +/// +/// # Safety +/// +/// `readfds`, `writefds`, `exceptfds` must point to arrays of `FdSetElement` +/// containing at least `nfds.div_ceil(size_of::())` elements. +/// +/// # References +/// - [POSIX] +/// - [Apple] +/// - [FreeBSD] +/// - [NetBSD] +/// - [DragonFly BSD] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/select.html +/// [Apple]: https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/select.2.html +/// [FreeBSD]: https://man.freebsd.org/cgi/man.cgi?query=select&sektion=2 +/// [NetBSD]: https://man.netbsd.org/select.2 +/// [DragonFly BSD]: https://man.dragonflybsd.org/?command=select§ion=2 +pub unsafe fn select( + nfds: i32, + readfds: *mut FdSetElement, + writefds: *mut FdSetElement, + exceptfds: *mut FdSetElement, + timeout: Option<&Timespec>, +) -> io::Result { + backend::event::syscalls::select(nfds, readfds, writefds, exceptfds, timeout) +} diff --git a/src/lib.rs b/src/lib.rs index 0fd0dc4f8..ec545d438 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,6 +71,7 @@ //! - Provide y2038 compatibility, on platforms which support this. //! - Correct selected platform bugs, such as behavioral differences when //! running under seccomp. +//! - Use `timespec` for timestamps instead of `timeval`. //! //! Things they don't do include: //! - Detecting whether functions are supported at runtime, except in specific @@ -362,6 +363,7 @@ mod signal; feature = "runtime", feature = "thread", feature = "time", + all(feature = "event", any(apple, freebsdlike, target_os = "netbsd")), all( linux_raw, not(feature = "use-libc-auxv"), diff --git a/tests/event/main.rs b/tests/event/main.rs index 68f999737..72da82f56 100644 --- a/tests/event/main.rs +++ b/tests/event/main.rs @@ -10,3 +10,5 @@ mod epoll; #[cfg(not(target_os = "wasi"))] mod eventfd; mod poll; +#[cfg(any(apple, freebsdlike, target_os = "netbsd"))] +mod select; diff --git a/tests/event/select.rs b/tests/event/select.rs new file mode 100644 index 000000000..6eeb97b38 --- /dev/null +++ b/tests/event/select.rs @@ -0,0 +1,180 @@ +#[cfg(feature = "pipe")] +use { + rustix::event::{select, FdSetElement}, + rustix::fd::{AsRawFd, FromRawFd, OwnedFd, RawFd}, + rustix::io::retry_on_intr, + std::cmp::max, +}; + +#[cfg(feature = "pipe")] +#[test] +fn test_select() { + use core::mem::size_of; + use core::ptr::null_mut; + use rustix::event::Timespec; + use rustix::io::{read, write}; + use rustix::pipe::pipe; + + // The number of bits in an `fd_set` element. + const BITS: usize = size_of::() * 8; + + // Create a pipe. + let (reader, writer) = pipe().unwrap(); + let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; + + // `select` should say there's nothing ready to be read from the pipe. + let mut readfds = vec![0 as FdSetElement; nfds as usize]; + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select( + nfds, + readfds.as_mut_ptr(), + null_mut(), + null_mut(), + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); + + // Write a byte to the pipe. + assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); + + // `select` should now say there's data to be read. + let mut readfds = vec![0 as FdSetElement; nfds as usize]; + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select(nfds, readfds.as_mut_ptr(), null_mut(), null_mut(), None) + }) + .unwrap(); + assert_eq!(num, 1); + assert_eq!( + readfds[reader.as_raw_fd() as usize / BITS], + 1 << (reader.as_raw_fd() as usize % BITS) + ); + + // Read the byte from the pipe. + let mut buf = [b'\0']; + assert_eq!(retry_on_intr(|| read(&reader, &mut buf)).unwrap(), 1); + assert_eq!(buf[0], b'a'); + + // Select should now say there's no more data to be read. + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select( + nfds, + readfds.as_mut_ptr(), + null_mut(), + null_mut(), + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); +} + +#[cfg(feature = "pipe")] +#[test] +fn test_select_with_great_fds() { + use core::cmp::max; + use core::mem::size_of; + use core::ptr::null_mut; + use rustix::event::select; + use rustix::event::Timespec; + use rustix::io::{read, write}; + use rustix::pipe::pipe; + use rustix::process::{getrlimit, setrlimit, Resource}; + + // The number of bits in an `fd_set` element. + const BITS: usize = size_of::() * 8; + + // Create a pipe. + let (reader, writer) = pipe().unwrap(); + + // Raise the file descriptor limit so that we can test fds above + // `FD_SETSIZE`. + let orig_rlimit = getrlimit(Resource::Nofile); + let mut rlimit = orig_rlimit; + if let Some(current) = rlimit.current { + rlimit.current = Some(max(current, libc::FD_SETSIZE as u64 + 2)); + } + setrlimit(Resource::Nofile, rlimit).unwrap(); + + // Create a fd at `FD_SETSIZE + 1` out of thin air. Use `libc` instead + // of `OwnedFd::from_raw_fd` because grabbing a fd out of thin air + // violates Rust's concept of I/O safety (and wouldn't make sense to do + // in anything other than a test like this). + let great_fd = unsafe { libc::dup2(reader.as_raw_fd(), libc::FD_SETSIZE as RawFd + 1) }; + let reader = unsafe { OwnedFd::from_raw_fd(great_fd) }; + + let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; + + // `select` should say there's nothing ready to be read from the pipe. + let mut readfds = vec![0 as FdSetElement; nfds as usize]; + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select( + nfds, + readfds.as_mut_ptr(), + null_mut(), + null_mut(), + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); + + // Write a byte to the pipe. + assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); + + // `select` should now say there's data to be read. + let mut readfds = vec![0 as FdSetElement; nfds as usize]; + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select(nfds, readfds.as_mut_ptr(), null_mut(), null_mut(), None) + }) + .unwrap(); + assert_eq!(num, 1); + assert_eq!( + readfds[reader.as_raw_fd() as usize / BITS], + 1 << (reader.as_raw_fd() as usize % BITS) + ); + + // Read the byte from the pipe. + let mut buf = [b'\0']; + assert_eq!(retry_on_intr(|| read(&reader, &mut buf)).unwrap(), 1); + assert_eq!(buf[0], b'a'); + + // Select should now say there's no more data to be read. + readfds[reader.as_raw_fd() as usize / BITS] |= 1 << (reader.as_raw_fd() as usize % BITS); + let num = retry_on_intr(|| unsafe { + select( + nfds, + readfds.as_mut_ptr(), + null_mut(), + null_mut(), + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert_eq!(readfds[reader.as_raw_fd() as usize / BITS], 0); + + // Reset the process limit. + setrlimit(Resource::Nofile, orig_rlimit).unwrap(); +}