signalfd.c revision abf99a006172ea5aab2246bda23f9d6d935bf1ad
/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
*/
/*
* Copyright 2016 Joyent, Inc.
*/
/*
* Support for the signalfd facility, a Linux-borne facility for
* file descriptor-based synchronous signal consumption.
*
* As described on the signalfd(3C) man page, the general idea behind these
* file descriptors is that they can be used to synchronously consume signals
* via the read(2) syscall. While that capability already exists with the
* sigwaitinfo(3C) function, signalfd holds an advantage since it is file
* event ports) to notify interested parties when consumable signals arrive.
*
* will be allocated for them along with an associated signalfd_state_t struct.
* It is there where the mask of desired signals resides.
*
* Reading from the signalfd is straightforward and mimics the kernel behavior
* for sigtimedwait(). Signals continue to live on either the proc's p_sig, or
* thread's t_sig, member. During a read operation, those which match the mask
* are consumed so they are no longer pending.
*
* The poll side is more complex. Every time a signal is delivered, all of the
* signalfds on the process need to be examined in order to pollwake threads
* waiting for signal arrival.
*
* When a thread polling on a signalfd requires a pollhead, several steps must
* be taken to safely ensure the proper result. A sigfd_proc_state_t is
* created for the calling process if it does not yet exist. It is there where
* a list of sigfd_poll_waiter_t structures reside which associate pollheads to
* signalfd_state_t entries. The sigfd_proc_state_t list is walked to find a
* sigfd_poll_waiter_t matching the signalfd_state_t which corresponds to the
* polled resource. If one is found, it is reused. Otherwise a new one is
* created, incrementing the refcount on the signalfd_state_t, and it is added
* to the sigfd_poll_waiter_t list.
*
* The complications imposed by fork(2) are why the pollhead is stored in the
* associated sigfd_poll_waiter_t instead of directly in the signalfd_state_t.
* More than one process can hold a reference to the signalfd at a time but
* arriving signals should wake only process-local pollers. Additionally,
* signalfd_close is called only when the last referencing fd is closed, hiding
* occurrences of preceeding threads which released their references. This
* necessitates reference counting on the signalfd_state_t so it is able to
* persist after close until all poll references have been cleansed. Doing so
* ensures that blocked pollers which hold references to the signalfd_state_t
* will be able to do clean-up after the descriptor itself has been closed.
*
* When a signal arrives in a process polling on signalfd, signalfd_pollwake_cb
* is called via the pointer in sigfd_proc_state_t. It will walk over the
* sigfd_poll_waiter_t entries present in the list, searching for any
* associated with a signalfd_state_t with a matching signal mask. The
* approach of keeping the poller list in p_sigfd was chosen because a process
* is likely to use few signalfds relative to its total file descriptors. It
* reduces the work required for each received signal.
*
* When matching sigfd_poll_waiter_t entries are encountered in the poller list
* during signalfd_pollwake_cb, they are dispatched into signalfd_wakeq to
* perform the pollwake. This is due to a lock ordering conflict between
* signalfd_poll and signalfd_pollwake_cb. The former acquires
* pollcache_t`pc_lock before proc_t`p_lock. The latter (via sigtoproc)
* reverses the order. Defering the pollwake into a taskq means it can be
* performed without proc_t`p_lock held, avoiding the deadlock.
*
* The sigfd_list is self-cleaning; as signalfd_pollwake_cb is called, the list
* will clear out on its own. Any remaining per-process state which remains
* will be cleaned up by the exit helper (signalfd_exit_helper).
*
* The structures associated with signalfd state are designed to operate
* correctly across fork, but there is one caveat that applies. Using
* fork-shared signalfd descriptors in conjuction with fork-shared caching poll
* wake-ups. This is caused by the pollhead identity of signalfd descriptors
* being dependent on the process they are polled from. Because it has a
* thread-local cache, poll(2) is unaffected by this limitation.
*
* Lock ordering:
*
* 1. signalfd_lock
* 2. signalfd_state_t`sfd_lock
*
* 1. proc_t`p_lock (to walk p_sigfd)
* 2. signalfd_state_t`sfd_lock
* 2a. signalfd_lock (after sfd_lock is dropped, when sfd_count falls to 0)
*/
#include <sys/signalfd.h>
#include <sys/sysmacros.h>
#include <sys/schedctl.h>
#include <sys/id_space.h>
#include <sys/taskq_impl.h>
typedef struct signalfd_state signalfd_state_t;
struct signalfd_state {
};
typedef struct sigfd_poll_waiter {
short spw_pollev;
/*
* Protects global state in signalfd_devi, signalfd_minor, signalfd_softstate,
* and signalfd_state (including sfd_list field of members)
*/
static kmutex_t signalfd_lock;
static void *signalfd_softstate; /* softstate pointer */
static void
{
}
static void
{
if (force_invalidate) {
}
if (force_invalidate) {
/*
* The invalidation performed in signalfd_close is done
* while signalfd_lock is held.
*/
} else {
}
return;
}
}
static sigfd_poll_waiter_t *
{
break;
}
}
return (pw);
}
static sigfd_poll_waiter_t *
{
break;
}
}
}
return (pw);
}
static void
{
}
}
static void
signalfd_exit_helper(void)
{
mutex_enter(&p->p_lock);
mutex_exit(&p->p_lock);
}
/*
* Perform pollwake for a sigfd_poll_waiter_t entry.
* Thanks to the strict and conflicting lock orders required for signalfd_poll
* (pc_lock before p_lock) and signalfd_pollwake_cb (p_lock before pc_lock),
* this is relegated to a taskq to avoid deadlock.
*/
static void
signalfd_wake_task(void *arg)
{
}
/*
* Called every time a signal is delivered to the process so that we can
* see if any signal stream needs a pollwakeup. We maintain a list of
* signal state elements so that we don't have to look at every file descriptor
* on the process. If necessary, a further optimization would be to maintain a
* signal set mask that is a union of all of the sets in the list so that
* we don't even traverse the list if the signal is not in one of the elements.
* However, since the list is likely to be very short, this is not currently
* being done. A more complex data structure might also be used, but it is
* unclear what that would be since each signal set needs to be checked for a
* match.
*/
static void
{
} else {
continue;
}
/*
* Pull the sigfd_poll_waiter_t out of the list and dispatch it
* to perform a pollwake. This cannot be done synchronously
* since signalfd_poll and signalfd_pollwake_cb have
* conflicting lock orders which can deadlock.
*/
&pw->spw_taskent);
}
}
static int
{
if (minor != SIGNALFDMNRN_SIGNALFD)
return (ENXIO);
return (ENODEV);
}
return (0);
}
/*
* Consume one signal from our set in a manner similar to sigtimedwait().
* The block parameter is used to control whether we wait for a signal or
* return immediately if no signal is pending. We use the thread's t_sigwait
* member in the same way that it is used by sigtimedwait.
*
* Return 0 if we successfully consumed a signal or an errno if not.
*/
static int
{
int timecheck = 0;
int ret = 0;
gethrestime(&now);
}
mutex_enter(&p->p_lock);
/*
* set the thread's signal mask to unmask those signals in the
* specified set.
*/
/*
* Based on rqtp, wait indefinitely until we take a signal in our set
* or return immediately if there are no signals pending from our set.
*/
timecheck)) > 0)
continue;
/* Restore thread's signal mask to its previous value. */
if (ret == -1) {
/* no signals pending */
mutex_exit(&p->p_lock);
sigemptyset(&t->t_sigwait);
return (EAGAIN); /* no signals pending */
}
/* Don't bother with signal if it is not in request set. */
if (lwp->lwp_cursig == 0 ||
mutex_exit(&p->p_lock);
/*
* lwp_cursig is zero if pokelwps() awakened cv_wait_sig().
* This happens if some other thread in this process called
* forkall() or exit().
*/
sigemptyset(&t->t_sigwait);
return (EINTR);
}
if (lwp->lwp_curinfo) {
} else {
}
lwp->lwp_cursig = 0;
lwp->lwp_extsig = 0;
mutex_exit(&p->p_lock);
/* Convert k_siginfo into external, datamodel independent, struct. */
if (lwp->lwp_curinfo) {
}
sigemptyset(&t->t_sigwait);
return (ret);
}
/*
* This is similar to sigtimedwait. Based on the fd mode we may wait until a
* signal within our specified set is posted. We consume as many available
* signals within our set as we can.
*/
static int
{
int res;
return (EINVAL);
if (sigisempty(&set))
do {
if (res == 0) {
/*
* After consuming one signal, do not block while
* trying to consume more.
*/
/*
* Refresh the matching signal set in case it was
* updated during the wait.
*/
if (sigisempty(&set))
break;
}
if (got_one)
res = 0;
return (res);
}
/*
* If ksigset_t's were a single word, we would do:
* return (((p->p_sig | t->t_sig) & set) & fillset);
*/
static int
{
}
static int
{
short revents = 0;
/*
* Enable pollwakeup handling.
*/
mutex_enter(&p->p_lock);
mutex_exit(&p->p_lock);
sizeof (sigfd_poll_waiter_t),
/* Check again, after blocking for the alloc. */
mutex_enter(&p->p_lock);
} else {
/* someone beat us to it */
}
}
mutex_exit(&p->p_lock);
}
return (0);
}
static int
{
switch (cmd) {
case SIGNALFDIOC_MASK:
md) != 0)
return (0);
default:
break;
}
return (ENOTTY);
}
static int
{
/* Make sure state is removed from this proc's pollwake list. */
mutex_enter(&p->p_lock);
}
}
mutex_exit(&p->p_lock);
}
return (0);
}
static int
{
return (DDI_FAILURE);
if (signalfd_minor == NULL) {
return (DDI_FAILURE);
}
sizeof (signalfd_state_t *), 0) != 0) {
return (DDI_FAILURE);
}
return (DDI_FAILURE);
}
0, INT_MAX, TASKQ_PREPOPULATE);
return (DDI_SUCCESS);
}
static int
{
switch (cmd) {
case DDI_DETACH:
break;
default:
return (DDI_FAILURE);
}
if (!list_is_empty(&signalfd_state)) {
/*
* There are dangling poll waiters holding signalfd_state_t
* entries on the global list. Detach is not possible until
* they purge themselves.
*/
return (DDI_FAILURE);
}
/*
* With no remaining entries in the signalfd_state list, the wake taskq
* should be empty with no possibility for new entries.
*/
return (DDI_SUCCESS);
}
static int
{
int error;
switch (infocmd) {
case DDI_INFO_DEVT2DEVINFO:
*result = (void *)signalfd_devi;
error = DDI_SUCCESS;
break;
case DDI_INFO_DEVT2INSTANCE:
*result = (void *)0;
error = DDI_SUCCESS;
break;
default:
error = DDI_FAILURE;
}
return (error);
}
static struct cb_ops signalfd_cb_ops = {
signalfd_open, /* open */
signalfd_close, /* close */
nulldev, /* strategy */
nulldev, /* print */
nodev, /* dump */
signalfd_read, /* read */
nodev, /* write */
signalfd_ioctl, /* ioctl */
nodev, /* devmap */
nodev, /* mmap */
nodev, /* segmap */
signalfd_poll, /* poll */
ddi_prop_op, /* cb_prop_op */
0, /* streamtab */
};
static struct dev_ops signalfd_ops = {
DEVO_REV, /* devo_rev */
0, /* refcnt */
signalfd_info, /* get_dev_info */
nulldev, /* identify */
nulldev, /* probe */
signalfd_attach, /* attach */
signalfd_detach, /* detach */
nodev, /* reset */
&signalfd_cb_ops, /* driver operations */
NULL, /* bus operations */
nodev, /* dev power */
ddi_quiesce_not_needed, /* quiesce */
};
&mod_driverops, /* module type (this is a pseudo driver) */
"signalfd support", /* name of module */
&signalfd_ops, /* driver ops */
};
static struct modlinkage modlinkage = {
(void *)&modldrv,
};
int
_init(void)
{
return (mod_install(&modlinkage));
}
int
{
}
int
_fini(void)
{
return (mod_remove(&modlinkage));
}