poll.c revision 5c7544f740a475b813dfbc871b7c965e54df7989
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* Portions of this source code were derived from Berkeley 4.3 BSD
* under license from the Regents of the University of California.
*/
#include <sys/isa_defs.h>
#include <sys/sysmacros.h>
#include <sys/poll_impl.h>
#include <sys/port_impl.h>
#include <sys/schedctl.h>
/*
* global counters to collect some stats
*/
static struct {
} pollstats = {
{ "polllistmiss", KSTAT_DATA_UINT64 },
{ "pollcachehit", KSTAT_DATA_UINT64 },
{ "pollcachephit", KSTAT_DATA_UINT64 },
{ "pollcachemiss", KSTAT_DATA_UINT64 }
};
struct pplock {
short pp_flag;
};
#ifdef DEBUG
static int pollcheckxref(pollstate_t *, int);
static void pollcheckphlist(void);
static int pollcheckrevents(pollstate_t *, int, int, int);
static void checkpolldat(pollstate_t *);
#endif /* DEBUG */
int *);
/*
* Data structure overview:
* The per-thread poll state consists of
* one pollstate_t
* one pollcache_t
* one bitmap with one event bit per fd
* a (two-dimensional) hashed array of polldat_t structures - one entry
* per fd
*
* This conglomerate of data structures interact with
* the pollhead which is used by VOP_POLL and pollwakeup
* (protected by the PHLOCK, cached array of plocks), and
* the fpollinfo list hanging off the fi_list which is used to notify
* poll when a cached fd is closed. This is protected by uf_lock.
*
* Invariants:
* pd_php (pollhead pointer) is set iff (if and only if) the polldat
* is on that pollhead. This is modified atomically under pc_lock.
*
* pd_fp (file_t pointer) is set iff the thread is on the fpollinfo
* list for that open file.
* This is modified atomically under pc_lock.
*
* pd_count is the sum (over all values of i) of pd_ref[i].xf_refcnt.
* Iff pd_ref[i].xf_refcnt >= 1 then
* ps_pcacheset[i].pcs_pollfd[pd_ref[i].xf_position].fd == pd_fd
* Iff pd_ref[i].xf_refcnt > 1 then
* In ps_pcacheset[i].pcs_pollfd between index
* pd_ref[i].xf_position] and the end of the list
* there are xf_refcnt entries with .fd == pd_fd
*
* Locking design:
* Whenever possible the design relies on the fact that the poll cache state
* is per thread thus for both poll and exit it is self-synchronizing.
* Thus the key interactions where other threads access the state are:
* pollwakeup (and polltime), and
* close cleaning up the cached references to an open file
*
* The two key locks in poll proper is ps_lock and pc_lock.
*
* The ps_lock is used for synchronization between poll, (lwp_)exit and close
* to ensure that modifications to pollcacheset structure are serialized.
* This lock is held through most of poll() except where poll sleeps
* since there is little need to handle closes concurrently with the execution
* of poll.
* The pc_lock protects most of the fields in pollcache structure and polldat
* structures (which are accessed by poll, pollwakeup, and polltime)
* with the exception of fields that are only modified when only one thread
* can access this per-thread state.
* Those exceptions occur in poll when first allocating the per-thread state,
* when poll grows the number of polldat (never shrinks), and when
* exit/pollcleanup has ensured that there are no references from either
* pollheads or fpollinfo to the threads poll state.
*
* Poll(2) system call is the only path which ps_lock and pc_lock are both
* held, in that order. It needs ps_lock to synchronize with close and
* lwp_exit; and pc_lock with pollwakeup.
*
* The locking interaction between pc_lock and PHLOCK take into account
* that poll acquires these locks in the order of pc_lock and then PHLOCK
* while pollwakeup does it in the reverse order. Thus pollwakeup implements
* deadlock avoidance by dropping the locks and reacquiring them in the
* reverse order. For this to work pollwakeup needs to prevent the thread
* from exiting and freeing all of the poll related state. Thus is done
* using
* the pc_no_exit lock
* the pc_busy counter
* the pc_busy_cv condition variable
*
* The locking interaction between pc_lock and uf_lock has similar
* which acquire uf_lock. The poll cleanup in close needs to hold uf_lock
* to prevent poll or exit from doing a delfpollinfo after which the thread
* might exit. But the cleanup needs to acquire pc_lock when modifying
* the poll cache state. The solution is to use pc_busy and do the close
* cleanup in two phases:
* First close calls pollblockexit which increments pc_busy.
* This prevents the per-thread poll related state from being freed.
* Then close drops uf_lock and calls pollcacheclean.
* This routine can then acquire pc_lock and remove any references
* to the closing fd (as well as recording that it has been closed
* so that a POLLNVAL can be generated even if the fd is reused before
* poll has been woken up and checked getf() again).
*
* When removing a polled fd from poll cache, the fd is always removed
* from pollhead list first and then from fpollinfo list, i.e.,
* pollhead_delete() is called before delfpollinfo().
*
*
* Locking hierarchy:
* pc_no_exit is a leaf level lock.
* ps_lock is held when acquiring pc_lock (except when pollwakeup
* acquires pc_lock).
* pc_lock might be held when acquiring PHLOCK (pollhead_insert/
* pollhead_delete)
* pc_lock is always held (but this is not required)
* when acquiring PHLOCK (in polladd/pollhead_delete and pollwakeup called
* from pcache_clean_entry).
* pc_lock is held across addfpollinfo/delfpollinfo which acquire
* uf_lock.
* pollwakeup tries to acquire pc_lock while holding PHLOCK
* but drops the locks and reacquire them in reverse order to avoid
* deadlock.
*
* Note also that there is deadlock avoidance support for VOP_POLL routines
* and pollwakeup involving a file system or driver lock.
* See below.
*/
/*
* Deadlock avoidance support for VOP_POLL() routines. This is
* sometimes necessary to prevent deadlock between polling threads
* (which hold poll locks on entry to xx_poll(), then acquire foo)
* and pollwakeup() threads (which hold foo, then acquire poll locks).
*
* pollunlock(void) releases whatever poll locks the current thread holds,
* returning a cookie for use by pollrelock();
*
* pollrelock(cookie) reacquires previously dropped poll locks;
*
* polllock(php, mutex) does the common case: pollunlock(),
* acquire the problematic mutex, pollrelock().
*/
int
pollunlock(void)
{
int lockstate = 0;
/*
* If the pollrelock/pollunlock is called as a result of poll(2),
* the t_pollcache should be NULL.
*/
else
lockstate = 1;
}
return (lockstate);
}
void
pollrelock(int lockstate)
{
/*
* If the pollrelock/pollunlock is called as a result of poll(2),
* the t_pollcache should be NULL.
*/
else
if (lockstate > 0)
}
/* ARGSUSED */
void
{
if (!mutex_tryenter(lp)) {
int lockstate = pollunlock();
}
}
static int
{
int fdcnt = 0;
int rval;
int i;
int timecheck = 0;
int imm_timeout = 0;
int error = 0;
int cacheindex = 0; /* which cache set is used */
/*
* Determine the precise future time of the requested timeout, if any.
*/
imm_timeout = 1;
else {
gethrestime(&now);
}
}
/*
* Reset our signal mask, if requested.
*/
mutex_enter(&p->p_lock);
/*
* Call cv_timedwait_sig() just to check for signals.
* We will return immediately with either 0 or -1.
*/
mutex_exit(&p->p_lock);
goto pollout;
}
mutex_exit(&p->p_lock);
}
/*
* Check to see if this guy just wants to use poll() as a timeout.
* If yes then bypass all the other stuff and make him sleep.
*/
if (nfds == 0) {
/*
* Sleep until we have passed the requested future
* time or until interrupted by a signal.
* Do not check for signals if we have a zero timeout.
*/
if (!imm_timeout) {
mutex_enter(&t->t_delay_lock);
continue;
mutex_exit(&t->t_delay_lock);
if (rval == 0)
}
goto pollout;
}
mutex_enter(&p->p_lock);
mutex_exit(&p->p_lock);
goto pollout;
}
/*
* Need to allocate memory for pollstate before anything because
* the mutex and cv are created in this space
*/
t->t_pollstate = pollstate_create();
ps = t->t_pollstate;
}
/*
* NOTE: for performance, buffers are saved across poll() calls.
* The theory is that if a process polls heavily, it tends to poll
* on the same set of descriptors. Therefore, we only reallocate
* buffers when nfds changes. There is no hysteresis control,
* because there is no data to suggest that this is necessary;
* the penalty of reallocating is not *that* great in any event.
*/
}
goto pollout;
}
/*
* If the process has page 0 mapped, then the copyin() above
* will succeed even if fds is NULL. However, our cached
* poll lists are keyed by the address of the passed-in fds
* structure, and we use the value NULL to indicate an unused
* poll cache list entry. As such, we elect not to support
* NULL as a valid (user) memory address and fail the poll()
* call.
*/
goto pollout;
}
/*
* If this thread polls for the first time, allocate ALL poll
* cache data structures and cache the poll fd list. This
* allocation is delayed till now because lwp's polling 0 fd
* (i.e. using poll as timeout()) don't need this memory.
*/
/*
* poll and cache this poll fd list in ps_pcacheset[0].
*/
goto pollout;
}
} else {
/*
* Not first time polling. Select a cached poll list by
* matching user pollfd list buffer address.
*/
/*
* counter is wrapping around.
*/
}
/*
* examine and resolve possible
* difference of the current poll
* list and previously cached one.
* If there is an error during resolve(),
* the callee will guarantee the consistency
* of cached poll list and cache content.
*/
if (error) {
goto pollout;
}
break;
}
/*
* Note that pcs_usradr field of an used entry won't be
* NULL because it stores the address of passed-in fds,
* and NULL fds will not be cached (Then it is either
* the special timeout case when nfds is 0 or it returns
* failure directly).
*/
/*
* found an unused entry. Use it to cache
* this poll list.
*/
goto pollout;
}
break;
}
}
/*
* We failed to find a matching cached poll fd list.
* replace an old list.
*/
if (error) {
goto pollout;
}
}
}
/*
* Always scan the bitmap with the lock on the pollcache held.
* This is to make sure that a wakeup does not come undetected.
* If the lock is not held, a pollwakeup could have come for an
* fd we already checked but before this thread sleeps, in which
* case the wakeup is missed. Now we hold the pcache lock and
* check the bitmap again. This will prevent wakeup from happening
* while we hold pcache lock since pollwakeup() will also lock
* the pcache before updating poll bitmap.
*/
for (;;) {
break;
}
/*
* If T_POLLWAKE is set, a pollwakeup() was performed on
* one of the file descriptors. This can happen only if
* one of the VOP_POLL() functions dropped pcp->pc_lock.
* The only current cases of this is in procfs (prpoll())
* and STREAMS (strpoll()).
*/
continue;
/*
* If you get here, the poll of fds was unsuccessful.
* Wait until some fd becomes readable, writable, or gets
* an exception, or until a signal or a timeout occurs.
* Do not check for signals if we have a zero timeout.
*/
if (imm_timeout)
rval = -1;
else
/*
* If we have received a signal or timed out
* then break out and return.
*/
if (rval <= 0) {
if (rval == 0)
break;
}
/*
* We have not received a signal or timed out.
* Continue around and poll fds again.
*/
}
/*
* If we changed the signal mask but we received
* no signal then restore the signal mask.
* Otherwise psig() will deal with the signal mask.
*/
mutex_enter(&p->p_lock);
if (lwp->lwp_cursig == 0) {
}
mutex_exit(&p->p_lock);
}
if (error)
/*
* Copy out the events and return the fdcnt to the user.
*/
if (nfds != 0 &&
#ifdef DEBUG
/*
* Another sanity check:
*/
if (fdcnt) {
int reventcnt = 0;
for (i = 0; i < nfds; i++) {
continue;
}
reventcnt++;
}
}
} else {
for (i = 0; i < nfds; i++) {
}
}
#endif /* DEBUG */
return (fdcnt);
}
/*
* This system call trap exists solely for binary compatibility with
* old statically-linked applications. It is not called from libc.
* It should be removed in the next release.
*/
int
{
if (time_out < 0)
else {
}
}
/*
* This is the system call trap that poll(),
* select() and pselect() are built upon.
* It is a private interface between libc and the kernel.
*/
int
{
else {
if (datamodel == DATAMODEL_NATIVE) {
} else {
}
if (itimerspecfix(&ts))
}
else {
}
}
/*
* Clean up any state left around by poll(2). Called when a thread exits.
*/
void
{
return;
/*
* free up all cached poll fds
*/
goto pollcleanout;
}
/*
* a close lwp can race with us when cleaning up a polldat
* entry. We hold the ps_lock when cleaning hash table.
* Since this pollcache is going away anyway, there is no
* need to hold the pc_lock.
*/
#ifdef DEBUG
/*
* At this point, all fds cached by this lwp should be
* cleaned up. There should be no fd in fi_list still
* reference this thread.
*/
checkfpollinfo(); /* sanity check */
pollcheckphlist(); /* sanity check */
#endif /* DEBUG */
}
/*
* Be sure no one is referencing thread before exiting
*/
}
/*
* pollwakeup() - poke threads waiting in poll() for some event
* on a particular object.
*
* The threads hanging off of the specified pollhead structure are scanned.
* If their event mask matches the specified event(s), then pollnotify() is
* called to poke the thread.
*
* Multiple events may be specified. When POLLHUP or POLLERR are specified,
* all waiting threads are poked.
*
* It is important that pollnotify() not drop the lock protecting the list
* of threads.
*/
void
{
struct plist {
int pevents;
};
/*
* Object (fd) is associated with an event port,
* => send event notification to the port.
*/
int pevents;
POLLERR);
/*
* portkev_lock mutex will be released
* by port_send_event().
*/
/*
* If we have some thread polling the
* port's fd, add it to the list. They
* will be notified later.
* The port_pollwkup() will flag the
* port_t so that it will not disappear
* till port_pollwkdone() is called.
*/
pevents =
if (pevents) {
struct plist *t;
t = kmem_zalloc(
sizeof (struct plist),
KM_SLEEP);
plhead = t;
} else {
}
pltail = t;
}
} else {
}
continue;
}
/*
* Try to grab the lock for this thread. If
* we don't get it then we may deadlock so
* back out and restart all over again. Note
* that the failure rate is very very low.
*/
} else {
/*
* We are here because:
* 1) This thread has been woke up
* and is trying to get out of poll().
* 2) Some other thread is also here
* but with a different pollhead lock.
*
* So, we need to drop the lock on pollhead
* because of (1) but we want to prevent
* that thread from doing lwp_exit() or
* devpoll close. We want to ensure that
* the pollcache pointer is still invalid.
*
* Solution: Grab the pcp->pc_no_exit lock,
* increment the pc_busy counter, drop every
* lock in sight. Get out of the way and wait
* for type (2) threads to finish.
*/
/*
* Wakeup the thread waiting in
* thread_exit().
*/
}
goto retry;
}
}
}
/*
* Event ports - If this php is of the port on the list,
* call port_pollwkdone() to release it. The port_pollwkdone()
* needs to be called before dropping the PH lock so that any new
* thread attempting to poll this port are blocked. There can be
* only one thread here in pollwakeup notifying this port's fd.
*/
struct plist *t;
t = plhead;
}
/*
* Event ports - Notify threads polling the event port's fd.
* This is normally done in port_send_event() where it calls
* pollwakeup() on the port. But, for PORT_SOURCE_FD source alone,
* we do it here in pollwakeup() to avoid a recursive call.
*/
goto retry;
}
}
/*
* This function is called to inform a thread that
* an event being polled for has occurred.
* The pollstate lock on the thread should be held on entry.
*/
void
{
}
/*
* add a polldat entry to pollhead ph_list. The polldat struct is used
* by pollwakeup to wake sleeping pollers when polled events has happened.
*/
void
{
#ifdef DEBUG
{
/*
* the polldat should not be already on the list
*/
}
}
#endif /* DEBUG */
}
/*
* Delete the polldat entry from ph_list.
*/
void
{
break;
}
}
#ifdef DEBUG
/* assert that pdp is no longer in the list */
}
#endif /* DEBUG */
}
/*
* walk through the poll fd lists to see if they are identical. This is an
* expensive operation and should not be done more than once for each poll()
* call.
*
* As an optimization (i.e., not having to go through the lists more than
* once), this routine also clear the revents field of pollfd in 'current'.
* Zeroing out the revents field of each entry in current poll list is
* required by poll man page.
*
* Since the events field of cached list has illegal poll events filtered
* out, the current list applies the same filtering before comparison.
*
* The routine stops when it detects a meaningful difference, or when it
* exhausts the lists.
*/
int
{
int ix;
/* Prefetch 64 bytes worth of 8-byte elements */
if ((ix & 0x7) == 0) {
}
/*
* Filter out invalid poll events while we are in
* inside the loop.
*/
}
continue;
}
}
continue;
}
return (ix);
}
return (ix);
}
/*
* This routine returns a pointer to a cached poll fd entry, or NULL if it
* does not find it in the hash table.
*/
{
int hashindex;
break;
}
return (pdp);
}
pcache_alloc_fd(int nsets)
{
if (nsets > 0) {
}
return (pdp);
}
/*
* This routine inserts a polldat into the pollcache's hash table. It
* may be necessary to grow the size of the hash table.
*/
void
{
int hashindex;
int fd;
}
pcp->pc_fdcount++;
#ifdef DEBUG
{
/*
* same fd should not appear on a hash list twice
*/
}
}
#endif /* DEBUG */
}
/*
* Grow the hash table -- either double the table size or round it to the
* nearest multiples of POLLHASHCHUNKSZ, whichever is bigger. Rehash all the
* elements on the hash table.
*/
void
{
int oldsize;
int i;
#ifdef DEBUG
int count = 0;
#endif
~(POLLHASHCHUNKSZ - 1);
} else {
}
KM_SLEEP);
/*
* rehash existing elements
*/
pcp->pc_fdcount = 0;
for (i = 0; i < oldsize; i++) {
#ifdef DEBUG
count++;
#endif
}
}
}
void
{
int newsize;
/*
* grow to nearest multiple of POLLMAPCHUNK, assuming POLLMAPCHUNK is
* power of 2.
*/
KM_SLEEP);
/*
* don't want pollwakeup to set a bit while growing the bitmap.
*/
}
/*
* remove all the reference from pollhead list and fpollinfo lists.
*/
void
{
int i;
for (i = 0; i < pcp->pc_hashsize; i++) {
}
}
}
}
}
void
{
int i;
/*
* we come here because an earlier close() on this cached poll fd.
*/
int j;
/*
* turn off every appearance in pcs_pollfd list
*/
for (j = refp->xf_position;
}
}
}
}
}
}
/*
* Insert poll fd into the pollcache, and add poll registration.
* This routine is called after getf() and before releasef(). So the vnode
* can not disappear even if we block here.
* If there is an error, the polled fd is not cached.
*/
int
{
int error;
int fd;
int newpollfd = 0;
/*
* The poll caching uses the existing VOP_POLL interface. If there
* is no polled events, we want the polled device to set its "some
* one is sleeping in poll" flag. When the polled events happen
* later, the driver will call pollwakeup(). We achieve this by
* always passing 0 in the third parameter ("anyyet") when calling
* VOP_POLL. This parameter is not looked at by drivers when the
* polled events exist. If a driver chooses to ignore this parameter
* and call pollwakeup whenever the polled events happen, that will
* be OK too.
*/
if (error) {
return (error);
}
(*fdcntp)++;
}
/*
* polling the underlying device succeeded. Now we can cache it.
* A close can't come in here because we have not done a releasef()
* yet.
*/
newpollfd = 1;
}
/*
* If this entry was used to cache a poll fd which was closed, and
* this entry has not been cleaned, do it now.
*/
}
/*
* the entry is never used or cleared by removing a cached
* pollfd (pcache_delete_fd). So all the fields should be clear.
*/
}
/*
* A polled fd is considered cached. So there should be a fpollinfo
* entry on uf_fpollinfo list.
*/
/*
* If there is an inconsistency, we want to know it here.
*/
/*
* XXX pd_events is a union of all polled events on this fd, possibly
* by different threads. Unless this is a new first poll(), pd_events
* never shrinks. If an event is no longer polled by a process, there
* is no way to cancel that event. In that case, poll degrade to its
* old form -- polling on this fd every time poll() is called. The
* assumption is an app always polls the same type of events.
*/
/*
* There is not much special handling for multiple appearances of
* same fd other than xf_position always recording the first
* appearance in poll list. If this is called from pcacheset_cache_list,
* a VOP_POLL is called on every pollfd entry; therefore each
* revents and fdcnt should be set correctly. If this is called from
* pcacheset_resolve, we don't care about fdcnt here. Pollreadmap will
* pick up the right count and handle revents field of each pollfd
* entry.
*/
} else {
/*
* xf_position records the fd's first appearance in poll list
*/
}
}
}
}
if (newpollfd != 0) {
}
if (memphp) {
} else {
/*
* layered devices (e.g. console driver)
* may change the vnode and thus the pollhead
* pointer out from underneath us.
*/
}
}
}
/*
* Since there is a considerable window between VOP_POLL and when
* we actually put the polldat struct on the pollhead list, we could
* miss a pollwakeup. In the case of polling additional events, we
* don't update the events until after VOP_POLL. So we could miss
* pollwakeup there too. So we always set the bit here just to be
* safe. The real performance gain is in subsequent pcache_poll.
*/
return (0);
}
/*
* The entry is not really deleted. The fields are cleared so that the
* entry is no longer useful, but it will remain in the hash table for reuse
* later. It will be freed when the polling lwp exits.
*/
int
{
/*
* It is possible for a wakeup thread to get ahead
* of the following pollhead_delete and set the bit in
* bitmap. It is OK because the bit will be cleared
* here anyway.
*/
}
}
return (0);
}
/*
* fd cached here has been closed. This is the first
* pcache_delete_fd called after the close. Clean up the
* entire entry.
*/
return (0);
}
#ifdef DEBUG
}
#endif /* DEBUG */
} else {
/*
* The xref position is no longer valid.
* Reset it to a special value and let
* caller know it needs to updatexref()
* with a new xf_position value.
*/
return (1);
}
}
return (0);
}
void
{
}
#ifdef DEBUG
/*
* For each polled fd, it's either in the bitmap or cached in
* pcache hash table. If this routine returns 0, something is wrong.
*/
static int
{
int i;
int fd;
for (i = 0; i < nfds; i++) {
if (fd < 0) {
continue;
}
continue;
continue;
continue;
return (0);
}
return (1);
}
#endif /* DEBUG */
/*
* resolve the difference between the current poll list and a cached one.
*/
int
{
int i;
int common;
int count = 0;
int offset;
int remain;
int fd;
int fdcnt = 0;
int cnt = 0;
int error = 0;
int mismatch = 0;
#ifdef DEBUG
#endif
/*
* the length of poll list has changed. allocate a new
* pollfd list.
*/
}
/*
* Compare the overlapping part of the current fd list with the
* cached one. Whenever a difference is found, resolve it.
* The comparison is done on the current poll list and the
* cached list. But we may be setting up the newlist to be the
* cached list for next poll.
*/
int tmpfd;
remain);
/*
* Collect stats. If lists are completed the first time,
* it's a hit. Otherwise, it's a partial hit or miss.
*/
} else {
mismatch++;
}
/*
* Filter out invalid events.
*/
} else {
}
}
/*
* when resolving a difference, we always remove the
* fd from cache before inserting one into cache.
*/
/*
* This should be rare but needed for
* correctness.
*
* The first appearance in cached list
* is being "turned off". The same fd
* appear more than once in the cached
* poll list. Find the next one on the
* list and update the cached
* xf_position field.
*/
which);
break;
}
}
}
/*
* In case a new cache list is allocated,
* need to keep both cache lists in sync
* b/c the new one can be freed if we have
* an error later.
*/
}
}
/*
* add to the cached fd tbl and bitmap.
*/
}
fdcnt++;
} else {
/*
* Here we don't care about the
* fdcnt. We will examine the bitmap
* later and pick up the correct
* fdcnt there. So we never bother
* to check value of 'cnt'.
*/
/*
* if no error, we want to do releasef
* after we updated cache poll list
* entry so that close() won't race
* us.
*/
if (error) {
/*
* If we encountered an error,
* we have invalidated an
* entry in cached poll list
* (in pcache_delete_fd() above)
* but failed to add one here.
* This is OK b/c what's in the
* cached list is consistent
* with content of cache.
* It will not have any ill
* effect on next poll().
*/
nfds *
sizeof (pollfd_t));
}
return (error);
}
/*
* If we have allocated a new(temp)
* cache list, we need to keep both
* in sync b/c the new one can be freed
* if we have an error later.
*/
}
}
} else {
}
count++;
}
}
if (mismatch != 0) {
} else {
}
}
/*
* take care of the non overlapping part of a list
*/
/* filter out invalid events */
}
continue;
}
/*
* add to the cached fd tbl and bitmap.
*/
fdcnt++;
continue;
}
/*
* Here we don't care about the
* fdcnt. We will examine the bitmap
* later and pick up the correct
* fdcnt there. So we never bother to
* check 'cnt'.
*/
if (error) {
/*
* Here we are half way through adding newly
* polled fd. Undo enough to keep the cache
* list consistent with the cache content.
*/
i, which, 0);
return (error);
}
}
}
/*
* remove the fd's which are no longer polled.
*/
which, 1);
}
/*
* set difference resolved. update nfds and cachedlist
* in pollstate struct.
*/
/*
* By now, the pollfd.revents field should
* all be zeroed.
*/
}
/*
* By now for every fd in pollfdp, one of the following should be
* true. Otherwise we will miss a polled event.
*
* 1. the bit corresponding to the fd in bitmap is set. So VOP_POLL
* will be called on this fd in next poll.
* 2. the fd is cached in the pcache (i.e. pd_php is set). So
* pollnotify will happen.
*/
/*
* make sure cross reference between cached poll lists and cached
* poll fds are correct.
*/
/*
* ensure each polldat in pollcache reference a polled fd in
* pollcacheset.
*/
#ifdef DEBUG
#endif
return (0);
}
#ifdef DEBUG
static int
{
int i;
int reventcnt = 0;
for (i = 0; i < nfds; i++) {
continue;
}
reventcnt++;
}
}
}
return (reventcnt);
}
#endif /* DEBUG */
/*
* read the bitmap and poll on fds corresponding to the '1' bits. The ps_lock
* is held upon entry.
*/
int
int which)
{
int i;
int fd;
int fdcnt;
int error = 0;
int entry;
done = 0;
begin = 0;
fdcnt = 0;
/*
* only poll fds which may have events
*/
if (fd >= 0) {
/*
* adjust map pointers for next round
*/
done = 1;
} else {
}
/*
* A bitmap caches poll state information of
* multiple poll lists. Call VOP_POLL only if
* the bit corresponds to an fd in this poll
* list.
*/
continue;
/*
* we are in this routine implies that we have
* successfully polled this fd in the past.
* Check to see this fd is closed while we are
* blocked in poll. This ensures that we don't
* miss a close on the fd in the case this fd is
* reused.
*/
fdcnt++;
/*
* this fd appeared multiple time
* in the poll list. Find all of them.
*/
fdcnt++;
}
}
}
continue;
}
/*
* We can be here polling a device that is being
* closed (i.e. the file pointer is set to NULL,
* but pollcacheclean has not happened yet).
*/
fdcnt++;
/*
* this fd appeared multiple time
* in the poll list. Find all of them.
*/
fdcnt++;
}
}
}
continue;
}
/*
* Since we no longer hold poll head lock across
* VOP_POLL, pollunlock logic can be simplifed.
*/
/*
* underlying file systems may set a "pollpending"
* flag when it sees the poll may block. Pollwakeup()
* is called by wakeup thread if pollpending is set.
* Pass a 0 fdcnt so that the underlying file system
* will set the "pollpending" flag set when there is
* no polled events.
*
* Use pollfdp[].events for actual polling because
* the pd_events is union of all cached poll events
* on this fd. The events parameter also affects
* how the polled device sets the "poll pending"
* flag.
*/
/*
* releasef after completely done with this cached
* poll entry. To prevent close() coming in to clear
* this entry.
*/
if (error) {
break;
}
/*
* layered devices (e.g. console driver)
* may change the vnode and thus the pollhead
* pointer out from underneath us.
*/
/*
* We could have missed a wakeup on the new
* target device. Make sure the new target
* gets polled once.
*/
goto retry;
}
fdcnt++;
/*
* this fd appeared multiple time
* in the poll list. This is rare but
* we have to look at all of them for
* correctness.
*/
if (error > 0) {
break;
}
if (error < 0) {
goto retry;
}
}
} else {
/*
* VOP_POLL didn't return any revents. We can
* clear the bit in bitmap only if we have the
* pollhead ptr cached and no other cached
* entry is polling different events on this fd.
* VOP_POLL may have dropped the ps_lock. Make
* sure pollwakeup has not happened before clear
* the bit.
*/
}
/*
* if the fd can be cached now but not before,
* do it now.
*/
/*
* We are inserting a polldat struct for
* the first time. We may have missed a
* wakeup on this device. Re-poll once.
* This should be a rare event.
*/
goto retry;
}
/*
* this fd appeared multiple time
* in the poll list. This is rare but
* we have to look at all of them for
* correctness.
*/
if (error > 0) {
break;
}
if (error < 0) {
goto retry;
}
}
}
} else {
done = 1;
}
}
if (!error) {
}
return (error);
}
/*
* Going through the poll list without much locking. Poll all fds and
* cache all valid fds in the pollcache.
*/
int
{
int i;
int fd;
int error = 0;
/*
* cache the new poll list in pollcachset.
*/
/*
* We have saved a copy of current poll fd list in one pollcacheset.
* The 'revents' field of the new list is not yet set to 0. Loop
* through the new list just to do that is expensive. We do that
* while polling the list.
*/
/*
* We also filter out the illegal poll events in the event
*/
}
if (fd < 0) {
continue;
}
/*
* invalidate this cache entry in the cached poll list
*/
(*fdcntp)++;
continue;
}
/*
* cache this fd.
*/
which);
if (error) {
/*
* Here we are half way through caching a new
* poll list. Undo every thing.
*/
break;
}
}
return (error);
}
/*
* called by pollcacheclean() to set the fp NULL. It also sets polled events
* in pcacheset entries to a special events 'POLLCLOSED'. Do a pollwakeup to
* wake any sleeping poller, then remove the polldat from the driver.
* The routine is called with ps_pcachelock held.
*/
void
{
int i;
/*
* the corresponding fpollinfo in fi_list has been removed by
* a close on this fd. Reset the cached fp ptr here.
*/
/*
* XXX - This routine also touches data in pcacheset struct.
*
* set the event in cached poll lists to POLLCLOSED. This invalidate
* the cached poll fd entry in that poll list, which will force a
* removal of this cached entry in next poll(). The cleanup is done
* at the removal time.
*/
(short)POLLCLOSED;
}
int j;
/*
* mark every matching entry in pcs_pollfd
*/
for (j = refp->xf_position;
(short)POLLCLOSED;
}
}
}
}
}
}
}
/*
* This is the first time this thread has ever polled,
* so we have to create its pollstate structure.
* This will persist for the life of the thread,
* until it calls pollcleanup().
*/
pollstate_create(void)
{
return (ps);
}
void
{
}
}
}
}
/*
* We are holding the appropriate uf_lock entering this routine.
* Bump up the ps_busy count to prevent the thread from exiting.
*/
void
{
}
}
/*
* Complete phase 2 of cached poll fd cleanup. Call pcache_clean_entry to mark
* the pcacheset events field POLLCLOSED to force the next poll() to remove
* this cache entry. We can't clean the polldat entry clean up here because
* lwp block in poll() needs the info to return. Wakeup anyone blocked in
* poll and let exiting lwp go. No lock is help upon entry. So it's OK for
* pcache_clean_entry to call pollwakeup().
*/
void
{
while (fpip) {
/*
* Wakeup the thread waiting in
* thread_exit().
*/
}
}
}
/*
* one of the cache line's counter is wrapping around. Reset all cache line
* counters to zero except one. This is simplistic, but probably works
* effectively.
*/
void
{
int i;
}
}
}
/*
* this routine implements poll cache list replacement policy.
* It is currently choose the "least used".
*/
int
{
int i;
int index = 0;
index = i;
}
}
return (index);
}
/*
* this routine is called by strclose to remove remaining polldat struct on
* the pollhead list of the device being closed. There are two reasons as why
* the polldat structures still remain on the pollhead list:
*
* (1) The layered device(e.g.the console driver).
* In this case, the existence of a polldat implies that the thread putting
* the polldat on this list has not exited yet. Before the thread exits, it
* will have to hold this pollhead lock to remove the polldat. So holding the
* pollhead lock here effectively prevents the thread which put the polldat
* on this list from exiting.
*
* pollhead list if the process has not done a POLLREMOVE before closing the
* polled fd. We just unlink it here.
*/
void
{
/*
* In case(1), while we must prevent the thread in question from
* exiting, we must also obey the proper locking order, i.e.
* (ps_lock -> phlock).
*/
/*
* This is case(2). Since the ph_lock is sufficient
* lwp, just unlink the polldat.
*/
continue;
}
/*
* Now get the locks in proper order to avoid deadlock.
*/
/*
* while we dropped the pollhead lock, the element could be
* taken off the list already.
*/
}
/*
* Wakeup the thread waiting in
* thread_exit().
*/
}
}
}
/*
* The remove_list is called to cleanup a partially cached 'current' list or
* to remove a partial list which is no longer cached. The flag value of 1
* indicates the second case.
*/
void
int cacheindex, int flag)
{
int i;
int j;
for (j = i + 1; j < end; j++) {
(ssize_t)j, cacheindex);
break;
}
}
}
}
}
}
#ifdef DEBUG
/*
* make sure curthread is not on anyone's pollhead list any more.
*/
static void
{
int i;
while (pdp) {
}
}
}
}
}
/*
* for resolved set poll list, the xref info in the pcache should be
* consistent with this poll list.
*/
static int
{
int i;
continue;
}
if (refp->xf_position >= 0) {
int j;
int count = 0;
for (j = refp->xf_position;
j++) {
count++;
}
}
}
}
}
return (1);
}
/*
* For every cached pollfd, its polldat struct should be consistent with
* what is in the pcacheset lists.
*/
static void
{
int i;
for (i = 0; i < pcp->pc_hashsize; i++) {
int j;
}
}
}
}
}
}
/*
* every wfd element on ph_list must have a corresponding fpollinfo on the
* uf_fpollinfo list. This is a variation of infpollinfo() w/o holding locks.
*/
void
{
return;
}
break;
}
}
}
}
}
/*
* For each cached fd whose bit is not set in bitmap, its revents field in
* current poll list should be 0.
*/
static int
{
int i;
int entry;
continue;
}
int j;
}
}
}
}
}
return (1);
}
#endif /* DEBUG */
{
}
void
{
/*
* allocate enough bits for the poll fd list
*/
}
KM_SLEEP);
/*
* The hash size is at least POLLHASHCHUNKSZ. If user polls a large
* number of fd to start with, allocate a bigger hash table (to the
* nearest multiple of POLLHASHCHUNKSZ) because dynamically growing a
* hash table is expensive.
*/
if (nfds < POLLHASHCHUNKSZ) {
} else {
~(POLLHASHCHUNKSZ - 1);
}
KM_SLEEP);
}
void
{
int i;
for (i = 0; i < pcp->pc_hashsize; i++) {
}
pcp->pc_fdcount--;
}
}
}
}
pcacheset_create(int nsets)
{
}
void
{
int i;
for (i = 0; i < nsets; i++) {
sizeof (pollfd_t));
}
}
}
/*
* Check each duplicated poll fd in the poll list. It may be necessary to
* VOP_POLL the same fd again using different poll events. getf() has been
* done by caller. This routine returns 0 if it can sucessfully process the
* entire poll fd list. It returns -1 if underlying vnode has changed during
* a VOP_POLL, in which case the caller has to repoll. It returns a positive
* value if VOP_POLL failed.
*/
static int
{
int i;
int fd;
(*fdcntp)++;
}
} else {
int error;
/*
* the events are different. VOP_POLL on this
* fd so that we don't miss any revents.
*/
if (error) {
return (error);
}
/*
* layered devices(e.g. console driver)
* may change the vnode and thus the pollhead
* pointer out from underneath us.
*/
/*
* We could have missed a wakeup on the
* new target device. Make sure the new
* target gets polled once.
*/
return (-1);
}
(*fdcntp)++;
}
}
}
}
return (0);
}