port_fd.c revision 7c478bd95313f5f23a4c958a745db2134aa03244
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License, Version 1.0 only
* (the "License"). You may not use this file except in compliance
* with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/stat.h>
#include <sys/errno.h>
#include <sys/kmem.h>
#include <sys/sysmacros.h>
#include <sys/debug.h>
#include <sys/poll_impl.h>
#include <sys/port_impl.h>
#define PORTHASH_START 256 /* start cache space for events */
#define PORTHASH_MULT 2 /* growth threshold and factor */
/* local functions */
static int port_fd_callback(void *, int *, pid_t, int, void *);
static int port_bind_pollhead(pollhead_t **, polldat_t *, short *);
static void port_remove_fd_local(portfd_t *, port_fdcache_t *);
static void port_close_sourcefd(void *, int, pid_t, int);
static void port_cache_insert_fd(port_fdcache_t *, polldat_t *);
/*
* port_fd_callback()
* The event port framework uses callback functions to notify associated
* event sources about actions on source specific objects.
* The source itself defines the "arg" required to identify the object with
* events. In the port_fd_callback() case the "arg" is a pointer to portfd_t
* structure. The portfd_t structure is specific for PORT_SOURCE_FD source.
* The port_fd_callback() function is notified in three cases:
* - PORT_CALLBACK_DEFAULT
* The object (fd) will be delivered to the application.
* - PORT_CALLBACK_DISSOCIATE
* The object (fd) will be dissociated from the port.
* - PORT_CALLBACK_CLOSE
* The object (fd) will be dissociated from the port because the port
* is being closed.
* A fd is shareable between processes only when
* - processes have the same fd id and
* - processes have the same fp.
* A fd becomes shareable:
* - on fork() across parent and child process and
* - when I_SENDFD is used to pass file descriptors between parent and child
* immediately after fork() (the sender and receiver must get the same
* file descriptor id).
* If a fd is shared between processes, all involved processes will get
* the same rights related to re-association of the fd with the port and
* retrieve of events from that fd.
* The process which associated the fd with a port for the first time
* becomes also the owner of the association. Only the owner of the
* association is allowed to dissociate the fd from the port.
*/
/* ARGSUSED */
static int
port_fd_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
{
portfd_t *pfd = (portfd_t *)arg;
polldat_t *pdp = PFTOD(pfd);
port_fdcache_t *pcp;
file_t *fp;
int error;
ASSERT((pdp != NULL) && (events != NULL));
switch (flag) {
case PORT_CALLBACK_DEFAULT:
if (curproc->p_pid != pid) {
/*
* Check if current process is allowed to retrieve
* events from this fd.
*/
fp = getf(pdp->pd_fd);
if (fp == NULL) {
error = EACCES; /* deny delivery of events */
break;
}
releasef(pdp->pd_fd);
if (fp != pdp->pd_fp) {
error = EACCES; /* deny delivery of events */
break;
}
}
*events = pdp->pd_portev->portkev_events; /* update events */
error = 0;
break;
case PORT_CALLBACK_DISSOCIATE:
error = 0;
break;
case PORT_CALLBACK_CLOSE:
/* remove polldat/portfd struct */
pdp->pd_portev = NULL;
pcp = (port_fdcache_t *)pdp->pd_pcache;
mutex_enter(&pcp->pc_lock);
pdp->pd_fp = NULL;
pdp->pd_events = 0;
if (pdp->pd_php != NULL) {
pollhead_delete(pdp->pd_php, pdp);
pdp->pd_php = NULL;
}
port_pcache_remove_fd(pcp, pfd);
mutex_exit(&pcp->pc_lock);
error = 0;
break;
default:
error = EINVAL;
break;
}
return (error);
}
/*
* This routine returns a pointer to a cached poll fd entry, or NULL if it
* does not find it in the hash table.
* The fd is used as index.
* The fd and the fp are used to detect a valid entry.
* This function returns a pointer to a valid portfd_t structure only when
* the fd and the fp in the args match the entries in polldat_t.
*/
portfd_t *
port_cache_lookup_fp(port_fdcache_t *pcp, int fd, file_t *fp)
{
polldat_t *pdp;
portfd_t **bucket;
ASSERT(MUTEX_HELD(&pcp->pc_lock));
bucket = PORT_FD_BUCKET(pcp, fd);
pdp = PFTOD(*bucket);
while (pdp != NULL) {
if (pdp->pd_fd == fd && pdp->pd_fp == fp)
break;
pdp = pdp->pd_hashnext;
}
return (PDTOF(pdp));
}
/*
* port_associate_fd()
* This function associates new file descriptors with a port or
* reactivate already associated file descriptors.
* The reactivation also updates the events types to be checked and the
* attached user pointer.
* Per port a cache is used to store associated file descriptors.
* Internally the VOP_POLL interface is used to poll for existing events.
* The VOP_POLL interface can also deliver a pointer to a pollhead_t structure
* which is used to enqueue polldat_t structures with pending events.
* If VOP_POLL immediately returns valid events (revents) then those events
* will be submitted to the event port with port_send_event().
* Otherwise VOP_POLL does not return events but it delivers a pointer to a
* pollhead_t structure. In such a case the corresponding file system behind
* VOP_POLL will use the pollwakeup() function to notify about exisiting
* events.
*/
int
port_associate_fd(port_t *pp, int source, uintptr_t object, int events,
void *user)
{
port_fdcache_t *pcp;
int fd;
struct pollhead *php = NULL;
portfd_t *pfd;
polldat_t *pdp;
file_t *fp;
port_kevent_t *pkevp;
short revents;
int error = 0;
pcp = pp->port_queue.portq_pcp;
if (object > (uintptr_t)INT_MAX)
return (EBADFD);
fd = object;
if ((fp = getf(fd)) == NULL)
return (EBADFD);
mutex_enter(&pcp->pc_lock);
if (pcp->pc_hash == NULL) {
/*
* This is the first time that a fd is being associated with
* the current port:
* - create PORT_SOURCE_FD cache
* - associate PORT_SOURCE_FD source with the port
*/
error = port_associate_ksource(pp->port_fd, PORT_SOURCE_FD,
NULL, port_close_sourcefd, pp, NULL);
if (error) {
mutex_exit(&pcp->pc_lock);
releasef(fd);
return (error);
}
/* create polldat cache */
pcp->pc_hashsize = PORTHASH_START;
pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize *
sizeof (portfd_t *), KM_SLEEP);
pfd = NULL;
} else {
/* Check if the fd/fp is already associated with the port */
pfd = port_cache_lookup_fp(pcp, fd, fp);
}
if (pfd == NULL) {
/*
* new entry
* Allocate a polldat_t structure per fd
* The use of the polldat_t structure to cache file descriptors
* is required to be able to share the pollwakeup() function
* with poll(2) and devpoll(7d).
*/
pfd = kmem_zalloc(sizeof (portfd_t), KM_SLEEP);
pdp = PFTOD(pfd);
pdp->pd_fd = fd;
pdp->pd_fp = fp;
pdp->pd_pcache = (void *)pcp;
/* Allocate a port event structure per fd */
error = port_alloc_event_local(pp, source, PORT_ALLOC_CACHED,
&pdp->pd_portev);
if (error) {
kmem_free(pfd, sizeof (portfd_t));
releasef(fd);
mutex_exit(&pcp->pc_lock);
return (error);
}
pkevp = pdp->pd_portev;
pkevp->portkev_callback = port_fd_callback;
pkevp->portkev_arg = pfd;
/* add portfd_t entry to the cache */
port_cache_insert_fd(pcp, pdp);
pkevp->portkev_object = fd;
pkevp->portkev_user = user;
/*
* Add current port to the file descriptor interested list
* The members of the list are notified when the file descriptor
* is closed.
*/
addfd_port(fd, pfd);
} else {
/*
* The file descriptor is already associated with the port
*/
pdp = PFTOD(pfd);
pkevp = pdp->pd_portev;
/*
* Check if the re-association happens before the last
* submitted event of the file descriptor was retrieved.
*/
if (pkevp->portkev_flags & PORT_KEV_DONEQ) {
/*
* Event was fired for this fd and it is still in
* the port queue.
* => remove event from port queue.
*/
port_remove_done_event(pkevp);
}
pkevp->portkev_user = user;
}
pkevp->portkev_events = 0; /* no fired events */
pdp->pd_events = events; /* events associated */
pkevp->portkev_flags |= PORT_KEV_VALID; /* events can be submitted */
/*
* do VOP_POLL and cache this poll fd.
*
* XXX - pollrelock() logic needs to know
* which pollcache lock to grab. It'd be a
* cleaner solution if we could pass pcp as
* an arguement in VOP_POLL interface instead
* of implicitly passing it using thread_t
* struct. On the other hand, changing VOP_POLL
* interface will require all driver/file system
* poll routine to change.
*/
curthread->t_pollcache = (pollcache_t *)pcp;
error = VOP_POLL(fp->f_vnode, events, 0, &revents, &php);
curthread->t_pollcache = NULL;
/*
* To keep synchronization between VOP_POLL above and
* pollhead_insert below, it is necessary to
* call VOP_POLL() again (see port_bind_pollhead()).
*/
if (error) {
/* dissociate the fd from the port */
delfd_port(fd, pfd);
port_remove_fd_local(pfd, pcp);
releasef(fd);
mutex_exit(&pcp->pc_lock);
return (error);
}
if (php != NULL) {
/*
* No events delivered yet.
* Bind pollhead pointer with current polldat_t structure.
* Sub-system will call pollwakeup() later with php as
* argument.
*/
error = port_bind_pollhead(&php, pdp, &revents);
if (error) {
delfd_port(fd, pfd);
port_remove_fd_local(pfd, pcp);
releasef(fd);
mutex_exit(&pcp->pc_lock);
return (error);
}
}
/*
* Check if events detected.
* revents was already set after the VOP_POLL above or
* it was updated in port_bind_pollhead().
*/
if (revents) {
revents = revents & (pdp->pd_events | POLLHUP | POLLERR);
if ((pkevp->portkev_flags & PORT_KEV_DONEQ) == 0) {
/* send events to the event port */
pkevp->portkev_events = revents;
(void) port_send_event(pkevp);
} else {
/* update events */
pkevp->portkev_events |= revents;
}
}
releasef(fd);
mutex_exit(&pcp->pc_lock);
return (error);
}
/*
* The port_dissociate_fd() function dissociates the delivered file
* descriptor from the event port and removes already fired events.
* If a fd is shared between processes, all involved processes will get
* the same rights related to re-association of the fd with the port and
* retrieve of events from that fd.
* The process which associated the fd with a port for the first time
* becomes also the owner of the association. Only the owner of the
* association is allowed to dissociate the fd from the port.
*/
int
port_dissociate_fd(port_t *pp, uintptr_t object)
{
int fd;
port_fdcache_t *pcp;
portfd_t *pfd;
file_t *fp;
if (object > (uintptr_t)INT_MAX)
return (EBADFD);
fd = object;
pcp = pp->port_queue.portq_pcp;
mutex_enter(&pcp->pc_lock);
if (pcp->pc_hash == NULL) {
/* no file descriptor cache available */
mutex_exit(&pcp->pc_lock);
return (0);
}
if ((fp = getf(fd)) == NULL) {
mutex_exit(&pcp->pc_lock);
return (EBADFD);
}
pfd = port_cache_lookup_fp(pcp, fd, fp);
if (pfd == NULL) {
releasef(fd);
mutex_exit(&pcp->pc_lock);
return (0);
}
/* only association owner is allowed to remove the association */
if (curproc->p_pid != PFTOD(pfd)->pd_portev->portkev_pid) {
releasef(fd);
mutex_exit(&pcp->pc_lock);
return (EACCES);
}
/* remove port from the file descriptor interested list */
delfd_port(fd, pfd);
releasef(fd);
/* remove polldat & port event structure */
port_remove_fd_object(pfd, pp, pcp);
mutex_exit(&pcp->pc_lock);
return (0);
}
/*
* Remove the fd from the event port cache.
*/
static void
port_remove_fd_local(portfd_t *pfd, port_fdcache_t *pcp)
{
polldat_t *pdp = PFTOD(pfd);
ASSERT(MUTEX_HELD(&pcp->pc_lock));
pdp->pd_fp = NULL;
if (pdp->pd_php != NULL) {
pollhead_delete(pdp->pd_php, pdp);
pdp->pd_php = NULL;
}
port_free_event_local(pdp->pd_portev, 0);
/* remove polldat struct */
port_pcache_remove_fd(pcp, pfd);
}
/*
* Associate event port polldat_t structure with sub-system pointer to
* a polhead_t structure.
*/
static int
port_bind_pollhead(pollhead_t **php, polldat_t *pdp, short *revents)
{
int error;
file_t *fp;
/*
* During re-association of a fd with a port the pd_php pointer
* is still the same as at the first association time.
*/
if (pdp->pd_php == *php)
return (0); /* already associated */
/* polldat_t associated with another pollhead_t pointer */
if (pdp->pd_php != NULL)
pollhead_delete(pdp->pd_php, pdp);
/*
* Before pollhead_insert() pollwakeup() will not detect a polldat
* entry in the ph_list and the event notification will disappear.
* This happens because polldat_t is still not associated with
* the pointer to the pollhead_t structure.
*/
pollhead_insert(*php, pdp);
/*
* From now on event notification can be detected in pollwakeup(),
* Use VOP_POLL() again to check the current status of the event.
*/
pdp->pd_php = *php;
fp = pdp->pd_fp;
curthread->t_pollcache = (pollcache_t *)pdp->pd_pcache;
error = VOP_POLL(fp->f_vnode, pdp->pd_events, 0, revents, php);
curthread->t_pollcache = NULL;
return (error);
}
/*
* Grow the hash table. Rehash all the elements on the hash table.
*/
static void
port_cache_grow_hashtbl(port_fdcache_t *pcp)
{
portfd_t **oldtbl;
polldat_t *pdp;
portfd_t *pfd;
polldat_t *pdp1;
int oldsize;
int i;
ASSERT(MUTEX_HELD(&pcp->pc_lock));
oldsize = pcp->pc_hashsize;
oldtbl = pcp->pc_hash;
pcp->pc_hashsize *= PORTHASH_MULT;
pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (portfd_t *),
KM_SLEEP);
/*
* rehash existing elements
*/
pcp->pc_fdcount = 0;
for (i = 0; i < oldsize; i++) {
pfd = oldtbl[i];
pdp = PFTOD(pfd);
while (pdp != NULL) {
pdp1 = pdp->pd_hashnext;
port_cache_insert_fd(pcp, pdp);
pdp = pdp1;
}
}
kmem_free(oldtbl, oldsize * sizeof (portfd_t *));
}
/*
* This routine inserts a polldat into the portcache's hash table. It
* may be necessary to grow the size of the hash table.
*/
static void
port_cache_insert_fd(port_fdcache_t *pcp, polldat_t *pdp)
{
portfd_t **bucket;
ASSERT(MUTEX_HELD(&pcp->pc_lock));
if (pcp->pc_fdcount > (pcp->pc_hashsize * PORTHASH_MULT))
port_cache_grow_hashtbl(pcp);
bucket = PORT_FD_BUCKET(pcp, pdp->pd_fd);
pdp->pd_hashnext = PFTOD(*bucket);
*bucket = PDTOF(pdp);
pcp->pc_fdcount++;
}
/*
* The port_remove_portfd() function dissociates the port from the fd
* and vive versa.
*/
static void
port_remove_portfd(polldat_t *pdp, port_fdcache_t *pcp)
{
port_t *pp;
file_t *fp;
ASSERT(MUTEX_HELD(&pcp->pc_lock));
pp = pdp->pd_portev->portkev_port;
fp = getf(pdp->pd_fd);
ASSERT(fp != NULL);
delfd_port(pdp->pd_fd, PDTOF(pdp));
releasef(pdp->pd_fd);
port_remove_fd_object(PDTOF(pdp), pp, pcp);
}
/*
* This function is used by port_close_sourcefd() to destroy the cache
* on last close.
*/
static void
port_pcache_destroy(port_fdcache_t *pcp)
{
ASSERT(pcp->pc_fdcount == 0);
kmem_free(pcp->pc_hash, sizeof (polldat_t *) * pcp->pc_hashsize);
mutex_destroy(&pcp->pc_lock);
kmem_free(pcp, sizeof (port_fdcache_t));
}
/*
* port_close() calls this function to request the PORT_SOURCE_FD source
* to remove/free all resources allocated and associated with the port.
*/
/* ARGSUSED */
static void
port_close_sourcefd(void *arg, int port, pid_t pid, int lastclose)
{
port_t *pp = arg;
port_fdcache_t *pcp;
portfd_t **hashtbl;
polldat_t *pdp;
polldat_t *pdpnext;
int index;
pcp = pp->port_queue.portq_pcp;
if (pcp == NULL)
/* no cache available -> nothing to do */
return;
mutex_enter(&pcp->pc_lock);
if (lastclose) {
/* event port vnode will be destroyed -> remove everything */
pp->port_queue.portq_pcp = NULL;
}
/*
* Scan the cache and free all allocated portfd_t and port_kevent_t
* structures.
*/
hashtbl = pcp->pc_hash;
for (index = 0; index < pcp->pc_hashsize; index++) {
for (pdp = PFTOD(hashtbl[index]); pdp != NULL; pdp = pdpnext) {
pdpnext = pdp->pd_hashnext;
if (pid == pdp->pd_portev->portkev_pid) {
/*
* remove polldat + port_event_t from cache
* only when current process did the
* association.
*/
port_remove_portfd(pdp, pcp);
}
}
}
mutex_exit(&pcp->pc_lock);
/*
* last close:
* pollwakeup() can not further interact with this cache
* (all polldat structs are removed from pollhead entries).
*/
if (lastclose)
port_pcache_destroy(pcp);
}