socket.c revision 4095b364155591b10bfe79c77e686031f6976852
/* $Id: socket.c,v 1.9 1998/11/15 11:48:17 explorer Exp $ */
#include "attribute.h"
#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <isc/assertions.h>
#include <isc/unexpect.h>
#include <isc/condition.h>
#ifndef _WIN32
#define WINAPI /* we're not windows */
#endif
/*
* We use macros instead of calling the routines directly because
* the capital letters make the locking stand out.
*
* We INSIST that they succeed since there's no way for us to continue
* if they fail.
*/
/*
* Debugging
*/
#if 1
#else
#define XTRACE(a)
#define XENTER(a)
#define XEXIT(a)
#endif
/*
* functions.
*/
typedef struct isc_socket_intev {
typedef struct isc_socket_ncintev {
#define VALID_SOCKET(t) ((t) != NULL && \
(t)->magic == SOCKET_MAGIC)
struct isc_socket {
/* Not locked. */
unsigned int magic;
/* Locked by socket lock. */
unsigned int references;
int fd;
struct isc_sockaddr address;
unsigned int addrlength;
};
#define VALID_MANAGER(m) ((m) != NULL && \
(m)->magic == SOCKET_MANAGER_MAGIC)
struct isc_socketmgr {
/* Not locked. */
unsigned int magic;
/* Locked by manager lock. */
unsigned int nscheduled;
unsigned int nsockets; /* sockets managed */
int maxfd;
int pipe_fds[2];
};
#define SELECT_POKE_SHUTDOWN (-1)
#define SELECT_POKE_NOTHING (-2)
static void rwdone_event_destroy(isc_event_t);
static void free_socket(isc_socket_t *);
isc_socket_t *);
static void destroy(isc_socket_t *);
/*
* poke the select loop when there is something for us to do. Manager must
* be locked.
*/
static void
{
int cc;
if (cc < 0)
"write() failed during watcher poke: %s",
}
/*
* read a message on the internal fd.
*/
static int
{
int msg;
int cc;
if (cc < 0) {
if (errno == EWOULDBLOCK)
return SELECT_POKE_NOTHING;
"read() failed during watcher poke: %s",
return SELECT_POKE_NOTHING; /* XXX */
}
return msg;
}
/*
* Make a fd non-blocking
*/
static isc_result_t
make_nonblock(int fd)
{
int ret;
int flags;
flags |= O_NONBLOCK;
if (ret == -1) {
"fcntl(%d, F_SETFL, %d): %s",
return ISC_R_UNEXPECTED;
}
return ISC_R_SUCCESS;
}
/*
* Handle freeing a done event when needed.
*/
static void
{
/*
* detach from the socket. We would have already detached from the
* task when we actually queue this event up.
*/
sock->references--;
XTRACE(("rwdone_event_destroy: sock %p, ref cnt == %d\n",
if (sock->references == 0)
if (kill_socket)
}
static void
{
/*
* detach from the socket. We would have already detached from the
* task when we actually queue this event up.
*/
sock->references--;
if (sock->references == 0)
if (kill_socket)
}
/*
* Kill.
*
* Caller must ensure locking.
*/
static void
{
/*
* Noone has this socket open, so the watcher doesn't have to be
* poked, and the socket doesn't have to be locked.
*/
/*
* XXX should reset manager->maxfd here
*/
}
static isc_result_t
{
return (NULL);
/*
* set up list of readers and writers to be initially empty
*/
/*
* initialize the lock
*/
"isc_mutex_init() failed");
return (ISC_R_UNEXPECTED);
}
return (ISC_R_SUCCESS);
}
/*
* This event requires that the various lists be empty, that the reference
* count be 1, and that the magic number is valid. The other socket bits,
* like the lock, must be initialized as well. The fd associated must be
* marked as closed, by setting it to -1 on close, or this routine will
* also close the socket.
*/
static void
{
}
}
/*
* Create a new 'type' socket managed by 'manager'. The sockets
* parameters are specified by 'expires' and 'interval'. Events
* will be posted to 'task' and when dispatched 'action' will be
* called with 'arg' as the arg value. The new socket is returned
* in 'socketp'.
*/
{
XENTER("isc_socket_create");
if (ret != ISC_R_SUCCESS)
return (ret);
/*
* Create the associated socket XXX
*/
switch (type) {
case isc_socket_udp:
break;
case isc_socket_tcp:
break;
}
free_socket(&sock);
switch (errno) {
case EMFILE:
case ENFILE:
case ENOBUFS:
return (ISC_R_NORESOURCES);
break;
default:
"socket() failed: %s",
return (ISC_R_UNEXPECTED);
break;
}
}
free_socket(&sock);
return (ISC_R_UNEXPECTED);
}
/*
* Note we don't have to lock the socket like we normally would because
* there are no external references to it yet.
*/
sock->references++;
XEXIT("isc_socket_create");
return (ISC_R_SUCCESS);
}
/*
* Attach to a socket. Caller must explicitly detach when it is done.
*/
void
{
sock->references++;
}
/*
* Dereference a socket. If this is the last reference to it, clean things
* up by destroying the socket.
*/
void
{
XENTER("isc_socket_detach");
sock->references--;
if (sock->references == 0)
if (kill_socket)
XEXIT("isc_socket_detach");
}
/*
* I/O is possible on a given socket. Schedule an event to this task that
* will call an internal function to do the I/O. This will charge the
* task with the I/O operation and let our select loop handler get back
* to doing something real as fast as possible.
*
* The socket and manager must be locked before calling this function.
*/
static void
{
XTRACE(("dispatch_read: posted event %p to task %p\n",
}
static void
{
}
static void
{
}
/*
* Dequeue an item off the given socket's read queue, set the result code
* in the done event to the one provided, and send it to the task it was
* destined for.
*
* Caller must have the socket locked.
*/
static void
{
}
static void
{
}
static void
{
}
/*
* Call accept() on a socket, to get the new file descriptor. The listen
* socket is used as a prototype to create a new isc_socket_t. The new
* socket is referenced twice (one for the task which is receiving this
* message, and once for the message itself) so the task does not need to
* attach to the socket again. The task is not attached at all.
*/
static isc_boolean_t
{
int fd;
/*
* Has this event been canceled?
*/
return (0);
}
/*
* Try to accept the new connection. If the accept fails with
* EWOULDBLOCK, simply poke the watcher to watch this socket
* again.
*/
if (fd < 0) {
if (errno == EWOULDBLOCK) {
XTRACE(("internal_accept: ewouldblock\n"));
return (0);
}
/*
* If some other error, ignore it as well and hope
* for the best, but log it. XXX This will have to be
* changed, thanks to broken OSs trying to overload what
* accept does.
*/
XTRACE(("internal_accept: accept returned %s\n",
return (0);
}
/*
* The accept succeeded. Pull off the done event and set the
* fd and other information in the socket descriptor here. These
* were preallocated for us.
*/
XTRACE(("internal_accept: newsock %p, fd %d\n",
/*
* It's safe to do this, since the done event's free routine will
* detach from the socket, so sock can't disappear out from under
* us.
*/
return (0);
}
static isc_boolean_t
{
int cc;
/*
* Find out what socket this is and lock it.
*/
/*
* Pull the first entry off the list, and look at it. If it is
* NULL, or not ours, something bad happened.
*/
/*
* Try to do as much I/O as possible on this socket. There are no
* limits here, currently. If some sort of quantum read count is
* desired before giving up control, make certain to process markers
* regardless of quantum.
*/
do {
/*
* check for canceled I/O
*/
continue;
}
/*
* If this is a marker event, post its completion and
* continue the loop.
*/
continue;
}
/*
* It must be a read request. Try to satisfy it as best
* we can.
*/
read_count, 0);
sock->addrlength);
} else {
read_count, 0,
&addrlen);
}
/*
* check for error or block condition
*/
if (cc < 0) {
if (cc == EWOULDBLOCK)
goto poke;
"internal read: %s",
}
/*
* read of 0 means the remote end was closed. Run through
* the event queue and dispatch all the events with an EOF
* result code. This will set the EOF flag in markers as
* well, but that's really ok.
*/
if (cc == 0) {
do {
goto poke;
}
/*
* if we read less than we expected, update counters,
* poke.
*/
/*
* If partial reads are allowed, we return whatever
* was read with a success result, and continue
* the loop.
*/
continue;
}
/*
* Partials not ok. Exit the loop and notify the
* watcher to wait for more reads
*/
goto poke;
}
/*
* Exactly what we wanted to read. We're done with this
* entry. Post its completion event.
*/
dev->n += read_count;
}
poke:
return (0);
}
static isc_boolean_t
{
int cc;
/*
* Find out what socket this is and lock it.
*/
/*
* Pull the first entry off the list, and look at it. If it is
* NULL, or not ours, something bad happened.
*/
/*
* Try to do as much I/O as possible on this socket. There are no
* limits here, currently. If some sort of quantum write count is
* desired before giving up control, make certain to process markers
* regardless of quantum.
*/
do {
/*
* check for canceled I/O
*/
continue;
}
/*
* If this is a marker event, post its completion and
* continue the loop.
*/
continue;
}
/*
* It must be a write request. Try to satisfy it as best
* we can.
*/
write_count, 0,
dev->addrlength);
else
write_count, 0);
/*
* check for error or block condition
*/
if (cc < 0) {
if (cc == EWOULDBLOCK)
goto poke;
"internal_write: %s",
}
/*
* write of 0 means the remote end was closed. Run through
* the event queue and dispatch all the events with an EOF
* result code. This will set the EOF flag in markers as
* well, but that's really ok.
*/
if (cc == 0) {
do {
goto poke;
}
/*
* if we write less than we expected, update counters,
* poke.
*/
goto poke;
}
/*
* Exactly what we wanted to write. We're done with this
* entry. Post its completion event.
*/
dev->n += write_count;
}
poke:
return (0);
}
/*
* This is the thread that will loop forever, always in a select or poll
* call.
*
* When select returns something to do, track down what thread gets to do
* this I/O and post the event to it.
*/
static isc_threadresult_t
{
int ctlfd;
int cc;
int msg;
int i;
int maxfd;
/*
* Get the control fd here. This will never change.
*/
while (!done) {
do {
NULL);
XTRACE(("select(%d, ...) == %d, errno %d\n",
if (cc < 0) {
"select failed: %s",
}
} while (cc < 0);
XTRACE(("watcher got manager lock\n"));
/*
* Process reads on internal, control fd.
*/
while (1) {
/*
* Nothing to read?
*/
if (msg == SELECT_POKE_NOTHING)
break;
/*
* handle shutdown message. We really should
* jump out of this loop right away, but
* it doesn't matter if we have to do a little
* more work first.
*/
if (msg == SELECT_POKE_SHUTDOWN)
/*
* This is a wakeup on a socket. Look
* at the event queue for both read and write,
* and decide if we need to watch on it now
* or not.
*/
if (msg >= 0) {
XTRACE(("watcher locked socket %p\n",
sock));
/*
* If there are no events, or there
* is an event but we have already
* queued up the internal event on a
* task's queue, clear the bit.
* Otherwise, set it.
*/
|| sock->pending_read) {
XTRACE(("watch cleared r\n"));
} else {
XTRACE(("watch set r\n"));
}
XTRACE(("watch cleared w\n"));
} else {
XTRACE(("watch set w\n"));
}
}
}
}
/*
* and unlocking twice if both reads and writes are possible.
*/
for (i = 0 ; i < maxfd ; i++) {
XTRACE(("watcher r on %d, sock %p\n",
else
}
XTRACE(("watcher w on %d, sock %p\n",
if (!unlock_sock) {
}
}
if (unlock_sock)
}
}
}
return ((isc_threadresult_t)0);
}
/*
* Create a new socket manager.
*/
{
XENTER("isc_socketmgr_create");
return (ISC_R_NOMEMORY);
manager->nscheduled = 0;
"isc_mutex_init() failed");
return (ISC_R_UNEXPECTED);
}
/*
* Create the special fds that will be used to wake up the
*/
"pipe() failed: %s",
return (ISC_R_UNEXPECTED);
}
/*
* Set up initial state for the select loop
*/
/*
*/
"isc_thread_create() failed");
return (ISC_R_UNEXPECTED);
}
XEXIT("isc_socketmgr_create (normal)");
return (ISC_R_SUCCESS);
}
void
{
/*
* Destroy a socket manager.
*/
/*
* half of the pipe, which will send EOF to the read half.
*/
/*
* Wait for thread to exit.
*/
"isc_thread_join() failed");
/*
* Clean up.
*/
}
{
int cc;
return (ISC_R_NOMEMORY);
sock,
sock,
sizeof(*iev));
/* no special free routine yet */
return (ISC_R_NOMEMORY);
}
}
/*
* UDP sockets are always partial read
*/
/*
* Remember that we need to detach on event free
*/
ev->n = 0;
/*
* If the read queue is empty, try to do the I/O right now.
*/
&ev->addrlength);
} else {
}
if (cc < 0) {
if (cc == EWOULDBLOCK)
goto queue;
"isc_socket_recv: %s",
}
if (cc == 0) {
return (ISC_R_SUCCESS);
}
/*
* Partial reads need to be queued
*/
goto queue;
/*
* full reads are posted, or partials if partials are ok.
*/
return (ISC_R_SUCCESS);
}
/*
* We couldn't read all or part of the request right now, so queue
* it.
*/
/*
* Enqueue the request. If the socket was previously not being
* watched, poke the watcher to start paying attention to it.
*/
} else {
}
XTRACE(("isc_socket_recv: posted ievent %p, dev %p, task %p\n",
return (ISC_R_SUCCESS);
}
{
}
{
int cc;
return (ISC_R_NOMEMORY);
sock,
sock,
sizeof(*iev));
/* no special free routine yet */
return (ISC_R_NOMEMORY);
}
}
/*
* Remember that we need to detach on event free
*/
ev->n = 0;
/*
* If the write queue is empty, try to do the I/O right now.
*/
if (addrlength > 0) {
} else if (sock->addrlength > 0) {
}
INSIST(addrlength == 0);
}
ev->addrlength);
else
if (cc < 0) {
if (cc == EWOULDBLOCK)
goto queue;
"isc_socket_send: %s",
}
if (cc == 0) {
return (ISC_R_SUCCESS);
}
/*
* Partial writes need to be queued
*/
goto queue;
/*
* full writes are posted.
*/
return (ISC_R_SUCCESS);
}
/*
* We couldn't send all or part of the request right now, so queue
* it.
*/
/*
* Enqueue the request. If the socket was previously not being
* watched, poke the watcher to start paying attention to it.
*/
} else {
}
XTRACE(("isc_socket_send: posted ievent %p, dev %p, task %p\n",
return (ISC_R_SUCCESS);
}
int addrlen)
{
switch (errno) {
case EACCES:
return (ISC_R_NOPERM);
break;
case EADDRNOTAVAIL:
return (ISC_R_ADDRNOTAVAIL);
break;
case EADDRINUSE:
return (ISC_R_ADDRINUSE);
break;
case EINVAL:
return (ISC_R_BOUND);
break;
default:
return (ISC_R_UNEXPECTED);
break;
}
}
return (ISC_R_SUCCESS);
}
/*
* set up to listen on a given socket. We do this by creating an internal
* event that will be dispatched when the socket has read activity. The
* watcher will send the internal event to the task when there is a new
* connection.
*
* Unlike in read, we don't preallocate a done event here. Every time there
* is a new connection we'll have to allocate a new one anyway, so we might
* as well keep things simple rather than having to track them.
*/
{
"Socket is not isc_socket_tcp");
return (ISC_R_UNEXPECTED);
}
"Socket already listener");
return (ISC_R_UNEXPECTED);
}
if (backlog == 0)
return (ISC_R_UNEXPECTED);
}
return (ISC_R_SUCCESS);
}
{
XENTER("isc_socket_accept");
sock,
sock,
sizeof(*iev));
return (ISC_R_NOMEMORY);
}
sock,
arg,
sizeof (*dev));
return (ISC_R_NOMEMORY);
}
if (ret != ISC_R_SUCCESS) {
return (ret);
}
/*
* Attach to socket and to task
*/
sock->references++;
/*
* poke watcher here. We still have the socket locked, so there
* is no race condition. We will keep the lock for such a short
* bit of time waking it up now or later won't matter all that much.
*/
return (ISC_R_SUCCESS);
}