socket.c revision da4242bf62503672dbabd04e1c84c62680393f25
/* $Id: socket.c,v 1.16 1998/12/04 20:00:16 halley Exp $ */
#include "attribute.h"
#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <isc/assertions.h>
#include <isc/condition.h>
#ifndef _WIN32
#define WINAPI /* we're not windows */
#endif
/*
* We use macros instead of calling the routines directly because
* the capital letters make the locking stand out.
*
* We INSIST that they succeed since there's no way for us to continue
* if they fail.
*/
/*
* Debugging
*/
#define TRACE_WATCHER 0x0001
#define TRACE_LISTEN 0x0002
#define TRACE_CONNECT 0x0004
#define TRACE_RECV 0x0008
#define TRACE_SEND 0x0010
#define TRACE_MANAGER 0x0020
#if 0
#else
#define XTRACE(l, a)
#define XENTER(l, a)
#define XEXIT(l, a)
#endif
/*
* functions.
*/
typedef struct rwintev {
} *rwintev_t;
typedef struct ncintev {
} *ncintev_t;
typedef struct cnintev {
} *cnintev_t;
struct isc_socket {
/* Not locked. */
unsigned int magic;
/* Locked by socket lock. */
unsigned int references;
int fd;
int addrlength; /* remote addrlen */
};
#define VALID_MANAGER(m) ((m) != NULL && \
(m)->magic == SOCKET_MANAGER_MAGIC)
struct isc_socketmgr {
/* Not locked. */
unsigned int magic;
/* Locked by manager lock. */
unsigned int nsockets; /* sockets managed */
int maxfd;
int pipe_fds[2];
};
static void done_event_destroy(isc_event_t);
static void free_socket(isc_socket_t *);
isc_socket_t *);
static void destroy(isc_socket_t *);
#define SELECT_POKE_SHUTDOWN (-1)
#define SELECT_POKE_NOTHING (-2)
/*
* Poke the select loop when there is something for us to do.
* We assume that if a write completes here, it will be inserted into the
* queue fully. That is, we will not get partial writes.
*/
static void
{
int cc;
if (cc < 0) /* XXX need to handle EAGAIN, EINTR here */
"write() failed during watcher poke: %s",
}
/*
* read a message on the internal fd.
*/
static int
{
int msg;
int cc;
if (cc < 0) {
if (SOFT_ERROR(errno))
return (SELECT_POKE_NOTHING);
"read() failed during watcher poke: %s",
return (SELECT_POKE_NOTHING);
}
return (msg);
}
/*
* Make a fd non-blocking
*/
static isc_result_t
make_nonblock(int fd)
{
int ret;
int flags;
flags |= O_NONBLOCK;
if (ret == -1) {
"fcntl(%d, F_SETFL, %d): %s",
return (ISC_R_UNEXPECTED);
}
return (ISC_R_SUCCESS);
}
/*
* Handle freeing a done event when needed.
*/
static void
{
/*
* detach from the socket. We would have already detached from the
* task when we actually queue this event up.
*/
sock->references--;
if (sock->references == 0)
if (kill_socket)
}
/*
* Kill.
*
* Caller must ensure locking.
*/
static void
{
/*
* Noone has this socket open, so the watcher doesn't have to be
* poked, and the socket doesn't have to be locked.
*/
/*
* XXX should reset manager->maxfd here
*/
}
static isc_result_t
{
return (ISC_R_NOMEMORY);
sock->references = 0;
/*
* set up list of readers and writers to be initially empty
*/
/*
* initialize the lock
*/
"isc_mutex_init() failed");
return (ISC_R_UNEXPECTED);
}
return (ISC_R_SUCCESS);
}
/*
* This event requires that the various lists be empty, that the reference
* count be 1, and that the magic number is valid. The other socket bits,
* like the lock, must be initialized as well. The fd associated must be
* marked as closed, by setting it to -1 on close, or this routine will
* also close the socket.
*/
static void
{
}
}
/*
* Create a new 'type' socket managed by 'manager'. The sockets
* parameters are specified by 'expires' and 'interval'. Events
* will be posted to 'task' and when dispatched 'action' will be
* called with 'arg' as the arg value. The new socket is returned
* in 'socketp'.
*/
{
if (ret != ISC_R_SUCCESS)
return (ret);
switch (type) {
case isc_socket_udp:
break;
case isc_socket_tcp:
break;
}
free_socket(&sock);
switch (errno) {
case EMFILE:
case ENFILE:
case ENOBUFS:
return (ISC_R_NORESOURCES);
break;
default:
"socket() failed: %s",
return (ISC_R_UNEXPECTED);
break;
}
}
free_socket(&sock);
return (ISC_R_UNEXPECTED);
}
/*
* Note we don't have to lock the socket like we normally would because
* there are no external references to it yet.
*/
return (ISC_R_SUCCESS);
}
/*
* Attach to a socket. Caller must explicitly detach when it is done.
*/
void
{
sock->references++;
}
/*
* Dereference a socket. If this is the last reference to it, clean things
* up by destroying the socket.
*/
void
{
sock->references--;
if (sock->references == 0)
if (kill_socket)
}
/*
* I/O is possible on a given socket. Schedule an event to this task that
* will call an internal function to do the I/O. This will charge the
* task with the I/O operation and let our select loop handler get back
* to doing something real as fast as possible.
*
* The socket and manager must be locked before calling this function.
*/
static void
{
}
static void
{
}
static void
{
}
static void
{
}
/*
* Dequeue an item off the given socket's read queue, set the result code
* in the done event to the one provided, and send it to the task it was
* destined for.
*
* Caller must have the socket locked.
*/
static void
{
}
static void
{
}
static void
{
}
/*
* Call accept() on a socket, to get the new file descriptor. The listen
* socket is used as a prototype to create a new isc_socket_t. The new
* socket is referenced twice (one for the task which is receiving this
* message, and once for the message itself) so the task does not need to
* attach to the socket again. The task is not attached at all.
*/
static isc_boolean_t
{
int fd;
("internal_accept called, locked parent sock %p\n", sock));
/*
* Has this event been canceled?
*/
return (0);
}
/*
* Try to accept the new connection. If the accept fails with
* EAGAIN or EINTR, simply poke the watcher to watch this socket
* again.
*/
if (fd < 0) {
if (SOFT_ERROR(errno)) {
return (0);
}
/*
* If some other error, ignore it as well and hope
* for the best, but log it. XXX This will have to be
* changed, thanks to broken OSs trying to overload what
* accept does.
*/
return (0);
}
/*
* The accept succeeded. Pull off the done event and set the
* fd and other information in the socket descriptor here. These
* were preallocated for us.
*/
/*
* Save away the remote address
*/
/*
* It's safe to do this, since the done event's free routine will
* detach from the socket, so sock can't disappear out from under
* us.
*/
return (0);
}
static isc_boolean_t
{
int cc;
/*
* Find out what socket this is and lock it.
*/
/*
* Pull the first entry off the list, and look at it. If it is
* NULL, or not ours, something bad happened.
*/
/*
* Try to do as much I/O as possible on this socket. There are no
* limits here, currently. If some sort of quantum read count is
* desired before giving up control, make certain to process markers
* regardless of quantum.
*/
do {
/*
* check for canceled I/O
*/
continue;
}
/*
* If this is a marker event, post its completion and
* continue the loop.
*/
continue;
}
/*
* It must be a read request. Try to satisfy it as best
* we can.
*/
read_count, 0,
&addrlen);
} else {
read_count, 0);
sock->addrlength);
}
/*
* check for error or block condition
*/
if (cc < 0) {
if (SOFT_ERROR(errno))
goto poke;
"internal read: %s",
}
/*
* read of 0 means the remote end was closed. Run through
* the event queue and dispatch all the events with an EOF
* result code. This will set the EOF flag in markers as
* well, but that's really ok.
*/
if (cc == 0) {
do {
goto poke;
}
/*
* if we read less than we expected, update counters,
* poke.
*/
/*
* If partial reads are allowed, we return whatever
* was read with a success result, and continue
* the loop.
*/
continue;
}
/*
* Partials not ok. Exit the loop and notify the
* watcher to wait for more reads
*/
goto poke;
}
/*
* Exactly what we wanted to read. We're done with this
* entry. Post its completion event.
*/
dev->n += read_count;
}
poke:
return (0);
}
static isc_boolean_t
{
int cc;
/*
* Find out what socket this is and lock it.
*/
/*
* Pull the first entry off the list, and look at it. If it is
* NULL, or not ours, something bad happened.
*/
/*
* Try to do as much I/O as possible on this socket. There are no
* limits here, currently. If some sort of quantum write count is
* desired before giving up control, make certain to process markers
* regardless of quantum.
*/
do {
/*
* check for canceled I/O
*/
continue;
}
/*
* If this is a marker event, post its completion and
* continue the loop.
*/
continue;
}
/*
* It must be a write request. Try to satisfy it as best
* we can.
*/
write_count, 0,
dev->addrlength);
else
write_count, 0);
/*
* check for error or block condition
*/
if (cc < 0) {
if (SOFT_ERROR(errno))
goto poke;
"internal_send: %s",
}
/*
* write of 0 means the remote end was closed. Run through
* the event queue and dispatch all the events with an EOF
* result code. This will set the EOF flag in markers as
* well, but that's really ok.
*/
if (cc == 0) {
do {
goto poke;
}
/*
* if we write less than we expected, update counters,
* poke.
*/
goto poke;
}
/*
* Exactly what we wanted to write. We're done with this
* entry. Post its completion event.
*/
dev->n += write_count;
}
poke:
return (0);
}
/*
* This is the thread that will loop forever, always in a select or poll
* call.
*
* When select returns something to do, track down what thread gets to do
* this I/O and post the event to it.
*/
static isc_threadresult_t
{
int ctlfd;
int cc;
int msg;
int i;
int maxfd;
/*
* Get the control fd here. This will never change.
*/
while (!done) {
do {
NULL);
("select(%d, ...) == %d, errno %d\n",
if (cc < 0) {
if (!SOFT_ERROR(errno))
"select failed: %s",
}
} while (cc < 0);
/*
* Process reads on internal, control fd.
*/
while (1) {
("watcher got message %d\n", msg));
/*
* Nothing to read?
*/
if (msg == SELECT_POKE_NOTHING)
break;
/*
* handle shutdown message. We really should
* jump out of this loop right away, but
* it doesn't matter if we have to do a little
* more work first.
*/
if (msg == SELECT_POKE_SHUTDOWN)
/*
* This is a wakeup on a socket. Look
* at the event queue for both read and write,
* and decide if we need to watch on it now
* or not.
*/
if (msg >= 0) {
("watcher locked socket %p\n",
sock));
/*
* If there are no events, or there
* is an event but we have already
* queued up the internal event on a
* task's queue, clear the bit.
* Otherwise, set it.
*/
|| sock->pending_recv) {
("watch cleared r\n"));
} else {
("watch set r\n"));
}
|| sock->pending_send)
&& !sock->connecting) {
("watch cleared w\n"));
} else {
("watch set w\n"));
}
}
}
}
/*
* and unlocking twice if both reads and writes are possible.
*/
for (i = 0 ; i < maxfd ; i++) {
("watcher r on %d, sock %p\n",
else
}
("watcher w on %d, sock %p\n",
if (!unlock_sock) {
}
if (sock->connecting)
else
}
if (unlock_sock)
}
}
}
return ((isc_threadresult_t)0);
}
/*
* Create a new socket manager.
*/
{
return (ISC_R_NOMEMORY);
"isc_mutex_init() failed");
return (ISC_R_UNEXPECTED);
}
/*
* Create the special fds that will be used to wake up the
*/
"pipe() failed: %s",
return (ISC_R_UNEXPECTED);
}
/*
* Set up initial state for the select loop
*/
/*
*/
"isc_thread_create() failed");
return (ISC_R_UNEXPECTED);
}
return (ISC_R_SUCCESS);
}
void
{
/*
* Destroy a socket manager.
*/
/*
* half of the pipe, which will send EOF to the read half.
*/
/*
* Wait for thread to exit.
*/
"isc_thread_join() failed");
/*
* Clean up.
*/
}
{
int cc;
return (ISC_R_NOMEMORY);
sock,
sock,
sizeof(*iev));
/* no special free routine yet */
return (ISC_R_NOMEMORY);
}
}
/*
* UDP sockets are always partial read
*/
/*
* Remember that we need to detach on event free
*/
ev->n = 0;
/*
* If the read queue is empty, try to do the I/O right now.
*/
&ev->addrlength);
} else {
}
if (cc < 0) {
if (SOFT_ERROR(errno))
goto queue;
"isc_socket_recv: %s",
}
if (cc == 0) {
return (ISC_R_SUCCESS);
}
/*
* Partial reads need to be queued
*/
goto queue;
/*
* full reads are posted, or partials if partials are ok.
*/
return (ISC_R_SUCCESS);
}
/*
* We couldn't read all or part of the request right now, so queue
* it.
*/
/*
* Enqueue the request. If the socket was previously not being
* watched, poke the watcher to start paying attention to it.
*/
} else {
}
("isc_socket_recv: posted ievent %p, dev %p, task %p\n",
return (ISC_R_SUCCESS);
}
{
}
{
int cc;
return (ISC_R_NOMEMORY);
sock,
sock,
sizeof(*iev));
/* no special free routine yet */
return (ISC_R_NOMEMORY);
}
}
/*
* Remember that we need to detach on event free
*/
ev->n = 0;
/*
* If the write queue is empty, try to do the I/O right now.
*/
if (addrlength > 0) {
} else if (sock->addrlength > 0) {
}
INSIST(addrlength == 0);
}
ev->addrlength);
else {
"isc_socket_send: unknown socket type");
return (ISC_R_UNEXPECTED);
}
if (cc < 0) {
if (SOFT_ERROR(errno))
goto queue;
"isc_socket_send: %s",
return (ISC_R_UNEXPECTED);
}
if (cc == 0) {
return (ISC_R_SUCCESS);
}
/*
* Partial writes need to be queued
*/
goto queue;
/*
* full writes are posted.
*/
return (ISC_R_SUCCESS);
}
/*
* We couldn't send all or part of the request right now, so queue
* it.
*/
/*
* Enqueue the request. If the socket was previously not being
* watched, poke the watcher to start paying attention to it.
*/
} else {
}
("isc_socket_send: posted ievent %p, dev %p, task %p\n",
return (ISC_R_SUCCESS);
}
int addrlen)
{
switch (errno) {
case EACCES:
return (ISC_R_NOPERM);
break;
case EADDRNOTAVAIL:
return (ISC_R_ADDRNOTAVAIL);
break;
case EADDRINUSE:
return (ISC_R_ADDRINUSE);
break;
case EINVAL:
return (ISC_R_BOUND);
break;
default:
return (ISC_R_UNEXPECTED);
break;
}
}
return (ISC_R_SUCCESS);
}
/*
* set up to listen on a given socket. We do this by creating an internal
* event that will be dispatched when the socket has read activity. The
* watcher will send the internal event to the task when there is a new
* connection.
*
* Unlike in read, we don't preallocate a done event here. Every time there
* is a new connection we'll have to allocate a new one anyway, so we might
* as well keep things simple rather than having to track them.
*/
{
"Socket is not isc_socket_tcp");
return (ISC_R_UNEXPECTED);
}
"Socket already listener");
return (ISC_R_UNEXPECTED);
}
if (backlog == 0)
return (ISC_R_UNEXPECTED);
}
return (ISC_R_SUCCESS);
}
/*
* This should try to do agressive accept()
*/
{
sizeof(*iev));
return (ISC_R_NOMEMORY);
}
sock,
arg,
sizeof (*dev));
return (ISC_R_NOMEMORY);
}
if (ret != ISC_R_SUCCESS) {
return (ret);
}
/*
* Attach to socket and to task
*/
sock->references++;
nsock->references++;
/*
* poke watcher here. We still have the socket locked, so there
* is no race condition. We will keep the lock for such a short
* bit of time waking it up now or later won't matter all that much.
*/
return (ISC_R_SUCCESS);
}
{
int cc;
sock,
sock,
return (ISC_R_NOMEMORY);
}
}
sock,
arg,
sizeof (*dev));
return (ISC_R_NOMEMORY);
}
/*
* attach to socket
*/
sock->references++;
/*
* Try to do the connect right away, as there can be only one
* outstanding, and it might happen to complete.
*/
if (cc < 0) {
goto queue;
/* XXX check for normal errors here */
return (ISC_R_UNEXPECTED);
}
/*
* If connect completed, fire off the done event
*/
if (cc == 0) {
return (ISC_R_SUCCESS);
}
/*
* Attach to to task
*/
/*
* poke watcher here. We still have the socket locked, so there
* is no race condition. We will keep the lock for such a short
* bit of time waking it up now or later won't matter all that much.
*/
return (ISC_R_SUCCESS);
}
/*
* Called when a socket with a pending connect() finishes.
*/
static isc_boolean_t
{
int cc;
int optlen;
("internal_connect called, locked parent sock %p\n", sock));
/*
* Has this event been canceled?
*/
return (0);
}
/*
* Get any possible error status here.
*/
else
if (errno != 0) {
/*
* If the error is EAGAIN, just re-select on this
* fd and pretend nothing strange happened.
*/
return (0);
}
/*
* Translate other errors into ISC_R_* flavors.
*/
switch (errno) {
case ETIMEDOUT:
break;
case ECONNREFUSED:
break;
case ENETUNREACH:
break;
default:
"internal_connect: connect() %s",
break;
}
}
/*
* It's safe to do this, since the done event's free routine will
* detach from the socket, so sock can't disappear out from under
* us.
*/
return (0);
}
/*
* Locking should not be necessary
*/
int *lengthp)
{
return (ISC_R_TOOSMALL);
return (ISC_R_SUCCESS);
}
/*
* Locking should not be necessary
*/
int *lengthp)
{
struct isc_sockaddr addr;
int len;
return (ISC_R_UNEXPECTED);
}
return (ISC_R_TOOSMALL);
return (ISC_R_SUCCESS);
}