socket.c revision 499b34cea04a46823d003d4c0520c8b03e8513cb
a134177ed9f82189504191d90f3ed9e97c2b47cbTinderbox User * Copyright (C) 1998-2001 Internet Software Consortium.
0c27b3fe77ac1d5094ba3521e8142d9e7973133fMark Andrews * Permission to use, copy, modify, and distribute this software for any
0c27b3fe77ac1d5094ba3521e8142d9e7973133fMark Andrews * purpose with or without fee is hereby granted, provided that the above
0c27b3fe77ac1d5094ba3521e8142d9e7973133fMark Andrews * copyright notice and this permission notice appear in all copies.
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM
28a8f5b0de57d269cf2845c69cb6abe18cbd3b3aMark Andrews * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
058e44186b74531402c1f99088eb9dbe4926f8daMark Andrews * INTERNET SOFTWARE CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews/* $Id: socket.c,v 1.179 2001/01/09 21:58:28 bwelling Exp $ */
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews#endif /* ISC_PLATFORM_USETHREADS */
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * Some systems define the socket length argument as an int, some as size_t,
058e44186b74531402c1f99088eb9dbe4926f8daMark Andrews * some as socklen_t. This is here so it can be easily changed if needed.
058e44186b74531402c1f99088eb9dbe4926f8daMark Andrews#define ISC_SOCKADDR_LEN_T unsigned int
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * Define what the possible "soft" errors can be. These are non-fatal returns
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews * of various network related functions, like recv() and so on.
394f4aec2189750d7f861d00f97fe28ffcd9f659Mark Andrews * For some reason, BSDI (and perhaps others) will sometimes return <0
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews * from recv() but will have errno==0. This is broken, but we have to
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews * work around it here.
ba751492fcc4f161a18b983d4f018a1a52938cb9Evan Hunt#define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
aaaf8d4f4873d21e55c3ffb4f656203d08339865Mark Andrews * DLVL(90) -- Function entry/exit and other tracing.
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews * DLVL(70) -- Socket "correctness" -- including returning of events, etc.
394f4aec2189750d7f861d00f97fe28ffcd9f659Mark Andrews * DLVL(60) -- Socket data send/receive
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews * DLVL(50) -- Event tracing, including receiving/sending completion events.
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews * DLVL(20) -- Socket creation/destruction.
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews#define VALID_SOCKET(t) ((t) != NULL && (t)->magic == SOCKET_MAGIC)
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * IPv6 control information. If the socket is an IPv6 socket we want
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * to collect the destination address and interface so the client can
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * set them on outgoing packets.
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * NetBSD and FreeBSD can timestamp packets. XXXMLG Should we have
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * a setsockopt() like interface to request timestamps, and if the OS
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * doesn't do it for us, call gettimeofday() on every UDP receive?
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * Check to see if we have even basic support for cracking messages from
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * the control data returned from/sent via recvmsg()/sendmsg().
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews#if defined(USE_CMSG) && (!defined(CMSG_LEN) || !defined(CMSG_SPACE))
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews /* Not locked. */
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews unsigned int magic;
75ae74f8fd0847817bce2db7c868b179db67f019Mark Andrews /* Locked by socket lock. */
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * Internal events. Posted when a descriptor is readable or
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews * writable. These are statically allocated and never freed.
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * They will be set to non-purgable before use.
394f4aec2189750d7f861d00f97fe28ffcd9f659Mark Andrews unsigned char overflow; /* used for MSG_TRUNC fake */
394f4aec2189750d7f861d00f97fe28ffcd9f659Mark Andrews unsigned char *cmsg;
394f4aec2189750d7f861d00f97fe28ffcd9f659Mark Andrews unsigned int cmsglen;
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews#define SOCKET_MANAGER_MAGIC 0x494f6d67U /* IOmg */
ba751492fcc4f161a18b983d4f018a1a52938cb9Evan Hunt /* Not locked. */
aaaf8d4f4873d21e55c3ffb4f656203d08339865Mark Andrews unsigned int magic;
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews /* Locked by manager lock. */
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews#else /* ISC_PLATFORM_USETHREADS */
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews unsigned int refs;
ba751492fcc4f161a18b983d4f018a1a52938cb9Evan Hunt#endif /* ISC_PLATFORM_USETHREADS */
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews#endif /* ISC_PLATFORM_USETHREADS */
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews * send() and recv() iovec counts
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews#define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER)
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews# define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER + 1)
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews# define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER)
b0ba1a6059b6d6c4b3aa77d8bc84cc443b981e01Mukund Sivaramanstatic void send_recvdone_event(isc_socket_t *, isc_socketevent_t **,
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrewsstatic void send_senddone_event(isc_socket_t *, isc_socketevent_t **,
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrewsstatic isc_result_t allocate_socket(isc_socketmgr_t *, isc_sockettype_t,
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrewsstatic void internal_accept(isc_task_t *, isc_event_t *);
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrewsstatic void internal_connect(isc_task_t *, isc_event_t *);
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrewsstatic void internal_recv(isc_task_t *, isc_event_t *);
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrewsstatic void internal_send(isc_task_t *, isc_event_t *);
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrewsstatic void process_cmsg(isc_socket_t *, struct msghdr *, isc_socketevent_t *);
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrewsstatic void build_msghdr_send(isc_socket_t *, isc_socketevent_t *,
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrewsstatic void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *,
b0ba1a6059b6d6c4b3aa77d8bc84cc443b981e01Mukund Sivaraman struct msghdr *, struct iovec *, size_t *);
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews isc_logcategory_t *category, isc_logmodule_t *module, int level,
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews const char *fmt, ...)
394f4aec2189750d7f861d00f97fe28ffcd9f659Mark Andrews isc_log_write(isc_lctx, category, module, level,
ba751492fcc4f161a18b983d4f018a1a52938cb9Evan Huntsocket_log(isc_socket_t *sock, isc_sockaddr_t *address,
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews isc_logcategory_t *category, isc_logmodule_t *module, int level,
37dee1ff94960a61243f611c0f87f8c316815c53Mark Andrews const char *fmt, ...)
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews isc_log_iwrite(isc_lctx, category, module, level,
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews isc_sockaddr_format(address, peerbuf, sizeof peerbuf);
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews isc_log_iwrite(isc_lctx, category, module, level,
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrewswakeup_socket(isc_socketmgr_t *manager, int fd) {
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * This is a wakeup on a socket. Look at the event queue for both
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * read and write, and decide if we need to watch on it now or not.
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * If there are no events, or there is an event but we
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews * have already queued up the internal event on a task's
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews * queue, clear the bit. Otherwise, set it.
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews ev2 = (isc_event_t *) ISC_LIST_HEAD(sock->accept_list);
3dd63ba00f91f4d54ba45ba1831375950758c0a7Mark Andrews if ((rev == NULL || sock->pending_send) && !sock->connecting)
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * Poke the select loop when there is something for us to do.
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * We assume that if a write completes here, it will be inserted into the
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews * queue fully. That is, we will not get partial writes.
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews cc = write(mgr->pipe_fds[1], &msg, sizeof(int));
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews "write() failed "
0b09763c354ec91fb352b6b4cea383bd0195b2d8Mark Andrews "during watcher poke: %s"),
b0ba1a6059b6d6c4b3aa77d8bc84cc443b981e01Mukund Sivaraman * Read a message on the internal fd.
2047977ce2dfcfe3a0fa2d638c3242841310fad3Mark Andrews cc = read(mgr->pipe_fds[0], &msg, sizeof(int));
2047977ce2dfcfe3a0fa2d638c3242841310fad3Mark Andrews "read() failed "
2047977ce2dfcfe3a0fa2d638c3242841310fad3Mark Andrews "during watcher poke: %s"),
2047977ce2dfcfe3a0fa2d638c3242841310fad3Mark Andrews#else /* ISC_PLATFORM_USETHREADS */
2047977ce2dfcfe3a0fa2d638c3242841310fad3Mark Andrews * Update the state of the socketmgr when something changes.
2047977ce2dfcfe3a0fa2d638c3242841310fad3Mark Andrewsselect_poke(isc_socketmgr_t *manager, int msg) {
3d17a3ba61a303d5c4d9867068d0fbe9f24d2988Mark Andrews else if (msg >= 0)
3d17a3ba61a303d5c4d9867068d0fbe9f24d2988Mark Andrews#endif /* ISC_PLATFORM_USETHREADS */
static isc_result_t
int ret;
int flags;
return (ISC_R_UNEXPECTED);
return (ISC_R_SUCCESS);
#ifdef USE_CMSG
#ifdef ISC_PLATFORM_HAVEIPV6
#ifdef SO_TIMESTAMP
#ifndef ISC_NET_BSD44MSGHDR
#ifdef MSG_TRUNC
#ifdef MSG_CTRUNC
#ifndef USE_CMSG
#ifdef SO_TIMESTAMP
#ifdef ISC_PLATFORM_HAVEIPV6
#ifdef ISC_PLATFORM_HAVEIPV6
sizeof(struct in6_pktinfo));
goto next;
#ifdef SO_TIMESTAMP
goto next;
next:
unsigned int iovcount;
write_count = 0;
iovcount = 0;
goto config;
+ skip_count);
skip_count = 0;
iovcount++;
#ifdef ISC_NET_BSD44MSGHDR
#if defined(USE_CMSG)
unsigned int iovcount;
#ifdef ISC_NET_RECVOVERFLOW
maxiov--;
read_count = 0;
goto config;
iovcount = 0;
iovcount++;
#ifdef ISC_NET_RECVOVERFLOW
iovcount++;
#ifdef ISC_NET_BSD44MSGHDR
#if defined(USE_CMSG)
static isc_socketevent_t *
sizeof (*ev));
return (NULL);
ev->n = 0;
return (ev);
#if defined(ISC_SOCKET_DEBUG)
#ifdef ISC_NET_BSD44MSGHDR
int cc;
#if defined(ISC_SOCKET_DEBUG)
if (cc < 0) {
return (DOIO_SOFT);
return (DOIO_HARD); \
return (DOIO_SOFT); \
return (DOIO_HARD); \
return (DOIO_SUCCESS);
return (DOIO_EOF);
#ifdef ISC_NET_RECVOVERFLOW
cc--;
actual_count = 0;
return (DOIO_SOFT);
return (DOIO_SUCCESS);
int cc;
if (cc < 0) {
return (DOIO_SOFT);
return (DOIO_HARD); \
return (DOIO_SOFT); \
return (DOIO_HARD); \
#ifdef EHOSTDOWN
return (DOIO_HARD);
if (cc == 0)
return (DOIO_SOFT);
return (DOIO_SUCCESS);
#ifdef ISC_PLATFORM_USETHREADS
static isc_result_t
return (ISC_R_NOMEMORY);
#ifdef ISC_PLATFORM_HAVEIPV6
#ifdef SO_TIMESTAMP
goto err1;
goto err2;
return (ISC_R_SUCCESS);
#ifdef USE_CMSG
return (ret);
#ifdef USE_CMSG
return (ret);
switch (type) {
case isc_sockettype_udp:
case isc_sockettype_tcp:
switch (errno) {
case EMFILE:
case ENFILE:
case ENOBUFS:
return (ISC_R_NORESOURCES);
case EPROTONOSUPPORT:
case EPFNOSUPPORT:
case EAFNOSUPPORT:
#ifdef LINUX
case EINVAL:
return (ISC_R_FAMILYNOSUPPORT);
return (ISC_R_UNEXPECTED);
return (ISC_R_UNEXPECTED);
#ifdef SO_BSDCOMPAT
#if defined(USE_CMSG)
#if defined(SO_TIMESTAMP)
#if defined(ISC_PLATFORM_HAVEIPV6)
#ifdef IPV6_RECVPKTINFO
return (ISC_R_SUCCESS);
if (kill_socket)
int fd;
(void *)&addrlen);
if (fd < 0) {
goto next;
case DOIO_SOFT:
goto poke;
case DOIO_EOF:
goto poke;
case DOIO_SUCCESS:
case DOIO_HARD:
next:
poke:
goto next;
case DOIO_SOFT:
goto poke;
case DOIO_HARD:
case DOIO_SUCCESS:
next:
poke:
for (i = 0 ; i < maxfd ; i++) {
#ifdef ISC_PLATFORM_USETHREADS
close(i);
goto check_write;
if (!unlock_sock) {
if (unlock_sock)
#ifdef ISC_PLATFORM_USETHREADS
static isc_threadresult_t
int ctlfd;
int cc;
int msg;
int maxfd;
while (!done) {
if (cc < 0) {
} while (cc < 0);
msg);
if (msg >= 0)
return ((isc_threadresult_t)0);
#ifndef ISC_PLATFORM_USETHREADS
return (ISC_R_SUCCESS);
return (ISC_R_NOMEMORY);
return (ISC_R_UNEXPECTED);
#ifdef ISC_PLATFORM_USETHREADS
return (ISC_R_UNEXPECTED);
return (ISC_R_UNEXPECTED);
#ifdef ISC_PLATFORM_USETHREADS
#ifdef ISC_PLATFORM_USETHREADS
return (ISC_R_UNEXPECTED);
#ifndef ISC_PLATFORM_USETHREADS
return (ISC_R_SUCCESS);
#ifndef ISC_PLATFORM_USETHREADS
#ifdef ISC_PLATFORM_USETHREADS
INSIST(0);
#ifdef ISC_PLATFORM_USETHREADS
#ifdef ISC_PLATFORM_USETHREADS
for (i = 0 ; i < FD_SETSIZE ; i++)
close(i);
unsigned int iocount;
return (ISC_R_NOMEMORY);
if (minimum == 0)
if (!was_empty)
goto queue;
case DOIO_SOFT:
goto queue;
case DOIO_EOF:
return (ISC_R_SUCCESS);
case DOIO_HARD:
case DOIO_SUCCESS:
return (ISC_R_SUCCESS);
if (was_empty)
return (ISC_R_SUCCESS);
return (ISC_R_NOMEMORY);
if (minimum == 0)
dev->n = 0;
if (!was_empty)
goto queue;
case DOIO_SOFT:
goto queue;
case DOIO_EOF:
return (ISC_R_SUCCESS);
case DOIO_HARD:
case DOIO_SUCCESS:
return (ISC_R_SUCCESS);
if (was_empty)
return (ISC_R_SUCCESS);
NULL));
return (ISC_R_NOMEMORY);
if (!was_empty)
goto queue;
case DOIO_SOFT:
goto queue;
case DOIO_HARD:
case DOIO_SUCCESS:
return (ISC_R_SUCCESS);
if (was_empty)
return (ISC_R_SUCCESS);
NULL));
unsigned int iocount;
return (ISC_R_NOMEMORY);
if (!was_empty)
goto queue;
case DOIO_SOFT:
goto queue;
case DOIO_HARD:
case DOIO_SUCCESS:
return (ISC_R_SUCCESS);
if (was_empty)
return (ISC_R_SUCCESS);
return (ISC_R_FAMILYMISMATCH);
sizeof on) < 0) {
switch (errno) {
case EACCES:
return (ISC_R_NOPERM);
case EADDRNOTAVAIL:
return (ISC_R_ADDRNOTAVAIL);
case EADDRINUSE:
return (ISC_R_ADDRINUSE);
case EINVAL:
return (ISC_R_BOUND);
return (ISC_R_UNEXPECTED);
return (ISC_R_SUCCESS);
if (backlog == 0)
return (ISC_R_UNEXPECTED);
return (ISC_R_SUCCESS);
return (ISC_R_NOMEMORY);
return (ret);
if (do_poke)
return (ISC_R_SUCCESS);
int cc;
return (ISC_R_MULTICAST);
sizeof (*dev));
return (ISC_R_NOMEMORY);
if (cc < 0) {
goto queue;
switch (errno) {
#ifdef EHOSTDOWN
return (ISC_R_UNEXPECTED);
return (ISC_R_SUCCESS);
if (cc == 0) {
return (ISC_R_SUCCESS);
return (ISC_R_SUCCESS);
int cc;
if (errno != 0) {
switch (errno) {
#ifdef EHOSTDOWN
return (ret);
goto out;
goto out;
out:
return (ret);
if (how == 0)
ev_link);
return (ISC_R_NOMEMORY);
return (ISC_R_SUCCESS);
return (ISC_R_NOMEMORY);
return (ISC_R_SUCCESS);
return (val);
#ifndef ISC_PLATFORM_USETHREADS
*maxfd = 0;
return (ISC_R_NOTFOUND);
return (ISC_R_SUCCESS);