socket.c revision 02d01c31ba7ee6d64f721314f74aeb25937f95bc
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <libintl.h>
#include <strings.h>
#include <alloca.h>
#include <ucred.h>
#include <sys/socketvar.h>
#include <sys/lx_debug.h>
#include <sys/lx_syscall.h>
#include <sys/lx_socket.h>
#include <sys/lx_brand.h>
static int lx_connect(ulong_t *);
static int lx_getsockname(ulong_t *);
static int lx_getpeername(ulong_t *);
static int lx_socketpair(ulong_t *);
static int lx_recvfrom(ulong_t *);
static int lx_shutdown(ulong_t *);
static int lx_setsockopt(ulong_t *);
static int lx_getsockopt(ulong_t *);
static int lx_sendmsg(ulong_t *);
static int lx_recvmsg(ulong_t *);
static struct {
int s_nargs; /* Number of arguments the function takes */
} sockfns[] = {
lx_socket, 3,
lx_bind, 3,
lx_connect, 3,
lx_listen, 2,
lx_accept, 3,
lx_getsockname, 3,
lx_getpeername, 3,
lx_socketpair, 4,
lx_send, 4,
lx_recv, 4,
lx_sendto, 6,
lx_recvfrom, 6,
lx_shutdown, 2,
lx_setsockopt, 5,
lx_getsockopt, 5,
lx_sendmsg, 3,
lx_recvmsg, 3
};
/*
* What follows are a series of tables we use to translate Linux constants
* into equivalent Solaris constants and back again. I wish this were
* cleaner, more programmatic, and generally nicer. Sadly, life is messy,
* and Unix networking even more so.
*/
};
};
#define LTOS_SOCKTYPE(t) \
/*
* Linux socket option type definitions
*
* The protocol `levels` are well defined (see in.h) The option values are
* not so well defined. Linux often uses different values to Solaris
* although they mean the same thing. For example, IP_TOS in Linux is
* defined as value 1 but in Solaris it is defined as value 3. This table
* maps all the Protocol levels to their options and maps them between
* Linux and Solaris and vice versa. Hence the reason for the complexity.
*/
typedef struct lx_proto_opts {
const int *proto; /* Linux to Solaris mapping table */
int maxentries; /* max entries in this table */
};
};
};
};
#define PROTO_SOCKOPTS(opts) \
/*
* The main Linux to Solaris protocol to options mapping table
* IPPROTO_TAB_SIZE can be set up to IPPROTO_MAX. All entries above
* IPPROTO_TAB_SIZE are in effect not implemented,
*/
#define IPPROTO_TAB_SIZE 8
/* IPPROTO_IP 0 */
/* SOL_SOCKET 1 */
/* IPPROTO_IGMP 2 */
/* NOT IMPLEMENTED 3 */
{ NULL, 0 },
/* NOT IMPLEMENTED 4 */
{ NULL, 0 },
/* NOT IMPLEMENTED 5 */
{ NULL, 0 },
/* IPPROTO_TCP 6 */
/* NOT IMPLEMENTED 7 */
{ NULL, 0 }
};
/*
* Lifted from socket.h, since these definitions are contained within
* _KERNEL guards.
*/
#define _CMSG_HDR_ALIGNMENT 4
~(_CMSG_HDR_ALIGNMENT - 1))
#define CMSG_FIRSTHDR(m) \
(((m)->msg_controllen < sizeof (struct cmsghdr)) ? \
#define CMSG_NXTHDR(m, c) \
(((c) == 0) ? CMSG_FIRSTHDR(m) : \
((((uintptr_t)_CMSG_HDR_ALIGN((char *)(c) + \
((struct cmsghdr *)0) : \
((struct cmsghdr *)_CMSG_HDR_ALIGN((char *)(c) + \
#define LX_TO_SOL 1
#define SOL_TO_LX 2
static int
{
int err = 0;
else
} else {
}
} else {
else
} else {
}
}
}
if (err)
lx_unsupported("Unsupported socket control message in %s\n.",
caller);
return (err);
}
static int
{
int lx_in6_len;
int size;
/*
* Note that if the buffer at inaddr is ever smaller than inlen bytes,
* we may erroneously return EFAULT rather than a possible EINVAL
* as the copy comes before the various checks as to whether inlen
* is of the proper length for the socket type.
*
* This isn't an issue at present because all callers to this routine
* do meet that constraint.
*/
return (-EINVAL);
return (-errno);
switch (family) {
case (sa_family_t)AF_NOTSUPPORTED:
return (-EPROTONOSUPPORT);
case (sa_family_t)AF_INVAL:
return (-EAFNOSUPPORT);
case AF_INET:
return (-EINVAL);
break;
case AF_INET6:
/*
* The Solaris sockaddr_in6 has one more 32-bit
* field than the Linux version.
*/
size = sizeof (struct sockaddr_in6);
if (inlen != lx_in6_len)
return (-EINVAL);
*len = (sizeof (struct sockaddr_in6));
break;
case AF_UNIX:
if (inlen > sizeof (struct sockaddr_un))
return (-EINVAL);
break;
default:
}
return (0);
}
static int
int *out_type)
{
return (-EINVAL);
return (-EAFNOSUPPORT);
return (-EINVAL);
if (type == SOCK_NOTSUPPORTED)
return (-ESOCKTNOSUPPORT);
if (type == SOCK_INVAL)
return (-EINVAL);
/*
* Linux does not allow the app to specify IP Protocol for raw
* sockets. Solaris does, so bail out here.
*/
return (-ESOCKTNOSUPPORT);
return (0);
}
static int
{
int solaris_flags = 0;
if (lx_flags & LX_MSG_OOB)
solaris_flags |= MSG_OOB;
if (lx_flags & LX_MSG_PEEK)
if (lx_flags & LX_MSG_DONTROUTE)
if (lx_flags & LX_MSG_CTRUNC)
if (lx_flags & LX_MSG_TRUNC)
if (lx_flags & LX_MSG_WAITALL)
if (lx_flags & LX_MSG_DONTWAIT)
if (lx_flags & LX_MSG_EOR)
solaris_flags |= MSG_EOR;
if (lx_flags & LX_MSG_PROXY)
lx_unsupported("socket operation with MSG_PROXY flag set");
if (lx_flags & LX_MSG_FIN)
lx_unsupported("socket operation with MSG_FIN flag set");
if (lx_flags & LX_MSG_SYN)
lx_unsupported("socket operation with MSG_SYN flag set");
if (lx_flags & LX_MSG_CONFIRM)
lx_unsupported("socket operation with MSG_CONFIRM set");
if (lx_flags & LX_MSG_RST)
lx_unsupported("socket operation with MSG_RST flag set");
if (lx_flags & LX_MSG_MORE)
lx_unsupported("socket operation with MSG_MORE flag set");
return (solaris_flags);
}
static int
{
int domain;
int type;
int fd;
int err;
if (err != 0)
return (err);
/* Right now IPv6 sockets don't work */
return (-EAFNOSUPPORT);
if (fd >= 0)
return (fd);
if (errno == EPROTONOSUPPORT)
return (-ESOCKTNOSUPPORT);
return (-errno);
}
static int
{
int r;
return (-EINVAL);
return (r);
/*
* Linux returns EADDRINUSE for attempts to bind to UNIX domain
* sockets that aren't sockets.
*/
return (-EADDRINUSE);
return ((r < 0) ? -errno : r);
}
static int
{
int r;
return (-EINVAL);
return (r);
return ((r < 0) ? -errno : r);
}
static int
{
int r;
return ((r < 0) ? -errno : r);
}
static int
{
int r;
/*
* The Linux man page says that -1 is returned and errno is set to
* EFAULT if the "name" address is bad, but it is silent on what to
* set errno to if the "namelen" address is bad. Experimentation
* shows that Linux (at least the 2.4.21 kernel in CentOS) actually
* sets errno to EINVAL in both cases.
*
* Note that we must first check the name pointer, as the Linux
* docs state nothing is copied out if the "name" pointer is NULL.
* If it is NULL, we don't care about the namelen pointer's value
* or about dereferencing it.
*
* Happily, Solaris' accept(3SOCKET) treats NULL name pointers and
* zero namelens the same way.
*/
/*
* In Linux, accept()ed sockets do not inherit anything set by
* fcntl(), so filter those out.
*/
return (-errno);
/*
* Once again, a bad "namelen" address sets errno to EINVAL, not
* EFAULT. If namelen was zero, there's no need to copy a zero back
* out.
*
* Logic might dictate that we should check if we can write to
* the namelen pointer earlier so we don't accept a pending connection
* only to fail the call because we can't write the namelen value back
* out. However, testing shows Linux does indeed fail the call after
* accepting the connection so we must behave in a compatible manner.
*/
return (r);
}
static int
{
return (-errno);
lx_debug("\tgetsockname(%d, 0x%p, 0x%p (=%d))",
if (namelen > 0) {
return (-EINVAL);
}
return (-errno);
/*
* If the name that getsockname() want's to return is larger
* than namelen, getsockname() will copy out the maximum amount
* of data possible and then update namelen to indicate the
* actually size of all the data that it wanted to copy out.
*/
return (-errno);
return (-errno);
return (0);
}
static int
{
return (-errno);
lx_debug("\tgetpeername(%d, 0x%p, 0x%p (=%d))",
/*
* Linux returns EFAULT in this case, even if the namelen parameter
* is 0. This check will not catch other illegal addresses, but
* the benefit catching a non-null illegal address here is not
* worth the cost of another system call.
*/
return (-EFAULT);
return (-EINVAL);
return (-errno);
return (-errno);
return (-errno);
return (0);
}
static int
{
int domain;
int type;
int fds[2];
int r;
if (r != 0)
return (r);
if (r == 0) {
r = errno;
return (-r);
}
return (0);
}
if (errno == EPROTONOSUPPORT)
return (-ESOCKTNOSUPPORT);
return (-errno);
}
static ssize_t
{
ssize_t r;
/*
* If nosigpipe is set, we want to emulate the Linux action of
* not sending a SIGPIPE to the caller if the remote socket has
* already been closed.
*
* As SIGPIPE is a directed signal sent only to the thread that
* performed the action, we can emulate this behavior by momentarily
* resetting the action for SIGPIPE to SIG_IGN, performing the socket
* call, and resetting the action back to its previous value.
*/
if (nosigpipe) {
"%s: could not ignore SIGPIPE to emulate "
"LX_MSG_NOSIGNAL"), "send()");
}
gettext("%s: could not reset SIGPIPE handler to "
"emulate LX_MSG_NOSIGNAL"), "send()");
return ((r < 0) ? -errno : r);
}
static ssize_t
{
ssize_t r;
/*
* If nosigpipe is set, we want to emulate the Linux action of
* not sending a SIGPIPE to the caller if the remote socket has
* already been closed.
*
* As SIGPIPE is a directed signal sent only to the thread that
* performed the action, we can emulate this behavior by momentarily
* resetting the action for SIGPIPE to SIG_IGN, performing the socket
* call, and resetting the action back to its previous value.
*/
if (nosigpipe) {
"%s: could not ignore SIGPIPE to emulate "
"LX_MSG_NOSIGNAL"), "recv()");
}
gettext("%s: could not reset SIGPIPE handler to "
"emulate LX_MSG_NOSIGNAL"), "recv()");
return ((r < 0) ? -errno : r);
}
static ssize_t
{
ssize_t r;
return (-EINVAL);
return (r);
/*
* If nosigpipe is set, we want to emulate the Linux action of
* not sending a SIGPIPE to the caller if the remote socket has
* already been closed.
*
* As SIGPIPE is a directed signal sent only to the thread that
* performed the action, we can emulate this behavior by momentarily
* resetting the action for SIGPIPE to SIG_IGN, performing the socket
* call, and resetting the action back to its previous value.
*/
if (nosigpipe) {
"%s: could not ignore SIGPIPE to emulate "
"LX_MSG_NOSIGNAL"), "sendto()");
}
gettext("%s: could not reset SIGPIPE handler to "
"emulate LX_MSG_NOSIGNAL"), "sendto()");
if (r < 0) {
/*
* according to the man page and LTP, the expected error in
* this case is EPIPE.
*/
return (-EPIPE);
else
return (-errno);
}
return (r);
}
static ssize_t
{
ssize_t r;
/*
* If nosigpipe is set, we want to emulate the Linux action of
* not sending a SIGPIPE to the caller if the remote socket has
* already been closed.
*
* As SIGPIPE is a directed signal sent only to the thread that
* performed the action, we can emulate this behavior by momentarily
* resetting the action for SIGPIPE to SIG_IGN, performing the socket
* call, and resetting the action back to its previous value.
*/
if (nosigpipe) {
"%s: could not ignore SIGPIPE to emulate "
"LX_MSG_NOSIGNAL"), "recvfrom()");
}
gettext("%s: could not reset SIGPIPE handler to "
"emulate LX_MSG_NOSIGNAL"), "recvfrom()");
return ((r < 0) ? -errno : r);
}
static int
{
int r;
return ((r < 0) ? -errno : r);
}
static int
{
int internal_opt;
int r;
/*
* The kernel returns EFAULT for all invalid addresses except NULL,
* for which it returns EINVAL. Linux wants EFAULT for NULL too.
*/
return (-EFAULT);
/*
* Do a table lookup of the Solaris equivalent of the given option
*/
return (-ENOPROTOOPT);
return (-ENOPROTOOPT);
/*
* TCP_CORK is a Linux-only option that instructs the TCP
* stack not to send out partial frames. Solaris doesn't
* include this option but some apps require it. So, we do
* our best to emulate the option by disabling TCP_NODELAY.
* If the app requests that we disable TCP_CORK, we just
* ignore it since enabling TCP_NODELAY may be
* overcompensating.
*/
if (optlen != sizeof (int))
return (-EINVAL);
return (-errno);
if (internal_opt == 0)
return (0);
internal_opt = 1;
optval = &internal_opt;
} else {
return (-ENOPROTOOPT);
}
if (level == LX_SOL_SOCKET)
level = SOL_SOCKET;
return ((r < 0) ? -errno : r);
}
static int
{
int r;
/*
* According to the Linux man page, a NULL optval should indicate
* (as in Solaris) that no return value is expected. Instead, it
* actually triggers an EFAULT error.
*/
return (-EFAULT);
/*
* Do a table lookup of the Solaris equivalent of the given option
*/
return (-EOPNOTSUPP);
return (-ENOPROTOOPT);
/*
* We don't support TCP_CORK but some apps rely on it. So,
* rather than return an error we just return 0. This
* isn't exactly a lie, since this option really isn't set,
* but it's not the whole truth either. Fortunately, we
* aren't under oath.
*/
r = 0;
return (-errno);
r = sizeof (int);
return (-errno);
return (0);
}
/*
* We don't support SO_PEERCRED, but we do have equivalent
* functionality in getpeerucred() so invoke that here.
*/
/* Verify there's going to be enough room for the results. */
return (-errno);
if (r < sizeof (struct lx_ucred))
return (-EOVERFLOW);
/*
* We allocate a ucred_t ourselves rather than allow
* getpeerucred() to do it for us because getpeerucred()
* uses malloc(3C) and we'd rather use SAFE_ALLOCA().
*/
return (-ENOMEM);
/* Get the credential for the remote end of this socket. */
return (-errno);
return (-errno);
}
/* Copy out the results. */
return (-errno);
r = sizeof (lx_ucred);
return (-errno);
return (0);
}
return (-ENOPROTOOPT);
if (level == LX_SOL_SOCKET)
level = SOL_SOCKET;
return ((r < 0) ? -errno : r);
}
/*
* libc routines that issue these system calls. We bypass the libsocket
* wrappers since they explicitly turn off the MSG_XPG_2 flag we need for
* Linux compatibility.
*/
extern int _so_sendmsg();
extern int _so_recvmsg();
static int
{
int r;
return (-errno);
/*
* If there are control messages bundled in this message, we need
* to convert them from Linux to Solaris.
*/
if (msg.msg_controllen == 0) {
} else {
return (-EINVAL);
}
return (-errno);
return (-r);
}
/*
* If nosigpipe is set, we want to emulate the Linux action of
* not sending a SIGPIPE to the caller if the remote socket has
* already been closed.
*
* As SIGPIPE is a directed signal sent only to the thread that
* performed the action, we can emulate this behavior by momentarily
* resetting the action for SIGPIPE to SIG_IGN, performing the socket
* call, and resetting the action back to its previous value.
*/
if (nosigpipe) {
"%s: could not ignore SIGPIPE to emulate "
"LX_MSG_NOSIGNAL"), "sendmsg()");
}
gettext("%s: could not reset SIGPIPE handler to "
"emulate LX_MSG_NOSIGNAL"), "sendmsg()");
if (r < 0) {
/*
* according to the man page and LTP, the expected error in
* this case is EPIPE.
*/
return (-EPIPE);
else
return (-errno);
}
return (r);
}
static int
{
int r, err;
return (-errno);
/*
* If we are expecting to have to convert any control messages,
* then we should receive them into our address space instead of
* the app's.
*/
if (msg.msg_controllen == 0) {
} else {
return (-EINVAL);
}
}
/*
* If nosigpipe is set, we want to emulate the Linux action of
* not sending a SIGPIPE to the caller if the remote socket has
* already been closed.
*
* As SIGPIPE is a directed signal sent only to the thread that
* performed the action, we can emulate this behavior by momentarily
* resetting the action for SIGPIPE to SIG_IGN, performing the socket
* call, and resetting the action back to its previous value.
*/
if (nosigpipe) {
"%s: could not ignore SIGPIPE to emulate "
"LX_MSG_NOSIGNAL"), "recvmsg()");
}
gettext("%s: could not reset SIGPIPE handler to "
"emulate LX_MSG_NOSIGNAL"), "recvmsg()");
/*
* If there are control messages bundled in this message,
* we need to convert them from Linux to Solaris.
*/
return (-err);
return (-errno);
}
/*
* A handful of the values in the msghdr are set by the recvmsg()
* call, so copy their values back to the caller. Rather than iterate,
* just copy the whole structure back.
*/
return (-errno);
return ((r < 0) ? -errno : r);
}
int
{
int r;
return (-EINVAL);
/*
* Copy the arguments to the subcommand in from the app's address
* space, returning EFAULT if we get a bogus pointer.
*/
return (-errno);
return (r);
}