socketvar.h revision 745b26904e92607793a42c0c924dbfb8f221a1ee
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
/*
* University Copyright- Copyright (c) 1982, 1986, 1988
* The Regents of the University of California
* All Rights Reserved
*
* University Acknowledgment- Portions of this document are derived from
* software developed by the University of California, Berkeley, and its
* contributors.
*/
#ifndef _SYS_SOCKETVAR_H
#define _SYS_SOCKETVAR_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
/*
* Internal representation used for addresses.
*/
struct soaddr {
};
/* Maximum size address for transports that have ADDR_size == 1 */
#define SOA_DEFSIZE 128
/*
* Internal representation of the address used to represent addresses
* in the loopback transport for AF_UNIX. While the sockaddr_un is used
* as the sockfs layer address for AF_UNIX the pathnames contained in
* these addresses are not unique (due to relative pathnames) thus can not
* be used in the transport.
*
* The transport level address consists of a magic number (used to separate the
* name space for specific and implicit binds). For a specific bind
* this is followed by a "vnode *" which ensures that all specific binds
* have a unique transport level address. For implicit binds the latter
* part of the address is a byte string (of the same length as a pointer)
* that is assigned by the loopback transport.
*
* The uniqueness assumes that the loopback transport has a separate namespace
* for sockets in order to avoid name conflicts with e.g. TLI use of the
* same transport.
*/
struct so_ux_addr {
void *soua_vp; /* vnode pointer or assigned by tl */
};
struct sockaddr_ux {
struct so_ux_addr sou_addr;
};
typedef struct sonodeops sonodeops_t;
/*
* The sonode represents a socket. A sonode never exist in the file system
* name space and can not be opened using open() - only the socket, socketpair
* and accept calls create sonodes.
*
* When an AF_UNIX socket is bound to a pathname the sockfs
* creates a VSOCK vnode in the underlying file system. However, the vnodeops
* etc in this VNODE remain those of the underlying file system.
* Sockfs uses the v_stream pointer in the underlying file system VSOCK node
* to find the sonode bound to the pathname. The bound pathname vnode
* is accessed through so_ux_vp.
*
* A socket always corresponds to a VCHR stream representing the transport
* socket configuration table and entered into so_accessvp. sockfs uses
* this to perform VOP_ACCESS checks before allowing an open of the transport
* provider.
*
* The locking of sockfs uses the so_lock mutex plus the SOLOCKED
* and SOREADLOCKED flags in so_flag. The mutex protects all the state
* in the sonode. The SOLOCKED flag is used to single-thread operations from
* sockfs users to prevent e.g. multiple bind() calls to operate on the
* same sonode concurrently. The SOREADLOCKED flag is used to ensure that
* only one thread sleeps in kstrgetmsg for a given sonode. This is needed
* to ensure atomic operation for things like MSG_WAITALL.
*
* Note that so_lock is sometimes held across calls that might go to sleep
* (kmem_alloc and soallocproto*). This implies that no other lock in
* the system should be held when calling into sockfs; from the system call
* side or from strrput. If locks are held while calling into sockfs
* the system might hang when running low on memory.
*/
struct sonode {
/*
* These fields are initialized once.
*/
/* The locks themselves */
/* fields so_version and so_pushcnt */
/* These fields are protected by so_lock */
int so_count; /* count of opened references */
/* Needed to recreate the same socket for accept */
short so_family;
short so_type;
short so_protocol;
short so_version; /* From so_socket call */
short so_pushcnt; /* Number of modules above "sockmod" */
/* Options */
short so_options; /* From socket call, see socket.h */
int so_sndbuf; /* SO_SNDBUF value */
int so_rcvbuf; /* SO_RCVBUF value */
int so_sndlowat; /* send low water mark */
int so_rcvlowat; /* receive low water mark */
#ifdef notyet
int so_sndtimeo; /* Not yet implemented */
int so_rcvtimeo; /* Not yet implemented */
#endif /* notyet */
int so_backlog; /* Listen backlog */
/*
* The counts (so_oobcnt and so_oobsigcnt) track the number of
* urgent indicates that are (logically) queued on the stream head
* read queue. The urgent data is queued on the stream head
* as follows.
*
* In the normal case the SIGURG is not generated until
* the T_EXDATA_IND arrives at the stream head. However, transports
* that have an early indication that urgent data is pending
* (e.g. TCP receiving a "new" urgent pointer value) can send up
*
* The mark is indicated by either:
* - a T_EXDATA_IND (with no M_DATA b_cont) with MSGMARK set.
* When this message is consumed by sorecvmsg the socket layer
* sets SS_RCVATMARK until data has been consumed past the mark.
* - a message with MSGMARKNEXT set (indicating that the
* first byte of the next message constitutes the mark). When
* the last byte of the MSGMARKNEXT message is consumed in
* the stream head the stream head sets STRATMARK. This flag
* is cleared when at least one byte is read. (Note that
* the MSGMARKNEXT messages can be of zero length when there
* is no previous data to which the marknext can be attached.)
*
* While the T_EXDATA_IND method is the common case which is used
* with all TPI transports, the MSGMARKNEXT method is needed to
* indicate the mark when e.g. the TCP urgent byte has not been
* received yet but the TCP urgent pointer has made TCP generate
*
* The signal (the M_PCSIG carrying the SIGURG) and the mark
* indication can not be delivered as a single message, since
* the signal should be delivered as high priority and any mark
* indication must flow with the data. This implies that immediately
* when the SIGURG has been delivered if the stream head queue is
* empty it is impossible to determine if this will be the position
* of the mark. This race condition is resolved by using MSGNOTMARKNEXT
* messages and the STRNOTATMARK flag in the stream head. The
* SIOCATMARK code calls the stream head to wait for either a
* non-empty queue or one of the STR*ATMARK flags being set.
* This implies that any transport that is sending M_PCSIG(SIGURG)
* should send the appropriate MSGNOTMARKNEXT message (which can be
* zero length) after sending an M_PCSIG to prevent SIOCATMARK
* from sleeping unnecessarily.
*/
/* From T_info_ack */
/* From T_capability_ack */
/* Internal provider information */
struct tpi_provinfo *so_provinfo;
/*
* The local and remote addresses have multiple purposes
* but one of the key reasons for their existence and careful
* tracking in sockfs is to support getsockname and getpeername
* when the transport does not handle the TI_GET*NAME ioctls
* and caching when it does (signalled by valid bits in so_state).
* When all transports support the new TPI (with T_ADDR_REQ)
* we can revisit this code.
* The other usage of so_faddr is to keep the "connected to"
* address for datagram sockets.
* Finally, for AF_UNIX both local and remote addresses are used
* to record the sockaddr_un since we use a separate namespace
* in the loopback transport.
*/
/*
* For AF_UNIX sockets:
* so_ux_laddr/faddr records the internal addresses used with the
* transport.
* so_ux_vp and v_stream->sd_vnode form the cross-
* linkage between the underlying fs vnode corresponding to
* the bound sockaddr_un and the socket node.
*/
/* put here for delayed processing */
void *so_priv; /* sonode private data */
void *so_obj; /* object to free */
/*
* For NL7C sockets:
*
* so_nl7c_flags the NL7C state of URL processing.
*
* so_nl7c_rcv_mp mblk_t chain of already received data to be
* passed up to the app after NL7C gives up on
* a socket.
*
* so_nl7c_rcv_rval returned rval for last mblk_t from above.
*
* so_nl7c_uri the URI currently being processed.
*
* so_nl7c_rtime URI request gethrestime_sec().
*
* so_nl7c_addr pointer returned by nl7c_addr_lookup().
*/
void *so_nl7c_uri;
void *so_nl7c_addr;
/* For sockets acting as an in-kernel SSL proxy */
};
/* flags */
/*
* Socket state bits.
*/
/* Set of states when the socket can't be rebound */
/*
* Characteristics of sockets. Not changed after the socket is created.
*/
/*
* Socket versions. Used by the socket library when calling _so_socket().
*/
#define SOV_STREAM 0 /* Not a socket - just a stream */
/*
* Defined here so that crash can use it.
*/
struct sockparams {
int sp_domain;
int sp_type;
int sp_protocol;
char *sp_devpath;
int sp_devpathlen; /* Is 0 if sp_devpath is a static string */
struct sockparams *sp_next;
};
extern struct sockparams *sphead;
/*
* Used to traverse the list of AF_UNIX sockets to construct the kstat
* for netstat(1m).
*/
struct socklist {
};
/*
* ss_full_waits is the number of times the reader thread
* waits when the queue is full and ss_empty_waits is the number
* of times the consumer thread waits when the queue is empty.
* No locks for these as they are just indicators of whether
* disk or network or both is slow or fast.
*/
struct sendfile_stats {
};
/*
* A single sendfile request is represented by snf_req.
*/
typedef struct snf_req {
int sr_hiwat;
int sr_lowat;
int sr_operation;
#define SR_READ_DONE 0x80000000
int sr_read_error;
int sr_write_error;
} snf_req_t;
/* A queue of sendfile requests */
struct sendfile_queue {
int snfq_svc_threads; /* # of service threads */
int snfq_idle_cnt; /* # of idling threads */
int snfq_max_threads;
int snfq_req_cnt; /* Number of requests */
};
#define READ_OP 1
/* Socket network operations switch */
struct sonodeops {
int);
int (*sop_listen)(struct sonode *, int);
socklen_t, int, int);
struct uio *);
struct uio *);
int (*sop_getpeername)(struct sonode *);
int (*sop_getsockname)(struct sonode *);
int (*sop_shutdown)(struct sonode *, int);
int (*sop_getsockopt)(struct sonode *, int, int, void *,
socklen_t *, int);
int (*sop_setsockopt)(struct sonode *, int, int, const void *,
};
#define SOP_GETPEERNAME(so) \
#define SOP_GETSOCKNAME(so) \
#endif /* defined(_KERNEL) || defined(_KMEMUSER) */
#ifdef _KERNEL
#define ISALIGNED_cmsghdr(addr) \
#define ROUNDUP_cmsglen(len) \
/*
* Macros that operate on struct cmsghdr.
* Used in parsing msg_control.
* The CMSG_VALID macro does not assume that the last option buffer is padded.
*/
(ISALIGNED_cmsghdr(cmsg) && \
/*
* Maximum size of any argument that is copied in (addresses, options,
* access rights). MUST be at least MAXPATHLEN + 3.
* BSD and SunOS 4.X limited this to MLEN or MCLBYTES.
*/
#define SO_MAXARGSIZE 8192
/*
* Convert between vnode and sonode
*/
/*
* Internal flags for sobind()
*/
/* to enable listen with backlog = 1 */
/*
* Internal flags for sounbind()
*/
/*
* Internal flags for soconnect()
*/
/*
* Internal flags for sodisconnect()
*/
/*
* Internal flags for sotpi_getsockopt().
*/
/*
* Internal flags for soallocproto*()
*/
#define _ALLOC_NOSLEEP 0 /* Don't sleep for memory */
/*
* Internal structure for handling AF_UNIX file descriptor passing
*/
struct fdbuf {
int fd_size; /* In bytes, for kmem_free */
int fd_numfd; /* Number of elements below */
char *fd_ebuf; /* Extra buffer to free */
int fd_ebuflen;
};
/*
* Variable that can be patched to set what version of socket socket()
* will create.
*/
extern int so_default_version;
#ifdef DEBUG
/* Turn on extra testing capabilities */
#define SOCK_TEST
#endif /* DEBUG */
#ifdef DEBUG
int so_verify_oobstate(struct sonode *);
#endif /* DEBUG */
/*
* DEBUG macros
*/
#define SOCK_DEBUG
extern int sockdebug;
extern int sockprinterr;
#define eprintline(error) \
{ \
printf("socket error %d: line %d file %s\n", \
}
printf("socket(%p) error %d: line %d file %s\n", \
}
#else /* define(DEBUG) && !defined(__lint) */
#define eprintline(error) {}
#ifdef DEBUG
#endif
#endif /* defined(DEBUG) && !defined(__lint) */
extern struct vfsops sock_vfsops;
extern struct vnodeops *socktpi_vnodeops;
extern const struct fs_operation_def socktpi_vnodeops_template[];
extern sonodeops_t sotpi_sonodeops;
/*
* sockfs functions
*/
uchar_t, int, int);
int *);
extern int so_sock2stream(struct sonode *);
extern void so_stream2sock(struct sonode *);
extern int sockinit(int, char *);
extern struct vnode
*makesockvp(struct vnode *, int, int, int);
extern void so_update_attrs(struct sonode *, int);
extern int soconfig(int, int, int, char *, int);
extern struct vnode
*solookup(int, int, int, char *, int *);
extern void so_lock_single(struct sonode *);
extern void so_unlock_single(struct sonode *, int);
extern int so_lock_read(struct sonode *, int);
extern int so_lock_read_intr(struct sonode *, int);
extern void so_unlock_read(struct sonode *);
extern void so_getopt_srcaddr(void *, t_uscalar_t,
void **, t_uscalar_t *);
extern int so_getopt_unix_close(void *, t_uscalar_t);
extern void fdbuf_free(struct fdbuf *);
extern int fdbuf_create(void *, int, struct fdbuf **);
extern void so_closefds(void *, t_uscalar_t, int, int);
extern int so_getfdopt(void *, t_uscalar_t, int, void **, int *);
extern t_uscalar_t
void *, t_uscalar_t);
extern void soisconnecting(struct sonode *);
extern void soisconnected(struct sonode *);
extern void soisdisconnected(struct sonode *, int);
extern void socantsendmore(struct sonode *);
extern void socantrcvmore(struct sonode *);
extern void soseterror(struct sonode *, int);
extern int sogetrderr(vnode_t *, int, int *);
extern int sogetwrerr(vnode_t *, int, int *);
extern void so_unix_close(struct sonode *);
ssize_t, int);
extern void so_drain_discon_ind(struct sonode *);
extern void so_flush_discon_ind(struct sonode *);
extern int sowaitconnected(struct sonode *, int, int);
extern void so_installhooks(struct sonode *);
struct uio *);
extern int sotpi_getpeername(struct sonode *);
extern int sotpi_getsockopt(struct sonode *, int, int, void *,
socklen_t *, int);
extern int sotpi_setsockopt(struct sonode *, int, int, const void *,
struct cred *, int *);
extern void *sock_kstat_init(zoneid_t);
extern void sock_kstat_fini(zoneid_t, void *);
/*
* Function wrappers (mostly arround the sonode switch) for
* backward compatibility.
*/
int, int);
int, int);
extern int sogetpeername(struct sonode *);
extern int sogetsockname(struct sonode *);
extern int soshutdown(struct sonode *, int);
int);
extern int sosetsockopt(struct sonode *, int, int, const void *,
struct sonode *, int *);
extern int so_copyout(const void *, void *, size_t, int);
struct cred *);
struct cred *, caller_context_t *);
/* SCTP sockfs */
struct sonode *, int *);
extern int sosctp_init(void);
/* SDP sockfs */
struct sonode *, int *);
extern int sosdp_init(void);
#endif
/*
* Internal structure for obtaining sonode information from the socklist.
* These types match those corresponding in the sonode structure.
* This is not a published interface, and may change at any time.
*/
struct sockinfo {
short si_family;
short si_type;
};
#ifdef __cplusplus
}
#endif
#endif /* _SYS_SOCKETVAR_H */