socksyscalls.c revision 7f9e9054de030261b1a0e55ae05c5d8682590697
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER START
fa9e4066f08beec538e775443c5be79dd423fcabahrens * The contents of this file are subject to the terms of the
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * Common Development and Distribution License (the "License").
ea8dc4b6d2251b437950c0056bc626b311c73c27eschrock * You may not use this file except in compliance with the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fa9e4066f08beec538e775443c5be79dd423fcabahrens * See the License for the specific language governing permissions
fa9e4066f08beec538e775443c5be79dd423fcabahrens * and limitations under the License.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * When distributing Covered Code, include this CDDL HEADER in each
fa9e4066f08beec538e775443c5be79dd423fcabahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * If applicable, add the following below this CDDL HEADER, with the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * fields enclosed by brackets "[]" replaced with your own identifying
fa9e4066f08beec538e775443c5be79dd423fcabahrens * information: Portions Copyright [yyyy] [name of copyright owner]
fa9e4066f08beec538e775443c5be79dd423fcabahrens * CDDL HEADER END
daec38ecb4fb5e73e4ca9e99be84f6b8c50c02faJoe Stein * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
9dc3941c735ef88de46e850f745aa556d3a071a5Sašo Kiselkov * Use is subject to license terms.
22e30981d82a0b6dc89253596ededafae8655e00George Wilsonint do_useracc = 1; /* Controlled by setting SO_DEBUG to 4 */
22e30981d82a0b6dc89253596ededafae8655e00George Wilson#endif /* SOCK_TEST */
09c9d376e8ccb8fbba74f33cc268964464092b62George Wilson * Note: DEF_IOV_MAX is defined and used as it is in "fs/vncalls.c"
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * as there isn't a formal definition of IOV_MAX ???
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Kernel component of socket creation.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * The socket library determines which version number to use.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * First the library calls this with a NULL devpath. If this fails
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * to find a transport (using solookup) the library will look in /etc/netconfig
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * for the appropriate transport. If one is found it will pass in the
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * devpath for the kernel to use.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilsonso_socket(int family, int type, int protocol, char *devpath, int version)
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson so = socket_create(family, type, protocol, buf, NULL,
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson so = socket_create(family, type, protocol, NULL, NULL,
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson /* Allocate a file descriptor for the socket */
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson if (error = falloc(vp, FWRITE|FREAD, &fp, &fd)) {
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson * Now fill in the entries that falloc reserved
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson * Map from a file descriptor to a socket node.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Returns with the file descriptor held i.e. the caller has to
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * use releasef when done with the file descriptor.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson /* Check if it is a socket */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Use the stream head to find the real socket vnode.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * This is needed when namefs sits above sockfs.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Allocate and copyin a sockaddr.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Ensures NULL termination for AF_UNIX addresses by extending them
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * with one NULL byte if need be. Verifies that the length is not
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * excessive to prevent an application from consuming all of kernel
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * memory. Returns NULL when an error occurred.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonstatic struct sockaddr *
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsoncopyin_name(struct sonode *so, struct sockaddr *name, socklen_t *namelenp,
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Add space for NULL termination if needed.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Do a quick check if the last byte is NUL.
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (so->so_family == AF_UNIX && faddr[namelen - 1] != '\0') {
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* Check if there is any NULL termination */
a15215608b8bd90f714f6db21ee623b584607cb6Jeff Bonwick for (i = sizeof (name->sa_family); i < namelen; i++) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* Add extra byte for NUL padding */
88ecc943b4eb72f7c4fbbd8435997b85ef171fc3George Wilson nfaddr = (char *)kmem_alloc(namelen + 1, KM_SLEEP);
88ecc943b4eb72f7c4fbbd8435997b85ef171fc3George Wilson /* NUL terminate */
88ecc943b4eb72f7c4fbbd8435997b85ef171fc3George Wilson * Copy from kaddr/klen to uaddr/ulen. Updates ulenp if non-NULL.
88ecc943b4eb72f7c4fbbd8435997b85ef171fc3George Wilsoncopyout_arg(void *uaddr, socklen_t ulen, void *ulenp,
b24ab6762772a3f6a89393947930c7fa61306783Jeff Bonwick * Copy from kaddr/klen to uaddr/ulen. Updates ulenp if non-NULL.
b24ab6762772a3f6a89393947930c7fa61306783Jeff Bonwick * If klen is greater than ulen it still uses the non-truncated
b24ab6762772a3f6a89393947930c7fa61306783Jeff Bonwick * klen to update ulenp.
b24ab6762772a3f6a89393947930c7fa61306783Jeff Bonwickcopyout_name(void *uaddr, socklen_t ulen, void *ulenp,
b24ab6762772a3f6a89393947930c7fa61306783Jeff Bonwick printf("sockfs: truncating copyout of address using "
b24ab6762772a3f6a89393947930c7fa61306783Jeff Bonwick "XNET semantics for pid = %d. Lengths %d, %d\n",
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * The socketpair() code in libsocket creates two sockets (using
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * the /etc/netconfig fallback if needed) before calling this routine
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * to connect the two sockets together.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * For a SOCK_STREAM socketpair a listener is needed - in that case this
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * routine will create a new file descriptor as part of accepting the
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * connection. The library socketpair() will check if svs[2] has changed
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * in which case it will close the changed fd.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Note that this code could use the TPI feature of accepting the connection
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * on the listening endpoint. However, that would require significant changes
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * to soaccept.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson dprint(1, ("so_socketpair(%p)\n", (void *)sv));
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson if ((so1 = getsonode(svs[0], &error, NULL)) == NULL)
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson if ((so2 = getsonode(svs[1], &error, NULL)) == NULL) {
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson if (so1->so_family != AF_UNIX || so2->so_family != AF_UNIX) {
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * The code below makes assumptions about the "sockfs" implementation.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * So make sure that the correct implementation is really used.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Bind both sockets and connect them with each other.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Need to allocate name/namelen for soconnect.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson error = socket_bind(so1, NULL, 0, _SOBIND_UNSPEC, CRED());
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson error = socket_bind(so2, NULL, 0, _SOBIND_UNSPEC, CRED());
c39a2aae1e2c439d156021edfc20910dad7f9891George Wilson * Bind both sockets, with so1 being a listener.
c39a2aae1e2c439d156021edfc20910dad7f9891George Wilson * Connect so2 to so1 - nonblocking to avoid waiting for
c39a2aae1e2c439d156021edfc20910dad7f9891George Wilson * soaccept to complete.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Accept a connection on so1. Pass out the new fd as sv[0].
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * The library will detect the changed fd and close
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * the original one.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * We could simply call socket_listen() here (which would do the
fa9e4066f08beec538e775443c5be79dd423fcabahrens * binding automatically) if the code didn't rely on passing
fa9e4066f08beec538e775443c5be79dd423fcabahrens * _SOBIND_NOXLATE to the TPI implementation of socket_bind().
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson error = socket_bind(so2, NULL, 0, _SOBIND_UNSPEC, CRED());
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson /* wait for so2 being SS_CONNECTED ignoring signals */
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if (error = falloc(nvp, FWRITE|FREAD, &nfp, &nfd)) {
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * fill in the entries that falloc reserved
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * The socketpair library routine will close the original
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * svs[0] when this code passes out a different file
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * descriptor.
22e30981d82a0b6dc89253596ededafae8655e00George Wilsonbind(int sock, struct sockaddr *name, socklen_t namelen, int version)
22e30981d82a0b6dc89253596ededafae8655e00George Wilson if ((so = getsonode(sock, &error, NULL)) == NULL)
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson /* Allocate and copyin name */
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson * X/Open test does not expect EFAULT with NULL name and non-zero
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson name = copyin_name(so, name, &namelen, &error);
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson error = socket_bind(so, name, namelen, 0, CRED());
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson error = socket_bind(so, name, namelen, _SOBIND_XPG4_2, CRED());
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson error = socket_bind(so, name, namelen, _SOBIND_SOCKBSD, CRED());
22e30981d82a0b6dc89253596ededafae8655e00George Wilson/* ARGSUSED2 */
22e30981d82a0b6dc89253596ededafae8655e00George Wilson if ((so = getsonode(sock, &error, NULL)) == NULL)
fa9e4066f08beec538e775443c5be79dd423fcabahrens return (0);
fa9e4066f08beec538e775443c5be79dd423fcabahrens/*ARGSUSED3*/
fa9e4066f08beec538e775443c5be79dd423fcabahrensaccept(int sock, struct sockaddr *name, socklen_t *namelenp, int version)
a33cae9802e94744efee12a7a77c89360645eae8Tim Haley if (copyin(namelenp, &namelen, sizeof (namelen))) {
a15215608b8bd90f714f6db21ee623b584607cb6Jeff Bonwick error = useracc(name, (size_t)namelen, B_WRITE);
a15215608b8bd90f714f6db21ee623b584607cb6Jeff Bonwick * Allocate the user fd before socket_accept() in order to
a15215608b8bd90f714f6db21ee623b584607cb6Jeff Bonwick * catch EMFILE errors before calling socket_accept().
a15215608b8bd90f714f6db21ee623b584607cb6Jeff Bonwick error = socket_accept(so, fp->f_flag, CRED(), &nso);
a15215608b8bd90f714f6db21ee623b584607cb6Jeff Bonwick addrp = (struct sockaddr *)kmem_alloc(addrlen, KM_SLEEP);
a15215608b8bd90f714f6db21ee623b584607cb6Jeff Bonwick if ((error = socket_getpeername(nso, (struct sockaddr *)addrp,
a15215608b8bd90f714f6db21ee623b584607cb6Jeff Bonwick if (error = falloc(NULL, FWRITE|FREAD, &nfp, NULL)) {
a15215608b8bd90f714f6db21ee623b584607cb6Jeff Bonwick * fill in the entries that falloc reserved
a15215608b8bd90f714f6db21ee623b584607cb6Jeff Bonwick * Copy FNDELAY and FNONBLOCK from listener to acceptor
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson * This code is a simplification of the F_SETFL code in fcntl()
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson * Ignore any errors from VOP_SETFL.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson if ((error = VOP_SETFL(nvp, oflag, arg, nfp->f_cred, NULL))
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilsonconnect(int sock, struct sockaddr *name, socklen_t namelen, int version)
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson if ((so = getsonode(sock, &error, &fp)) == NULL)
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson /* Allocate and copyin name */
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson name = copyin_name(so, name, &namelen, &error);
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson error = socket_connect(so, name, namelen, fp->f_flag,
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick (version != SOV_XPG4_2) ? 0 : _SOCONNECT_XPG4_2, CRED());
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson if ((so = getsonode(sock, &error, NULL)) == NULL)
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Common receive routine.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson msg->msg_flags = flags & (MSG_OOB | MSG_PEEK | MSG_WAITALL |
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson error = socket_recvmsg(so, msg, uiop, CRED());
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Clear internal flag.
fa9e4066f08beec538e775443c5be79dd423fcabahrens * Determine MSG_CTRUNC. sorecvmsg sets MSG_CTRUNC only
fa9e4066f08beec538e775443c5be79dd423fcabahrens * when controllen is zero and there is control data to
fa9e4066f08beec538e775443c5be79dd423fcabahrens * copy out.
fa9e4066f08beec538e775443c5be79dd423fcabahrens (msg->msg_controllen > controllen || control == NULL)) {
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Note: This MUST be done last. There can be no "goto err" after this
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * point since it could make so_closefds run twice on some part
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * of the file descriptor array.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Good old msg_accrights can only return a multiple
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * of 4 bytes.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson error = copyout_arg(control, controllen, controllenp,
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson if (msg->msg_controllen > controllen || control == NULL) {
22e30981d82a0b6dc89253596ededafae8655e00George Wilson so_closefds(msg->msg_control, msg->msg_controllen,
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson kmem_free(msg->msg_name, (size_t)msg->msg_namelen);
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson kmem_free(msg->msg_control, (size_t)msg->msg_controllen);
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson * If we fail and the control part contains file descriptors
22e30981d82a0b6dc89253596ededafae8655e00George Wilson * we have to close the fd's.
22e30981d82a0b6dc89253596ededafae8655e00George Wilson so_closefds(msg->msg_control, msg->msg_controllen,
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson kmem_free(msg->msg_name, (size_t)msg->msg_namelen);
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson kmem_free(msg->msg_control, (size_t)msg->msg_controllen);
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson * Native system call
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilsonrecv(int sock, void *buffer, size_t len, int flags)
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson return (recvit(sock, &lmsg, &auio, flags, NULL, NULL, NULL));
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilsonrecvfrom(int sock, void *buffer, size_t len, int flags,
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson dprint(1, ("recvfrom(%d, %p, %ld, %d, %p, %p)\n",
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson sock, buffer, len, flags, (void *)name, (void *)namelenp));
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson return (recvit(sock, &lmsg, &auio, flags, namelenp, NULL, NULL));
22e30981d82a0b6dc89253596ededafae8655e00George Wilson * Uses the MSG_XPG4_2 flag to determine if the caller is using
22e30981d82a0b6dc89253596ededafae8655e00George Wilson * struct omsghdr or struct nmsghdr.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsonrecvmsg(int sock, struct nmsghdr *msg, int flags)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (copyin(msg, STRUCT_BUF(u_lmsg), STRUCT_SIZE(u_lmsg)))
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson * Assumes that nmsghdr and omsghdr are identically shaped
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson * except for the added msg_flags field.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Code below us will kmem_alloc memory and hang it
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick * off msg_control and msg_name fields. This forces
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * us to copy the structure to its native form.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson lmsg.msg_namelen = STRUCT_FGET(u_lmsg, msg_namelen);
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson lmsg.msg_iovlen = STRUCT_FGET(u_lmsg, msg_iovlen);
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson lmsg.msg_control = STRUCT_FGETP(u_lmsg, msg_control);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson lmsg.msg_controllen = STRUCT_FGET(u_lmsg, msg_controllen);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson lmsg.msg_flags = STRUCT_FGET(u_lmsg, msg_flags);
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson * 32-bit callers need to have their iovec expanded, while ensuring
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * that they can't move more than 2Gbytes of data in a single call.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (copyin((struct iovec32 *)lmsg.msg_iov, aiov32,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson for (i = 0; i < iovcnt; i++) {
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick#endif /* _SYSCALL32_IMPL */
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick if (copyin(lmsg.msg_iov, aiov, iovcnt * sizeof (struct iovec))) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson for (i = 0; i < iovcnt; i++) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson useracc(lmsg.msg_control, lmsg.msg_controllen,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson STRUCT_FADDR(umsgptr, msg_controllen), flagsp));
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Common send function.
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilsonsendit(int sock, struct nmsghdr *msg, struct uio *uiop, int flags)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if ((so = getsonode(sock, &error, &fp)) == NULL)
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson /* Allocate and copyin name and control */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson /* copyin_name null terminates addresses for AF_UNIX */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Verify that the length is not excessive to prevent
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * an application from consuming all of kernel memory.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (copyin(msg->msg_control, control, controllen)) {
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson error = socket_sendmsg(so, msg, uiop, CRED());
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson * Native system call
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilsonsend(int sock, void *buffer, size_t len, int flags)
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson * In order to be compatible with the libsocket/sockmod
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * implementation we set EOR for all send* calls.
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson * Uses the MSG_XPG4_2 flag to determine if the caller is using
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson * struct omsghdr or struct nmsghdr.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsonsendmsg(int sock, struct nmsghdr *msg, int flags)
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson dprint(1, ("sendmsg(%d, %p, %d)\n", sock, (void *)msg, flags));
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson * Assumes that nmsghdr and omsghdr are identically shaped
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * except for the added msg_flags field.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * In order to be compatible with the libsocket/sockmod
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * implementation we set EOR for all send* calls.
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson * Code below us will kmem_alloc memory and hang it
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * off msg_control and msg_name fields. This forces
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * us to copy the structure to its native form.
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson lmsg.msg_name = STRUCT_FGETP(u_lmsg, msg_name);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson lmsg.msg_namelen = STRUCT_FGET(u_lmsg, msg_namelen);
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson lmsg.msg_iovlen = STRUCT_FGET(u_lmsg, msg_iovlen);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson lmsg.msg_control = STRUCT_FGETP(u_lmsg, msg_control);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson lmsg.msg_controllen = STRUCT_FGET(u_lmsg, msg_controllen);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson lmsg.msg_flags = STRUCT_FGET(u_lmsg, msg_flags);
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Unless this is XPG 4.2 we allow iovcnt == 0 to
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson * be compatible with SunOS 4.X and 4.4BSD.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * 32-bit callers need to have their iovec expanded, while ensuring
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * that they can't move more than 2Gbytes of data in a single call.
8d18220deb04ec7b12410cd90deb4d45e66d49bfMark J Musante copyin((struct iovec32 *)lmsg.msg_iov, aiov32,
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson for (i = 0; i < iovcnt; i++) {
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson#endif /* _SYSCALL32_IMPL */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson for (i = 0; i < iovcnt; i++) {
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwicksendto(int sock, void *buffer, size_t len, int flags,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * In order to be compatible with the libsocket/sockmod
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * implementation we set EOR for all send* calls.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsongetpeername(int sock, struct sockaddr *name, socklen_t *namelenp, int version)
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if ((so = getsonode(sock, &error, NULL)) == NULL)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (copyin(namelenp, &namelen, sizeof (namelen)) ||
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson sock_addrp = (struct sockaddr *)kmem_alloc(sock_addrlen, KM_SLEEP);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if ((error = socket_getpeername(so, sock_addrp, &sock_addrlen,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonbad: return (error != 0 ? set_errno(error) : 0);
1e9bd7ec42f2d3bf854c2da35310901194833267Prakash Surya if ((so = getsonode(sock, &error, NULL)) == NULL)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (copyin(namelenp, &namelen, sizeof (namelen)) ||
1e9bd7ec42f2d3bf854c2da35310901194833267Prakash Surya sock_addrp = (struct sockaddr *)kmem_alloc(sock_addrlen, KM_SLEEP);
1e9bd7ec42f2d3bf854c2da35310901194833267Prakash Surya if ((error = socket_getsockname(so, sock_addrp, &sock_addrlen,
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick/*ARGSUSED5*/
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick sock, level, option_name, option_value, (void *)option_lenp));
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (copyin(option_lenp, &optlen, sizeof (optlen))) {
1e9bd7ec42f2d3bf854c2da35310901194833267Prakash Surya * Verify that the length is not excessive to prevent
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * an application from consuming all of kernel memory.
1e9bd7ec42f2d3bf854c2da35310901194833267Prakash Surya error = socket_getsockopt(so, level, option_name, optval,
fa9e4066f08beec538e775443c5be79dd423fcabahrens &optlen_res, (version != SOV_XPG4_2) ? 0 : _SOGETSOCKOPT_XPG4_2,
fa9e4066f08beec538e775443c5be79dd423fcabahrens error = copyout_arg(option_value, optlen, option_lenp,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson sock, level, option_name, option_value, option_len));
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Verify that the length is not excessive to prevent
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * an application from consuming all of kernel memory.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson &buffer : kmem_alloc((size_t)option_len, KM_SLEEP);
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson if (copyin(option_value, optval, (size_t)option_len)) {
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson error = socket_setsockopt(so, level, option_name, optval,
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Add config info when name is non-NULL; delete info when name is NULL.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * name could be a device name or a module name and are user address.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilsonsockconfig(int family, int type, int protocol, char *name)
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson char *kdevpath = NULL; /* Copied in devpath string */
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if (secpolicy_net_config(CRED(), B_FALSE) != 0)
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * By default set the kdevpath and kmodule to NULL to delete an entry.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Otherwise when name is not NULL, set the kdevpath or kmodule
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * value to add an entry.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Adding an entry.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Copyin the name.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * This also makes it possible to check for too long pathnames.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Compress the space needed for the name before passing it
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * to soconfig - soconfig will store the string until
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * the configuration is removed.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if ((error = copyinstr(name, buf, MAXPATHLEN, &pathlen)) != 0) {
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if (strncmp(buf, "/dev", strlen("/dev")) == 0) {
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson /* For device */
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * Special handling for NCA:
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * DEV_NCA is never opened even if an application
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * requests for AF_NCA. The device opened is instead a
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * predefined AF_INET transport (NCA_INET_DEV).
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * Prior to Volo (PSARC/2007/587) NCA would determine
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * the device using a lookup, which worked then because
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * all protocols were based on TPI. Since TPI is no
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * longer the default, we have to explicitly state
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * which device to use.
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson /* only support entry <28, 2, 0> */
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson if (family != AF_NCA || type != SOCK_STREAM ||
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson /* For socket module */
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson /* Get the tcp device name for fallback */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson /* Get the udp device name for fallback */
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson /* Get the icmp device name for fallback */
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson error = soconfig(family, type, protocol, kdevpath, (int)pathlen,
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson * Sendfile is implemented through two schemes, direct I/O or by
80eb36f241abf8c076119fb4c49a55fd61ebc710George Wilson * caching in the filesystem page cache. We cache the input file by
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * default and use direct I/O only if sendfile_max_size is set
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * appropriately as explained below. Note that this logic is consistent
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * with other filesystems where caching is turned on by default
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * unless explicitly turned off by using the DIRECTIO ioctl.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * We choose a slightly different scheme here. One can turn off
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * caching by setting sendfile_max_size to 0. One can also enable
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * caching of files <= sendfile_max_size by setting sendfile_max_size
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * to an appropriate value. By default sendfile_max_size is set to the
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * maximum value so that all files are cached. In future, we may provide
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * better interfaces for caching the file.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Sendfile through Direct I/O (Zero copy)
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * --------------------------------------
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * As disks are normally slower than the network, we can't have a
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * single thread that reads the disk and writes to the network. We
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * need to have parallelism. This is done by having the sendfile
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * thread create another thread that reads from the filesystem
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * and queues it for network processing. In this scheme, the data
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * is never copied anywhere i.e it is zero copy unlike the other
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * We have a sendfile queue (snfq) where each sendfile
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * request (snf_req_t) is queued for processing by a thread. Number
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * of threads is dynamically allocated and they exit if they are idling
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * beyond a specified amount of time. When each request (snf_req_t) is
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * processed by a thread, it produces a number of mblk_t structures to
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * be consumed by the sendfile thread. snf_deque and snf_enque are
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * used for consuming and producing mblks. Size of the filesystem
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * read is determined by the tunable (sendfile_read_size). A single
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * mblk holds sendfile_read_size worth of data (except the last
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * read of the file) which is sent down as a whole to the network.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * sendfile_read_size is set to 1 MB as this seems to be the optimal
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * value for the UFS filesystem backed by a striped storage array.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Synchronisation between read (producer) and write (consumer) threads.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * --------------------------------------------------------------------
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * sr_lock protects sr_ib_head and sr_ib_tail. The lock is held while
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * adding and deleting items in this list. Error can happen anytime
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * during read or write. There could be unprocessed mblks in the
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * sr_ib_XXX list when a read or write error occurs. Whenever error
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * is encountered, we need two things to happen :
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * a) One of the threads need to clean the mblks.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * b) When one thread encounters an error, the other should stop.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * For (a), we don't want to penalize the reader thread as it could do
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * some useful work processing other requests. For (b), the error can
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * be detected by examining sr_read_error or sr_write_error.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * sr_lock protects sr_read_error and sr_write_error. If both reader and
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * writer encounters error, we need to report the write error back to
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * the application as that's what would have happened if the operations
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * were done sequentially. With this in mind, following should work :
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * - Check for errors before read or write.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * - If the reader encounters error, set the error in sr_read_error.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Check sr_write_error, if it is set, send cv_signal as it is
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * waiting for reader to complete. If it is not set, the writer
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * is either running sinking data to the network or blocked
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * because of flow control. For handling the latter case, we
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * always send a signal. In any case, it will examine sr_read_error
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * and return. sr_read_error is marked with SR_READ_DONE to tell
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * the writer that the reader is done in all the cases.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * - If the writer encounters error, set the error in sr_write_error.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * The reader thread is either blocked because of flow control or
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * running reading data from the disk. For the former, we need to
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * wakeup the thread. Again to keep it simple, we always wake up
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * the reader thread. Then, wait for the read thread to complete
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * if it is not done yet. Cleanup and return.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * High and low water marks for the read thread.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * --------------------------------------------
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * If sendfile() is used to send data over a slow network, we need to
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * make sure that the read thread does not produce data at a faster
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * rate than the network. This can happen if the disk is faster than
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * the network. In such a case, we don't want to build a very large queue.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * But we would still like to get all of the network throughput possible.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * This implies that network should never block waiting for data.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * As there are lot of disk throughput/network throughput combinations
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * possible, it is difficult to come up with an accurate number.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * A typical 10K RPM disk has a max seek latency 17ms and rotational
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * latency of 3ms for reading a disk block. Thus, the total latency to
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * initiate a new read, transfer data from the disk and queue for
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * transmission would take about a max of 25ms. Todays max transfer rate
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * for network is 100MB/sec. If the thread is blocked because of flow
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * control, it would take 25ms to get new data ready for transmission.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * We have to make sure that network is not idling, while we are initiating
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * new transfers. So, at 100MB/sec, to keep network busy we would need
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * 2.5MB of data. Rounding off, we keep the low water mark to be 3MB of data.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * We need to pick a high water mark so that the woken up thread would
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * do considerable work before blocking again to prevent thrashing. Currently,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * we pick this to be 10 times that of the low water mark.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Sendfile with segmap caching (One copy from page cache to mblks).
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * ----------------------------------------------------------------
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * We use the segmap cache for caching the file, if the size of file
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * is <= sendfile_max_size. In this case we don't use threads as VM
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * is reasonably fast enough to keep up with the network. If the underlying
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * transport allows, we call segmap_getmapflt() to map MAXBSIZE (8K) worth
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * of data into segmap space, and use the virtual address from segmap
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * directly through desballoc() to avoid copy. Once the transport is done
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * with the data, the mapping will be released through segmap_release()
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * called by the call-back routine.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * If zero-copy is not allowed by the transport, we simply call VOP_READ()
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * to copy the data from the filesystem into our temporary network buffer.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * To disable caching, set sendfile_max_size to 0.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonuint_t sendfile_req_lowat = SENDFILE_REQ_LOWAT;
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonuint_t sendfile_req_hiwat = 10 * SENDFILE_REQ_LOWAT;
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson snfq = kmem_zalloc(sizeof (struct sendfile_queue), KM_SLEEP);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson mutex_init(&snfq->snfq_lock, NULL, MUTEX_DEFAULT, NULL);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson cv_init(&snfq->snfq_cv, NULL, CV_DEFAULT, NULL);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson /* Cache all files by default. */
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Queues a mblk_t for network processing.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * De-queues a mblk_t for network processing.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * If we have encountered an error on read or read is
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * completed and no more mblks, return NULL.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * We need to check for NULL sr_mp_head also as
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * the reads could have completed and there is
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * nothing more to come.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if (((sr->sr_read_error & ~SR_READ_DONE) != 0) ||
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * To start with neither SR_READ_DONE is marked nor
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * the error is set. When we wake up from cv_wait,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * following are the possibilities :
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * a) sr_read_error is zero and mblks are queued.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * b) sr_read_error is set to SR_READ_DONE
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * and mblks are queued.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * c) sr_read_error is set to SR_READ_DONE
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * and no mblks.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * d) sr_read_error is set to some error other
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * than SR_READ_DONE.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson while ((sr->sr_read_error == 0) && (sr->sr_mp_head == NULL)) {
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson /* Handle (a) and (b) first - the normal case. */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (((sr->sr_read_error & ~SR_READ_DONE) == 0) &&
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson /* Handle (c) and (d). */
d6e555bdd793b8bc8fe57d5f12c3d69c813d0661George Wilson * Reads data from the filesystem and queues it for network processing.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Ignore the error for filesystems that doesn't support DIRECTIO.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson (void) VOP_IOCTL(fp->f_vnode, _FIODIRECTIO, DIRECTIO_ON, 0,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Get the extra space to insert a header and a trailer.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson while ((size != 0) && (sr->sr_write_error == 0)) {
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * For sockets acting as an SSL proxy, we
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * need to adjust the size to the maximum
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * SSL record size set in the stream head.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if (vp->v_type == VSOCK && !SOCK_IS_NONSTR(so) &&
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson ret_size = soreadfile(fp, mp->b_rptr, fileoff, &error, iosize);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson /* Error or Reached EOF ? */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson (void) VOP_IOCTL(fp->f_vnode, _FIODIRECTIO, DIRECTIO_OFF, 0,
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson CALLB_CPR_INIT(&cprinfo, &snfq->snfq_lock, callb_generic_cpr, "snfq");
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * If we didn't find a entry, then block until woken up
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * again and then look through the queues again.
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson /* NOTREACHED */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson &snfq->snfq_lock, snfq_timeout, TR_CLOCK_TICK);
2a104a5236475eb73aa41eaaf3ed9f3ccbe0ca55Alex Reececreate_thread(int operation, struct vnode *vp, file_t *fp,
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson sr = (snf_req_t *)kmem_zalloc(sizeof (snf_req_t), KM_SLEEP);
2a104a5236475eb73aa41eaaf3ed9f3ccbe0ca55Alex Reece * store sd_qn_maxpsz into sr_maxpsz while we have stream head.
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * stream might be closed before thread returns from snf_async_read.
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson sr->sr_maxpsz = MIN(MAXBSIZE, stp->sd_qn_maxpsz);
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson mutex_init(&sr->sr_lock, NULL, MUTEX_DEFAULT, NULL);
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * See whether we need another thread for servicing this
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * request. If there are already enough requests queued
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * for the threads, create one if not exceeding
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * snfq_max_threads.
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson if (snfq->snfq_req_cnt >= snfq->snfq_idle_cnt &&
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson snfq->snfq_svc_threads < snfq->snfq_max_threads) {
2a104a5236475eb73aa41eaaf3ed9f3ccbe0ca55Alex Reece (void) thread_create(NULL, 0, &snf_async_thread, 0, 0, &p0,
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilsonsnf_direct_io(file_t *fp, file_t *rfp, u_offset_t fileoff, u_offset_t size,
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson if ((sr = create_thread(READ_OP, vp, rfp, fileoff, size)) == NULL)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * We check for read error in snf_deque. It has to check
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * for successful READ_DONE and return NULL, and we might
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * as well make an additional check there.
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &mp);
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson /* Look at the big comments on why we cv_signal here. */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson /* Wait for the reader to complete always. */
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson /* If there is no write error, check for read error. */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson/* Maximum no.of pages allocated by vpm for sendfile at a time */
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * Maximum no.of elements in the list returned by vpm, including
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * NULL for the last entry
fa9e4066f08beec538e775443c5be79dd423fcabahrenstypedef struct {
fa9e4066f08beec538e775443c5be79dd423fcabahrens unsigned int snfv_ref;
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsontypedef struct {
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * The callback function used for vpm mapped mblks called when the last ref of
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * the mblk is dropped which normally occurs when TCP receives the ack. But it
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilson * can be the driver too due to lazy reclaim.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson if (atomic_add_32_nv(&snfv->snfv_ref, -1) == 0) {
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * The callback function used for segmap'ped mblks called when the last ref of
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * the mblk is dropped which normally occurs when TCP receives the ack. But it
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson * can be the driver too due to lazy reclaim.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * We don't need to call segmap_fault(F_SOFTUNLOCK) for
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * segmap_kpm as long as the latter never falls back to
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * "use_segmap_range". (See segmap_getmapflt().)
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Using S_OTHER saves an redundant hat_setref() in
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick * segmap_unlock()
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson snfi->snfi_mapoff) & PAGEMASK), snfi->snfi_len,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson (void) segmap_release(segkmap, snfi->snfi_base, SM_DONTNEED);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * Use segmap or vpm instead of bcopy to send down a desballoca'ed, mblk.
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick * When segmap is used, the mblk contains a segmap slot of no more
fa9e4066f08beec538e775443c5be79dd423fcabahrens * than MAXBSIZE.
468c413a79615e77179e8d98f22a7e513a8135bdTim Haley * With vpm, a maximum of SNF_MAXVMAPS page-sized mappings can be obtained
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * in each iteration and sent by socket_sendmblk until an error occurs or
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * the requested size has been transferred. An mblk is esballoca'ed from
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * each mapped page and a chain of these mblk is sent to the transport layer.
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * vpm will be called to unmap the pages when all mblks have been freed by
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * At the end of the whole sendfile() operation, we wait till the data from
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * the last mblk is ack'ed by the transport before returning so that the
b1be2892dd07cf9a97d47ad06334cdc879196aafMatthew Ahrens * caller of sendfile() can safely modify the file content.
16a4a8074274d2d7cc408589cf6359f4a378c861George Wilsonsnf_segmap(file_t *fp, vnode_t *fvp, u_offset_t fileoff, u_offset_t total_size,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson maxsize = MIN((SNF_VPMMAXPGS * PAGESIZE), total_size);
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson snfv = kmem_zalloc(sizeof (snf_vmap_desbinfo),
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson /* Get vpm mappings for maxsize with read access */
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if (vpm_map_pages(fvp, fileoff, (size_t)maxsize,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson (VPM_FETCHPAGE), snfv->snfv_vml, SNF_MAXVMAPS,
2e4c998613148111f2fc5371085331ffb39122ffGeorge Wilson snfv->snfv_frtn.free_func = snf_vmap_desbfree;
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* Construct the mblk chain from the page mappings */
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson for (i = 0; (snfv->snfv_vml[i].vs_addr != NULL) &&
fa9e4066f08beec538e775443c5be79dd423fcabahrens * We return EAGAIN after unmapping the pages
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * if we cannot allocate the the head of the
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * chain. Otherwise, we continue sending the
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * mblks constructed so far.
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (i == 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* Mark this dblk with the zero-copy flag */
fa9e4066f08beec538e775443c5be79dd423fcabahrens /* vpm not supported. fallback to segmap */
468c413a79615e77179e8d98f22a7e513a8135bdTim Haley * we don't forcefault because we'll call
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * segmap_fault(F_SOFTLOCK) next.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * S_READ will get the ref bit set (by either
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * segmap_getmapflt() or segmap_fault()) and page
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * shared locked.
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson base = segmap_getmapflt(segkmap, fvp, fileoff,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson snfi->snfi_len = (size_t)roundup(mapoff+chain_size,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * We must call segmap_fault() even for segmap_kpm
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson * because that's how error gets returned.
468c413a79615e77179e8d98f22a7e513a8135bdTim Haley * (segmap_getmapflt() never fails but segmap_fault()
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson (caddr_t)(uintptr_t)(((uintptr_t)base + mapoff) &
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson snfi->snfi_frtn.free_func = snf_smap_desbfree;
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson mp = esballoca((uchar_t *)base + mapoff, chain_size,
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilson /* Mark this dblk with the zero-copy flag */
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson mp->b_datap->db_struioflag |= STRUIO_ZCNOTIFY;
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &mp);
fa9e4066f08beec538e775443c5be79dd423fcabahrens if (error != 0) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens * mp contains the mblks that were not sent by
468c413a79615e77179e8d98f22a7e513a8135bdTim Haley * socket_sendmblk. Use its size to update *count
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson (void) VOP_RWLOCK(fvp, V_WRITELOCK_FALSE, NULL);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson error = VOP_GETATTR(fvp, &va, 0, kcred, NULL);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson /* Read as much as possible. */
44cd46cadd9aab751dae6a4023c1cb5bf316d274billmsnf_cache(file_t *fp, vnode_t *fvp, u_offset_t fileoff, u_offset_t size,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Get the extra space to insert a header and a trailer.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson /* If read sync is not asked for, filter sync flags */
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * For sockets acting as an SSL proxy, we
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * need to adjust the size to the maximum
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * SSL record size set in the stream head.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if (vp->v_type == VSOCK && !SOCK_IS_NONSTR(so) &&
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson error = VOP_READ(fvp, &auio, ioflag, fp->f_cred, NULL);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson error = socket_sendmblk(VTOSO(vp), &msg, fflag, CRED(), &mp);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson (void) VOP_RWLOCK(fvp, V_WRITELOCK_FALSE, NULL);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson error = VOP_GETATTR(fvp, &va, 0, kcred, NULL);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson /* Read as much as possible. */
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson#if defined(_SYSCALL32_IMPL) || defined(_ILP32)
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson * Largefile support for 32 bit applications only.
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilsonsosendfile64(file_t *fp, file_t *rfp, const struct ksendfilevec64 *sfv,
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson /* Same checks as in pread */
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson * There are no more checks on sfv_len. So, we cast it to
0f7643c7376dd69a08acbfc9d1d7d548b10c846aGeorge Wilson * u_offset_t and share the snf_direct_io/snf_cache code between
44cd46cadd9aab751dae6a4023c1cb5bf316d274billm * 32 bit and 64 bit.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * TODO: should do nbl_need_check() like read()?
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson error = snf_direct_io(fp, rfp, sfv_off, (u_offset_t)sfv_len,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Grab the lock as a reader to prevent the file size
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * from changing underneath.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson (void) VOP_RWLOCK(fvp, V_WRITELOCK_FALSE, NULL);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson error = VOP_GETATTR(fvp, &va, 0, kcred, NULL);
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if ((error != 0) || (va_size == 0) || (sfv_off >= va_size)) {
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson /* Read as much as possible. */
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * When the NOWAIT flag is not set, we enable zero-copy only if the
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * transfer size is large enough. This prevents performance loss
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * when the caller sends the file piece by piece.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if (sfv_len >= MAXBSIZE && (sfv_len >= (va_size >> 1) ||
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson (sfv->sfv_flag & SFV_NOWAIT) || sfv_len >= 0x1000000) &&
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson !vn_has_flocks(fvp) && !(fvp->v_flag & VNOMAP)) {
fa9e4066f08beec538e775443c5be79dd423fcabahrens error = snf_segmap(fp, fvp, sfv_off, (u_offset_t)sfv_len,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson error = snf_cache(fp, fvp, sfv_off, (u_offset_t)sfv_len,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * recv32(), recvfrom32(), send32(), sendto32(): intentionally return a
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * ssize_t rather than ssize32_t; see the comments above read32 for details.
44cd46cadd9aab751dae6a4023c1cb5bf316d274billmrecv32(int32_t sock, caddr32_t buffer, size32_t len, int32_t flags)
03f8c366886542ed249a15d755ae78ea4e775d9dGeorge Wilson return (recv(sock, (void *)(uintptr_t)buffer, (ssize32_t)len, flags));
03f8c366886542ed249a15d755ae78ea4e775d9dGeorge Wilsonrecvfrom32(int32_t sock, caddr32_t buffer, size32_t len, int32_t flags,
44cd46cadd9aab751dae6a4023c1cb5bf316d274billm return (recvfrom(sock, (void *)(uintptr_t)buffer, (ssize32_t)len, flags,
44cd46cadd9aab751dae6a4023c1cb5bf316d274billm (void *)(uintptr_t)name, (void *)(uintptr_t)namelenp));
0713e232b7712cd27d99e1e935ebb8d5de61c57dGeorge Wilsonsend32(int32_t sock, caddr32_t buffer, size32_t len, int32_t flags)
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson return (send(sock, (void *)(uintptr_t)buffer, (ssize32_t)len, flags));
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonsendto32(int32_t sock, caddr32_t buffer, size32_t len, int32_t flags,
ecc2d604e885a75cc75e647b5641af99d5a6f4a6bonwick return (sendto(sock, (void *)(uintptr_t)buffer, (ssize32_t)len, flags,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson#endif /* _SYSCALL32_IMPL */
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Function wrappers (mostly around the sonode switch) for
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * backward compatibility.
aeb1c1b609b02f03e8e7448beb88384ebc713525gwsoaccept(struct sonode *so, int fflag, struct sonode **nsop)
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonsobind(struct sonode *so, struct sockaddr *name, socklen_t namelen,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonsoconnect(struct sonode *so, const struct sockaddr *name, socklen_t namelen,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson return (socket_connect(so, name, namelen, fflag, flags, CRED()));
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonsorecvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson return (socket_recvmsg(so, msg, uiop, CRED()));
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonsosendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop)
03f8c366886542ed249a15d755ae78ea4e775d9dGeorge Wilsonsogetsockopt(struct sonode *so, int level, int option_name, void *optval,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson return (socket_getsockopt(so, level, option_name, optval, optlenp,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonsosetsockopt(struct sonode *so, int level, int option_name, const void *optval,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson return (socket_setsockopt(so, level, option_name, optval, optlen,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * Because this is backward compatibility interface it only needs to be
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson * able to handle the creation of TPI sockfs sockets.
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilsonsocreate(struct sockparams *sp, int family, int type, int protocol, int version,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson so = sp->sp_smod_info->smod_sock_create_func(sp, family, type, protocol,
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson if ((*errorp = SOP_INIT(so, NULL, CRED(), SOCKET_SLEEP)) == 0) {
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson /* Cannot fail, only bumps so_count */
8363e80ae72609660f6090766ca8c2c18aa53f0cGeorge Wilson (void) VOP_OPEN(&SOTOV(so), FREAD|FWRITE, CRED(), NULL);