sendfile.c revision 740243730195c25d65f2a1987de1b96cc6783fde
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/sysmacros.h>
#include <sys/socketvar.h>
/* swilly code in sys/socketvar.h turns off DEBUG */
#ifdef __lint
#define DEBUG
#endif
#include <sys/sendfile.h>
ssize32_t *);
int, ssize_t *);
#define readflg (V_WRITELOCK_FALSE)
#define rwflag (V_WRITELOCK_TRUE)
/*
* kstrwritemp() has very similar semantics as that of strwrite().
* The main difference is it obtains mblks from the caller and also
* does not do any copy as done in strwrite() from user buffers to
* kernel buffers.
*
* Currently, this routine is used by sendfile to send data allocated
* within the kernel without any copying. This interface does not use the
* synchronous stream interface as synch. stream interface implies
* copying.
*/
int
{
char waitflag;
int tempmode;
int error = 0;
int done = 0;
/*
* This is the sockfs direct fast path. canputnext() need
* not be accurate so we don't grab the sd_lock here. If
* we get flow-controlled, we grab sd_lock just before the
* do..while loop below to emulate what strwrite() does.
*/
/* Fast check of flags before acquiring the lock */
if (error != 0) {
}
return (error);
}
}
else
do {
if (canputnext(wqp)) {
/* The caller will free mp */
return (ECOMM);
}
}
return (0);
}
&done);
/*
* EAGAIN tells the application to try again. ENOMEM
* is returned only if the memory allocation size
* exceeds the physical limits of the system. ENOMEM
* can't be true here.
*/
return (error);
}
#define SEND_MAX_CHUNK 16
#if defined(_SYSCALL32_IMPL) || defined(_ILP32)
/*
* 64 bit offsets for 32 bit applications only running either on
* 64 bit kernel or 32 bit kernel. For 32 bit apps, we can't transfer
* more than 2GB of data.
*/
int
{
int ioflag;
int i, error;
for (i = 0; i < copy_cnt; i++) {
return (EINTR);
/*
* Do similar checks as "write" as we are writing
* sfv_len bytes into "vp".
*/
if (sfv_len == 0)
continue;
if (sfv_len < 0)
return (EINVAL);
(void) rctl_action(
return (EFBIG);
}
return (EFBIG);
return (EINVAL);
}
if (tmpcount < 0)
return (EINVAL);
while (sfv_len > 0) {
if (error != 0)
return (error);
}
} else {
return (EBADF);
return (EBADF);
}
return (EINVAL);
}
/*
* No point reading and writing to same vp,
* as long as both are regular files. readvp is not
* locked; but since we got it from an open file the
* contents will be valid during the time of access.
*/
return (EINVAL);
}
/*
* Note: we assume readvp != vp. "vp" is already
* locked, and "readvp" must not be.
*/
/*
* Same checks as in pread64.
*/
if (sfv_off > MAXOFFSET_T) {
return (EINVAL);
}
/* Find the native blocksize to transfer data */
while (sfv_len > 0) {
/*
* If read sync is not asked for,
* filter sync flags
*/
if (error) {
return (error);
}
/*
* Check how must data was really read.
* Decrement the 'len' and increment the
* 'off' appropriately.
*/
if (cnt == 0) {
/*
* If we were reading a pipe (currently
* not implemented), we may now lose
* data.
*/
return (EINVAL);
}
/*
* Check how much data was written. Increment
* the 'len' and decrement the 'off' if all
* the data was not written.
*/
if (error != 0) {
return (error);
}
}
}
sfv++;
}
return (0);
}
{
int copy_cnt;
const struct ksendfilevec64 *copy_vec;
int error;
do {
sizeof (struct ksendfilevec64))) {
break;
}
/*
* Optimize the regular file over
* the socket case.
*/
break;
}
break;
}
if (error)
break;
copy_vec++;
sfvcnt--;
continue;
}
}
if (error != 0)
break;
} while (sfvcnt > 0);
if (error != 0)
return (count);
}
#endif
int
{
int ioflag;
int i, error;
#ifdef _SYSCALL32_IMPL
#else
#endif
int wroff;
int buf_left = 0;
int tail_len;
return (ENOMEM);
for (i = 0; i < copy_cnt; i++) {
return (EINTR);
/*
* Do similar checks as "write" as we are writing
* sfv_len bytes into "vp".
*/
if (sfv_len == 0) {
sfv++;
continue;
}
/* Make sure sfv_len is not negative */
#ifdef _SYSCALL32_IMPL
if (model == DATAMODEL_ILP32) {
return (EINVAL);
} else
#endif
if (sfv_len < 0)
return (EINVAL);
/* Check for overflow */
#ifdef _SYSCALL32_IMPL
if (model == DATAMODEL_ILP32) {
return (EINVAL);
} else
#endif
return (EINVAL);
while (sfv_len > 0) {
if (buf_left == 0) {
return (ENOMEM);
}
} else {
}
total_size -= iov_len;
if (error != 0) {
return (error);
}
}
} else {
return (EBADF);
}
return (EACCES);
}
return (EINVAL);
}
/*
* No point reading and writing to same vp,
* as long as both are regular files. readvp is not
* locked; but since we got it from an open file the
* contents will be valid during the time of access.
*/
return (EINVAL);
}
/*
* Note: we assume readvp != vp. "vp" is already
* locked, and "readvp" must not be.
*/
/* Same checks as in pread */
return (EINVAL);
}
sfv_off);
}
while (sfv_len > 0) {
if (buf_left == 0) {
NULL);
return (ENOMEM);
}
} else {
}
/*
* If read sync is not asked for,
* filter sync flags
*/
if (error != 0) {
/*
* If we were reading a pipe (currently
* not implemented), we may now loose
* data.
*/
return (error);
}
/*
* Check how much data was really read.
* Decrement the 'len' and increment the
* 'off' appropriately.
*/
if (cnt == 0) {
return (EINVAL);
}
total_size -= cnt;
}
}
sfv++;
}
ASSERT(total_size == 0);
if (error != 0) {
return (error);
}
return (0);
}
int
{
int ioflag;
int i, error;
#ifdef _SYSCALL32_IMPL
#else
#endif
}
for (i = 0; i < copy_cnt; i++) {
return (EINTR);
/*
* Do similar checks as "write" as we are writing
* sfv_len bytes into "vp".
*/
if (sfv_len == 0) {
sfv++;
continue;
}
/* Make sure sfv_len is not negative */
#ifdef _SYSCALL32_IMPL
if (model == DATAMODEL_ILP32) {
return (EINVAL);
} else
#endif
if (sfv_len < 0)
return (EINVAL);
(void) rctl_action(
return (EFBIG);
}
return (EFBIG);
return (EINVAL);
}
/* Check for overflow */
#ifdef _SYSCALL32_IMPL
if (model == DATAMODEL_ILP32) {
return (EINVAL);
} else
#endif
return (EINVAL);
/*
* Optimize for the socket case
*/
return (ENOMEM);
if (error != 0) {
return (error);
}
if (error != 0) {
return (error);
}
} else {
while (sfv_len > 0) {
if (error != 0)
return (error);
}
}
} else {
int segmapit;
return (EBADF);
return (EBADF);
}
return (EINVAL);
}
/*
* No point reading and writing to same vp,
* as long as both are regular files. readvp is not
* locked; but since we got it from an open file the
* contents will be valid during the time of access.
*/
return (EINVAL);
}
/*
* Note: we assume readvp != vp. "vp" is already
* locked, and "readvp" must not be.
*/
/* Same checks as in pread */
return (EINVAL);
}
sfv_off);
}
/* Find the native blocksize to transfer data */
segmapit = 0;
return (ENOMEM);
}
} else {
/*
* For sockets acting as an SSL proxy, we
* need to adjust the size to the maximum
* SSL record size set in the stream head.
*/
if (vn_has_flocks(readvp) ||
segmapit = 0;
segmapit = 1;
} else {
int on = 1;
segmapit = 1;
}
}
if (segmapit) {
if (error)
return (error);
sfv++;
continue;
}
while (sfv_len > 0) {
NULL);
return (ENOMEM);
}
} else {
}
/*
* If read sync is not asked for,
* filter sync flags
*/
if (error != 0) {
/*
* If we were reading a pipe (currently
* not implemented), we may now lose
* data.
*/
else
return (error);
}
/*
* Check how much data was really read.
* Decrement the 'len' and increment the
* 'off' appropriately.
*/
if (cnt == 0) {
else
return (EINVAL);
}
if (error != 0) {
NULL);
return (error);
}
} else {
/*
* Check how much data was written.
* Increment the 'len' and decrement the
* 'off' if all the data was not
* written.
*/
if (error != 0) {
NULL);
return (error);
}
}
}
if (buf) {
}
}
sfv++;
}
return (0);
}
{
int error;
int copy_cnt;
const struct sendfilevec *copy_vec;
#ifdef _SYSCALL32_IMPL
#endif
int i;
int maxblk = 0;
if (sfvcnt <= 0)
goto err;
}
case VSOCK:
/* sendfile not supported for SCTP */
goto err;
}
case AF_INET:
case AF_INET6:
/*
* Make similar checks done in SOP_WRITE().
*/
goto err;
}
error = EOPNOTSUPP;
goto err;
}
(SS_ISCONNECTED|SS_ISBOUND)) {
goto err;
}
} else {
}
break;
default:
goto err;
}
break;
case VREG:
break;
default:
goto err;
}
switch (opcode) {
case SENDFILEV :
break;
#if defined(_SYSCALL32_IMPL) || defined(_ILP32)
case SENDFILEV64 :
#endif
default :
break;
}
do {
total_size = 0;
#ifdef _SYSCALL32_IMPL
/* 32-bit callers need to have their iovec expanded. */
if (get_udatamodel() == DATAMODEL_ILP32) {
copy_cnt * sizeof (ksendfilevec32_t))) {
break;
}
for (i = 0; i < copy_cnt; i++) {
}
} else {
#endif
copy_cnt * sizeof (sendfilevec_t))) {
break;
}
for (i = 0; i < copy_cnt; i++) {
}
#ifdef _SYSCALL32_IMPL
}
#endif
/*
* The task between deciding to use sendvec_small_chunk
* and sendvec_chunk is dependant on multiple things:
*
* i) latency is important for smaller files. So if the
* data is smaller than 'tcp_slow_start_initial' times
* maxblk, then use sendvec_small_chunk which creates
* maxblk size mblks and chains then together and sends
* them to TCP in one shot. It also leaves 'wroff' size
* space for the headers in each mblk.
*
* ii) for total size bigger than 'tcp_slow_start_initial'
* time maxblk, its probably real file data which is
* dominating. So its better to use sendvec_chunk because
* performance goes to dog if we don't do pagesize reads.
* sendvec_chunk will do pagesize reads and write them
* in pagesize mblks to TCP.
*
* Side Notes: A write to file has not been optimized.
* Future zero copy code will plugin into sendvec_chunk
* only because doing zero copy for files smaller then
* pagesize is useless.
*
* Note, if socket has NL7C enabled then call NL7C's
* senfilev() function to consume the sfv[].
*/
if (is_sock) {
case AF_INET:
case AF_INET6:
if (so->so_nl7c_flags != 0)
else
break;
}
} else {
&count);
}
#ifdef _SYSCALL32_IMPL
if (get_udatamodel() == DATAMODEL_ILP32)
(copy_cnt * sizeof (ksendfilevec32_t)));
else
#endif
} while (sfvcnt > 0);
#ifdef _SYSCALL32_IMPL
if (get_udatamodel() == DATAMODEL_ILP32) {
if (error != 0)
return (count32);
}
#endif
if (error != 0)
return (count);
err:
}