socket.c revision 828c993fb931176485d9d951827a2770b94ca377
0N/A * NAT - socket handling. 2362N/A * Copyright (C) 2006-2010 Oracle Corporation 2362N/A * This file is part of VirtualBox Open Source Edition (OSE), as 0N/A * you can redistribute it and/or modify it under the terms of the GNU 0N/A * General Public License (GPL) as published by the Free Software 0N/A * Foundation, in version 2 as it comes in the "COPYING" file of the 0N/A * VirtualBox OSE distribution. VirtualBox OSE is distributed in the 0N/A * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. 0N/A * This code is based on: 2362N/A * Copyright (c) 1995 Danny Gasparovski. 2362N/A * Please read the file COPYRIGHT for the 0N/A * terms and conditions of the copyright. 0N/A#
else /* RT_OS_WINDOWS */ 0N/A#
endif /* !RT_OS_WINDOWS */ 0N/A * Create a new socket, initialise the fields 0N/A * It is the responsibility of the caller to 0N/A * insque() it into the correct linked-list * remque and free a socket, clobber cache * VBOX_WITH_SLIRP_MT: before sofree queue should be locked, because * in sofree we don't know from which queue item beeing removed. /* check if mbuf haven't been already freed */ * Read from so's socket into sb_snd, updating all relevant sbuf fields * NOTE: This will only be called if it is select()ed for reading, so * a read() of 0 (or less) means it's disconnected * No need to check if there's enough room to read. * soread wouldn't have been called if there weren't /* Should never succeed, but... */ /* Should never succeed, but... */ Log2((
" ... read nn = %d bytes\n",
nn));
* Special case for WSAEnumNetworkEvents: If we receive 0 bytes that * _could_ mean that the connection is closed. But we will receive an * FD_CLOSE event later if the connection was _really_ closed. With * www.youtube.com I see this very often. Closing the socket too early Log((
"NAT:error in WSAIoctl: %d\n",
errno));
/* nn == 0 means peer has performed an orderly shutdown */ Log2((
" --- soread() disconnected, nn = %d, errno = %d (%s)\n",
* If there was no error, try and read the second time round * We read again if n = 2 (ie, there's another part of the buffer) * and we read as much as we could in the first read * We don't test for <= 0 this time, because there legitimately * might not be any more data (since the socket is non-blocking), * a close will be detected on next iteration. * A return of -1 wont (shouldn't) happen, since it didn't happen above Log2((
" ... read nn = %d bytes\n",
nn));
#
else /* VBOX_WITH_SLIRP_BSD_SBUF */ Log((
"NAT: can't alloc enough memory\n"));
* Special case for WSAEnumNetworkEvents: If we receive 0 bytes that * _could_ mean that the connection is closed. But we will receive an * FD_CLOSE event later if the connection was _really_ closed. With * www.youtube.com I see this very often. Closing the socket too early Log((
"NAT:error in WSAIoctl: %d\n",
errno));
Log2((
" --- soread() disconnected, n = %d, errno = %d (%s)\n",
* When the socket is created, we set it SO_OOBINLINE, * so when OOB data arrives, we soread() it and everything * in the send buffer is sent as urgent data LogFlow((
"sorecvoob: so = %lx\n", (
long)
so));
* We take a guess at how much urgent data has arrived. * In most situations, when urgent data arrives, the next * read() should get all the urgent data. This guess will * be wrong however if more data arrives just after the * urgent data, or the read() doesn't return all the * There's a lot duplicated code here, but... char buff[
2048];
/* XXX Shouldn't be sending more oob data than this */ /* We can send it directly */ Log2((
" --- sent %d bytes urgent data, %d urgent bytes left\n",
* Since there's no sendv or sendtov like writev, * we must copy all data to a linear buffer then Log((
"Didn't send all data urgently XXXXX\n"));
Log2((
" ---2 sent %d bytes urgent data, %d urgent bytes left\n",
* Write data from so_rcv to so's socket, * updating all sbuf field as necessary * No need to check if there's something to write, * sowrite wouldn't have been called otherwise /* Should never succeed, but... */ /* Check if there's urgent data to send, and if so, send it */ Log2((
" ... wrote nn = %d bytes\n",
nn));
/* This should never happen, but people tell me it does *shrug* */ Log2((
" --- sowrite disconnected, so->so_state = %x, errno = %d\n",
Log2((
" ... wrote nn = %d bytes\n",
nn));
* If in DRAIN mode, and there's no more data, set #
else /* VBOX_WITH_SLIRP_BSD_SBUF */ LogFlow((
"sosendoob: so = %lx\n", (
long)
so));
Log((
"NAT: Can't sent sbuf via socket.\n"));
Log((
"NAT: No space to allocate temporal buffer\n"));
* Write data from so_rcv to so's socket, * updating all sbuf field as necessary * recvfrom() a UDP socket LogFlow((
"sorecvfrom: so = %lx\n", (
long)
so));
/* This is a "ping" reply */ #
else /* RT_OS_WINDOWS */#
endif /* !RT_OS_WINDOWS */ /* A "normal" UDP packet */ /*How many data has been received ?*/ * 1. calculate how much we can read * 2. read as much as possible * 3. attach buffer to allocated header mbuf LogRel((
"NAT: can't fetch amount of bytes on socket %R[natsock], so message will be truncated.\n",
so));
* Even if amounts of bytes on socket is greater than MTU value * Slirp will able fragment it, but we won't create temporal location * If we've met comporison below our size prediction was failed * it's not fatal just we've allocated for nothing. (@todo add counter here * to calculate how rare we here) Log((
"NAT:udp: Expected size(%d) lesser than real(%d) and less minimal mbuf size(%d)\n",
/* we're freeing buffer anyway */ Log2((
" rx error, tx icmp ICMP_UNREACH:%i\n",
code));
* Hack: domain name lookup will be used the most for UDP, * and since they'll only be used once there's no need * for the 4 minute (or whatever) timeout... So we time them * out much quicker (10 seconds for now...) * last argument should be changed if Slirp will inject IP attributes * Note: Here we can't check if dnsproxy's sent initial request * If this packet was destined for CTL_ADDR, * make it look like that's where it came from, done by udp_output LogFlow((
"sosendto: so = %lx, m = %lx\n", (
long)
so, (
long)m));
/* handle this case at 'default:' */ /* Send the packet to host to fully emulate broadcast */ /** @todo r=klaus: on Linux host this causes the host to receive * the packet twice for some reason. And I cannot find any place * in the man pages which states that sending a broadcast does not * reach the host itself. */ /* Don't care what port we get */ * Kill the socket if there's no reply in 4 minutes, * but only if it's an expirable socket * XXX This should really be tcp_listen /* RTMemFree(so); Not sofree() ??? free(NULL) == NOP */ /* Don't tcp_attach... we don't need so_snd nor so_rcv */ * SS_FACCEPTONCE sockets must time out. /* Restore the real errno */ int tmperrno =
errno;
/* Don't clobber the real reason we failed */ /* Restore the real errno */ LogRel((
"NAT: Error(%d) while setting RCV capacity to (%d)\n",
errno,
opt));
LogRel((
"NAT: Error(%d) while setting SND capacity to (%d)\n",
errno,
opt));
* Data is available in so_rcv * Just write() the data to the socket * Data has been freed in so_snd * We have room for a read() if we want to * For now, don't read, it'll be done in the main loop * Various session state calls * XXX Should be #define's * The socket state stuff needs work, these often get call 2 or 3 * times each when only 1 was needed /* XXX close() here as well? */ * Set CANTSENDMORE once all data has been write()n /* Fix ip->ip_len to contain the total packet length including the header * in _host_ byte order for all OSes. On Darwin, that value already is in * host byte order. Solaris and Darwin report only the payload. */ Log((
"send_icmp_to_guest: ICMP header is too small to understand which type/subtype of the datagram\n"));
* ICMP_ECHOREPLY, ICMP_TIMXCEED, ICMP_UNREACH minimal header size is * ICMP_ECHOREPLY assuming data 0 * icmp_{type(8), code(8), cksum(16),identifier(16),seqnum(16)} Log((
"send_icmp_to_guest: NAT accept ICMP_{ECHOREPLY, TIMXCEED, UNREACH} the minimum size is 64 (see rfc792)\n"));
* ICMP_TIMXCEED, ICMP_UNREACH minimal header size is * icmp_{type(8), code(8), cksum(16),unused(32)} + IP header + 64 bit of original datagram Log((
"send_icmp_to_guest: NAT accept ICMP_{TIMXCEED, UNREACH} the minimum size of ipheader + 64 bit of data (see rfc792)\n"));
Log((
"NAT: Can't find the corresponding packet for the received ICMP\n"));
Log((
"NAT: we haven't found echo for this reply\n"));
* while combining buffer to send (see ip_icmp.c) we control ICMP header only, * IP header combined by OS network stack, our local copy of IP header contians values * in host byte order so no byte order conversion is required. IP headers fields are converting * in ip_output0 routine only. Log((
"NAT: ECHO(%d) lenght doesn't match ECHOREPLY(%d)\n",
/* ip points on origianal ip header */ /* Now ip is pointing on header we've sent from guest */ /* source address from original IP packet*/ /* overide ther tail of old packet */ ip =
mtod(m,
struct ip *);
/* ip is from mbuf we've overrided */ /* saves original ip header and options */ ip->
ip_p =
IPPROTO_ICMP;
/* the original package could be whatever, but we're response via ICMP*/ /* according RFC 793 error messages required copy of initial IP header + 64 bit */ /* Don't call m_free here*/ /*XXX: so->so_m already freed so we shouldn't call sofree */ /*close tcp should be here */ struct ip *
ip_broken;
/* ICMP returns header + 64 bit of packet */ for (i = 0; i <
len; ++i)
/* UNREACH error inject here */ icp = (
struct icmp *)&
ip[
1];
/* no options */ #
else /* !RT_OS_WINDOWS */ /* 1- step: read the ip header */ Log((
"sorecvfrom_icmp_unix: 1 - step can't read IP datagramm (would block)\n"));
if (
len <
sizeof(
struct ip)
Log((
"sorecvfrom_icmp_unix: 1 - step can't read IP datagramm\n"));
/* basic check of IP header */ Log((
"sorecvfrom_icmp_unix: 1 - step IP isn't IPv4\n"));
/* Darwin reports the IP length already in host byte order. */ /* Solaris and Darwin report the payload only */ /* Note: ip->ip_len in host byte order (all OS) */ Log((
"sorecvfrom_icmp_unix: 1 - step can't allocate enought room for datagram\n"));
/* 2 - step: we're reading rest of the datagramm to the buffer */ Log((
"sorecvfrom_icmp_unix: 2 - step can't read IP body (would block expected:%d)\n",
Log((
"sorecvfrom_icmp_unix: 2 - step read of the rest of datagramm is fallen (errno:%d, len:%d expected: %d)\n",
/* len is modified in 2nd read, when the rest of the datagramm was read */ #
endif /* !RT_OS_WINDOWS */