VBoxNetFlt-linux.c revision 986e4033c297420e418108aebd77907c4dae8596
48N/A * VBoxNetFlt - Network Filter Driver (Host), Linux Specific Code. 48N/A * Copyright (C) 2006-2008 Oracle Corporation 48N/A * This file is part of VirtualBox Open Source Edition (OSE), as 48N/A * you can redistribute it and/or modify it under the terms of the GNU 48N/A * General Public License (GPL) as published by the Free Software 48N/A * Foundation, in version 2 as it comes in the "COPYING" file of the 48N/A * VirtualBox OSE distribution. VirtualBox OSE is distributed in the 48N/A * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. 48N/A/******************************************************************************* 48N/A*******************************************************************************/ 70N/A/******************************************************************************* 48N/A* Defined Constants And Macros * 70N/A*******************************************************************************/ /* Versions prior 2.6.10 use stats for both bstats and qstats */ /** Create scatter / gather segments for fragments. When not used, we will * linearize the socket buffer before creating the internal networking SG. */ /** Indicates that the linux kernel may send us GSO frames. */ /** This enables or disables the transmitting of GSO frame from the internal * network and to the host. */ #
if 0
/** @todo This is currently disable because it causes performance loss of 5-10%. *//** This enables or disables the transmitting of GSO frame from the internal * network and to the wire. */ * to the internal network. */ /** This enables or disables handling of GSO frames coming from the wire (GRO). */ * GRO support was backported to RHEL 5.4 /******************************************************************************* *******************************************************************************/ /******************************************************************************* *******************************************************************************/ * The (common) global data. #
endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 12) */ * @returns appropriate status code. Log((
"VBoxNetFltLinuxInit\n"));
* Initialize the globals and connect to the support driver. * This will call back vboxNetFltOsOpenSupDrv (and maybe vboxNetFltOsCloseSupDrv) * for establishing the connect to the support driver. LogRel((
"VBoxNetFlt: Successfully started.\n"));
LogRel((
"VBoxNetFlt: failed to initialize device extension (rc=%d)\n",
rc));
LogRel((
"VBoxNetFlt: failed to initialize IPRT (rc=%d)\n",
rc));
* @todo We have to prevent this if we're busy! Log((
"VBoxNetFltLinuxUnload\n"));
* Undo the work done during start (in reverse order). Log((
"VBoxNetFltLinuxUnload - done\n"));
* We filter traffic from the host to the internal network * before it reaches the NIC driver. * The current code uses a very ugly hack overriding hard_start_xmit * callback in the device structure, but it has been shown to give us a * performance boost of 60-100% though. Eventually we have to find some * less hacky way of getting this job done. #
else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) */#
endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) */ * The overridden net_device_ops of the device we're attached to. * As there is no net_device_ops structure in pre-2.6.29 kernels we override * ethtool_ops instead along with hard_start_xmit callback in net_device * This is a very dirty hack that was created to explore how much we can improve * the host to guest transfers by not CC'ing the NIC. It turns out to be * the only way to filter outgoing packets for devices without TX queue. /** Our overridden ops. */ /** Pointer to the original ops. */ /** Pointer to the original hard_start_xmit function. */ #
endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) */ /** Pointer to the net filter instance. */ /** The number of filtered packages. */ /** The total number of packets */ /** VBOXNETDEVICEOPSOVERRIDE::u32Magic value. */ * ndo_start_xmit wrapper that drops packets that shouldn't go to the wire * because they belong on the internal network. * @returns NETDEV_TX_XXX. * @param pSkb The socket buffer to transmit. * @param pDev The net device. * Validate the override structure. * Note! We're racing vboxNetFltLinuxUnhookDev here. If this was supposed * to be production quality code, we would have to be much more * careful here and avoid the race. #
endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) */ * Do the filtering base on the default OUI of our virtual NICs * Note! In a real solution, we would ask the switch whether the * destination MAC is 100% to be on the internal network and then /** @todo consider reference counting, etc. */ * Hooks the device ndo_start_xmit operation of the device. * @param pThis The net filter instance. * @param pDev The net device. /* Cancel override if ethtool_ops is missing (host-only case, @bugref{5712}) */ #
else /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) */#
endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) */#
endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) */ * Undos what vboxNetFltLinuxHookDev did. * @param pThis The net filter instance. * @param pDev The net device. Can be NULL, in which case * we'll try retrieve it from @a pThis. #
endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) */#
endif /* VBOXNETFLT_WITH_HOST2WIRE_FILTER */ * Reads and retains the host interface handle. * @returns The handle, NULL if detached. Log((
"vboxNetFltLinuxRetainNetDev\n"));
* Be careful here to avoid problems racing the detached callback. Log((
"vboxNetFltLinuxRetainNetDev: Device %p(%s) retained. ref=%d\n",
Log((
"vboxNetFltLinuxRetainNetDev - done\n"));
* Release the host interface handle previously retained * by vboxNetFltLinuxRetainNetDev. * @param pThis The instance. * @param pDev The vboxNetFltLinuxRetainNetDev * return value, NULL is fine. Log((
"vboxNetFltLinuxReleaseNetDev\n"));
Log((
"vboxNetFltLinuxReleaseNetDev: Device %p(%s) released. ref=%d\n",
Log((
"vboxNetFltLinuxReleaseNetDev - done\n"));
* Checks whether this is an mbuf created by vboxNetFltLinuxMBufFromSG, * i.e. a buffer which we're pushing and should be ignored by the filter callbacks. * @returns true / false accordingly. * @param pBuf The sk_buff. * Internal worker that create a linux sk_buff for a * @returns Pointer to the sk_buff. * @param pThis The instance. * @param pSG The (scatter/)gather list. * @param fDstWire Set if the destination is the wire. LogRel((
"VBoxNetFlt: Dropped empty packet coming from internal network.\n"));
/** @todo We should use fragments mapping the SG buffers with large packets. * 256 bytes seems to be the a threshold used a lot for this. It * requires some nasty work on the intnet side though... */ * Allocate a packet and copy over the data. Log((
"vboxNetFltLinuxSkBufFromSG: Failed to allocate sk_buff(%u).\n",
pSG->
cbTotal));
/* Align IP header on 16-byte boundary: 2 + 14 (ethernet hdr size). */ * Setup GSO if used by this packet. * We need to set checksum fields even if the packet goes to the host * directly as it may be immediately forwarded by IP layer @bugref{5020}. #
endif /* VBOXNETFLT_WITH_GSO_XMIT_WIRE || VBOXNETFLT_WITH_GSO_XMIT_HOST */ * Finish up the socket buffer. /* Restore ethernet header back. */ * Initializes a SG list from an sk_buff. * @returns Number of segments. * @param pThis The instance. * @param pBuf The sk_buff. * @param pvFrame The frame pointer, optional. * @param cSegs The number of segments allocated for the SG. * This should match the number in the mbuf exactly! * @param fSrc The source of the frame. * @param pGso Pointer to the GSO context if it's a GSO * internal network frame. NULL if regular frame. * Add a trailer if the frame is too small. * Since we're getting to the packet before it is framed, it has not * yet been padded. The current solution is to add a segment pointing * to a buffer containing all zeros and pray that works for all frames... Log4((
"vboxNetFltLinuxSkBufToSG: allocated=%d, segments=%d frags=%d next=%p frag_list=%p pkt_type=%x fSrc=%x\n",
Log4((
"vboxNetFltLinuxSkBufToSG: #%d: cb=%d pv=%p\n",
* @returns 0 or EJUSTRETURN. * @param pThis The instance. * @param pvFrame The start of the frame, optional. * @param fSrc Where the packet (allegedly) comes from, one INTNETTRUNKDIR_* value. * @param eProtocol The protocol. LogFlow((
"vboxNetFltLinuxPacketHandler: pBuf=%p pSkbDev=%p pPacketType=%p\n",
Log3((
"vboxNetFltLinuxPacketHandler: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
Log3((
"vboxNetFltLinuxPacketHandler: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u tso_size=%u tso_seqs=%u frag_list=%p pkt_type=%x\n",
* We are not interested in loopbacked packets as they will always have * another copy going to the wire. Log2((
"vboxNetFltLinuxPacketHandler: dropped loopback packet (cb=%u)\n",
pBuf->
len));
Log((
"vboxNetFltLinuxPacketHandler: Devices do not match, pThis may be wrong! pThis=%p\n",
pThis));
Log4((
"vboxNetFltLinuxPacketHandler: pBuf->cb dump:\n%.*Rhxd\n",
sizeof(
pBuf->
cb),
pBuf->
cb));
Log2((
"vboxNetFltLinuxPacketHandler: got our own sk_buff, drop it.\n"));
* Get rid of fragmented packets, they cause too much trouble. LogRel((
"VBoxNetFlt: Failed to allocate packet buffer, dropping the packet.\n"));
/* Somehow skb_copy ignores mac_len */ /* Restore VLAN tag stripped by host hardware */ #
endif /* LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27) */ Log3((
"vboxNetFltLinuxPacketHandler: skb copy len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
Log3((
"vboxNetFltLinuxPacketHandler: skb copy len=%u data_len=%u truesize=%u next=%p nr_frags=%u tso_size=%u tso_seqs=%u frag_list=%p pkt_type=%x\n",
/* Forward it to the internal network. */ /* Add the packet to transmit queue and schedule the bottom half. */ Log4((
"vboxNetFltLinuxPacketHandler: scheduled work %p for sk_buff %p\n",
/* It does not really matter what we return, it is ignored by the kernel. */ * Calculate the number of INTNETSEG segments the socket buffer will need. * @returns Segment count. * @param pBuf The socket buffer. /* vboxNetFltLinuxSkBufToSG adds a padding segment if it's a runt. */ * Destroy the intnet scatter / gather buffer created by * vboxNetFltLinuxSkBufToSG. Log((
"VBoxNetFlt: (int)%02x:%02x:%02x:%02x:%02x:%02x" " %s (%s)%02x:%02x:%02x:%02x:%02x:%02x (%u bytes) packet #%u\n",
Log3((
"-- segment %d at 0x%x (%d bytes) --\n%.*Rhxd\n",
* Worker for vboxNetFltLinuxForwardToIntNet that checks if we can forwards a * GSO socket buffer without having to segment it. * @returns true on success, false if needs segmenting. * @param pThis The net filter instance. * @param pSkb The GSO socket buffer. * @param fSrc The source. * @param pGsoCtx Where to return the GSO context on success. * Check the GSO properties of the socket buffer and make sure it fits. /** @todo Figure out how to handle SKB_GSO_TCP_ECN! */ * It is possible to receive GSO packets from wire if GRO is enabled. Log5((
"vboxNetFltLinuxCanForwardAsGso: fSrc=wire\n"));
* The packet came from the wire and the driver has already consumed * mac header. We need to restore it back. Log5((
"vboxNetFltLinuxCanForwardAsGso: mac_len=%d data=%p mac_header=%p network_header=%p\n",
#
else /* !VBOXNETFLT_WITH_GRO */ /* Older kernels didn't have GRO. */ #
endif /* !VBOXNETFLT_WITH_GRO */ * skb_gso_segment does the following. Do we need to do it as well? * Switch on the ethertype. Log5((
"vboxNetFltLinuxCanForwardAsGso: failed to access IPv4 hdr\n"));
else /** @todo IPv6: 4to6 tunneling */ Log5((
"vboxNetFltLinuxCanForwardAsGso: failed to access IPv6 hdr\n"));
/** @todo IPv6: Dig our way out of the other headers. */ Log5((
"vboxNetFltLinuxCanForwardAsGso: Unsupported protocol %d\n",
uProtocol));
Log5((
"vboxNetFltLinuxCanForwardAsGso: Bad transport length; off=%#x + cb=%#x => %#x; skb_len=%#x (%s)\n",
Log5((
"vboxNetFltLinuxCanForwardAsGso: failed to access TCP hdr\n"));
* We're good, init the GSO context. * Forward the socket buffer as a GSO internal network frame. * @returns IPRT status code. * @param pThis The net filter instance. * @param pSkb The GSO socket buffer. * @param fSrc The source. * @param pGsoCtx Where to return the GSO context on success. Log((
"VBoxNetFlt: Dropping the sk_buff (failure case).\n"));
Log((
"VBoxNetFlt: Bad sk_buff? cSegs=%#x.\n",
cSegs));
Log4((
"VBoxNetFlt: Dropping the sk_buff.\n"));
#
endif /* VBOXNETFLT_WITH_GSO_RECV */ * Worker for vboxNetFltLinuxForwardToIntNet. * @returns VINF_SUCCESS or VERR_NO_MEMORY. * @param pThis The net filter instance. * @param pBuf The socket buffer. * @param fSrc The source. * The packet came from wire, ethernet header was removed by device driver. * Restore it using mac_len field. This takes into account VLAN headers too. Log((
"VBoxNetFlt: Failed to allocate SG buffer.\n"));
Log((
"VBoxNetFlt: Bad sk_buff? cSegs=%#x.\n",
cSegs));
Log4((
"VBoxNetFlt: Dropping the sk_buff.\n"));
* @param pBuf The socket buffer. This is consumed by this function. Log3((
"vboxNetFltLinuxForwardToIntNet: skb len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x ip_summed=%d\n",
pBuf->
len,
pBuf->
data_len,
pBuf->
truesize,
pBuf->
next,
skb_shinfo(
pBuf)->
nr_frags,
skb_shinfo(
pBuf)->
gso_size,
skb_shinfo(
pBuf)->
gso_segs,
skb_shinfo(
pBuf)->
gso_type,
skb_shinfo(
pBuf)->
frag_list,
pBuf->
pkt_type,
pBuf->
ip_summed));
/* Need to segment the packet */ Log3((
"vboxNetFltLinuxForwardToIntNet: segment len=%u data_len=%u truesize=%u next=%p nr_frags=%u gso_size=%u gso_seqs=%u gso_type=%x frag_list=%p pkt_type=%x\n",
#
endif /* VBOXNETFLT_WITH_GSO */ * Try to work around the problem with CentOS 4.7 and 5.2 (2.6.9 * and 2.6.18 kernels), they pass wrong 'h' pointer down. We take IP * header length from the header itself and reconstruct 'h' pointer * to TCP (or whatever) header. #
endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18) */ LogRel((
"VBoxNetFlt: Failed to compute checksum, dropping the packet.\n"));
/* Restore the original (wrong) pointer. */ #
endif /* LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18) */ * Work queue handler that forwards the socket buffers queued by * vboxNetFltLinuxPacketHandler to the internal network. * @param pWork The work queue. Log4((
"vboxNetFltLinuxXmitTask: Got work %p.\n",
pWork));
* Active? Retain the instance and increment the busy counter. /** @todo Shouldn't we just drop the packets here? There is little point in * making them accumulate when the VM is paused and it'll only waste * kernel memory anyway... Hmm. maybe wait a short while (2-5 secs) * before start draining the packets (goes for the intnet ring buf #
endif /* !VBOXNETFLT_LINUX_NO_XMIT_QUEUE */ * Reports the GSO capabilities of the hardware NIC. * @param pThis The net filter instance. The caller hold a /* Set/update the GSO capabilities of the NIC. */ #
if 0
/** @todo GSO: Test UDP offloading (UFO) on linux. */ Log3((
"vboxNetFltLinuxReportNicGsoCapabilities: reporting wire %s%s%s%s\n",
#
endif /* VBOXNETFLT_WITH_GSO_XMIT_WIRE */ * Helper that determines whether the host (ignoreing us) is operating the * interface in promiscuous mode or not. LogFlow((
"vboxNetFltPortOsIsPromiscuous: returns %d, pDev->promiscuity=%d, fPromiscuousSet=%d\n",
* Helper for detecting TAP devices. Log3((
"vboxNetFltIsTapDevice: driver=%s version=%s bus_info=%s\n",
* Helper for updating the link state of TAP devices. * Only TAP devices are affected. Log3((
"vboxNetFltSetTapLinkState: bringing %s tap device link state\n",
#
else /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) */ /* Nothing to do for pre-2.6.36 kernels. */ #
endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 36) */ * Internal worker for vboxNetFltLinuxNotifierCallback. * @returns VBox status code. * @param pThis The instance. * @param fRediscovery If set we're doing a rediscovery attempt, so, don't * Retain and store the device. Log((
"vboxNetFltLinuxAttachToInterface: Device %p(%s) retained. ref=%d\n",
Log((
"vboxNetFltLinuxAttachToInterface: Got pDev=%p pThis=%p pThis->u.s.pDev=%p\n",
/* Get the mac address while we still have a valid net_device reference. */ * Install a packet filter for this device with a protocol wildcard (ETH_P_ALL). Log((
"vboxNetFltLinuxAttachToInterface: this=%p: Packet handler installed.\n",
pThis));
* If attaching to TAP interface we need to bring the link state up * starting from 2.6.36 kernel. * Set indicators that require the spinlock. Be abit paranoid about racing * the device notification handle. * If the above succeeded report GSO capabilities, if not undo and Log((
"vboxNetFltLinuxAttachToInterface: Device %p(%s) released. ref=%d\n",
Log((
"vboxNetFltLinuxUnregisterDevice: this=%p: packet handler removed.\n",
pThis));
Log((
"vboxNetFltLinuxUnregisterDevice: this=%p: xmit queue purged.\n",
pThis));
Log((
"vboxNetFltLinuxUnregisterDevice: Device %p(%s) released. ref=%d\n",
/* Check if we are not suspended and promiscuous mode has not been set. */ /* Note that there is no need for locking as the kernel got hold of the lock already. */ /* Undo promiscuous mode if we has set it. */ /* Note that there is no need for locking as the kernel got hold of the lock already. */ /** Stringify the NETDEV_XXX constants. */ const char *
pszEvent =
"NETDRV_<unknown>";
* Callback for listening to netdevice events. * This works the rediscovery, clean up on unregistration, promiscuity on * up/down, and GSO feature changes from ethtool. * @param self Pointer to our notifier registration block. * @param ulEventType The event. * @param ptr Event specific, but it is usually the device it Log((
"VBoxNetFlt: got event %s(0x%lx) on %s, pDev=%p pThis=%p pThis->u.s.pDev=%p\n",
* Create a sk_buff for the gather list and push it onto the wire. Log4((
"vboxNetFltPortOsXmit: pBuf->cb dump:\n%.*Rhxd\n",
sizeof(
pBuf->
cb),
pBuf->
cb));
Log4((
"vboxNetFltPortOsXmit: dev_queue_xmit(%p)\n",
pBuf));
* Create a sk_buff for the gather list and push it onto the host stack. Log4((
"vboxNetFltPortOsXmit: pBuf->cb dump:\n%.*Rhxd\n",
sizeof(
pBuf->
cb),
pBuf->
cb));
Log4((
"vboxNetFltPortOsXmit: netif_rx_ni(%p)\n",
pBuf));
LogFlow((
"vboxNetFltPortOsSetActive: pThis=%p (%s), fActive=%s, fDisablePromiscuous=%s\n",
* This api is a bit weird, the best reference is the code. * Also, we have a bit or race conditions wrt the maintenance of * host the interface promiscuity for vboxNetFltPortOsIsPromiscuous. * Remove packet handler when we get disconnected from internal switch as * we don't want the handler to forward packets to disconnected switch. Log((
"vboxNetFltOsDisconnectIt: this=%p: Packet handler removed.\n",
pThis));
* Report the GSO capabilities of the host and device (if connected). * Note! No need to mark ourselves busy here. /** @todo duplicate work here now? Attach */ Log3((
"vboxNetFltOsConnectIt: reporting host tso tso6 ufo\n"));
#
if 0
/** @todo GSO: Test UDP offloading (UFO) on linux. */ /** @todo This code may race vboxNetFltLinuxUnregisterDevice (very very * unlikely, but none the less). Since it doesn't actually update the * state (just reads it), it is likely to panic in some interesting Log((
"vboxNetFltOsDeleteInstance: this=%p: xmit queue purged.\n",
pThis));
Log((
"vboxNetFltOsDeleteInstance: Device %p(%s) released. ref=%d\n",
Log((
"vboxNetFltOsDeleteInstance: this=%p: Notifier removed.\n",
pThis));
Log((
"vboxNetFltOsInitInstance: this=%p: Notifier installed.\n",
pThis));
* Init the linux specific members.