bpf.c revision 0a0e9771ca0211c15f3ac4466b661c145feeb9e4
/* $NetBSD: bpf.c,v 1.143 2009/03/11 05:55:22 mrg Exp $ */
/*
* Copyright (c) 1990, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
* Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)bpf.c 8.4 (Berkeley) 1/9/95
* static char rcsid[] =
* "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* The BPF implements the following access controls for zones attempting
* to read and write data. Writing of data requires that the net_rawaccess
* privilege is held whilst reading data requires either net_rawaccess or
* net_observerability.
*
* | Shared | Exclusive | Global
* -----------------------------+--------+------------+------------+
* DLT_IPNET in local zone | Read | Read | Read |
* -----------------------------+--------+------------+------------+
* -----------------------------+--------+------------+------------+
* -----------------------------+--------+------------+------------+
*
* The BPF driver is written as a cloning driver: each call to bpfopen()
* allocates a new minor number. This provides BPF with a 1:1 relationship
* between open's and close's. There is some amount of "descriptor state"
* that is kept per open. Pointers to this data are stored in a hash table
* (bpf_hash) that is index'd by the minor device number for each open file.
*/
#include <sys/sysmacros.h>
#include <sys/mac_client.h>
#include <sys/mac_impl.h>
#include <sys/time_std_impl.h>
#include <sys/hook_event.h>
/*
* 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
* jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
*/
/*
* The default read buffer size, and limit for BIOCSBLEN.
*/
int bpf_bufsize = BPF_BUFSIZE;
int bpf_debug = 0;
/*
*/
static kcondvar_t bpf_dlt_waiter;
static bpf_kstats_t ks_stats;
static bpf_kstats_t bpf_kstats = {
{ "readWait", KSTAT_DATA_UINT64 },
{ "writeOk", KSTAT_DATA_UINT64 },
{ "writeError", KSTAT_DATA_UINT64 },
{ "receive", KSTAT_DATA_UINT64 },
{ "captured", KSTAT_DATA_UINT64 },
{ "dropped", KSTAT_DATA_UINT64 },
};
/*
* bpf_iflist is the list of interfaces; each corresponds to an ifnet
* bpf_dtab holds the descriptors, indexed by minor device #
*/
static int bpf_allocbufs(struct bpf_d *);
static void bpf_clear_timeout(struct bpf_d *);
static void bpf_debug_nic_action(char *, struct bpf_if *);
static struct bpf_if *
bpf_findif(struct bpf_d *, char *, int);
static int bpf_ifname(struct bpf_d *d, char *, int);
static void bpf_detachd(struct bpf_d *);
static void bpf_timed_out(void *);
static inline void
bpf_wakeup(struct bpf_d *);
static int bpf_setdlt(struct bpf_d *, void *);
static void bpf_dev_add(struct bpf_d *);
static void bpf_dev_remove(struct bpf_d *);
static int
{
mblk_t *m;
int error;
int len;
int hlen;
int align;
/*
* Build a sockaddr based on the data link layer type.
* We do this at this level because the ethernet header
* is copied directly into the data field of the sockaddr.
* In the case of SLIP, there is no header and the packet
* is forwarded as is.
* Also, we are careful to leave room at the front of the mbuf
* for the link level header.
*/
switch (linktype) {
case DLT_EN10MB:
hlen = sizeof (struct ether_header);
break;
case DLT_FDDI:
hlen = 16;
break;
case DLT_NULL:
hlen = 0;
break;
case DLT_IPOIB:
hlen = 44;
break;
default:
return (EIO);
}
/*
* If there aren't enough bytes for a link level header or the
* packet length exceeds the interface mtu, return an error.
*/
return (EMSGSIZE);
if (m == NULL) {
goto bad;
}
/* Insure the data is properly aligned */
if (align > 0)
if (error)
goto bad;
*mp = m;
return (0);
bad:
if (m != NULL)
freemsg(m);
return (error);
}
/*
* Attach file to the bpf interface, i.e. make d listen on bp.
*/
static void
{
/*
* Point d at bp, and add d to the interface's list of listeners.
* Finally, point the driver's bpf cookie at the interface so
* it will divert packets to bpf.
*
* Note: Although this results in what looks like a lock order
* reversal (bd_lock is held), the deadlock threat is not present
* because the descriptor is not attached to any interface and
* therefore there cannot be a packet waiting on bd_lock in
* catchpacket.
*/
&d->bd_promisc_handle, d->bd_promisc_flags);
}
/*
* Detach a file from its interface.
*/
static void
bpf_detachd(struct bpf_d *d)
{
d->bd_mcip = 0;
/*
* Check if this descriptor had requested promiscuous mode.
* If so, turn it off. There's no need to take any action
* here, that is done when MBPF_PROMISC_REMOVE is used;
* bd_promisc is just a local flag to stop promiscuous mode
* from being set more than once.
*/
if (d->bd_promisc)
d->bd_promisc = 0;
/*
* Take device out of "promiscuous" mode. Since we were able to
* enter "promiscuous" mode, we should be able to turn it off.
* Note, this field stores a pointer used to support both
* promiscuous and non-promiscuous callbacks for packets.
*/
mph = d->bd_promisc_handle;
d->bd_promisc_handle = 0;
/*
* The lock has to be dropped here because mac_promisc_remove may
* need to wait for mac_promisc_dispatch, which has called into
* bpf and catchpacket is waiting for bd_lock...
* i.e mac_promisc_remove() needs to be called with none of the
* locks held that are part of the bpf_mtap() call path.
*/
mutex_exit(&d->bd_lock);
if (mph != 0)
if (mch != 0)
/*
* bd_lock needs to stay not held by this function until after
* it has finished with bif_lock, otherwise there's a lock order
* reversal with bpf_deliver and the system can deadlock.
*
* Remove d from the interface's descriptor list.
*/
LIST_REMOVE(d, bd_next);
/*
* Because this function is called with bd_lock held, so it must
* exit with it held.
*/
mutex_enter(&d->bd_lock);
/*
* bd_bif cannot be cleared until after the promisc callback has been
* removed.
*/
d->bd_bif = 0;
}
/*
* bpfilterattach() is called at load time.
*/
int
bpfilterattach(void)
{
return (ENOMEM);
} else {
return (EEXIST);
}
return (0);
}
/*
* bpfilterdetach() is called at unload time.
*/
int
bpfilterdetach(void)
{
}
/*
* this is now safe without a lock.
*/
if (!LIST_EMPTY(&bpf_list)) {
return (EBUSY);
}
return (0);
}
/*
* Open ethernet device. Clones.
*/
/* ARGSUSED */
int
{
struct bpf_d *d;
/*
* The security policy described at the top of this file is
* enforced here.
*/
if (secpolicy_net_rawaccess(cred) != 0)
return (EACCES);
}
if ((secpolicy_net_observability(cred) != 0) &&
(secpolicy_net_rawaccess(cred) != 0))
return (EACCES);
}
return (ENXIO);
/*
* If BPF is being opened from a non-global zone, trigger a call
* back into the driver to see if it needs to initialise local
* state in a zone.
*/
/*
* A structure is allocated per open file in BPF to store settings
* such as buffer capture size, provide private buffers, etc.
*/
d->bd_bufsize = bpf_bufsize;
d->bd_seesent = 1;
/*
* Find an unused minor number. Obviously this is an O(n) algorithm
* and doesn't scale particularly well, so if there are large numbers
* of open file descriptors happening in real use, this design may
* need to be revisited.
*/
break;
kmem_free(d, sizeof (*d));
return (ENXIO);
}
bpf_dev_add(d);
return (0);
}
/*
* Close the descriptor by detaching it from its interface,
* deallocating its buffers, and marking it free.
*
* Because we only allow a device to be opened once, there is always a
* 1 to 1 relationship between opens and closes supporting this function.
*/
/* ARGSUSED */
int
{
mutex_enter(&d->bd_lock);
if (d->bd_state == BPF_WAITING)
if (d->bd_bif)
bpf_detachd(d);
mutex_exit(&d->bd_lock);
LIST_REMOVE(d, bd_list);
bpf_dev_remove(d);
mutex_enter(&d->bd_lock);
mutex_destroy(&d->bd_lock);
cv_destroy(&d->bd_wait);
bpf_freed(d);
kmem_free(d, sizeof (*d));
return (0);
}
/*
* Rotate the packet buffers in descriptor d. Move the store buffer
* into the hold slot, and the free buffer into the store slot.
* Zero the length of the new store buffer.
*/
#define ROTATE_BUFFERS(d) \
(d)->bd_slen = 0; \
(d)->bd_fbuf = 0;
/*
* bpfread - read next chunk of packets from buffers
*/
/* ARGSUSED */
int
{
int timed_out;
int error;
return (EBADF);
/*
* Restrict application to use a buffer the same size as
* the kernel buffers.
*/
return (EINVAL);
mutex_enter(&d->bd_lock);
if (d->bd_state == BPF_WAITING)
/*
* If the hold buffer is empty, then do a timed sleep, which
* ends when the timeout expires or when enough packets
* have arrived to fill the store buffer.
*/
while (d->bd_hbuf == 0) {
if (d->bd_nonblock) {
if (d->bd_slen == 0) {
mutex_exit(&d->bd_lock);
return (EWOULDBLOCK);
}
ROTATE_BUFFERS(d);
break;
}
/*
* A packet(s) either arrived since the previous
* read or arrived while we were asleep.
* Rotate the buffers and return what's here.
*/
ROTATE_BUFFERS(d);
break;
}
if (error == 0) {
mutex_exit(&d->bd_lock);
return (EINTR);
}
if (error == -1) {
/*
* On a timeout, return what's in the buffer,
* which may be nothing. If there is something
* in the store buffer, we can rotate the buffers.
*/
if (d->bd_hbuf)
/*
* We filled up the buffer in between
* getting the timeout and arriving
* here, so we don't need to rotate.
*/
break;
if (d->bd_slen == 0) {
mutex_exit(&d->bd_lock);
return (0);
}
ROTATE_BUFFERS(d);
}
}
/*
* At this point, we know we have something in the hold slot.
*/
mutex_exit(&d->bd_lock);
/*
* Move data from hold buffer into user space.
* We know the entire buffer is transferred since
* we checked above that the read buffer is bpf_bufsize bytes.
*/
mutex_enter(&d->bd_lock);
d->bd_hbuf = 0;
d->bd_hlen = 0;
done:
mutex_exit(&d->bd_lock);
return (error);
}
/*
* If there are processes sleeping on this descriptor, wake them up.
* NOTE: the lock for bd_wait is bd_lock and is held by bpf_deliver,
* so there is no code here grabbing it.
*/
static inline void
bpf_wakeup(struct bpf_d *d)
{
}
static void
bpf_timed_out(void *arg)
{
mutex_enter(&d->bd_lock);
if (d->bd_state == BPF_WAITING) {
d->bd_state = BPF_TIMED_OUT;
if (d->bd_slen != 0)
}
mutex_exit(&d->bd_lock);
}
/* ARGSUSED */
int
{
mblk_t *m;
int error;
int dlt;
return (EBADF);
mutex_enter(&d->bd_lock);
mutex_exit(&d->bd_lock);
return (EINTR);
}
mutex_exit(&d->bd_lock);
return (0);
}
while (d->bd_inuse < 0) {
d->bd_waiting++;
d->bd_waiting--;
mutex_exit(&d->bd_lock);
return (EINTR);
}
d->bd_waiting--;
}
mutex_exit(&d->bd_lock);
d->bd_inuse++;
m = NULL;
goto done;
}
if (error)
goto done;
goto done;
}
/*
* The "tx" action here is required to consume the mblk_t.
*/
m = NULL;
done:
if (error == 0)
else
if (m != NULL)
freemsg(m);
mutex_enter(&d->bd_lock);
d->bd_inuse--;
if ((d->bd_inuse == 0) && (d->bd_waiting != 0))
mutex_exit(&d->bd_lock);
/*
* The driver frees the mbuf.
*/
return (error);
}
/*
* Reset a descriptor by flushing its packet buffer and clearing the
* receive and drop counts. Should be called at splnet.
*/
static void
{
if (d->bd_hbuf) {
/* Free the hold buffer. */
d->bd_hbuf = 0;
}
d->bd_slen = 0;
d->bd_hlen = 0;
d->bd_rcount = 0;
d->bd_dcount = 0;
d->bd_ccount = 0;
}
/*
* FIONREAD Check for read packet available.
* BIOCGBLEN Get buffer len [for read()].
* BIOCSETF Set ethernet read filter.
* BIOCFLUSH Flush read packet buffer.
* BIOCPROMISC Put interface into promiscuous mode.
* BIOCGDLT Get link layer type.
* BIOCGETIF Get interface name.
* BIOCSETIF Set interface.
* BIOCSRTIMEOUT Set read timeout.
* BIOCGRTIMEOUT Get read timeout.
* BIOCGSTATS Get packet stats.
* BIOCIMMEDIATE Set immediate mode.
* BIOCVERSION Get filter language version.
* BIOCGHDRCMPLT Get "header already complete" flag.
* BIOCSHDRCMPLT Set "header already complete" flag.
*/
/* ARGSUSED */
int
{
struct bpf_program prog;
int error = 0;
/*
* Refresh the PID associated with this bpf file.
*/
mutex_enter(&d->bd_lock);
if (d->bd_state == BPF_WAITING)
mutex_exit(&d->bd_lock);
switch (cmd) {
default:
break;
/*
* Check for read packet available.
*/
case FIONREAD:
{
int n;
mutex_enter(&d->bd_lock);
n = d->bd_slen;
if (d->bd_hbuf)
n += d->bd_hlen;
mutex_exit(&d->bd_lock);
*(int *)addr = n;
break;
}
/*
* Get buffer len [for read()].
*/
case BIOCGBLEN:
sizeof (d->bd_bufsize));
break;
/*
* Set buffer length.
*/
case BIOCSBLEN:
break;
}
mutex_enter(&d->bd_lock);
if (d->bd_bif != 0) {
} else {
if (size > bpf_maxbufsize)
else if (size < BPF_MINBUFSIZE)
d->bd_bufsize = size;
}
mutex_exit(&d->bd_lock);
if (error == 0)
break;
/*
* Set link layer read filter.
*/
case BIOCSETF:
break;
}
break;
/*
* Flush read packet buffer.
*/
case BIOCFLUSH:
mutex_enter(&d->bd_lock);
reset_d(d);
mutex_exit(&d->bd_lock);
break;
/*
* Put interface into promiscuous mode.
* This is a one-way ioctl, it is not used to turn promiscuous
* mode off.
*/
case BIOCPROMISC:
if (d->bd_bif == 0) {
/*
* No interface attached yet.
*/
break;
}
mutex_enter(&d->bd_lock);
if (d->bd_promisc == 0) {
if (d->bd_promisc_handle) {
mph = d->bd_promisc_handle;
d->bd_promisc_handle = 0;
mutex_exit(&d->bd_lock);
mutex_enter(&d->bd_lock);
}
d->bd_mcip, MAC_CLIENT_PROMISC_ALL, d,
&d->bd_promisc_handle, d->bd_promisc_flags);
if (error == 0)
d->bd_promisc = 1;
}
mutex_exit(&d->bd_lock);
break;
/*
* Get device parameters.
*/
case BIOCGDLT:
if (d->bd_bif == 0)
else
break;
/*
* Get a list of supported device parameters.
*/
case BIOCGDLTLIST:
if (d->bd_bif == 0) {
} else {
struct bpf_dltlist list;
break;
}
if ((error == 0) &&
}
break;
/*
* Set device parameters.
*/
case BIOCSDLT:
break;
/*
* Get interface name.
*/
case BIOCGETIF:
break;
}
if ((error == 0) &&
break;
}
break;
/*
* Set interface.
*/
case BIOCSETIF:
break;
}
break;
/*
* Get interface name.
*/
case BIOCGETLIF:
break;
}
if ((error == 0) &&
break;
}
break;
/*
* Set interface.
*/
case BIOCSETLIF:
break;
}
break;
#ifdef _SYSCALL32_IMPL
/*
* Set read timeout.
*/
case BIOCSRTIMEOUT32:
{
break;
}
/* Convert the timeout in microseconds to ticks */
d->bd_rtout = 1;
break;
}
/*
* Get read timeout.
*/
case BIOCGRTIMEOUT32:
{
break;
}
/*
* Get a list of supported device parameters.
*/
case BIOCGDLTLIST32:
if (d->bd_bif == 0) {
} else {
struct bpf_dltlist32 lst32;
struct bpf_dltlist list;
break;
}
if (error == 0) {
sizeof (lst32)) != 0)
}
}
break;
/*
* Set link layer read filter.
*/
case BIOCSETF32: {
struct bpf_program32 prog32;
break;
}
break;
}
#endif
/*
* Set read timeout.
*/
case BIOCSRTIMEOUT:
{
break;
}
/* Convert the timeout in microseconds to ticks */
d->bd_rtout = 1;
break;
}
/*
* Get read timeout.
*/
case BIOCGRTIMEOUT:
{
break;
}
/*
* Get packet stats.
*/
case BIOCGSTATS:
{
break;
}
/*
* Set immediate mode.
*/
case BIOCIMMEDIATE:
sizeof (d->bd_immediate)) != 0)
break;
case BIOCVERSION:
{
struct bpf_version bv;
break;
}
case BIOCGHDRCMPLT: /* get "header already complete" flag */
sizeof (d->bd_hdrcmplt)) != 0)
break;
case BIOCSHDRCMPLT: /* set "header already complete" flag */
sizeof (d->bd_hdrcmplt)) != 0)
break;
/*
* Get "see sent packets" flag
*/
case BIOCGSEESENT:
sizeof (d->bd_seesent)) != 0)
break;
/*
* Set "see sent" packets flag
*/
case BIOCSSEESENT:
sizeof (d->bd_seesent)) != 0)
break;
case FIONBIO: /* Non-blocking I/O */
sizeof (d->bd_nonblock)) != 0)
break;
}
return (error);
}
/*
* Set d's packet filter program to fp. If this file already has a filter,
* free it and replace it. If the new filter is "empty" (has a 0 size), then
* the result is to just remove and free the existing filter.
* Returns EINVAL for bogus requests.
*/
int
{
return (EINVAL);
mutex_enter(&d->bd_lock);
oldsize = d->bd_filter_size;
d->bd_filter = 0;
d->bd_filter_size = 0;
reset_d(d);
mutex_exit(&d->bd_lock);
if (old != 0)
return (0);
}
if (flen > BPF_MAXINSNS)
return (EINVAL);
return (EFAULT);
mutex_enter(&d->bd_lock);
oldsize = d->bd_filter_size;
d->bd_filter_size = size;
reset_d(d);
mutex_exit(&d->bd_lock);
if (old != 0)
return (0);
}
return (EINVAL);
}
/*
* Detach a file from its current interface (if attached at all) and attach
* to the interface indicated by the name stored in ifr.
* Return an errno or 0.
*/
static int
{
int unit_seen;
char *cp;
int i;
/*
* Make sure the provided name has a unit number, and default
* it to '0' if not specified.
* XXX This is ugly ... do this differently?
*/
unit_seen = 0;
while (*cp++)
unit_seen = 1;
if (!unit_seen) {
/* Make sure to leave room for the '\0'. */
for (i = 0; i < (namesize - 1); ++i) {
continue;
ifname[i] = '0';
}
}
/*
* Make sure that only one call to this function happens at a time
*/
mutex_enter(&d->bd_lock);
while (d->bd_inuse != 0) {
d->bd_waiting++;
d->bd_waiting--;
mutex_exit(&d->bd_lock);
return (EINTR);
}
d->bd_waiting--;
}
d->bd_inuse = -1;
mutex_exit(&d->bd_lock);
/*
* Look through attached interfaces for the named one.
*
* The search is done twice - once
*/
int error = 0;
if (d->bd_sbuf == 0)
error = bpf_allocbufs(d);
/*
* We found the requested interface.
* If we're already attached to requested interface,
* just flush the buffer.
*/
mutex_enter(&d->bd_lock);
if (d->bd_bif)
/*
* Detach if attached to something else.
*/
bpf_detachd(d);
bpf_attachd(d, bp);
}
reset_d(d);
d->bd_inuse = 0;
if (d->bd_waiting != 0)
mutex_exit(&d->bd_lock);
return (error);
}
mutex_enter(&d->bd_lock);
d->bd_inuse = 0;
if (d->bd_waiting != 0)
mutex_exit(&d->bd_lock);
/*
* Try tickle the mac layer into attaching the device...
*/
}
/*
* Copy the interface name to the ifreq.
*/
static int
{
mutex_enter(&d->bd_lock);
mutex_exit(&d->bd_lock);
return (EINVAL);
}
mutex_exit(&d->bd_lock);
return (0);
}
/*
* Support for poll() system call
*
* Return true iff the specific operation will not block indefinitely - with
* the assumption that it is safe to positively acknowledge a request for the
* ability to write to the BPF device.
* Otherwise, return false but make a note that a selnotify() must be done.
*/
int
{
/*
* An imitation of the FIONREAD ioctl code.
*/
mutex_enter(&d->bd_lock);
if (d->bd_hlen != 0 ||
d->bd_slen != 0)) {
} else {
*reventsp = 0;
if (!anyyet)
/* Start the read timeout if necessary */
/*
* Only allow the timeout to be set once.
*/
if (d->bd_callout == 0)
d, d->bd_rtout);
d->bd_state = BPF_WAITING;
}
}
mutex_exit(&d->bd_lock);
}
return (0);
}
/*
* Copy data from an mblk_t chain into a buffer. This works for ipnet
* because the dl_ipnetinfo_t is placed in an mblk_t that leads the
* packet itself.
*/
static void *
{
const mblk_t *m;
m = src_arg;
while (len > 0) {
if (m == NULL)
panic("bpf_mcpy");
m = m->b_cont;
}
return (dst_arg);
}
/*
* Dispatch a packet to all the listeners on interface bp.
*
* marg pointer to the packet, either a data buffer or an mbuf chain
* buflen buffer length, if marg is a data buffer
* cpfn a function that can copy marg into the listener's buffer
* pktlen length of the packet
* issent boolean indicating whether the packet was sent or receive
*/
static inline void
{
if (!d->bd_seesent && issent)
return;
/*
* Accuracy of the packet counters in BPF is vital so it
* is important to protect even the outer ones.
*/
mutex_enter(&d->bd_lock);
d->bd_rcount++;
if (slen != 0) {
}
mutex_exit(&d->bd_lock);
}
/*
* Incoming linkage from device drivers.
*/
/* ARGSUSED */
void
{
void *marg;
} else {
marg = m;
buflen = 0;
}
}
/*
* Incoming linkage from ipnet.
* In ipnet, there is only one event, NH_OBSERVE, that delivers packets
* from all network interfaces. Thus the tap function needs to apply a
* specified interface.
*/
/* ARGSUSED */
void
{
return;
}
/*
* Move the packet data from interface memory (pkt) into the
* store buffer. Return 1 if it's time to wakeup a listener (buffer full),
* otherwise 0. "copy" is the routine called to do the actual data
* transfer. memcpy is passed in to copy contiguous chunks, while
* bpf_mcpy is passed in to copy mbuf chains. In the latter case,
* pkt is really an mbuf.
*/
static void
{
int do_wakeup = 0;
++d->bd_ccount;
/*
* Figure out how many bytes to move. If the packet is
* greater or equal to the snapshot length, transfer that
* much. Otherwise, transfer the whole packet (unless
* we hit the buffer size limit).
*/
if (totlen > d->bd_bufsize)
totlen = d->bd_bufsize;
/*
* Round up the end of the previous packet to the next longword.
*/
/*
* This packet will overflow the storage buffer.
* Rotate the buffers if we can, then wakeup any
* pending reads.
*/
if (d->bd_fbuf == 0) {
/*
* We haven't completed the previous read yet,
* so drop the packet.
*/
++d->bd_dcount;
return;
}
ROTATE_BUFFERS(d);
do_wakeup = 1;
curlen = 0;
/*
* Immediate mode is set, or the read timeout has
* already expired during a select call. A packet
* arrived, so the reader should be woken up.
*/
do_wakeup = 1;
}
/*
* Append the bpf header to the existing buffer before we add
* on the actual packet data.
*/
/*
* Copy the packet data into the store buffer and update its length.
*/
/*
* Call bpf_wakeup after bd_slen has been updated.
*/
if (do_wakeup)
bpf_wakeup(d);
}
/*
* Initialize all nonzero fields of a descriptor.
*/
static int
bpf_allocbufs(struct bpf_d *d)
{
if (!d->bd_fbuf)
return (ENOBUFS);
if (!d->bd_sbuf) {
return (ENOBUFS);
}
d->bd_slen = 0;
d->bd_hlen = 0;
return (0);
}
/*
* Free buffers currently in use by a descriptor.
* Called on close.
*/
static void
{
/*
* At this point the descriptor has been detached from its
* interface and it yet hasn't been marked free.
*/
if (d->bd_sbuf != 0) {
if (d->bd_hbuf != 0)
if (d->bd_fbuf != 0)
}
if (d->bd_filter)
}
/*
* Attach additional dlt for a interface to bpf.
* dlt is the link layer type.
*
* The zoneid is passed in explicitly to prevent the need to
* do a lookup in dls using the linkid. Such a lookup would need
* to use the same hash table that gets used for walking when
* dls_set_bpfattach() is called.
*/
void
{
int hdrlen;
if (bpf_debug)
provider);
return;
}
if (bpf_debug)
return;
}
/*
* To get the user-visible name, it is necessary to get the mac
* client name of an interface and for this, we need to do the
* mac_client_open. Leaving it open is undesirable because it
* creates an open reference that is hard to see from outside
* of bpf, potentially leading to data structures not being
* cleaned up when they should.
*/
if (bpf_debug)
"bpfattach: mac_client_open fail for %s",
return;
}
sizeof (bp->bif_ifname));
/*
* Compute the length of the bpf header. This is not necessarily
* equal to SIZEOF_BPF_HDR because we want to insert spacing such
* that the network layer header begins on a longword boundary (for
* performance reasons and to alleviate alignment restrictions).
*/
if (bpf_debug) {
"bpfattach: linkid resolution fail for %s/%s",
}
return;
}
}
/*
* Remove an interface from bpf.
*/
void
{
struct bpf_d *d;
int removed = 0;
/*
* Loop through all of the known descriptors to find any that are
* using the interface that wants to be detached.
*/
mutex_enter(&d->bd_lock);
/*
* Detach the descriptor from an interface now.
* It will be free'ed later by close routine.
*/
bpf_detachd(d);
}
mutex_exit(&d->bd_lock);
}
removed++;
goto again;
}
}
}
/*
* Get a list of available data link type of the interface.
*/
static int
{
int n, error;
sizeof (ifname));
n = 0;
error = 0;
continue;
if (d->bd_zone != GLOBAL_ZONEID &&
continue;
return (ENOMEM);
/*
* Bumping of bif_inuse ensures the structure does not
* disappear while the copyout runs and allows the for
* loop to be continued.
*/
}
n++;
}
return (error);
}
/*
* Set the data link type of a BPF instance.
*/
static int
{
int error;
int dlt;
return (EFAULT);
/*
* The established order is get bpf_mtx before bd_lock, even
* though bpf_mtx is not needed until the loop...
*/
mutex_enter(&d->bd_lock);
if (d->bd_bif == 0) { /* Interface not set */
mutex_exit(&d->bd_lock);
return (EINVAL);
}
mutex_exit(&d->bd_lock);
return (0);
}
/*
* See the matrix at the top of the file for the permissions table
* enforced by this driver.
*/
mutex_exit(&d->bd_lock);
return (EINVAL);
}
sizeof (ifname));
/*
* Now only bd_lock is held.
*
* If there was no matching interface that supports the requested
* DLT, return an error and leave the current binding alone.
*/
mutex_exit(&d->bd_lock);
return (EINVAL);
}
error = 0;
bpf_detachd(d);
bpf_attachd(d, bp);
reset_d(d);
mutex_exit(&d->bd_lock);
return (error);
}
/*
* bpf_clear_timeout is called with the bd_lock mutex held, providing it
* with the necessary protection to retrieve and modify bd_callout but it
* does not hold the lock for its entire duration... see below...
*/
static void
bpf_clear_timeout(struct bpf_d *d)
{
d->bd_callout = 0;
d->bd_inuse++;
/*
* If the timeout has fired and is waiting on bd_lock, we could
* deadlock here because untimeout if bd_lock is held and would
* wait for bpf_timed_out to finish and it never would.
*/
if (tid != 0) {
mutex_exit(&d->bd_lock);
mutex_enter(&d->bd_lock);
}
d->bd_inuse--;
}
/*
* As a cloning device driver, BPF needs to keep track of which device
* numbers are in use and which ones are not. A hash table, indexed by
* the minor device number, is used to store the pointers to the
* individual descriptors that are allocated in bpfopen().
* The functions below present the interface for that hash table to
* the rest of the driver.
*/
static struct bpf_d *
{
(mod_hash_val_t *)&d);
return (d);
}
static void
bpf_dev_add(struct bpf_d *d)
{
(mod_hash_val_t)d);
}
static void
bpf_dev_remove(struct bpf_d *d)
{
(mod_hash_val_t *)&stor);
}
/*
* bpf_def_get should only ever be called for a minor number that exists,
* thus there should always be a pointer in the hash table that corresponds
* to it.
*/
static struct bpf_d *
{
(mod_hash_val_t *)&d);
return (d);
}
static void
{
if (bpf_debug) {
}
}
/*
* Finding a BPF network interface is a two pass job.
* In the first pass, the best possible match is made on zone, DLT and
* interface name.
* In the second pass, we allow global zone snoopers to attach to interfaces
* that are reserved for other zones.
* This ensures that the global zone will always see its own interfaces first
* before attaching to those that belong to a shared IP instance zone.
*/
static struct bpf_if *
{
continue;
continue;
continue;
return (bp);
}
if (d->bd_zone == GLOBAL_ZONEID) {
continue;
continue;
return (bp);
}
}
return (NULL);
}