ipnet.c revision 391710f23076bd83c9cf29287fda8576773aec1e
29949e866e40b95795203f3ee46f44a197c946e4stevel * CDDL HEADER START
29949e866e40b95795203f3ee46f44a197c946e4stevel * The contents of this file are subject to the terms of the
29949e866e40b95795203f3ee46f44a197c946e4stevel * Common Development and Distribution License (the "License").
29949e866e40b95795203f3ee46f44a197c946e4stevel * You may not use this file except in compliance with the License.
29949e866e40b95795203f3ee46f44a197c946e4stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
29949e866e40b95795203f3ee46f44a197c946e4stevel * See the License for the specific language governing permissions
29949e866e40b95795203f3ee46f44a197c946e4stevel * and limitations under the License.
29949e866e40b95795203f3ee46f44a197c946e4stevel * When distributing Covered Code, include this CDDL HEADER in each
29949e866e40b95795203f3ee46f44a197c946e4stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
29949e866e40b95795203f3ee46f44a197c946e4stevel * If applicable, add the following below this CDDL HEADER, with the
29949e866e40b95795203f3ee46f44a197c946e4stevel * fields enclosed by brackets "[]" replaced with your own identifying
29949e866e40b95795203f3ee46f44a197c946e4stevel * information: Portions Copyright [yyyy] [name of copyright owner]
29949e866e40b95795203f3ee46f44a197c946e4stevel * CDDL HEADER END
29949e866e40b95795203f3ee46f44a197c946e4stevel * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
29949e866e40b95795203f3ee46f44a197c946e4stevel * Use is subject to license terms.
29949e866e40b95795203f3ee46f44a197c946e4stevel * The ipnet device defined here provides access to packets at the IP layer. To
29949e866e40b95795203f3ee46f44a197c946e4stevel * provide access to packets at this layer it registers a callback function in
29949e866e40b95795203f3ee46f44a197c946e4stevel * the ip module and when there are open instances of the device ip will pass
29949e866e40b95795203f3ee46f44a197c946e4stevel * packets into the device. Packets from ip are passed on the input, output and
29949e866e40b95795203f3ee46f44a197c946e4stevel * loopback paths. Internally the module returns to ip as soon as possible by
29949e866e40b95795203f3ee46f44a197c946e4stevel * deferring processing using a taskq.
29949e866e40b95795203f3ee46f44a197c946e4stevel * Management of the devices in /dev/ipnet/ is handled by the devname
29949e866e40b95795203f3ee46f44a197c946e4stevel * filesystem and use of the neti interfaces. This module registers for NIC
29949e866e40b95795203f3ee46f44a197c946e4stevel * events using the neti framework so that when IP interfaces are bought up,
29949e866e40b95795203f3ee46f44a197c946e4stevel * taken down etc. the ipnet module is notified and its view of the interfaces
29949e866e40b95795203f3ee46f44a197c946e4stevel * configured on the system adjusted. On attach, the module gets an initial
29949e866e40b95795203f3ee46f44a197c946e4stevel * view of the system again using the neti framework but as it has already
29949e866e40b95795203f3ee46f44a197c946e4stevel * registered for IP interface events, it is still up-to-date with any changes.
29949e866e40b95795203f3ee46f44a197c946e4stevel 0, /* mi_minpsz */
29949e866e40b95795203f3ee46f44a197c946e4stevel 0 /* mi_lowat */
29949e866e40b95795203f3ee46f44a197c946e4stevel * List to hold static view of ipnetif_t's on the system. This is needed to
29949e866e40b95795203f3ee46f44a197c946e4stevel * avoid holding the lock protecting the avl tree of ipnetif's over the
29949e866e40b95795203f3ee46f44a197c946e4stevel * callback into the dev filesystem.
29949e866e40b95795203f3ee46f44a197c946e4steveltypedef struct ipnetif_cbdata {
29949e866e40b95795203f3ee46f44a197c946e4stevel * Convenience enumerated type for ipnet_accept(). It describes the
29949e866e40b95795203f3ee46f44a197c946e4stevel * properties of a given ipnet_addrp_t relative to a single ipnet_t
29949e866e40b95795203f3ee46f44a197c946e4stevel * client stream. The values represent whether the address is ...
29949e866e40b95795203f3ee46f44a197c946e4steveltypedef enum {
29949e866e40b95795203f3ee46f44a197c946e4stevel IPNETADDR_MBCAST, /* a multicast or broadcast address. */
29949e866e40b95795203f3ee46f44a197c946e4stevel/* Argument used for the ipnet_nicevent_taskq callback. */
29949e866e40b95795203f3ee46f44a197c946e4steveltypedef struct ipnet_nicevent_s {
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic ddi_taskq_t *ipnet_taskq; /* taskq for packets */
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT;
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnet_open(queue_t *, dev_t *, int, int, cred_t *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnet_attach(dev_info_t *, ddi_attach_cmd_t);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnet_detach(dev_info_t *, ddi_detach_cmd_t);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void ipnet_nicevent_task(void *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic ipnetif_t *ipnetif_create(const char *, uint64_t, ipnet_stack_t *,
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void ipnetif_remove(ipnetif_t *, ipnet_stack_t *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic ipnetif_t *ipnetif_getby_index(uint64_t, ipnet_stack_t *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic ipnetif_t *ipnetif_getby_dev(dev_t, ipnet_stack_t *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic boolean_t ipnetif_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void ipnetif_zonecheck(ipnetif_t *, ipnet_stack_t *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnetif_compare_name(const void *, const void *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnetif_compare_name_zone(const void *, const void *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnetif_compare_index(const void *, const void *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void *ipnet_stack_init(netstackid_t, netstack_t *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void ipnet_dispatch(void *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipobs_bounce_func(hook_event_token_t, hook_data_t, void *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic int ipnet_bpf_bounce(hook_event_token_t, hook_data_t, void *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic void ipnet_bpf_release_shared(ipnet_stack_t *);
29949e866e40b95795203f3ee46f44a197c946e4stevelstatic ipnetif_t *ipnetif_clone_create(ipnetif_t *, zoneid_t);
29949e866e40b95795203f3ee46f44a197c946e4stevelDDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach,
29949e866e40b95795203f3ee46f44a197c946e4stevel ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info,
29949e866e40b95795203f3ee46f44a197c946e4stevel "STREAMS ipnet driver",
29949e866e40b95795203f3ee46f44a197c946e4stevel * This structure contains the template data (names and type) that is
29949e866e40b95795203f3ee46f44a197c946e4stevel * copied, in bulk, into the new kstats structure created by net_kstat_create.
29949e866e40b95795203f3ee46f44a197c946e4stevel * No actual statistical information is stored in this instance of the
29949e866e40b95795203f3ee46f44a197c946e4stevel * ipnet_kstats_t structure.
29949e866e40b95795203f3ee46f44a197c946e4stevel * Walk the list of physical interfaces on the machine, for each
29949e866e40b95795203f3ee46f44a197c946e4stevel * interface create a new ipnetif_t and add any addresses to it. We
29949e866e40b95795203f3ee46f44a197c946e4stevel * need to do the walk twice, once for IPv4 and once for IPv6.
29949e866e40b95795203f3ee46f44a197c946e4stevel * The interfaces are destroyed as part of ipnet_stack_fini() for each
29949e866e40b95795203f3ee46f44a197c946e4stevel * stack. Note that we cannot do this initialization in
29949e866e40b95795203f3ee46f44a197c946e4stevel * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
29949e866e40b95795203f3ee46f44a197c946e4stevel if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0)
29949e866e40b95795203f3ee46f44a197c946e4stevel * Standard module entry points.
29949e866e40b95795203f3ee46f44a197c946e4stevel if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1)
29949e866e40b95795203f3ee46f44a197c946e4stevel ipnet_minor_space = id_space_create("ipnet_minor_space",
29949e866e40b95795203f3ee46f44a197c946e4stevel * We call ddi_taskq_create() with nthread == 1 to ensure in-order
29949e866e40b95795203f3ee46f44a197c946e4stevel * delivery of packets to clients. Note that we need to create the
29949e866e40b95795203f3ee46f44a197c946e4stevel * taskqs before calling netstack_register() since ipnet_stack_init()
29949e866e40b95795203f3ee46f44a197c946e4stevel * registers callbacks that use 'em.
29949e866e40b95795203f3ee46f44a197c946e4stevel ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0);
29949e866e40b95795203f3ee46f44a197c946e4stevel ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue",
29949e866e40b95795203f3ee46f44a197c946e4stevel if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) {
29949e866e40b95795203f3ee46f44a197c946e4stevel netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini);
29949e866e40b95795203f3ee46f44a197c946e4stevel if (ret != 0) {
29949e866e40b95795203f3ee46f44a197c946e4stevel return (0);
29949e866e40b95795203f3ee46f44a197c946e4stevel HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents",
29949e866e40b95795203f3ee46f44a197c946e4stevel * It is possible for an exclusive stack to be in the process of
29949e866e40b95795203f3ee46f44a197c946e4stevel * shutting down here, and the netid and protocol lookups could fail
29949e866e40b95795203f3ee46f44a197c946e4stevel * in that case.
29949e866e40b95795203f3ee46f44a197c946e4stevel zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid);
29949e866e40b95795203f3ee46f44a197c946e4stevel if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) {
29949e866e40b95795203f3ee46f44a197c946e4stevel if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS,
29949e866e40b95795203f3ee46f44a197c946e4stevel cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks"
29949e866e40b95795203f3ee46f44a197c946e4stevel if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) {
29949e866e40b95795203f3ee46f44a197c946e4stevel if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS,
29949e866e40b95795203f3ee46f44a197c946e4stevel cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks"
29949e866e40b95795203f3ee46f44a197c946e4stevel * Create a local set of kstats for each zone.
29949e866e40b95795203f3ee46f44a197c946e4stevel ips->ips_kstatp = net_kstat_create(netid, "ipnet", 0, "ipnet_stats",
29949e866e40b95795203f3ee46f44a197c946e4stevel sizeof (ipnet_kstats_t) / sizeof (kstat_named_t), 0);
29949e866e40b95795203f3ee46f44a197c946e4stevel (void *)(uintptr_t)ips->ips_netstack->netstack_stackid;
29949e866e40b95795203f3ee46f44a197c946e4stevel cmn_err(CE_WARN, "net_kstat_create(%s,%s,%s) failed",
29949e866e40b95795203f3ee46f44a197c946e4stevel * This function is called on attach to build an initial view of the
29949e866e40b95795203f3ee46f44a197c946e4stevel * interfaces on the system. It will be called once for IPv4 and once
29949e866e40b95795203f3ee46f44a197c946e4stevel * for IPv6, although there is only one ipnet interface for both IPv4
29949e866e40b95795203f3ee46f44a197c946e4stevel * and IPv6 there are separate address lists.
29949e866e40b95795203f3ee46f44a197c946e4stevelipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
29949e866e40b95795203f3ee46f44a197c946e4stevel * If ipnet_register_netihook() was unable to initialize this
29949e866e40b95795203f3ee46f44a197c946e4stevel * stack's net_handle_t, then we cannot populate any interface
29949e866e40b95795203f3ee46f44a197c946e4stevel * information. This usually happens when we attempted to
29949e866e40b95795203f3ee46f44a197c946e4stevel * grab a net_handle_t as a stack was shutting down. We don't
29949e866e40b95795203f3ee46f44a197c946e4stevel * want to fail the entire _init() operation because of a
29949e866e40b95795203f3ee46f44a197c946e4stevel * stack shutdown (other stacks will continue to work just
29949e866e40b95795203f3ee46f44a197c946e4stevel * fine), so we silently return success here.
29949e866e40b95795203f3ee46f44a197c946e4stevel return (0);
29949e866e40b95795203f3ee46f44a197c946e4stevel * Make sure we're not processing NIC events during the
29949e866e40b95795203f3ee46f44a197c946e4stevel * population of our interfaces and address lists.
29949e866e40b95795203f3ee46f44a197c946e4stevel if ((ipnetif = ipnetif_getby_index(phyif, ips)) == NULL) {
29949e866e40b95795203f3ee46f44a197c946e4stevel * Skip addresses that aren't up. We'll add
29949e866e40b95795203f3ee46f44a197c946e4stevel * them when we receive an NE_LIF_UP event.
29949e866e40b95795203f3ee46f44a197c946e4stevel if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 ||
29949e866e40b95795203f3ee46f44a197c946e4stevel /* Don't add it if we already have it. */
29949e866e40b95795203f3ee46f44a197c946e4stevel if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO,
29949e866e40b95795203f3ee46f44a197c946e4stevel/* ARGSUSED */
29949e866e40b95795203f3ee46f44a197c946e4stevelipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
29949e866e40b95795203f3ee46f44a197c946e4stevel *result = (void *)0;
29949e866e40b95795203f3ee46f44a197c946e4stevel/* ARGSUSED */
29949e866e40b95795203f3ee46f44a197c946e4stevelipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
29949e866e40b95795203f3ee46f44a197c946e4stevel * If the system is labeled, only the global zone is allowed to open
29949e866e40b95795203f3ee46f44a197c946e4stevel * IP observability nodes.
29949e866e40b95795203f3ee46f44a197c946e4stevel /* We don't support open as a module */
29949e866e40b95795203f3ee46f44a197c946e4stevel /* This driver is self-cloning, we don't support re-open. */
29949e866e40b95795203f3ee46f44a197c946e4stevel if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL)
29949e866e40b95795203f3ee46f44a197c946e4stevel ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space);
29949e866e40b95795203f3ee46f44a197c946e4stevel * We need to hold ips_event_lock here as any NE_LIF_DOWN events need
29949e866e40b95795203f3ee46f44a197c946e4stevel * to be processed after ipnet_if is set and the ipnet_t has been
29949e866e40b95795203f3ee46f44a197c946e4stevel * inserted in the ips_str_list.
29949e866e40b95795203f3ee46f44a197c946e4stevel cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
29949e866e40b95795203f3ee46f44a197c946e4stevel *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor);
29949e866e40b95795203f3ee46f44a197c946e4stevel * Only register our callback if we're the first open client; we call
29949e866e40b95795203f3ee46f44a197c946e4stevel * unregister in close() for the last open client.
29949e866e40b95795203f3ee46f44a197c946e4stevel if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list))
29949e866e40b95795203f3ee46f44a197c946e4stevel ips->ips_hook = ipobs_register_hook(ns, ipnet_input);
29949e866e40b95795203f3ee46f44a197c946e4stevel if (err != 0) {
29949e866e40b95795203f3ee46f44a197c946e4stevel ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet;
29949e866e40b95795203f3ee46f44a197c946e4stevel cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
29949e866e40b95795203f3ee46f44a197c946e4stevel ipobs_unregister_hook(ips->ips_netstack, ips->ips_hook);
29949e866e40b95795203f3ee46f44a197c946e4stevel return (0);
29949e866e40b95795203f3ee46f44a197c946e4stevel return (0);
29949e866e40b95795203f3ee46f44a197c946e4stevel return (0);
29949e866e40b95795203f3ee46f44a197c946e4stevel /* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */
29949e866e40b95795203f3ee46f44a197c946e4stevel union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
29949e866e40b95795203f3ee46f44a197c946e4stevel size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t);
29949e866e40b95795203f3ee46f44a197c946e4stevel if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL)
case IPV4_VERSION :
case IPV6_VERSION :
int err;
switch (level) {
case DL_PROMISC_PHYS:
case DL_PROMISC_SAP:
case DL_PROMISC_MULTI:
switch (level) {
case DL_PROMISC_PHYS:
case DL_PROMISC_SAP:
case DL_PROMISC_MULTI:
int err = 0;
if (err != 0)
goto done;
if (err != 0 &&
goto done;
done:
return (err);
int err;
static mblk_t *
return (NULL);
return (dlhdr);
static ipnet_addrtype_t
case AF_INET:
return (IPNETADDR_MBCAST);
case AF_INET6:
return (IPNETADDR_MBCAST);
case AF_INET:
case AF_INET6:
return (addrtype);
static boolean_t
return (B_FALSE);
return (B_FALSE);
return (B_TRUE);
return (B_TRUE);
return (B_TRUE);
return (B_FALSE);
static boolean_t
return (B_FALSE);
return (B_FALSE);
DDI_SUCCESS) {
static ipnetif_t *
return (NULL);
return (ipnetif);
static ipnetif_t *
return (NULL);
return (NULL);
return (ipnetif);
case AF_INET:
case AF_INET6:
char *ifname;
ifflags = 0;
if (refrele_needed)
goto done;
case NE_PLUMB:
case NE_UNPLUMB:
case NE_LIF_UP:
case NE_LIF_DOWN:
isv6);
done:
return (dev);
return (dev);
return (dev);
static ipnetif_t *
return (ipnetif);
static ipnetif_t *
return (ipnetif);
static ipnetif_addr_t *
return (ifaddr);
return (ips);
static boolean_t
return (B_TRUE);
return (B_FALSE);
static boolean_t
int ret;
return (B_TRUE);
return (ret);
int res;
int res;
if (res != 0)
hook_t *
return (hook);
static ipnet_stack_t *
return (ESRCH);
return (ESRCH);
int flags)
int error;
if (error != 0)
return (error);
return (EINVAL);
if (error != 0)
goto regfail;
if (error != 0) {
goto regfail;
return (error);
hook) == 0);
hook) == 0);
static ipnetif_t *
return (newif);
return (NULL);
return (newif);
if (doremove) {
if (dofree) {