icmp.c revision 299625c6492013aa7bd163862f0d181854f69b3c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
*/
/* Copyright (c) 1990 Mentat Inc. */
#define _SUN_TPI_VERSION 2
#include <sys/xti_inet.h>
#include <sys/socketvar.h>
#include <sys/isa_defs.h>
#include <inet/ipsec_impl.h>
#include <inet/ip_multi.h>
#include <inet/proto_set.h>
#include <inet/kstatcom.h>
#include <inet/ipclassifier.h>
#include <inet/rawip_impl.h>
/*
* Synchronization notes:
*
* RAWIP is MT and uses the usual kernel synchronization primitives. We use
* conn_lock to protect the icmp_t.
*
* Plumbing notes:
* ICMP is always a device driver. For compatibility with mibopen() code
* it is possible to I_PUSH "icmp", but that results in pushing a passthrough
* dummy module.
*/
const in6_addr_t *, uint32_t);
static void icmp_close_free(conn_t *);
int sys_error);
ip_recv_attr_t *);
ip_recv_attr_t *);
/* Common routines for TPI and socket module */
static void rawip_do_close(conn_t *);
static int rawip_do_unbind(conn_t *);
static struct module_info icmp_mod_info = {
};
/*
* Entry points for ICMP as a device.
*/
static struct qinit icmprinitv4 = {
};
static struct qinit icmprinitv6 = {
};
};
/* ICMP entry point during fallback */
static struct qinit icmp_fallback_sock_winit = {
};
struct streamtab icmpinfov4 = {
};
struct streamtab icmpinfov6 = {
};
/* Default structure copied into T_INFO_ACK messages */
static struct T_info_ack icmp_g_t_info_ack = {
IP_MAXPACKET, /* TSDU_size. icmp allows maximum size messages. */
T_INVALID, /* ETSDU_size. icmp does not support expedited data. */
T_INVALID, /* CDATA_size. icmp does not support connect data. */
T_INVALID, /* DDATA_size. icmp does not support disconnect data. */
0, /* ADDR_size - filled in later. */
0, /* OPT_size - not initialized here */
IP_MAXPACKET, /* TIDU_size. icmp allows maximum size messages. */
T_CLTS, /* SERV_type. icmp supports connection-less. */
TS_UNBND, /* CURRENT_state. This is set from icmp_state. */
};
static int
{
}
static int
{
}
/*
*
* Note: All those tunables which do not start with "icmp_" are Committed and
* therefore are public. See PSARC 2010/080.
*/
static mod_prop_info_t icmp_propinfo_tbl[] = {
/* tunable - 0 */
{ "_wroff_extra", MOD_PROTO_RAWIP,
{0, 128, 32}, {32} },
{ "_ipv4_ttl", MOD_PROTO_RAWIP,
{1, 255, 255}, {255} },
{ "_ipv6_hoplimit", MOD_PROTO_RAWIP,
{0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
{IPV6_DEFAULT_HOPS} },
{ "_bsd_compat", MOD_PROTO_RAWIP,
{ "send_buf", MOD_PROTO_RAWIP,
{4096, 65536, 8192}, {8192} },
{ "_xmit_lowat", MOD_PROTO_RAWIP,
{0, 65536, 1024}, {1024} },
{ "recv_buf", MOD_PROTO_RAWIP,
{4096, 65536, 8192}, {8192} },
{ "max_buf", MOD_PROTO_RAWIP,
{ "_pmtu_discovery", MOD_PROTO_RAWIP,
{ "_sendto_ignerr", MOD_PROTO_RAWIP,
};
typedef union T_primitives *t_primp_t;
/*
* This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
* passed to icmp_wput.
* It calls IP to verify the local IP address, and calls IP to insert
* the conn_t in the fanout table.
* If everything is ok it then sends the T_BIND_ACK back up.
*/
static void
{
int error;
struct T_bind_req *tbr;
/*
* All Solaris components should pass a db_credp
* for this TPI message, hence we ASSERT.
* But in case there is some other M_PROTO that looks
* like a TPI message sent by some other kernel
* component, we check and return an error.
*/
return;
}
"icmp_bind: bad req, len %u",
return;
}
return;
}
/*
* Reallocate the message to make sure we have enough room for an
* address.
*/
return;
}
/* Reset the message type in preparation for shipping it back. */
switch (len) {
case 0: /* request for a generic port */
} else {
}
break;
case sizeof (sin_t): /* Complete IPv4 address */
sizeof (sin_t));
break;
case sizeof (sin6_t): /* Complete IPv6 address */
break;
default:
return;
}
if (error != 0) {
if (error > 0) {
} else {
}
} else {
}
}
static int
{
int error = 0;
return (EINVAL);
}
switch (len) {
case sizeof (sin_t): /* Complete IPv4 address */
/* TSYSERR, EAFNOSUPPORT */
return (EAFNOSUPPORT);
}
if (v4src != INADDR_ANY) {
B_TRUE);
}
break;
case sizeof (sin6_t): /* Complete IPv6 address */
/* TSYSERR, EAFNOSUPPORT */
return (EAFNOSUPPORT);
}
/* No support for mapped addresses on raw sockets */
/* TSYSERR, EADDRNOTAVAIL */
return (EADDRNOTAVAIL);
}
if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
if (IN6_IS_ADDR_LINKSCOPE(&v6src))
}
break;
default:
/* TBADADDR */
return (EADDRNOTAVAIL);
}
/* Is the local address a valid unicast, multicast, or broadcast? */
if (laddr_type == IPVL_BAD)
return (EADDRNOTAVAIL);
/*
* The state must be TS_UNBND.
*/
return (-TOUTSTATE);
}
/*
* Copy the source address into our icmp structure. This address
* may still be zero; if so, ip will fill in the correct address
* each time an outbound packet is passed to it.
* If we are binding to a broadcast or multicast address then
* we just set the conn_bound_addr since we don't want to use
* that as the source address when sending.
*/
if (scopeid != 0) {
} else {
}
switch (laddr_type) {
case IPVL_UNICAST_UP:
case IPVL_UNICAST_DOWN:
break;
case IPVL_MCAST:
case IPVL_BCAST:
/* ip_set_destination will pick a source address later */
break;
}
/* Any errors after this point should use late_error */
/*
* with IPPROTO_TCP.
*/
connp->conn_fport = 0;
} else {
}
/*
* We create an initial header template here to make a subsequent
* sendto have a starting point. Since conn_last_dst is zero the
* first sendto will always follow the 'dst changed' code path.
* Note that we defer massaging options and the related checksum
* adjustment until we have a destination address.
*/
if (error != 0) {
goto late_error;
}
/* Just in case */
if (error != 0)
goto late_error;
/* Bind succeeded */
return (0);
if (scopeid != 0) {
}
connp->conn_lport = 0;
/* Restore the header that was built above - different source address */
return (error);
}
/*
* Tell IP to just bind to the protocol.
*/
static void
{
(void) ip_laddr_fanout_insert(connp);
}
/*
* This routine handles each T_CONN_REQ message passed to icmp. It
* associates a default destination address with the stream.
*
* After various error checks are completed, icmp_connect() lays
* the target address and port into the composite header template.
* Then we ask IP for information, including a source address if we didn't
* already have one. Finally we send up the T_OK_ACK reply message.
*/
static void
{
struct T_conn_req *tcr;
int error;
/*
* All Solaris components should pass a db_credp
* for this TPI message, hence we ASSERT.
* But in case there is some other M_PROTO that looks
* like a TPI message sent by some other kernel
* component, we check and return an error.
*/
return;
}
/* Sanity checks */
return;
}
if (tcr->OPT_length != 0) {
return;
}
switch (len) {
default:
return;
case sizeof (sin_t):
sizeof (sin_t));
break;
case sizeof (sin6_t):
break;
}
if (error != 0) {
return;
}
if (error != 0) {
if (error < 0) {
} else {
}
} else {
/*
* We have to send a connection confirmation to
* keep TLI happy.
*/
} else {
}
return;
}
/*
* Send ok_ack for T_CONN_REQ
*/
/* Unable to reuse the T_CONN_REQ for the ack. */
return;
}
}
}
static int
{
int error;
return (EINVAL);
}
/*
* Determine packet type based on type of address passed in
* the request should contain an IPv4 or IPv6 address.
* Make sure that address family matches the type of
* family of the address passed down.
*/
switch (len) {
case sizeof (sin_t):
break;
case sizeof (sin6_t):
/* No support for mapped addresses on raw sockets */
return (EADDRNOTAVAIL);
}
}
break;
}
/*
* If there is a different thread using conn_ixa then we get a new
* copy and cut the old one loose from conn_ixa. Otherwise we use
* Once connect() is done other threads can use conn_ixa since the
* refcnt will be back at one.
* We defer updating conn_ixa until later to handle any concurrent
* conn_ixa_cleanup thread.
*/
return (ENOMEM);
/*
* This icmp_t must have bound already before doing a connect.
* Reject if a connect is in progress (we drop conn_lock during
* rawip_do_connect).
*/
return (-TOUTSTATE);
}
/* Already connected - clear out state */
if (connp->conn_mcbc_bind)
else
}
/*
* with IPPROTO_TCP.
*/
/*
* Interpret a zero destination to mean loopback.
* generate the T_CONN_CON.
*/
if (v4dst == INADDR_ANY) {
}
connp->conn_flowinfo = 0;
} else {
/*
* Interpret a zero destination to mean loopback.
* generate the T_CONN_CON.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
}
}
/*
*/
}
if (is_system_labeled()) {
/* We need to restart with a label based on the cred */
}
if (scopeid != 0) {
} else {
}
/*
* conn_connect will drop conn_lock and reacquire it.
* To prevent a send* from messing with this icmp_t while the lock
* is dropped we set icmp_state and clear conn_v6lastdst.
* That will make all send* fail with EISCONN.
*/
if (error != 0)
goto connect_failed;
/*
* The addresses have been verified. Time to insert in
* the correct fanout list.
*/
if (error != 0)
goto connect_failed;
if (error != 0) {
goto connect_failed;
}
/* Record this as the "last" send even though we haven't sent any */
/* Also remember a source to use together with lastdst */
return (0);
/* In case the source address was set above */
if (connp->conn_mcbc_bind)
else
connp->conn_flowinfo = 0;
return (error);
}
static void
{
if (!IPCL_IS_NONSTR(connp)) {
}
/*
* Now we are truly single threaded on this stream, and can
* delete the things hanging off the connp, and finally the connp.
* We removed this connp from the fanout list, it cannot be
* accessed thru the fanouts, and we already waited for the
* conn_ref to drop to 0. We are already in close, so
* there cannot be any other thread from the top. qprocsoff
* has completed, and service has completed or won't run in
* future.
*/
if (!IPCL_IS_NONSTR(connp)) {
} else {
}
}
static int
{
if (flags & SO_FALLBACK) {
/*
* stream is being closed while in fallback
* simply free the resources that were allocated
*/
qprocsoff(q);
goto done;
}
(void) rawip_do_close(connp);
done:
return (0);
}
static void
{
}
/*
* Clear any fields which the kmem_cache constructor clears.
* Only icmp_connp needs to be preserved.
* TBD: We should make this more efficient to avoid clearing
* everything.
*/
}
/*
* This routine handles each T_DISCON_REQ message passed to icmp
* as an indicating that ICMP is no longer connected. This results
* in telling IP to restore the binding to just the local address.
*/
static int
{
int error;
return (-TOUTSTATE);
}
if (connp->conn_mcbc_bind)
else
if (error != 0)
return (error);
/*
* Tell IP to remove the full binding and revert
* to the local address binding.
*/
return (ip_laddr_fanout_insert(connp));
}
static void
{
int error;
/*
* Allocate the largest primitive we need to send back
* T_error_ack is > than T_ok_ack
*/
/* Unable to reuse the T_DISCON_REQ for the ack. */
return;
}
if (error != 0) {
if (error > 0) {
} else {
}
} else {
}
}
static int
{
int error;
if (error < 0)
return (error);
}
/* This routine creates a T_ERROR_ACK message and passes it upstream. */
static void
{
}
/* Shorthand to generate and send TPI error acks to our client */
static void
{
struct T_error_ack *teackp;
}
}
/*
* icmp_icmp_input is called as conn_recvicmp to process ICMP messages.
* Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
* Assumes that IP has pulled up everything up to and including the ICMP header.
*/
/* ARGSUSED2 */
static void
{
int iph_hdr_length;
int error = 0;
return;
}
/* Skip past the outer IP and ICMP headers */
switch (icmph->icmph_type) {
case ICMP_DEST_UNREACHABLE:
switch (icmph->icmph_code) {
case ICMP_FRAGMENTATION_NEEDED: {
/*
* IP has already adjusted the path MTU.
* But we need to adjust DF for IPv4.
*/
break;
/*
* Some other thread holds conn_ixa. We will
* redo this on the next ICMP too big.
*/
break;
}
(void) ip_get_pmtu(ixa);
} else {
}
break;
}
case ICMP_PORT_UNREACHABLE:
break;
default:
/* Transient errors */
break;
}
break;
default:
/* Transient errors */
break;
}
if (error == 0) {
return;
}
/*
* Deliver T_UDERROR_IND when the application has asked for it.
* The socket layer enables this automatically when connected.
*/
if (!connp->conn_dgram_errind) {
return;
}
if (IPCL_IS_NONSTR(connp)) {
goto done;
}
} else {
}
} else {
error);
}
done:
}
/*
* icmp_icmp_error_ipv6 is called by icmp_icmp_error to process ICMP for IPv6.
* Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
* Assumes that IP has pulled up all the extension headers as well as the
* ICMPv6 header.
*/
static void
{
int error = 0;
#ifdef DEBUG
else
#endif
/* Skip past the outer IP and ICMP headers */
return;
}
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
switch (icmp6->icmp6_code) {
case ICMP6_DST_UNREACH_NOPORT:
break;
case ICMP6_DST_UNREACH_ADMIN:
case ICMP6_DST_UNREACH_ADDR:
/* Transient errors */
break;
default:
break;
}
break;
case ICMP6_PACKET_TOO_BIG: {
struct T_unitdata_ind *tudi;
sizeof (struct ip6_mtuinfo);
struct ip6_mtuinfo *mtuinfo;
/*
* If the application has requested to receive path mtu
* information, send up an empty message containing an
* IPV6_PATHMTU ancillary data item.
*/
if (!connp->conn_ipv6_recvpathmtu)
break;
break;
}
/*
* newmp->b_cont is left to NULL on purpose. This is an
* empty message containing only ancillary data.
*/
/*
* We've consumed everything we need from the original
* message. Free it, then send our empty message.
*/
return;
}
case ICMP6_TIME_EXCEEDED:
/* Transient errors */
break;
case ICMP6_PARAM_PROB:
/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
break;
}
break;
}
if (error == 0) {
return;
}
/*
* Deliver T_UDERROR_IND when the application has asked for it.
* The socket layer enables this automatically when connected.
*/
if (!connp->conn_dgram_errind) {
return;
}
if (IPCL_IS_NONSTR(connp)) {
&connp->conn_faddr_v6)) {
goto done;
}
} else {
}
} else {
}
done:
}
/*
* This routine responds to T_ADDR_REQ messages. It is called by icmp_wput.
* The local address is filled in if endpoint is bound. The remote address
* is filled in if remote address has been precified ("connected endpoint")
* (The concept of connected CLTS sockets is alien to published TPI
* but we support it anyway).
*/
static void
{
struct T_addr_ack *taa;
/* Make it large enough for worst case */
return;
}
else
/*
* Note: Following code assumes 32 bit alignment of basic
* data structures like sin_t and struct T_addr_ack.
*/
/*
* Fill in local address first
*/
}
/*
* connected, fill remote address too
*/
/* assumed 32-bit alignment */
}
}
static void
{
*tap = icmp_g_t_info_ack;
else
}
static void
{
}
}
/*
* This routine responds to T_CAPABILITY_REQ messages. It is called by
* icmp_wput. Much of the T_CAPABILITY_ACK information is copied from
* icmp_g_t_info_ack. The current state of the stream is copied from
* icmp_state.
*/
static void
{
struct T_capability_ack *tcap;
if (!mp)
return;
}
/*
* This routine responds to T_INFO_REQ messages. It is called by icmp_wput.
* Most of the T_INFO_ACK information is copied from icmp_g_t_info_ack.
* The current state of the stream is copied from icmp_state.
*/
static void
{
/* Create a T_INFO_ACK message. */
if (!mp)
return;
}
static int
int family)
{
int error;
/* If the stream is already open, return immediately. */
return (0);
return (EINVAL);
/*
* Since ICMP is not used so heavily, allocating from the small
* arena should be sufficient.
*/
return (EBUSY);
}
if (flag & SO_FALLBACK) {
/*
* Non streams socket needs a stream to fallback to
*/
qprocson(q);
return (0);
}
return (error);
}
/*
* Initialize the icmp_t structure for this stream.
*/
qprocson(q);
/* Set the Stream head write offset. */
return (0);
}
static int
{
}
static int
{
}
/*
* This is the open routine for icmp. It allocates a icmp_t structure for
* the stream and, on the first open of the module, creates an ND table.
*/
static conn_t *
{
netstack_t *ns;
int len;
if (*err != 0)
return (NULL);
/*
* For exclusive stacks we set the zoneid to zero
* to make ICMP operate as if in the global zone.
*/
else
/*
* ipcl_conn_create did a netstack_hold. Undo the hold that was
* done by netstack_find_by_cred()
*/
/*
* need to lock anything.
*/
/* Set the initial state of the stream and the privilege status. */
if (isv6) {
/* May be changed by a SO_PROTOTYPE socket option. */
} else {
/* May be changed by a SO_PROTOTYPE socket option. */
}
/*
* For the socket of protocol IPPROTO_RAW or when IP_HDRINCL is set,
* the checksum is provided in the pre-built packet. We clear
* IXAF_SET_ULP_CKSUM to tell IP that the application has sent a
* complete IP header and not to compute the transport checksum.
*/
/* conn_allzones can not be set this early, hence no IPCL_ZONEID */
/*
* If the caller has the process-wide flag set, then default to MAC
* exempt mode. This allows read-down to unlabeled hosts.
*/
/* Cache things in ixa without an extra refhold */
if (is_system_labeled())
if (is->is_pmtu_discovery)
return (connp);
}
/*
* Which ICMP options OK to set through T_UNITDATA_REQ...
*/
/* ARGSUSED */
static boolean_t
{
return (B_TRUE);
}
/*
* This routine gets default values of certain options whose default
* values are maintained by protcol specific code
*/
int
{
switch (level) {
case IPPROTO_IP:
switch (name) {
case IP_MULTICAST_TTL:
return (sizeof (uchar_t));
case IP_MULTICAST_LOOP:
return (sizeof (uchar_t));
}
break;
case IPPROTO_IPV6:
switch (name) {
case IPV6_MULTICAST_HOPS:
return (sizeof (int));
case IPV6_MULTICAST_LOOP:
return (sizeof (int));
case IPV6_UNICAST_HOPS:
return (sizeof (int));
}
break;
case IPPROTO_ICMPV6:
switch (name) {
case ICMP6_FILTER:
/* Make it look like "pass all" */
return (sizeof (icmp6_filter_t));
}
break;
}
return (-1);
}
/*
* This routine retrieves the current status of socket options.
* It returns the size of the option retrieved, or -1.
*/
int
{
int retval;
coas.coa_changed = 0;
/*
* We assume that the optcom framework has checked for the set
* of levels and names that are supported, hence we don't worry
* about rejecting based on that.
* First check for ICMP specific handling, then pass to common routine.
*/
switch (level) {
case IPPROTO_IP:
/*
* Only allow IPv4 option processing on IPv4 sockets.
*/
return (-1);
switch (name) {
case IP_OPTIONS:
case T_IP_OPTIONS:
/* Options are passed up with each packet */
return (0);
case IP_HDRINCL:
return (sizeof (int));
}
break;
case IPPROTO_IPV6:
/*
* Only allow IPv6 option processing on native IPv6 sockets.
*/
return (-1);
switch (name) {
case IPV6_CHECKSUM:
/*
* Return offset or -1 if no checksum offset.
* Does not apply to IPPROTO_ICMPV6
*/
return (-1);
else
*i1 = -1;
return (sizeof (int));
}
break;
case IPPROTO_ICMPV6:
/*
* Only allow IPv6 option processing on native IPv6 sockets.
*/
return (-1);
return (-1);
switch (name) {
case ICMP6_FILTER:
/* Make it look like "pass all" */
} else {
sizeof (icmp6_filter_t));
}
return (sizeof (icmp6_filter_t));
}
}
return (retval);
}
/*
* This routine retrieves the current status of socket options.
* It returns the size of the option retrieved, or -1.
*/
int
{
int err;
return (err);
}
/*
* This routine sets socket options.
*/
int
{
int error;
/*
* For fixed length options, no sanity check
* of passed in length is done. It is assumed *_optcom_req()
* routines do the right thing.
*/
switch (level) {
case SOL_SOCKET:
switch (name) {
case SO_PROTOTYPE:
secpolicy_net_rawaccess(cr) != 0) {
return (EACCES);
}
if (checkonly)
break;
/* Used by test applications like psh */
icmp->icmp_hdrincl = 0;
} else {
icmp->icmp_hdrincl = 0;
}
/* Set offset for icmp6_cksum */
}
sizeof (icmp6_filter_t));
}
/*
* For SCTP, we don't use icmp_bind_proto() for
* raw socket binding.
*/
return (0);
return (0);
case SO_SNDBUF:
return (ENOBUFS);
}
break;
case SO_RCVBUF:
return (ENOBUFS);
}
break;
}
break;
case IPPROTO_IP:
/*
* Only allow IPv4 option processing on IPv4 sockets.
*/
return (EINVAL);
switch (name) {
case IP_HDRINCL:
if (!checkonly) {
if (onoff)
else
}
break;
}
break;
case IPPROTO_IPV6:
return (EINVAL);
switch (name) {
case IPV6_CHECKSUM:
/*
* Integer offset into the user data of where the
* checksum is located.
* Offset of -1 disables option.
* Does not apply to IPPROTO_ICMPV6.
*/
coa->coa_ancillary) {
return (EINVAL);
}
/* Negative or not 16 bit aligned offset */
return (EINVAL);
}
if (checkonly)
break;
if (*i1 == -1) {
ixa->ixa_raw_cksum_offset = 0;
} else {
}
break;
}
break;
case IPPROTO_ICMPV6:
/*
* Only allow IPv6 option processing on IPv6 sockets.
*/
return (EINVAL);
return (EINVAL);
switch (name) {
case ICMP6_FILTER:
if (checkonly)
break;
if ((inlen != 0) &&
(inlen != sizeof (icmp6_filter_t)))
return (EINVAL);
if (inlen == 0) {
sizeof (icmp6_filter_t));
}
} else {
sizeof (icmp6_filter_t),
return (ENOBUFS);
}
}
}
break;
}
break;
}
return (error);
}
/*
* This routine sets socket options.
*/
int
{
int err;
switch (optset_context) {
case SETFN_OPTCOM_CHECKONLY:
/*
* Note: Implies T_CHECK semantics for T_OPTCOM_REQ
* inlen != 0 implies value supplied and
* we have to "pretend" to set it.
* inlen == 0 implies that there is no
* value part in T_CHECK request and just validation
* done elsewhere should be enough, we just return here.
*/
if (inlen == 0) {
*outlenp = 0;
return (0);
}
break;
case SETFN_OPTCOM_NEGOTIATE:
break;
case SETFN_UD_NEGOTIATE:
case SETFN_CONN_NEGOTIATE:
/*
* Negotiating local and "association-related" options
* through T_UNITDATA_REQ.
*
* Following routine can filter out ones we do not
* want to be "set" this way.
*/
*outlenp = 0;
return (EINVAL);
}
break;
default:
/*
* We should never get here
*/
*outlenp = 0;
return (EINVAL);
}
if (thisdg_attrs != NULL) {
/* Options from T_UNITDATA_REQ */
} else {
/* Get a reference on conn_ixa to prevent concurrent mods */
*outlenp = 0;
return (ENOMEM);
}
coas.coa_changed = 0;
}
if (err != 0) {
if (!coa->coa_ancillary)
*outlenp = 0;
return (err);
}
/*
* Common case of OK return with outval same as inval.
*/
}
/*
* If this was not ancillary data, then we rebuild the headers,
* Since the label depends on the destination we go through
* ip_set_destination first.
*/
if (coa->coa_ancillary) {
return (0);
}
/*
* We clear lastdst to make sure we pick up the change
* next time sending.
* If we are connected we re-cache the information.
* We ignore errors to preserve BSD behavior.
* Note that we don't redo IPsec policy lookup here
* since the final destination (or source) didn't change.
*/
if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
}
}
/*
* Rebuild the header template if we are connected.
* Otherwise clear conn_v6lastdst so we rebuild the header
* in the data path.
*/
if (err != 0) {
return (err);
}
} else {
}
}
connp->conn_rcvbuf);
}
}
/* Increase wroff if needed */
} else {
}
}
}
return (err);
}
/* This routine sets socket options. */
int
{
int error;
return (error);
}
/*
* Setup IP headers.
*
* Note that IP_HDRINCL has ipha_protocol that is different than conn_proto,
* but icmp_output_hdrincl restores ipha_protocol once we return.
*/
mblk_t *
{
return (NULL);
}
/*
* has massaged it and placed the pseudo-header checksum difference
* in the cksum argument.
*
* Prepare for ICMPv6 checksum done in IP.
*
* We make it easy for IP to include our pseudo header
* by putting our length (and any routing header adjustment)
* in the ICMPv6 checksum field.
* The IP source, destination, and length have already been set by
* conn_prepend_hdr.
*/
} else {
uint_t cksum_offset = 0;
}
}
if (cksum_offset != 0) {
/* Make sure the checksum fits in the first mblk */
cksum_offset + sizeof (short));
return (NULL);
}
}
}
}
/* Note that we don't try to update wroff due to ancillary data */
return (mp);
}
static int
{
int error;
/*
* We clear lastdst to make sure we don't use the lastdst path
* next time sending since we might not have set v6dst yet.
*/
if (error != 0)
return (error);
/*
* is stored in conn_sum.
*/
return (0);
}
static mblk_t *
{
/*
* fallback has started but messages have not been moved yet
*/
} else {
}
return (NULL);
} else {
/*
* Fallback completed, let the caller putnext() the mblk.
*/
return (mp);
}
}
/*
* Deliver data to ULP. In case we have a socket, and it's falling back to
* TPI, then we'll queue the mp for later processing.
*/
static void
{
if (IPCL_IS_NONSTR(connp)) {
int error;
/*
* let's confirm while holding the lock
*/
}
}
} else {
}
}
} else {
}
}
/*
* This is the inbound data path.
* IP has already pulled up the IP headers and verified alignment
* etc.
*/
/* ARGSUSED2 */
static void
{
struct T_unitdata_ind *tudi;
int ip_hdr_length;
int udi_size; /* Size of T_unitdata_ind */
int pkt_len;
/*
* Get a snapshot of these and allow other threads to change
* them after that. We need the same recv_ancillary when determining
* the size as when adding the ancillary data items.
*/
/* Initialize regardless of IP version */
ipps.ipp_fields = 0;
if (recv_ancillary.crb_all != 0)
/*
* BSD for some reason adjusts ipha_length to exclude the
* IP header length. We do the same.
*/
if (is->is_bsd_compat) {
/*
* Allocate a new IP header so that we can
* modify ipha_length.
*/
return;
}
}
len -= ip_hdr_length;
}
/*
* sockets. This is ensured by icmp_bind and the IP fanout code.
*/
/*
* This is the inbound data path. Packets are passed upstream
* as T_UNITDATA_IND messages with full IPv4 headers still
* attached.
*/
/*
* Normally only send up the source address.
* If any ancillary data items are wanted we add those.
*/
if (recv_ancillary.crb_all != 0) {
}
/* Allocate a message block for the T_UNITDATA_IND structure. */
return;
}
sizeof (sin_t);
/*
* Add options if IP_RECVIF etc is set
*/
if (udi_size != 0) {
}
goto deliver;
}
/*
* IPv6 packets can only be received by applications
* that are prepared to receive IPv6 addresses.
* The IP fanout must ensure this.
*/
/*
* Handle IPv6 packets. We don't pass up the IP headers with the
* payload for IPv6.
*/
if (recv_ancillary.crb_all != 0) {
/*
* Call on ip_find_hdr_v6 which gets individual lenghts of
* extension headers (and pointers to them).
*/
/* We don't care about the length or nextheader. */
/*
* We do not pass up hop-by-hop options or any other
* extension header as part of the packet. Applications
* that want to see them have to specify IPV6_RECV* socket
* options. And conn_recvancillary_size/add explicitly
* drops the TX option from IPV6_HOPOPTS as it does for UDP.
*
* If we had multilevel ICMP sockets, then we'd want to
* modify conn_recvancillary_size/add to
* allow the user to see the label.
*/
}
/*
* Check a filter for ICMPv6 types if needed.
* Verify raw checksums if needed.
*/
int type;
/* Assumes that IP has done the pullupmsg */
return;
}
}
/* Checksum */
int remlen;
if (sum != 0) {
/* IPv6 RAW checksum failed */
return;
}
}
if (recv_ancillary.crb_all != 0) {
}
return;
}
/* No sin6_flowinfo per API */
sin6->sin6_flowinfo = 0;
/* For link-scope pass up scope id */
else
sin6->sin6_scope_id = 0;
if (udi_size != 0) {
}
/* Skip all the IPv6 headers per API */
pkt_len -= ip_hdr_length;
}
/*
* return SNMP stuff in buffer in mpdata. We don't hold any lock and report
* information that can be changing beneath us.
*/
mblk_t *
{
/*
* make a copy of the original message
*/
return (0);
}
/* fixed length structure for IPv4 and IPv6 counters */
sizeof (is->is_rawip_mib));
return (mp2ctl);
}
/*
* Return 0 if invalid set request, 1 otherwise, including non-rawip requests.
* TODO: If this ever actually tries to set anything, it needs to be
* to do the appropriate locking.
*/
/* ARGSUSED */
int
{
switch (level) {
case EXPER_RAWIP:
return (0);
default:
return (1);
}
}
/*
* This routine creates a T_UDERROR_IND message and passes it upstream.
* The address and options are copied from the T_UNITDATA_REQ message
* passed in mp. This message is freed.
*/
static void
{
struct T_unitdata_req *tudr;
goto done;
}
goto done;
}
goto done;
}
done:
}
static int
{
/* If a bind has not been done, we can't unbind. */
return (-TOUTSTATE);
}
connp->conn_lport = 0;
connp->conn_fport = 0;
/* In case we were also connected */
return (0);
}
/*
* This routine is called by icmp_wput to handle T_UNBIND_REQ messages.
* After some error checking, the message is passed downstream to ip.
*/
static void
{
int error;
if (error) {
if (error < 0) {
} else {
}
return;
}
/*
* Convert mp into a T_OK_ACK
*/
/*
* should not happen in practice... T_OK_ACK is smaller than the
* original message.
*/
}
/*
* Process IPv4 packets that already include an IP header.
* Used when IP_HDRINCL has been set (implicit for IPPROTO_RAW and
* IPPROTO_IGMP).
* In this case we ignore the address and any options in the T_UNITDATA_REQ.
*
* The packet is assumed to have a base (20 byte) IP header followed
* by the upper-layer protocol. We include any IP_OPTIONS including a
* CIPSO label but otherwise preserve the base IP header.
*/
static int
{
int ip_hdr_length;
int tp_hdr_len;
int error;
/*
* We need an exclusive copy of conn_ixa since the included IP
* header could have any destination.
* That copy has no pointers hence we
* need to set them up once we've parsed the ancillary data.
*/
return (ENOMEM);
}
/*
* Caller has a reference on cr; from db_credp or because we
* are running in process context.
*/
if (is_system_labeled()) {
/* We need to restart with a label based on the cred */
}
/* In case previous destination was multicast or multirt */
/* Get a copy of conn_xmit_ipp since the TX label might change it */
return (ENOMEM);
}
if (error != 0) {
goto done;
}
/* Sanity check length of packet */
goto done;
}
}
/*
* We set IXAF_DONTFRAG if the application set DF which makes
* IP not fragment.
*/
else
/* Even for multicast and broadcast we honor the apps ttl */
/*
* No source verification for non-local addresses
*/
!= IPVL_UNICAST_UP) {
}
(do_ipsec ? IPDF_IPSEC : 0));
switch (error) {
case 0:
break;
case EADDRNOTAVAIL:
/*
* IXAF_VERIFY_SOURCE tells us to pick a better source.
* Don't have the application see that errno
*/
error = ENETUNREACH;
goto failed;
case ENETDOWN:
/*
* Have !ipif_addr_ready address; drop packet silently
* until we can get applications to not send until we
* are ready.
*/
error = 0;
goto failed;
case EHOSTUNREACH:
case ENETUNREACH:
/*
* Let conn_ip_output/ire_send_noroute return
* the error and send any local ICMP error.
*/
error = 0;
break;
}
/* FALLTHRU */
default:
goto done;
}
/*
* We might be going to a different destination than last time,
* thus check that TX allows the communication and compute any
* needed label.
*
* TSOL Note: We have an exclusive ipp and ixa for this thread so we
* don't have to worry about concurrent threads.
*/
if (is_system_labeled()) {
/*
* Check whether Trusted Solaris policy allows communication
* with this host, and pretend that the destination is
* unreachable if not.
* Compute any needed label and place it in ipp_label_v4/v6.
*
* Later conn_build_hdr_template/conn_prepend_hdr takes
* ipp_label_v4/v6 to form the packet.
*
* Tsol note: We have ipp structure local to this thread so
* no locking is needed.
*/
if (error != 0) {
goto done;
}
}
/*
* Save away a copy of the IPv4 header the application passed down
* and then prepend an IPv4 header complete with any IP options
* including label.
* We need a struct copy since icmp_prepend_hdr will reuse the available
* space in the mblk.
*/
goto done;
}
goto done;
}
/* Restore key parts of the header that the application passed down */
/*
* Make sure that the IP header plus any transport header that is
* checksumed by ip_output is in the first mblk. (ip_output assumes
* that at least the checksum field is in the first mblk.)
*/
switch (ipha->ipha_protocol) {
case IPPROTO_UDP:
tp_hdr_len = 8;
break;
case IPPROTO_TCP:
tp_hdr_len = 20;
break;
default:
tp_hdr_len = 0;
break;
}
else
goto done;
}
}
if (!do_ipsec) {
}
goto done;
}
}
/* We're done. Pass the packet to ip. */
/* No rawipOutErrors if an error since IP increases its error counter */
switch (error) {
case 0:
break;
case EWOULDBLOCK:
error = 0;
break;
case EADDRNOTAVAIL:
/*
* IXAF_VERIFY_SOURCE tells us to pick a better source.
* Don't have the application see that errno
*/
error = ENETUNREACH;
break;
}
done:
return (error);
}
static mblk_t *
{
else
}
}
/*
* Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
* Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
* the TPI options, otherwise we take them from msg_control.
* If both sin and sin6 is set it is a connected socket and we use conn_faddr.
* Always consumes mp; never consumes tudr_mp.
*/
static int
{
int error;
int is_absreq_failure = 0;
/*
* Get ixa before checking state to handle a disconnect race.
*
* We need an exclusive copy of conn_ixa since the ancillary data
* options might modify it. That copy has no pointers hence we
* need to set them up once we've parsed the ancillary data.
*/
return (ENOMEM);
}
if (is_system_labeled()) {
/* We need to restart with a label based on the cred */
}
/* In case previous destination was multicast or multirt */
/* Get a copy of conn_xmit_ipp since the options might change it */
return (ENOMEM);
}
if (error != 0) {
goto done;
}
/*
* Parse the options and update ixa and ipp as a result.
*/
coa->coa_changed = 0;
} else {
struct T_unitdata_req *tudr;
coa, &is_absreq_failure);
}
if (error != 0) {
/*
* Note: No special action needed in this
* module for "is_absreq_failure"
*/
goto done;
}
ASSERT(is_absreq_failure == 0);
/*
* If laddr is unspecified then we look at sin6_src_id.
* We will give precedence to a source address set with IPV6_PKTINFO
* (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
* want ip_attr_connect to select a source (since it can fail) when
* IPV6_PKTINFO is specified.
* If this doesn't result in a source address then we get a source
* from ip_attr_connect() below.
*/
flowinfo = 0;
} else {
}
}
if (IN6_IS_ADDR_V4MAPPED(&v6dst))
else
} else {
/* Connected case */
}
/* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
} else {
}
}
/*
* Allow source not assigned to the system
* only if it is not a local addresses
*/
if (!V6_OR_V4_INADDR_ANY(v6src)) {
} else {
}
if (laddr_type != IPVL_UNICAST_UP)
}
switch (error) {
case 0:
break;
case EADDRNOTAVAIL:
/*
* IXAF_VERIFY_SOURCE tells us to pick a better source.
* Don't have the application see that errno
*/
error = ENETUNREACH;
goto failed;
case ENETDOWN:
/*
* Have !ipif_addr_ready address; drop packet silently
* until we can get applications to not send until we
* are ready.
*/
error = 0;
goto failed;
case EHOSTUNREACH:
case ENETUNREACH:
/*
* Let conn_ip_output/ire_send_noroute return
* the error and send any local ICMP error.
*/
error = 0;
break;
}
/* FALLTHRU */
default:
goto done;
}
/*
* We might be going to a different destination than last time,
* thus check that TX allows the communication and compute any
* needed label.
*
* TSOL Note: We have an exclusive ipp and ixa for this thread so we
* don't have to worry about concurrent threads.
*/
if (is_system_labeled()) {
/*
* Check whether Trusted Solaris policy allows communication
* with this host, and pretend that the destination is
* unreachable if not.
* Compute any needed label and place it in ipp_label_v4/v6.
*
* Later conn_build_hdr_template/conn_prepend_hdr takes
* ipp_label_v4/v6 to form the packet.
*
* Tsol note: We have ipp structure local to this thread so
* no locking is needed.
*/
if (error != 0) {
goto done;
}
}
&error);
goto done;
}
goto done;
}
goto done;
}
/* We're done. Pass the packet to ip. */
if (!connp->conn_unspec_src)
/* No rawipOutErrors if an error since IP increases its error counter */
switch (error) {
case 0:
break;
case EWOULDBLOCK:
error = 0;
break;
case EADDRNOTAVAIL:
/*
* IXAF_VERIFY_SOURCE tells us to pick a better source.
* Don't have the application see that errno
*/
error = ENETUNREACH;
/* FALLTHRU */
default:
/*
* Clear the source and v6lastdst so we call ip_attr_connect
* for the next packet and try to pick a better source.
*/
if (connp->conn_mcbc_bind)
else
break;
}
done:
return (error);
}
/*
* Handle sending an M_DATA for a connected socket.
* Handles both IPv4 and IPv6.
*/
int
{
int error;
/*
* If no other thread is using conn_ixa this just gets a reference to
* conn_ixa. Otherwise we get a safe copy of conn_ixa.
*/
return (ENOMEM);
}
switch (ixa->ixa_protocol) {
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
break;
default:
}
return (error);
}
if (!do_ipsec) {
return (EHOSTUNREACH); /* IPsec policy failure */
}
}
/*
* In case we got a safe copy of conn_ixa, or if opt_set made us a new
* safe copy, then we need to fill in any pointers in it.
*/
(do_ipsec ? IPDF_IPSEC : 0));
switch (error) {
case 0:
break;
case EADDRNOTAVAIL:
/*
* IXAF_VERIFY_SOURCE tells us to pick a better source.
* Don't have the application see that errno
*/
error = ENETUNREACH;
goto failed;
case ENETDOWN:
/*
* Have !ipif_addr_ready address; drop packet silently
* until we can get applications to not send until we
* are ready.
*/
error = 0;
goto failed;
case EHOSTUNREACH:
case ENETUNREACH:
/*
* Let conn_ip_output/ire_send_noroute return
* the error and send any local ICMP error.
*/
error = 0;
break;
}
/* FALLTHRU */
default:
return (error);
}
} else {
/* Done with conn_t */
}
/* We're done. Pass the packet to ip. */
/* No rawipOutErrors if an error since IP increases its error counter */
switch (error) {
case 0:
break;
case EWOULDBLOCK:
error = 0;
break;
case EADDRNOTAVAIL:
/*
* IXAF_VERIFY_SOURCE tells us to pick a better source.
* Don't have the application see that errno
*/
error = ENETUNREACH;
break;
}
return (error);
}
/*
* Handle sending an M_DATA to the last destination.
* Handles both IPv4 and IPv6.
*
* NOTE: The caller must hold conn_lock and we drop it here.
*/
int
{
int error;
switch (ixa->ixa_protocol) {
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
break;
default:
}
return (error);
}
if (!do_ipsec) {
return (EHOSTUNREACH); /* IPsec policy failure */
}
}
/*
* In case we got a safe copy of conn_ixa, or if opt_set made us a new
* safe copy, then we need to fill in any pointers in it.
*/
switch (error) {
case 0:
break;
case EADDRNOTAVAIL:
/*
* IXAF_VERIFY_SOURCE tells us to pick a better source.
* Don't have the application see that errno
*/
error = ENETUNREACH;
goto failed;
case ENETDOWN:
/*
* Have !ipif_addr_ready address; drop packet silently
* until we can get applications to not send until we
* are ready.
*/
error = 0;
goto failed;
case EHOSTUNREACH:
case ENETUNREACH:
/*
* Let conn_ip_output/ire_send_noroute return
* the error and send any local ICMP error.
*/
error = 0;
break;
}
/* FALLTHRU */
default:
return (error);
}
} else {
/* Done with conn_t */
}
/* We're done. Pass the packet to ip. */
/* No rawipOutErrors if an error since IP increases its error counter */
switch (error) {
case 0:
break;
case EWOULDBLOCK:
error = 0;
break;
case EADDRNOTAVAIL:
/*
* IXAF_VERIFY_SOURCE tells us to pick a better source.
* Don't have the application see that errno
*/
error = ENETUNREACH;
/* FALLTHRU */
default:
/*
* Clear the source and v6lastdst so we call ip_attr_connect
* for the next packet and try to pick a better source.
*/
if (connp->conn_mcbc_bind)
else
break;
}
return (error);
}
/*
* Prepend the header template and then fill in the source and
* flowinfo. The caller needs to handle the destination address since
* it's setting is different if rthdr or source route.
*
* Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
* When it returns NULL it sets errorp.
*/
static mblk_t *
{
/*
* Copy the header template.
*/
if (pktlen > IP_MAXPACKET) {
return (NULL);
}
return (NULL);
}
}
/*
* Prepare for ICMPv6 checksum done in IP.
*
* icmp_build_hdr_template has already massaged any routing header
* and placed the result in conn_sum.
*
* We make it easy for IP to include our pseudo header
* by putting our length (and any routing header adjustment)
* in the ICMPv6 checksum field.
*/
/* if IP_PKTINFO specified an addres it wins over bind() */
} else {
}
} else {
uint_t cksum_offset = 0;
/* if IP_PKTINFO specified an addres it wins over bind() */
} else {
}
/* Overrides the class part of flowinfo */
ipp->ipp_tclass);
}
}
}
if (cksum_offset != 0) {
/* Make sure the checksum fits in the first mblk */
cksum_offset + sizeof (short));
return (NULL);
}
}
}
}
return (mp);
}
/*
* This routine handles all messages passed downstream. It either
* consumes the message or passes it downstream; it never queues a
* a message.
*/
void
{
int error = 0;
struct T_unitdata_req *tudr;
/*
* We directly handle several cases here: T_UNITDATA_REQ message
* socket.
*/
case M_DATA:
/* sockfs never sends down M_DATA */
return;
case M_PROTO:
case M_PCPROTO:
icmp_wput_other(q, mp);
return;
}
break;
default:
icmp_wput_other(q, mp);
return;
}
/* Handle valid T_UNITDATA_REQ here */
goto ud_error2;
}
goto ud_error2;
}
/*
* All Solaris components should pass a db_credp
* for this message, hence we ASSERT.
* On production kernels we return an error to be robust against
* random streams modules sitting on top of us.
*/
goto ud_error2;
}
/*
* If a port has not been bound to the stream, fail.
* This is not a problem when sockfs is directly
* above us, because it will ensure that the socket
* is first bound before allowing data to be sent.
*/
goto ud_error2;
}
switch (connp->conn_family) {
case AF_INET6:
goto ud_error2;
}
/* No support for mapped addresses on raw sockets */
goto ud_error2;
}
/*
* If the local address is a mapped address return
* an error.
* It would be possible to send an IPv6 packet but the
* response would never make it back to the application
* since it is bound to a mapped address.
*/
goto ud_error2;
}
if (tudr->OPT_length != 0) {
/*
* If we are connected then the destination needs to be
* the same as the connected one.
*/
goto ud_error2;
}
} else {
/*
* We have to allocate an ip_xmit_attr_t before we grab
* conn_lock and we need to hold conn_lock once we've
* checked conn_same_as_last_v6 to handle concurrent
* send* calls on a socket.
*/
goto ud_error2;
}
/* icmp_output_lastdst drops conn_lock */
} else {
/* icmp_output_newdst drops conn_lock */
}
}
if (error == 0) {
return;
}
break;
case AF_INET:
goto ud_error2;
}
/* Protocol 255 contains full IP headers */
/* Read without holding lock */
if (icmp->icmp_hdrincl) {
goto ud_error2;
}
}
if (error == 0) {
return;
}
/* data_mp consumed above */
goto ud_error2;
}
if (tudr->OPT_length != 0) {
/*
* If we are connected then the destination needs to be
* the same as the connected one.
*/
goto ud_error2;
}
} else {
/*
* We have to allocate an ip_xmit_attr_t before we grab
* conn_lock and we need to hold conn_lock once we've
* checked conn_same_as_last_v4 to handle concurrent
* send* calls on a socket.
*/
goto ud_error2;
}
/* icmp_output_lastdst drops conn_lock */
} else {
/* icmp_output_newdst drops conn_lock */
}
}
if (error == 0) {
return;
}
break;
}
/* mp is freed by the following routine */
return;
/* mp is freed by the following routine */
}
/*
* Handle the case of the IP address or flow label being different
* for both IPv4 and IPv6.
*
* NOTE: The caller must hold conn_lock and we drop it here.
*/
static int
{
int error;
/*
* We hold conn_lock across all the use and modifications of
* the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
* stay consistent.
*/
if (is_system_labeled()) {
/* We need to restart with a label based on the cred */
}
/*
* If we are connected then the destination needs to be the
* same as the connected one, which is not the case here since we
* checked for that above.
*/
goto ud_error;
}
/* In case previous destination was multicast or multirt */
/*
* If laddr is unspecified then we look at sin6_src_id.
* We will give precedence to a source address set with IPV6_PKTINFO
* (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
* want ip_attr_connect to select a source (since it can fail) when
* IPV6_PKTINFO is specified.
* If this doesn't result in a source address then we get a source
* from ip_attr_connect() below.
*/
flowinfo = 0;
srcid = 0;
}
} else {
} else {
}
}
if (IN6_IS_ADDR_V4MAPPED(&v6dst))
else
}
/* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */
} else {
}
}
switch (ixa->ixa_protocol) {
case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
break;
default:
}
(do_ipsec ? IPDF_IPSEC : 0));
switch (error) {
case 0:
break;
case EADDRNOTAVAIL:
/*
* IXAF_VERIFY_SOURCE tells us to pick a better source.
* Don't have the application see that errno
*/
error = ENETUNREACH;
goto failed;
case ENETDOWN:
/*
* Have !ipif_addr_ready address; drop packet silently
* until we can get applications to not send until we
* are ready.
*/
error = 0;
goto failed;
case EHOSTUNREACH:
case ENETUNREACH:
/*
* Let conn_ip_output/ire_send_noroute return
* the error and send any local ICMP error.
*/
error = 0;
break;
}
/* FALLTHRU */
default:
goto ud_error;
}
/*
* While we dropped the lock some other thread might have connected
* this socket. If so we bail out with EISCONN to ensure that the
* connecting thread is the one that updates conn_ixa, conn_ht_*
* and conn_*last*.
*/
goto ud_error;
}
/*
* We need to rebuild the headers if
* - we are labeling packets (could be different for different
* destinations)
* - we have a source route (or routing header) since we need to
* massage that to get the pseudo-header checksum
* - a socket option with COA_HEADER_CHANGED has been set which
* set conn_v6lastdst to zero.
*
* Otherwise the prepend function will just update the src, dst,
* and flow label.
*/
if (is_system_labeled()) {
/* TX MLP requires SCM_UCRED and don't have that here */
goto ud_error;
}
/*
* Check whether Trusted Solaris policy allows communication
* with this host, and pretend that the destination is
* unreachable if not.
* Compute any needed label and place it in ipp_label_v4/v6.
*
* Later conn_build_hdr_template/conn_prepend_hdr takes
* ipp_label_v4/v6 to form the packet.
*
* Tsol note: Since we hold conn_lock we know no other
* thread manipulates conn_xmit_ipp.
*/
&connp->conn_xmit_ipp);
if (error != 0) {
goto ud_error;
}
/* Rebuild the header template */
flowinfo);
if (error != 0) {
goto ud_error;
}
/* Rebuild the header template */
flowinfo);
if (error != 0) {
goto ud_error;
}
} else {
/* Simply update the destination address if no source route */
} else {
}
} else {
}
}
/*
* Remember the dst etc which corresponds to the built header
* template and conn_ixa.
*/
/* Also remember a source to use together with lastdst */
/* Done with conn_t */
goto ud_error;
}
if (!do_ipsec) {
goto done;
}
}
/* We're done. Pass the packet to ip. */
/* No rawipOutErrors if an error since IP increases its error counter */
switch (error) {
case 0:
break;
case EWOULDBLOCK:
error = 0;
break;
case EADDRNOTAVAIL:
/*
* IXAF_VERIFY_SOURCE tells us to pick a better source.
* Don't have the application see that errno
*/
error = ENETUNREACH;
/* FALLTHRU */
default:
/*
* Clear the source and v6lastdst so we call ip_attr_connect
* for the next packet and try to pick a better source.
*/
if (connp->conn_mcbc_bind)
else
break;
}
done:
return (error);
return (error);
}
/* ARGSUSED */
static void
{
#ifdef DEBUG
#endif
}
static void
{
case M_PROTO:
case M_PCPROTO:
/*
* If the message does not contain a PRIM_type,
* throw it away.
*/
return;
}
case T_ADDR_REQ:
icmp_addr_req(q, mp);
return;
case O_T_BIND_REQ:
case T_BIND_REQ:
icmp_tpi_bind(q, mp);
return;
case T_CONN_REQ:
icmp_tpi_connect(q, mp);
return;
case T_CAPABILITY_REQ:
icmp_capability_req(q, mp);
return;
case T_INFO_REQ:
icmp_info_req(q, mp);
return;
case T_UNITDATA_REQ:
/*
* If a T_UNITDATA_REQ gets here, the address must
* be bad. Valid T_UNITDATA_REQs are handled
* in icmp_wput.
*/
return;
case T_UNBIND_REQ:
icmp_tpi_unbind(q, mp);
return;
case T_SVR4_OPTMGMT_REQ:
/*
* All Solaris components should pass a db_credp
* for this TPI message, hence we ASSERT.
* But in case there is some other M_PROTO that looks
* like a TPI message sent by some other kernel
* component, we check and return an error.
*/
return;
}
cr)) {
}
return;
case T_OPTMGMT_REQ:
/*
* All Solaris components should pass a db_credp
* for this TPI message, hence we ASSERT.
* But in case there is some other M_PROTO that looks
* like a TPI message sent by some other kernel
* component, we check and return an error.
*/
return;
}
return;
case T_DISCON_REQ:
icmp_tpi_disconnect(q, mp);
return;
/* The following TPI message is not supported by icmp. */
case O_T_CONN_RES:
case T_CONN_RES:
return;
/* The following 3 TPI requests are illegal for icmp. */
case T_DATA_REQ:
case T_EXDATA_REQ:
case T_ORDREL_REQ:
return;
default:
break;
}
break;
case M_FLUSH:
break;
case M_IOCTL:
case TI_GETPEERNAME:
/*
* If a default destination address has not
* been associated with the stream, then we
* don't know the peer's name.
*/
return;
}
/* FALLTHRU */
case TI_GETMYNAME:
/*
* For TI_GETPEERNAME and TI_GETMYNAME, we first
* need to copyin the user's strbuf structure.
* Processing will continue in the M_IOCDATA case
* below.
*/
return;
default:
break;
}
break;
case M_IOCDATA:
icmp_wput_iocdata(q, mp);
return;
default:
/* Unrecognized messages are passed through without change. */
break;
}
ip_wput_nondata(q, mp);
}
/*
* icmp_wput_iocdata is called by icmp_wput_other to handle all M_IOCDATA
* messages.
*/
static void
{
/* Make sure it is one of ours. */
case TI_GETMYNAME:
case TI_GETPEERNAME:
break;
default:
ip_wput_nondata(q, mp);
return;
}
case -1:
return;
break;
/*
* The address has been copied out, so now
* copyout the strbuf.
*/
mi_copyout(q, mp);
return;
/*
* The address and strbuf have been copied out.
* We're done, so just acknowledge the original
* M_IOCTL.
*/
mi_copy_done(q, mp, 0);
return;
default:
/*
* Something strange has happened, so acknowledge
* the original M_IOCTL with an EPROTO error.
*/
return;
}
/*
* Now we have the strbuf structure for TI_GETMYNAME
* and TI_GETPEERNAME. Next we copyout the requested
* address and then we'll copyout the strbuf.
*/
else
return;
}
case TI_GETMYNAME:
break;
case TI_GETPEERNAME:
return;
}
break;
default:
return;
}
if (!mp1)
return;
case TI_GETMYNAME:
&addrlen);
break;
case TI_GETPEERNAME:
&addrlen);
break;
}
/* Copy out the address */
mi_copyout(q, mp);
}
void
icmp_ddi_g_init(void)
{
/*
* We want to be informed each time a stack is created or
* destroyed in the kernel, so we can maintain the
* set of icmp_stack_t's.
*/
}
void
icmp_ddi_g_destroy(void)
{
}
#define INET_NAME "ip"
/*
* Initialize the ICMP stack instance.
*/
static void *
{
int error = 0;
arrsz = sizeof (icmp_propinfo_tbl);
return (is);
}
/*
* Free the ICMP stack instance.
*/
static void
{
}
static void *
{ "inDatagrams", KSTAT_DATA_UINT32, 0 },
{ "inCksumErrs", KSTAT_DATA_UINT32, 0 },
{ "inErrors", KSTAT_DATA_UINT32, 0 },
{ "outDatagrams", KSTAT_DATA_UINT32, 0 },
{ "outErrors", KSTAT_DATA_UINT32, 0 },
};
0, stackid);
return (NULL);
return (ksp);
}
static void
{
}
}
static int
{
netstack_t *ns;
return (EIO);
if (rw == KSTAT_WRITE)
return (EACCES);
return (-1);
return (-1);
}
return (0);
}
/* ARGSUSED */
int
{
return (EOPNOTSUPP);
}
/* ARGSUSED */
int
{
int error;
/* All Solaris components should pass a cred for this operation. */
/* Binding to a NULL address really means unbind */
else
if (error < 0) {
else
}
return (error);
}
static int
{
int error;
len = sizeof (struct sockaddr_in);
} else {
}
}
static int
{
int error;
if (error < 0) {
}
return (error);
}
/* ARGSUSED */
int
{
return (EOPNOTSUPP);
}
int
{
int error;
/* All Solaris components should pass a cred for this operation. */
/*
* Disconnect
* Make sure we are connected
*/
return (EINVAL);
return (error);
}
if (error != 0)
return (error);
/* do an implicit bind if necessary */
/*
* We could be racing with an actual bind, in which case
* we would see EPROTO. We cross our fingers and try
* to connect.
*/
return (error);
}
/*
* set SO_DGRAM_ERRIND
*/
int unbind_err;
ASSERT(unbind_err == 0);
}
if (error == 0) {
*id = 0;
0, NULL, -1);
} else if (error < 0) {
}
return (error);
}
/* ARGSUSED2 */
int
{
struct T_capability_ack tca;
short opts;
struct stroptions *stropt;
int error;
/*
* setup the fallback stream that was allocated
*/
/* Notify stream head about options before sending up data */
/*
* free helper stream
*/
/*
* Collect the information needed to sync with the sonode
*/
if (error != 0)
faddrlen = 0;
opts = 0;
if (connp->conn_dgram_errind)
opts |= SO_DGRAM_ERRIND;
opts |= SO_DONTROUTE;
/*
* Attempts to send data up during fallback will result in it being
* queued in icmp_t. Now we push up any queued packets.
*/
}
}
/*
* No longer a streams less socket
*/
return (0);
}
/* ARGSUSED2 */
{
return (NULL);
}
} else {
}
return ((sock_lower_handle_t)connp);
}
/* ARGSUSED3 */
void
{
struct sock_proto_props sopp;
/* All Solaris components should pass a cred for this operation. */
}
/* ARGSUSED3 */
int
{
int error;
/* All Solaris components should pass a cred for this operation. */
else
return (error);
}
/* ARGSUSED3 */
int
{
int error;
/* All Solaris components should pass a cred for this operation. */
return (error);
}
int
{
int error;
/* All Solaris components should pass a cred for this operation. */
if (error != 0) {
/*
* option not recognized
*/
if (error < 0) {
}
return (error);
}
return (error);
}
int
{
int error;
void *optvalp_buf;
int len;
/* All Solaris components should pass a cred for this operation. */
if (error != 0) {
if (error < 0) {
}
return (error);
}
if (len == -1) {
return (EINVAL);
}
/*
* update optlen and copy option value
*/
return (0);
}
/* ARGSUSED1 */
int
{
/* All Solaris components should pass a cred for this operation. */
(void) rawip_do_close(connp);
return (0);
}
/* ARGSUSED2 */
int
{
/* All Solaris components should pass a cred for this operation. */
/* shut down the send side */
SOCK_OPCTL_SHUT_SEND, 0);
/* shut down the recv side */
SOCK_OPCTL_SHUT_RECV, 0);
return (0);
}
void
{
}
int
{
int error;
/* All Solaris components should pass a cred for this operation. */
/*
* If we don't have a helper stream then create one.
* ip_create_helper_stream takes care of locking the conn_t,
* so this check for NULL is just a performance optimization.
*/
/*
* Create a helper stream for non-STREAMS socket.
*/
if (error != 0) {
ip0dbg(("rawip_ioctl: create of IP helper stream "
"failed %d\n", error));
return (error);
}
}
switch (cmd) {
case _SIOCSOCKFALLBACK:
case TI_GETPEERNAME:
case TI_GETMYNAME:
#ifdef DEBUG
" socket", cmd);
#endif
break;
default:
/*
* Pass on to IP using helper stream
*/
break;
}
return (error);
}
int
{
int error = 0;
/* All Solaris components should pass a cred for this operation. */
/* do an implicit bind if necessary */
/*
* We could be racing with an actual bind, in which case
* we would see EPROTO. We cross our fingers and try
* to connect.
*/
return (error);
}
}
/* Protocol 255 contains full IP headers */
/* Read without holding lock */
if (icmp->icmp_hdrincl) {
return (EINVAL);
}
}
if (is->is_sendto_ignerr)
return (0);
else
return (error);
}
/* Connected? */
return (EDESTADDRREQ);
}
if (msg->msg_controllen != 0) {
} else {
}
if (is->is_sendto_ignerr)
return (0);
else
return (error);
}
return (EISCONN);
}
if (error != 0) {
return (error);
}
switch (connp->conn_family) {
case AF_INET6:
/* No support for mapped addresses on raw sockets */
return (EADDRNOTAVAIL);
}
/*
* If the local address is a mapped address return
* an error.
* It would be possible to send an IPv6 packet but the
* response would never make it back to the application
* since it is bound to a mapped address.
*/
return (EADDRNOTAVAIL);
}
/*
* We have to allocate an ip_xmit_attr_t before we grab
* conn_lock and we need to hold conn_lock once we've check
* conn_same_as_last_v6 to handle concurrent send* calls on a
* socket.
*/
if (msg->msg_controllen == 0) {
return (ENOMEM);
}
} else {
}
if (icmp->icmp_delayed_error != 0) {
icmp->icmp_delayed_error = 0;
/* Compare IP address and family */
return (error);
}
}
if (msg->msg_controllen != 0) {
/* icmp_output_lastdst drops conn_lock */
} else {
/* icmp_output_newdst drops conn_lock */
}
if (is->is_sendto_ignerr)
return (0);
else
return (error);
case AF_INET:
/*
* We have to allocate an ip_xmit_attr_t before we grab
* conn_lock and we need to hold conn_lock once we've check
* conn_same_as_last_v6 to handle concurrent send* on a socket.
*/
if (msg->msg_controllen == 0) {
return (ENOMEM);
}
} else {
}
if (icmp->icmp_delayed_error != 0) {
icmp->icmp_delayed_error = 0;
/* Compare IP address */
return (error);
}
}
if (msg->msg_controllen != 0) {
/* icmp_output_lastdst drops conn_lock */
} else {
/* icmp_output_newdst drops conn_lock */
}
if (is->is_sendto_ignerr)
return (0);
else
return (error);
default:
return (EINVAL);
}
}
NULL,
NULL,
NULL,
};