ipsecesp.c revision 44b099c4d944a196d124a02c7403ad891223139e
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/sysmacros.h>
#include <net/pfpolicy.h>
#include <inet/ipsec_info.h>
#include <inet/ipsec_impl.h>
#include <inet/ipsecesp.h>
#include <inet/udp_impl.h>
/*
* Table of ND variables supported by ipsecesp. These are loaded into
* ipsecesp_g_nd in ipsecesp_init_nd.
*/
static ipsecespparam_t lcl_param_arr[] = {
/* min max value name */
{ 0, 3, 0, "ipsecesp_debug"},
{ 1, 10, 1, "ipsecesp_reap_delay"},
{ 1, 300, 15, "ipsecesp_acquire_timeout"},
{ 1, 1800, 90, "ipsecesp_larval_timeout"},
/* Default lifetime values for ACQUIRE messages. */
{ 0, 0xffffffffU, 0, "ipsecesp_default_soft_bytes"},
{ 0, 0xffffffffU, 0, "ipsecesp_default_hard_bytes"},
{ 0, 0xffffffffU, 24000, "ipsecesp_default_soft_addtime"},
{ 0, 0xffffffffU, 28800, "ipsecesp_default_hard_addtime"},
{ 0, 0xffffffffU, 0, "ipsecesp_default_soft_usetime"},
{ 0, 0xffffffffU, 0, "ipsecesp_default_hard_usetime"},
{ 0, 1, 0, "ipsecesp_log_unknown_spi"},
{ 0, 2, 1, "ipsecesp_padding_check"},
{ 0, 600, 20, "ipsecesp_nat_keepalive_interval"},
};
#define ipsecesp_acquire_timeout \
#define ipsecesp_larval_timeout \
#define ipsecesp_default_soft_bytes \
#define ipsecesp_default_hard_bytes \
#define ipsecesp_default_soft_addtime \
#define ipsecesp_default_hard_addtime \
#define ipsecesp_default_soft_usetime \
#define ipsecesp_default_hard_usetime \
#define ipsecesp_log_unknown_spi \
#define ipsecesp_padding_check \
/* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */
/* NOTE: != 0 instead of > 0 so lint doesn't complain. */
static int ipsecesp_close(queue_t *);
ipsecesp_stack_t *, cred_t *);
kstat_named_t **, ipsecesp_stack_t *);
static struct module_info info = {
};
};
};
struct streamtab ipsecespinfo = {
};
/*
*
* Question: Do I need this, given that all instance's esps->esps_wq point
* to IP?
*
* Answer: Yes, because I need to know which queue is BOUND to
* IPPROTO_ESP
*/
/*
* Stats. This may eventually become a full-blown SNMP MIB once that spec
* stabilizes.
*/
typedef struct esp_kstats_s {
} esp_kstats_t;
/*
* espstack->esp_kstats is equal to espstack->esp_ksp->ks_data if
* kstat_create_netstack for espstack->esp_ksp succeeds, but when it
* fails, it will be NULL. Note this is done for all stack instances,
* so it *could* fail. hence a non-NULL checking is done for
* ESP_BUMP_STAT and ESP_DEBUMP_STAT
*/
#define ESP_BUMP_STAT(espstack, x) \
do { \
} while (0)
#define ESP_DEBUMP_STAT(espstack, x) \
do { \
} while (0)
static int esp_kstat_update(kstat_t *, int);
static boolean_t
{
"net", KSTAT_TYPE_NAMED,
sizeof (esp_kstats_t) / sizeof (kstat_named_t),
return (B_FALSE);
#define K64 KSTAT_DATA_UINT64
KI(keysock_in);
return (B_TRUE);
}
static int
{
netstack_t *ns;
return (EIO);
if (rw == KSTAT_WRITE)
return (EACCES);
return (-1);
return (-1);
}
return (0);
}
#ifdef DEBUG
/*
* Debug routine, useful to see pre-encryption data.
*/
static char *
{
unsigned char *ptr;
printf("mblk address 0x%p, length %ld, db_ref %d "
"type %d, base 0x%p, lim 0x%p\n",
tmp_line[0] = '\0';
if (!(diff & 0x1f)) {
tmp_line[0] = '\0';
}
}
if (!(diff & 0x3))
ptr++;
}
}
return ("\n");
}
#else /* DEBUG */
static char *
{
return ("\n");
}
#endif /* DEBUG */
/*
* Don't have to lock age_interval, as only one thread will access it at
* a time, because I control the one function that does with timeout().
*/
static void
{
}
/*
* Get an ESP NDD parameter.
*/
/* ARGSUSED */
static int
queue_t *q;
{
return (0);
}
/*
* This routine sets an NDD variable in a ipsecespparam_t structure.
*/
/* ARGSUSED */
static int
queue_t *q;
char *value;
{
/*
* Fail the request if the new value does not lie within the
* required bounds.
*/
return (EINVAL);
}
/* Set the new value */
return (0);
}
/*
* Using lifetime NDD variables, fill in an extended combination's
* lifetime information.
*/
void
{
}
/*
* Initialize things for ESP at module load time.
*/
ipsecesp_ddi_init(void)
{
/*
* We want to be informed each time a stack is created or
* destroyed in the kernel, so we can maintain the
* set of ipsecesp_stack_t's.
*/
return (B_TRUE);
}
/*
* Walk through the param array specified registering each element with the
* named dispatch handler.
*/
static boolean_t
{
espp->ipsecesp_param_name[0]) {
return (B_FALSE);
}
}
}
return (B_TRUE);
}
/*
* Initialize things for ESP for each stack instance
*/
static void *
{
KM_SLEEP);
return (espstack);
}
/*
* Destroy things for ESP at module unload time.
*/
void
ipsecesp_ddi_destroy(void)
{
}
/*
* Destroy things for ESP for one stack instance
*/
static void
{
}
}
/*
* ESP module open routine, which is here for keysock plumbing.
* Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old
* Days of export control, and fears that ESP would not be allowed
* to be shipped at all by default. Eventually, keysock should
* either access AH and ESP via modstubs or krtld dependencies, or
*/
/* ARGSUSED */
static int
{
netstack_t *ns;
return (EPERM);
return (0); /* Re-open of an already open instance. */
return (EINVAL);
qprocson(q);
return (0);
}
/*
* ESP module close routine.
*/
static int
{
/*
* Clean up q_ptr, if needed.
*/
qprocsoff(q);
/* Keysock queue check is safe, because of OCEXCL perimeter. */
if (q == espstack->esp_pfkey_q) {
("ipsecesp_close: Ummm... keysock is closing ESP.\n"));
/* Detach qtimeouts. */
}
return (0);
}
/*
* Add a number of bytes to what the SA has protected so far. Return
* B_TRUE if the SA can still protect that many bytes.
*
* Caller must REFRELE the passed-in assoc. This function must REFRELE
* any obtained peer SA.
*/
static boolean_t
{
int outhash;
/* No peer? No problem! */
if (!assoc->ipsa_haspeer) {
B_TRUE));
}
/*
* Otherwise, we want to grab both the original assoc and its peer.
* There might be a race for this, but if it's a real race, two
* expire messages may occur. We limit this by only sending the
* expire message on one of the peers, we'll pick the inbound
* arbitrarily.
*
* If we need tight synchronization on the peer SA, then we need to
* reconsider.
*/
if (inbound) {
if (isv6) {
&inassoc->ipsa_dstaddr));
} else {
&inassoc->ipsa_dstaddr));
}
/* Q: Do we wish to set haspeer == B_FALSE? */
esp0dbg(("esp_age_bytes: "
"can't find peer for inbound.\n"));
}
} else {
/* Q: Do we wish to set haspeer == B_FALSE? */
esp0dbg(("esp_age_bytes: "
"can't find peer for outbound.\n"));
}
}
/*
* REFRELE any peer SA.
*
* Because of the multi-line macro nature of IPSA_REFRELE, keep
* them in { }.
*/
if (inbound) {
} else {
}
}
/*
* Do incoming NAT-T manipulations for packet.
* Returns NULL if the mblk chain is consumed.
*/
static mblk_t *
{
/* Initialize to our inbound cksum adjustment... */
switch (ipha->ipha_protocol) {
case IPPROTO_TCP:
break;
case IPPROTO_UDP:
if (udpha->uha_checksum != 0) {
/* Adujst if the inbound one was not zero. */
if (udpha->uha_checksum == 0)
}
break;
case IPPROTO_IP:
/*
* This case is only an issue for self-encapsulated
* packets. So for now, fall through.
*/
break;
}
return (data_mp);
}
/*
* Strip ESP header, check padding, and fix IP header.
* Returns B_TRUE on success, B_FALSE if an error occured.
*/
static boolean_t
{
/*
* Strip ESP data and fix IP header.
*
* XXX In case the beginning of esp_inbound() changes to not do a
* pullup, this part of the code can remain unchanged.
*/
if (isv4) {
} else {
}
/*
* "Next header" and padding length are the last two bytes in the
* ESP-protected datagram, thus the explicit - 1 and - 2.
* lastpad is the last byte of the padding, which can be used for
* a quick check to see if the padding is correct.
*/
if (isv4) {
/* Fix part of the IP header. */
/*
* Reality check the padlen. The explicit - 2 is for the
* padding length and the next-header bytes.
*/
"Corrupt ESP packet (padlen too big).\n");
padlen));
"hdr - ivlen(%d) = %d.\n",
return (B_FALSE);
}
/*
* Fix the rest of the header. The explicit - 2 is for the
* padding length and the next-header bytes.
*/
ipha->ipha_hdr_checksum = 0;
} else {
} else {
NULL);
} else {
/* Panic a DEBUG kernel. */
/* Otherwise, pretend it's IP + ESP. */
}
}
ivlen) {
"Corrupt ESP packet (v6 padlen too big).\n");
padlen));
("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = "
return (B_FALSE);
}
/*
* Fix the rest of the header. The explicit - 2 is for the
* padding length and the next-header bytes. IPv6 is nice,
* because there's no hdr checksum!
*/
}
/*
* Weak padding check: compare last-byte to length, they
* should be equal.
*/
"Corrupt ESP packet (lastpad != padlen).\n");
("lastpad (%d) not equal to padlen (%d):\n",
return (B_FALSE);
}
/*
* Strong padding check: Check all pad bytes to see that
* they're ascending. Go backwards using a descending counter
* to verify. padlen == 1 is checked by previous block, so
* only bother if we've more than 1 byte of padding.
* Consequently, start the check one byte before the location
* of "lastpad".
*/
/*
* This assert may have to become an if and a pullup
* if we start accepting multi-dblk mblks. For now,
* though, any packet here will have been pulled up in
* esp_inbound.
*/
/*
* Use "--lastpad" because we already checked the very
* last pad byte previously.
*/
while (--lastpad != 0) {
"packet (bad padding).\n");
("padding not in correct"
" format:\n"));
return (B_FALSE);
}
lastbyte--;
}
}
}
/* Trim off the padding. */
/*
* Remove the ESP header.
*
* The above assertions about data_mp's size will make this work.
*
* XXX Question: If I send up and get back a contiguous mblk,
* would it be quicker to bcopy over, or keep doing the dupb stuff?
* I go with copying for now.
*/
do {
src--;
dst--;
} else {
do {
src--;
dst--;
}
return (B_TRUE);
}
/*
* Updating use times can be tricky business if the ipsa_haspeer flag is
* set. This function is called once in an SA's lifetime.
*
* Caller has to REFRELE "assoc" which is passed in. This function has
* to REFRELE any peer SA that is obtained.
*/
static void
{
int outhash;
/* No peer? No problem! */
if (!assoc->ipsa_haspeer) {
return;
}
/*
* Otherwise, we want to grab both the original assoc and its peer.
* There might be a race for this, but if it's a real race, the times
* will be out-of-synch by at most a second, and since our time
* granularity is a second, this won't be a problem.
*
* If we need tight synchronization on the peer SA, then we need to
* reconsider.
*/
if (inbound) {
if (isv6) {
&inassoc->ipsa_dstaddr));
} else {
&inassoc->ipsa_dstaddr));
}
/* Q: Do we wish to set haspeer == B_FALSE? */
esp0dbg(("esp_set_usetime: "
"can't find peer for inbound.\n"));
return;
}
} else {
/* Q: Do we wish to set haspeer == B_FALSE? */
esp0dbg(("esp_set_usetime: "
"can't find peer for outbound.\n"));
return;
}
}
/* Update usetime on both. */
/*
* REFRELE any peer SA.
*
* Because of the multi-line macro nature of IPSA_REFRELE, keep
* them in { }.
*/
if (inbound) {
} else {
}
}
/*
* Handle ESP inbound data for IPv4 and IPv6.
* On success returns B_TRUE, on failure returns B_FALSE and frees the
* mblk chain data_mp.
*/
mblk_t *
{
/*
* We may wish to check replay in-range-only here as an optimization.
* Include the reality check of ipsa->ipsa_replay >
* ipsa->ipsa_replay_wsize for times when it's the first N packets,
* where N == ipsa->ipsa_replay_wsize.
*
* Another check that may come here later is the "collision" check.
* If legitimate packets flow quickly enough, this won't be a problem,
* but collisions may cause authentication algorithm crunching to
* take place when it doesn't need to.
*/
&espstack->esp_dropper);
return (NULL);
}
/*
* Adjust the IP header's payload length to reflect the removal
* of the ICV.
*/
ipsa->ipsa_mac_len);
} else {
ipsa->ipsa_mac_len);
}
/* submit the request to the crypto framework */
}
/*
* Perform the really difficult work of inserting the proposed situation.
* Called while holding the algorithm lock.
*/
static void
netstack_t *ns)
{
/*
* Based upon algorithm properties, and what-not, prioritize a
* proposal, based on the ordering of the ESP algorithms in the
* alternatives in the policy rule or socket that was placed
* in the acquire record.
*
* For each action in policy list
* Add combination. If I've hit limit, return.
*/
continue;
if (!(prot->ipp_use_esp))
continue;
if (prot->ipp_esp_auth_alg != 0) {
[prot->ipp_esp_auth_alg];
continue;
}
[prot->ipp_encr_alg];
continue;
comb->sadb_comb_flags = 0;
comb->sadb_comb_reserved = 0;
comb->sadb_comb_auth = 0;
comb->sadb_comb_auth_minbits = 0;
comb->sadb_comb_auth_maxbits = 0;
} else {
}
/*
* The following may be based on algorithm
* properties, but in the meantime, we just pick
* some good, sensible numbers. Key mgmt. can
* (and perhaps should) be the place to finalize
* such decisions.
*/
/*
* No limits on allocations, since we really don't
* support that concept currently.
*/
/*
* These may want to come from policy rule..
*/
if (--combs == 0)
break; /* out of space.. */
comb++;
}
}
/*
* Prepare and actually send the SADB_ACQUIRE message to PF_KEY.
*/
static void
{
return;
}
/* Set up ACQUIRE. */
ns->netstack_ipsec);
esp0dbg(("sadb_setup_acquire failed.\n"));
return;
}
/* Insert proposal here. */
/*
* Must mutex_exit() before sending PF_KEY message up, in
* order to avoid recursive mutex_enter() if there are no registered
* listeners.
*
* Once I've sent the message, I'm cool anyway.
*/
}
}
/* XXX refactor me */
/*
* Handle the SADB_GETSPI message. Create a larval SA.
*/
static void
{
int rc, diagnostic;
/*
* Randomly generate a proposed SPI value
*/
if (cl_inet_getspi != NULL) {
} else {
sizeof (uint32_t));
}
ksi->ks_in_serial);
return;
ksi->ks_in_serial);
return;
}
/*
* XXX - We may randomly collide. We really should recover from this.
* Unfortunately, that could require spending way-too-much-time
* in here. For now, let the user retry.
*/
} else {
}
/*
* Check for collisions (i.e. did sadb_getspi() return with something
* that already exists?).
*
* Try outbound first. Even though SADB_GETSPI is traditionally
* for inbound SAs, you never know what a user might do.
*/
}
/*
* I don't have collisions elsewhere!
*/
} else {
/*
* sadb_insertassoc() also checks for collisions, so
* if there's a colliding entry, rc will be set
* to EEXIST.
*/
}
/*
* Can exit outbound mutex. Hold inbound until we're done
* with newbie.
*/
if (rc != 0) {
return;
}
/* Can write here because I'm still holding the bucket lock. */
/*
* Construct successful return message. We have one thing going
* for us in PF_KEY v2. That's the fact that
* sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
*/
/* Convert KEYSOCK_IN to KEYSOCK_OUT. */
/*
* Can safely putnext() to esp_pfkey_q, because this is a turnaround
* from the esp_pfkey_q.
*/
}
/*
* Insert the ESP header into a packet. Duplicate an mblk, and insert a newly
* allocated mblk with the ESP header in between the two.
*/
static boolean_t
{
}
/* "scratch" is the 2nd half, split_mp is the first. */
("esp_insert_esp: can't allocate scratch.\n"));
return (B_FALSE);
}
/* NOTE: dupb() doesn't set b_cont appropriately. */
}
/*
* At this point, split_mp is exactly "wheretodiv" bytes long, and
* holds the end of the pre-ESP part of the datagram.
*/
return (B_TRUE);
}
/*
* Section 7 of RFC 3947 says:
*
* 7. Recovering from the Expiring NAT Mappings
*
* There are cases where NAT box decides to remove mappings that are still
* alive (for example, when the keepalive interval is too long, or when the
* NAT box is rebooted). To recover from this, ends that are NOT behind
* NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from
* the other end to determine which IP and port addresses should be used.
* The host behind dynamic NAT MUST NOT do this, as otherwise it opens a
* DoS attack possibility because the IP address or port of the other host
* will not change (it is not behind NAT).
*
* Keepalives cannot be used for these purposes, as they are not
* authenticated, but any IKE authenticated IKE packet or ESP packet can be
* used to detect whether the IP address or the port has changed.
*
* The following function will check an SA and its explicitly-set pair to see
* if the NAT-T remote port matches the received packet (which must have
* passed ESP authentication, see esp_in_done() for the caller context). If
* there is a mismatch, the SAs are updated. It is not important if we race
* with a transmitting thread, as if there is a transmitting thread, it will
* merely emit a packet that will most-likely be dropped.
*
* "ports" are ordered src,dst, and assoc is an inbound SA, where src should
* match ipsa_remote_nat_port and dst should match ipsa_local_nat_port.
*/
#ifdef _LITTLE_ENDIAN
#define FIRST_16(x) ((x) & 0xFFFF)
#else
#define NEXT_16(x) ((x) & 0xFFFF)
#endif
static void
{
/* We found a conn_t, therefore local != 0. */
/* Assume an IPv4 SA. */
/*
* On-the-wire rport == 0 means something's very wrong.
* An unpaired SA is also useless to us.
* If we are behind the NAT, don't bother.
* A zero local NAT port defaults to 4500, so check that too.
* And, of course, if the ports already match, we don't need to
* bother.
*/
(assoc->ipsa_remote_nat_port == 0 &&
return;
/* Try and snag the peer. NOTE: Assume IPv4 for now. */
assoc->ipsa_srcaddr[0]);
/* We probably lost a race to a deleting or expiring thread. */
if (outbound_peer == NULL)
return;
/*
* Hold the mutexes for both SAs so we don't race another inbound
* thread. A lock-entry order shouldn't matter, since all other
* per-ipsa locks are individually held-then-released.
*
* Luckily, this has nothing to do with the remote-NAT address,
* so we don't have to re-scribble the cached-checksum differential.
*/
}
/*
* Finish processing of an inbound ESP packet after processing by the
* crypto framework.
* - Remove the ESP header.
* - Send packet back to IP.
* If authentication was performed on the packet, this function is called
* only if the authentication succeeded.
* On success returns B_TRUE, on failure returns B_FALSE and frees the
* mblk chain data_mp.
*/
static mblk_t *
{
/* get the pointer to the ESP header */
/* authentication-only ESP */
} else {
/* encryption present */
/* encryption-only ESP */
} else {
/* encryption with authentication */
}
}
/*
* Authentication passed if we reach this point.
* Packets with authentication will have the ICV
* after the crypto data. Adjust b_wptr before
* making padlen checks.
*/
/*
* Check replay window here!
* For right now, assume keysock will set the replay window
* size to zero for SAs that have an unspecified sender.
* This may change...
*/
/*
* Log the event. As of now we print out an event.
* Do not print the replay failure number, or else
* syslog cannot collate the error messages. Printing
* the replay number that failed opens a denial-of-
* service attack.
*/
"Replay failed for ESP spi 0x%x, dst %s.\n",
goto drop_and_bail;
}
if (is_natt) {
}
}
/* The ipsa has hit hard expiration, LOG and AUDIT. */
"ESP association 0x%x, dst %s had bytes expire.\n",
goto drop_and_bail;
}
/*
* Remove ESP header and padding from packet. I hope the compiler
* spews "branch, predict taken" code for this.
*/
&espstack->esp_dropper);
return (NULL);
}
}
if (is_natt)
/*
* Cluster buffering case. Tell caller that we're
* handling the packet.
*/
return (NULL);
}
return (data_mp);
}
&espstack->esp_dropper);
return (NULL);
}
/*
* Called upon failing the inbound ICV check. The message passed as
* argument is freed.
*/
static void
{
/*
* Log the event. Don't print to the console, block
* potential denial-of-service attack.
*/
"ESP Authentication failed for spi 0x%x, dst %s.\n",
&espstack->esp_dropper);
}
/*
* Invoked for outbound packets after ESP processing. If the packet
* also requires AH, performs the AH SA selection and AH processing.
* Returns B_TRUE if the AH processing was not needed or if it was
* performed successfully. Returns B_FALSE and consumes the passed mblk
* if AH processing was required but could not be performed.
*
*/
static mblk_t *
{
}
if (!ap->ipa_want_ah)
return (data_mp);
/*
* Normally the AH SA would have already been put in place
* but it could have been flushed so we need to look for it.
*/
return (NULL);
}
}
return (data_mp);
}
/*
* Kernel crypto framework callback invoked after completion of async
* crypto requests for outbound packets.
*/
static void
{
netstack_t *ns;
/*
* First remove the ipsec_crypto_t mblk
* Note that we need to ipsec_free_crypto_data(mp) once done with ic.
*/
/*
* Extract the ip_xmit_attr_t from the first mblk.
* Verifies that the netstack and ill is still around; could
* have vanished while kEf was doing its work.
* disappear until we do the nce_refrele in ixa_cleanup.
*/
/* We have nowhere to do stats since ixa_ipst could be NULL */
}
goto done;
}
if (status == CRYPTO_SUCCESS) {
/*
* If a ICV was computed, it was stored by the
* crypto framework at the end of the packet.
*/
/* NAT-T packet. */
/* do AH processing if needed */
goto done;
} else {
/* Outbound shouldn't see invalid MAC */
("esp_kcf_callback_outbound: crypto failed with 0x%x\n",
status));
&espstack->esp_dropper);
}
done:
ixa_cleanup(&ixas);
(void) ipsec_free_crypto_data(mp);
}
/*
* Kernel crypto framework callback invoked after completion of async
* crypto requests for inbound packets.
*/
static void
{
netstack_t *ns;
/*
* First remove the ipsec_crypto_t mblk
* Note that we need to ipsec_free_crypto_data(mp) once done with ic.
*/
/*
* Extract the ip_recv_attr_t from the first mblk.
* Verifies that the netstack and ill is still around; could
* have vanished while kEf was doing its work.
*/
/* The ill or ip_stack_t disappeared on us */
goto done;
}
if (status == CRYPTO_SUCCESS) {
goto done;
/* finish IPsec processing */
} else if (status == CRYPTO_INVALID_MAC) {
} else {
("esp_kcf_callback: crypto failed with 0x%x\n",
status));
&espstack->esp_dropper);
}
done:
(void) ipsec_free_crypto_data(mp);
}
/*
* Invoked on crypto framework failure during inbound and outbound processing.
*/
static void
{
&espstack->esp_dropper);
if (is_inbound)
else
}
/*
* A statement-equivalent macro, _cr MUST point to a modifiable
* crypto_call_req_t.
*/
}
} else { \
} \
}
}
/*
* Returns data_mp if successfully completed the request. Returns
* NULL if it failed (and increments InDiscards) or if it is pending.
*/
static mblk_t *
{
int kef_rc;
#ifdef IPSEC_LATENCY_TEST
#else
#endif
/*
* An inbound packet is of the form:
* [IP,options,ESP,IV,data,ICV,pad]
*/
/* Packet length starting at IP header ending after ESP ICV. */
/*
* Counter mode algs need a nonce. This is setup in sadb_common_add().
* If for some reason we are using a SA which does not have a nonce
* then we must fail here.
*/
return (NULL);
}
if (force) {
/* We are doing asynch; allocate mblks to hold state */
return (NULL);
}
} else {
/*
* If we know we are going to do sync then ipsec_crypto_t
* should be on the stack.
*/
}
if (do_auth) {
/* authentication context template */
/* ICV to be verified */
/* authentication starts at the ESP header */
if (!do_encr) {
/* authentication only */
/* initialize input data argument */
/* call the crypto framework */
&ic->ic_crypto_data,
}
}
if (do_encr) {
/* encryption template */
/* Call the nonce update function. Also passes in IV */
if (!do_auth) {
/* decryption only */
/* initialize input data argument */
/* call the crypto framework */
}
}
/* dual operation */
/* initialize input data argument */
/* specify IV */
/* call the framework */
}
switch (kef_rc) {
case CRYPTO_SUCCESS:
if (force) {
/* Free mp after we are done with ic */
(void) ip_recv_attr_free_mblk(mp);
}
return (esp_mp);
case CRYPTO_QUEUED:
/* esp_kcf_callback_inbound() will be invoked on completion */
return (NULL);
case CRYPTO_INVALID_MAC:
if (force) {
}
/* esp_mp was passed to ip_drop_packet */
return (NULL);
}
if (force) {
}
/* esp_mp was passed to ip_drop_packet */
return (NULL);
}
/*
* Compute the IP and UDP checksums -- common code for both keepalives and
* actual ESP-in-UDP packets. Be flexible with multiple mblks because ESP
* uses mblk-insertion to insert the UDP header.
* TODO - If there is an easy way to prep a packet for HW checksums, make
* it happen here.
* Note that this is used before both before calling ip_output_simple and
* in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the
* latter.
*/
static void
{
int offset;
ipha->ipha_hdr_checksum = 0;
/* arr points to the IP header. */
/* arr[6-9] are the IP addresses. */
}
/* arr points to the UDP header's checksum field. */
}
}
/*
* taskq handler so we can send the NAT-T keepalive on a separate thread.
*/
static void
actually_send_keepalive(void *arg)
{
netstack_t *ns;
/* Disappeared */
return;
}
/* No ULP checksum; done by esp_prepare_udp */
ixa_cleanup(&ixas);
}
/*
* Send a one-byte UDP NAT-T keepalive.
*/
void
{
return;
ipha->ipha_type_of_service = 0;
/* Use the low-16 of the SPI so we have some clue where it came from. */
ipha->ipha_hdr_checksum = 0;
udpha->uha_checksum = 0;
/*
* We're holding an isaf_t bucket lock, so pawn off the actual
* packet transmission to another thread. Just in case syncq
* processing causes a same-bucket packet to be processed.
*/
TQ_NOSLEEP) == 0) {
/* Assume no memory if taskq_dispatch() fails. */
}
}
/*
* Returns mp if successfully completed the request. Returns
* NULL if it failed (and increments InDiscards) or if it is pending.
*/
static mblk_t *
{
int kef_rc = CRYPTO_FAILED;
#ifdef IPSEC_LATENCY_TEST
#else
#endif
/*
* Outbound IPsec packets are of the form:
* [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV]
* unless it's NATT, then it's
* [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV]
* Get a pointer to the mblk containing the ESP header.
*/
/*
* Combined mode algs need a nonce. This is setup in sadb_common_add().
* If for some reason we are using a SA which does not have a nonce
* then we must fail here.
*/
return (NULL);
}
if (force) {
/* We are doing asynch; allocate mblks to hold state */
return (NULL);
}
} else {
/*
* If we know we are going to do sync then ipsec_crypto_t
* should be on the stack.
*/
}
if (do_auth) {
/* authentication context template */
/* where to store the computed mac */
/* authentication starts at the ESP header */
if (!do_encr) {
/* authentication only */
/* initialize input data argument */
/* call the crypto framework */
&ic->ic_crypto_data,
}
}
if (do_encr) {
/* encryption context template */
/* Call the nonce update function. */
if (!do_auth) {
/* encryption only, skip mblk that contains ESP hdr */
/* initialize input data argument */
/*
* For combined mode ciphers, the ciphertext is the same
* size as the clear text, the ICV should follow the
* ciphertext. To convince the kcf to allow in-line
* encryption, with an ICV, use ipsec_out_crypto_mac
* to point to the same buffer as the data. The calling
* function need to ensure the buffer is large enough to
* include the ICV.
*
* The IV is already written to the packet buffer, the
* nonce setup function copied it to the params struct
* for the cipher to use.
*/
&ic->ic_crypto_mac,
sizeof (crypto_data_t));
}
/* call the crypto framework */
}
}
/*
* Encryption and authentication:
* Pass the pointer to the mblk chain starting at the ESP
* header to the framework. Skip the ESP header mblk
* for encryption, which is reflected by an encryption
* offset equal to the length of that mblk. Start
* the authentication at the ESP header, i.e. use an
* authentication offset of zero.
*/
/* specify IV */
/* call the framework */
}
switch (kef_rc) {
case CRYPTO_SUCCESS:
if (force) {
}
if (is_natt)
return (data_mp);
case CRYPTO_QUEUED:
/* esp_kcf_callback_outbound() will be invoked on completion */
return (NULL);
}
if (force) {
}
/* data_mp was passed to ip_drop_packet */
return (NULL);
}
/*
* Handle outbound IPsec processing for IPv4 and IPv6
*
* Returns data_mp if successfully completed the request. Returns
* NULL if it failed (and increments InDiscards) or if it is pending.
*/
static mblk_t *
{
/*
* <sigh> We have to copy the message here, because TCP (for example)
* keeps a dupb() of the message lying around for retransmission.
* Since ESP changes the whole of the datagram, we have to create our
* own copy lest we clobber TCP's data. Since we have to copy anyway,
* we might as well make use of msgpullup() and get the mblk into one
* contiguous piece!
*/
esp0dbg(("esp_outbound: msgpullup() failed, "
"dropping packet.\n"));
&espstack->esp_dropper);
return (NULL);
}
/*
* Get the outer IP header in shape to escape this system..
*/
/*
* Need to update packet with any CIPSO option and update
* ixa_tsl to capture the new label.
* We allocate a separate ixa for that purpose.
*/
&espstack->esp_dropper);
return (NULL);
}
/* Packet dropped by sadb_whack_label */
return (NULL);
}
}
/*
* Reality check....
*/
} else {
/*
* Destination options are tricky. If we get in here,
* then we have a terminal header following the
* destination options. We need to adjust backwards
* so we insert ESP BEFORE the destination options
* bag. (So that the dstopts get encrypted!)
*
* Since this is for outbound packets only, we know
* that non-terminal destination options only precede
* routing headers.
*/
}
} else {
/* It's probably IP + ESP. */
}
}
/* wedge in UDP header */
}
/*
* Set up ESP header and encryption padding for ENCR PI request.
*/
/* Determine the padding length. Pad to 4-bytes for no-encryption. */
/*
* Pad the data to the length of the cipher block size.
* Include the two additional bytes (hence the - 2) for the
* padding length and the next header. Take this into account
* when calculating the actual length of the padding.
*/
(block_size - 1);
} else {
iv_len = 0;
(sizeof (uint32_t) - 1);
}
/* Allocate ESP header and IV. */
/*
* Update association byte-count lifetimes. Don't forget to take
* into account the padding length and next-header (hence the + 2).
*
* Use the amount of data fed into the "encryption algorithm". This
* is the IV, the data length, the padding length, and the final two
* bytes (padlen, and next-header).
*
*/
&espstack->esp_dropper);
if (need_refrele)
return (NULL);
}
&espstack->esp_dropper);
if (need_refrele)
return (NULL);
}
if (is_natt) {
/*
* Set the checksum to 0, so that the esp_prepare_udp() call
* can do the right thing.
*/
udpha->uha_checksum = 0;
}
/*
* XXX We have replay counter wrapping.
* We probably want to nuke this SA (and its peer).
*/
"Outbound ESP SA (0x%x, %s) has wrapped sequence.\n",
&espstack->esp_dropper);
if (need_refrele)
return (NULL);
}
/*
* iv_ptr points to the mblk which will contain the IV once we have
* written it there. This mblk will be part of a mblk chain that
* will make up the packet.
*
* For counter mode algorithms, the IV is a 64 bit quantity, it
* must NEVER repeat in the lifetime of the SA, otherwise an
* attacker who had recorded enough packets might be able to
* determine some clear text.
*
* To ensure this does not happen, the IV is stored in the SA and
* incremented for each packet, the IV is then copied into the
* "packet" for transmission to the receiving system. The IV will
* also be copied into the nonce, when the packet is encrypted.
*
* CBC mode algorithms use a random IV for each packet. We do not
* require the highest quality random bits, but for best security
* with CBC mode ciphers, the value must be unlikely to repeat and
* must not be known in advance to an adversary capable of influencing
* the clear text.
*/
espstack)) {
if (need_refrele)
return (NULL);
}
/* Fix the IP header. */
if (is_natt) {
*nhp = IPPROTO_UDP;
} else {
*nhp = IPPROTO_ESP;
}
ipha->ipha_hdr_checksum = 0;
} else {
*nhp = IPPROTO_ESP;
}
/* I've got the two ESP mblks, now insert them. */
/* NOTE: esp_insert_esp() only fails if there's no memory. */
&espstack->esp_dropper);
if (need_refrele)
return (NULL);
}
/* Append padding (and leave room for ICV). */
;
esp0dbg(("esp_outbound: Can't allocate tailmp.\n"));
&espstack->esp_dropper);
if (need_refrele)
return (NULL);
}
}
/*
* If there's padding, N bytes of padding must be of the form 0x1,
* 0x2, 0x3... 0xN.
*/
for (i = 0; i < padlen; ) {
i++;
}
/*
* Okay. I've set up the pre-encryption ESP. Let's do it!
*/
if (mac_len > 0) {
} else {
}
if (need_refrele)
return (data_mp);
}
/*
* IP calls this to validate the ICMP errors that
* we got from the network.
*/
mblk_t *
{
/*
* Unless we get an entire packet back, this function is useless.
* Why?
*
* 1.) Partial packets are useless, because the "next header"
* is at the end of the decrypted ESP packet. Without the
* whole packet, this is useless.
*
* 2.) If we every use a stateful cipher, such as a stream or a
* one-time pad, we can't do anything.
*
* Since the chances of us getting an entire packet back are very
* very small, we discard here.
*/
&espstack->esp_dropper);
return (NULL);
}
/*
* Construct an SADB_REGISTER message with the current algorithms.
* This function gets called when 'ipsecalgs -s' is run or when
* in.iked (or other KMD) starts.
*/
static boolean_t
{
uint_t i, numalgs_snap;
int current_aalgs;
int current_ealgs;
/* Allocate the KEYSOCK_OUT. */
if (keysock_out_mp == NULL) {
esp0dbg(("esp_register_out: couldn't allocate mblk.\n"));
return (B_FALSE);
}
}
}
/*
* Allocate the PF_KEY message that follows KEYSOCK_OUT.
*/
/*
* Fill SADB_REGISTER message's algorithm descriptors. Hold
* down the lock while filling it.
*
* Return only valid algorithms, so the number of algorithms
* to send up may be less than the number of algorithm entries
* in the table.
*/
for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
num_aalgs++;
if (num_aalgs != 0) {
allocsize += sizeof (*sasupp_auth);
}
for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
num_ealgs++;
if (num_ealgs != 0) {
allocsize += sizeof (*sasupp_encr);
}
return (B_FALSE);
}
if (num_aalgs != 0) {
numalgs_snap = 0;
for (i = 0;
i++) {
continue;
saalg->sadb_alg_ivlen = 0;
authalgs[i]->alg_increment;
authalgs[i]->alg_saltlen);
numalgs_snap++;
saalg++;
}
#ifdef DEBUG
/*
* Reality check to make sure I snagged all of the
* algorithms.
*/
for (; i < IPSEC_MAX_ALGS; i++) {
"Missed aalg #%d.\n", i);
}
}
#endif /* DEBUG */
}
if (num_ealgs != 0) {
numalgs_snap = 0;
for (i = 0;
continue;
/*
* We could advertise the ICV length, except there
* is not a value in sadb_x_algb to do this.
* saalg->sadb_alg_maclen = encralgs[i]->alg_maclen;
*/
encralgs[i]->alg_increment;
numalgs_snap++;
saalg++;
}
#ifdef DEBUG
/*
* Reality check to make sure I snagged all of the
* algorithms.
*/
for (; i < IPSEC_MAX_ALGS; i++) {
"Missed ealg #%d.\n", i);
}
}
#endif /* DEBUG */
}
}
/* Now fill the rest of the SADB_REGISTER message. */
samsg->sadb_msg_errno = 0;
samsg->sadb_msg_reserved = 0;
/*
* from me over a new alg., I could give two hoots about sequence.
*/
if (sasupp_auth != NULL) {
}
if (sasupp_encr != NULL) {
}
else {
return (B_FALSE);
}
return (B_TRUE);
}
/*
* Invoked when the algorithm table changes. Causes SADB_REGISTER
* messages continaining the current list of algorithms to be
* sent up to the ESP listeners.
*/
void
{
/*
* Time to send a PF_KEY SADB_REGISTER message to ESP listeners
* everywhere. (The function itself checks for NULL esp_pfkey_q.)
*/
}
/*
* Stub function that taskq_dispatch() invokes to take the mblk (in arg)
* and send() it into ESP and IP again.
*/
static void
inbound_task(void *arg)
{
/* The ill or ip_stack_t disappeared on us */
goto done;
}
done:
}
/*
* Restart ESP after the SA has been added.
*/
static void
{
return;
/*
* Either it failed or is pending. In the former case
* ipIfStatsInDiscards was increased.
*/
return;
}
}
/*
* Now that weak-key passed, actually ADD the security association, and
* send back a reply ADD message.
*/
static int
{
int rc;
int error;
/*
* Locate the appropriate table(s).
*/
&sq, diagnostic);
if (error)
return (error);
/*
* Use the direction flags provided by the KMD to determine
* if the inbound or outbound table should be the primary
* for this SA. If these flags were absent then make this
* decision based on the addresses.
*/
is_inbound = B_TRUE;
}
/*
* The KMD did not set a direction flag, determine which
* table to insert the SA into based on addresses.
*/
switch (ksi->ks_in_dsttype) {
case KS_IN_ADDR_MBCAST:
/* FALLTHRU */
/*
* If the source address is either one of mine, or unspecified
* (which is best summed up by saying "not 'not mine'"),
* then the association is potentially bi-directional,
* in that it can be used for inbound traffic and outbound
* traffic. The best example of such an SA is a multicast
* SA (which allows me to receive the outbound traffic).
*/
case KS_IN_ADDR_ME:
is_inbound = B_TRUE;
break;
/*
* If the source address literally not mine (either
* unspecified or not mine), then this SA may have an
* address that WILL be mine after some configuration.
* We pay the price for this by making it a bi-directional
* SA.
*/
case KS_IN_ADDR_NOTME:
}
break;
default:
return (EINVAL);
}
}
/*
* Find a ACQUIRE list entry if possible. If we've added an SA that
* suits the needs of an ACQUIRE list entry, we can eliminate the
* ACQUIRE list entry and transmit the enqueued packets. Use the
* high-bit of the sequence number to queue it. Key off destination
* addr, and change acqrec's state.
*/
/*
* Q: I only check sequence. Should I check dst?
* A: Yes, check dest because those are the packets
* that are queued up.
*/
break;
}
/*
* AHA! I found an ACQUIRE record for this SA.
* Grab the msg list, and free the acquire record.
* I already am holding the lock for this record,
* so all I have to do is free it.
*/
}
}
/*
* Find PF_KEY message, and see if I'm an update. If so, find entry
* in larval list (if there).
*/
}
esp0dbg(("Larval update, but larval disappeared.\n"));
return (ESRCH);
} /* Else sadb_common_add unlinks it for me! */
}
/*
* Hold again, because sadb_common_add() consumes a reference,
* and we don't want to clear_lpkt() without a reference.
*/
}
if (rc == 0) {
lpkt, TQ_NOSLEEP);
}
}
}
/*
* How much more stack will I create with all of these
* esp_outbound() calls?
*/
/* Handle the packets queued waiting for the SA */
/*
* Extract the ip_xmit_attr_t from the first mblk.
* Verifies that the netstack and ill is still around; could
* have vanished while iked was doing its work.
* disappear until we do the nce_refrele in ixa_cleanup.
*/
&espstack->esp_dropper);
} else if (rc != 0) {
&espstack->esp_dropper);
} else {
}
ixa_cleanup(&ixas);
}
return (rc);
}
/*
* Process one of the queued messages (from ipsacq_mp) once the SA
* has been added.
*/
static void
{
&espstack->esp_dropper);
return;
}
return;
/* do AH processing if needed */
return;
}
/*
* routine eventually.
*/
static int
{
/* I need certain extensions present for an ADD message. */
return (EINVAL);
}
return (EINVAL);
}
return (EINVAL);
}
return (EINVAL);
}
return (EINVAL);
}
return (EINVAL);
}
/* Sundry ADD-specific reality checks. */
return (EINVAL);
}
return (EINVAL);
}
#ifndef IPSEC_LATENCY_TEST
return (EINVAL);
}
#endif
return (EINVAL);
}
return (EINVAL);
}
if (nttext_loc == NULL) {
return (EINVAL);
}
return (EINVAL);
}
}
if (nttext_rem == NULL) {
return (EINVAL);
}
return (EINVAL);
}
}
/* Stuff I don't support, for now. XXX Diagnostic? */
return (EOPNOTSUPP);
return (EINVAL);
/*
* XXX Policy : I'm not checking identities at this time,
* but if I did, I'd do them here, before I sent
* the weak key check up to the algorithm.
*/
/*
* First locate the authentication algorithm.
*/
#ifdef IPSEC_LATENCY_TEST
#else
#endif
[assoc->sadb_sa_auth];
assoc->sadb_sa_auth));
return (EINVAL);
}
/*
* Sanity check key sizes.
* Note: It's not possible to use SADB_AALG_NONE because
* this auth_alg is not defined with ALG_FLAG_VALID. If this
* ever changes, the same check for SADB_AALG_NONE and
* a auth_key != NULL should be made here ( see below).
*/
return (EINVAL);
}
/* check key and fix parity if needed */
diagnostic) != 0) {
return (EINVAL);
}
}
/*
* Then locate the encryption algorithm.
*/
[assoc->sadb_sa_encrypt];
assoc->sadb_sa_encrypt));
return (EINVAL);
}
/*
* Sanity check key sizes. If the encryption algorithm is
* SADB_EALG_NULL but the encryption key is NOT
* NULL then complain.
*
* The keying material includes salt bits if required by
* algorithm and optionally the Initial IV, check the
* length of whats left.
*/
return (EINVAL);
}
/* check key */
diagnostic) != 0) {
return (EINVAL);
}
}
diagnostic, espstack));
}
/*
* Update a security association. Updates come in two varieties. The first
* is an update of lifetimes on a non-larval SA. The second is an update of
* a larval SA, which ends up looking a lot more like an add.
*/
static int
{
int rcode;
return (EINVAL);
}
(rcode != 0)) {
return (rcode);
}
return (rcode);
}
/* XXX refactor me */
/*
* Delete a security association. This is REALLY likely to be code common to
* both AH and ESP. Find the association, then unlink it.
*/
static int
{
struct sockaddr_in *sin;
} else {
return (EINVAL);
}
espstack->esp_pfkey_q));
}
}
/* XXX refactor me */
/*
* Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP
* messages.
*/
static void
{
int error;
/*
* Dump each fanout, bailing if error is non-zero.
*/
if (error != 0)
goto bail;
bail:
}
/*
* First-cut reality check for an inbound PF_KEY message.
*/
static boolean_t
{
int diagnostic;
goto badmsg;
}
goto badmsg;
}
return (B_FALSE); /* False ==> no failures */
ksi->ks_in_serial);
return (B_TRUE); /* True ==> failures */
}
/*
* ESP parsing of PF_KEY messages. Keysock did most of the really silly
* error cases. What I receive is a fully-formed, syntactically legal
* PF_KEY message. I then need to check semantics...
*
* This code may become common to AH and ESP. Stay tuned.
*
* I also make the assumption that db_ref's are cool. If this assumption
* is wrong, this means that someone other than keysock or me has been
* mucking with PF_KEY messages.
*/
static void
{
int error;
int diagnostic = SADB_X_DIAGNOSTIC_NONE;
/*
* If applicable, convert unspecified AF_INET6 to unspecified
* AF_INET. And do other address reality checks.
*/
return;
}
switch (samsg->sadb_msg_type) {
case SADB_ADD:
if (error != 0) {
}
/* else esp_add_sa() took care of things. */
break;
case SADB_DELETE:
case SADB_X_DELPAIR:
case SADB_X_DELPAIR_STATE:
if (error != 0) {
}
/* Else esp_del_sa() took care of things. */
break;
case SADB_GET:
if (error != 0) {
}
/* Else sadb_get_sa() took care of things. */
break;
case SADB_FLUSH:
break;
case SADB_REGISTER:
/*
* Hmmm, let's do it! Check for extensions (there should
* be none), extract the fields, call esp_register_out(),
* then either free or report an error.
*
* Keysock takes care of the PF_KEY bookkeeping for this.
*/
} else {
/*
* Only way this path hits is if there is a memory
* failure. It will not return B_FALSE because of
* lack of esp_pfkey_q if I am in wput().
*/
}
break;
case SADB_UPDATE:
case SADB_X_UPDATEPAIR:
/*
* Find a larval, if not there, find a full one and get
* strict.
*/
if (error != 0) {
}
/* else esp_update_sa() took care of things. */
break;
case SADB_GETSPI:
/*
* Reserve a new larval entry.
*/
break;
case SADB_ACQUIRE:
/*
* most likely an error. Inbound ACQUIRE messages should only
* have the base header.
*/
break;
case SADB_DUMP:
/*
* Dump all entries.
*/
/* esp_dump will take care of the return message, etc. */
break;
case SADB_EXPIRE:
/* Should never reach me. */
break;
default:
break;
}
}
/*
* Handle case where PF_KEY says it can't find a keysock for one of my
* ACQUIRE messages.
*/
static void
{
return;
}
/*
* If keysock can't find any registered, delete the acquire record
* immediately, and handle errors.
*/
/*
* Use the write-side of the esp_pfkey_q
*/
}
}
/*
* ESP module write put routine.
*/
static void
{
/* NOTE: Each case must take care of freeing or passing mp. */
case M_CTL:
/* Not big enough message. */
break;
}
switch (ii->ipsec_info_type) {
case KEYSOCK_OUT_ERR:
break;
case KEYSOCK_IN:
/* Parse the message. */
break;
case KEYSOCK_HELLO:
break;
default:
ii->ipsec_info_type));
break;
}
break;
case M_IOCTL:
case ND_SET:
case ND_GET:
return;
} else {
}
/* FALLTHRU */
default:
/* We really don't support any other ioctls, do we? */
/* Return EINVAL */
return;
}
default:
("Got default message, type %d, passing to IP.\n",
}
}
/*
* Wrapper to allow IP to trigger an ESP association failure message
* during inbound SA selection.
*/
void
{
if (espstack->ipsecesp_log_unknown_spi) {
}
&espstack->esp_dropper);
}
/*
* Initialize the ESP input and output processing functions.
*/
void
{
}