snoop_pf.c revision 041bde0a02e9359336a030297bb507ce6bda43f1
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <stdio.h>
#include <stddef.h>
#include <ctype.h>
#include <string.h>
#include <fcntl.h>
#include <string.h>
#include <sys/isa_defs.h>
#include <netinet/in_systm.h>
#include <netinet/if_ether.h>
#include <netdb.h>
#include <setjmp.h>
#include "snoop.h"
#include "snoop_vlan.h"
/*
* This module generates code for the kernel packet filter.
* The kernel packet filter is more efficient since it
* operates without context switching or moving data into
* the capture buffer. On the other hand, it is limited
* in its filtering ability i.e. can't cope with variable
* length headers, can't compare the packet size, 1 and 4 octet
* comparisons are awkward, code space is limited to ENMAXFILTERS
* halfwords, etc.
* The parser is the same for the user-level packet filter though
* more limited in the variety of expressions it can generate
* code for. If the pf compiler finds an expression it can't
* handle, it tries to set up a split filter in kernel and do the
* remaining filtering in userland. If that also fails, it resorts
* to userland filter. (See additional comment in pf_compile)
*/
extern struct Pf_ext_packetfilt pf;
int eaddr; /* need ethernet addr */
int opstack; /* operand stack depth */
#define IPV4_ONLY 0
#define IPV6_ONLY 1
#define IPV4_AND_IPV6 2
typedef struct {
int transport_protocol;
int network_protocol;
/*
* offset is the offset in bytes from the beginning
* of the network protocol header to where the transport
* protocol type is.
*/
int offset;
typedef struct network_table {
char *nmt_name;
int nmt_val;
static network_table_t ether_network_mapping_table[] = {
{ "pup", ETHERTYPE_PUP },
{ "ip", ETHERTYPE_IP },
{ "arp", ETHERTYPE_ARP },
{ "revarp", ETHERTYPE_REVARP },
{ "at", ETHERTYPE_AT },
{ "aarp", ETHERTYPE_AARP },
{ "vlan", ETHERTYPE_VLAN },
{ "ip6", ETHERTYPE_IPV6 },
{ "slow", ETHERTYPE_SLOW },
{ "ppoed", ETHERTYPE_PPPOED },
{ "ppoes", ETHERTYPE_PPPOES },
{ "NULL", -1 }
};
static network_table_t ib_network_mapping_table[] = {
{ "pup", ETHERTYPE_PUP },
{ "ip", ETHERTYPE_IP },
{ "arp", ETHERTYPE_ARP },
{ "revarp", ETHERTYPE_REVARP },
{ "at", ETHERTYPE_AT },
{ "aarp", ETHERTYPE_AARP },
{ "vlan", ETHERTYPE_VLAN },
{ "ip6", ETHERTYPE_IPV6 },
{ "slow", ETHERTYPE_SLOW },
{ "ppoed", ETHERTYPE_PPPOED },
{ "ppoes", ETHERTYPE_PPPOES },
{ "NULL", -1 }
};
static network_table_t ipnet_network_mapping_table[] = {
{ "NULL", -1 }
};
static transport_table_t ether_transport_mapping_table[] = {
{-1, 0, 0} /* must be the final entry */
};
static transport_table_t ipnet_transport_mapping_table[] = {
{-1, 0, 0} /* must be the final entry */
};
static transport_table_t ib_transport_mapping_table[] = {
{-1, 0, 0} /* must be the final entry */
};
typedef struct datalink {
int dl_link_header_len;
int dl_link_type_offset;
int dl_link_dest_offset;
int dl_link_src_offset;
int dl_link_addr_len;
} datalink_t;
#define IPNET_SRCZONE_OFFSET 8
#define IPNET_DSTZONE_OFFSET 16
static int foundOR = 0;
char *token;
extern void next();
static void pf_expression();
static void pf_clear_offset_register();
static void pf_matchfn(const char *name);
/*
* This pointer points to the function that last generated
* instructions to change the offset register. It's used
* for comparisons to see if we need to issue more instructions
* to change the register.
*
* It's initialized to pf_clear_offset_register because the offset
* register in pfmod is initialized to zero, similar to the state
* it would be in after executing the instructions issued by
* pf_clear_offset_register.
*/
static void *last_offset_operation = (void*)pf_clear_offset_register;
static void
pf_emit(x)
ushort_t x;
{
*pfp++ = x;
}
static void
int len;
{
if (len > 0) {
printf("Kernel Filter:\n");
}
switch (action) {
case ENF_PUSHLIT:
printf("PUSHLIT ");
break;
case ENF_PUSHZERO:
printf("PUSHZERO ");
break;
#ifdef ENF_PUSHONE
case ENF_PUSHONE:
printf("PUSHONE ");
break;
#endif
#ifdef ENF_PUSHFFFF
case ENF_PUSHFFFF:
printf("PUSHFFFF ");
break;
#endif
#ifdef ENF_PUSHFF00
case ENF_PUSHFF00:
printf("PUSHFF00 ");
break;
#endif
#ifdef ENF_PUSH00FF
case ENF_PUSH00FF:
printf("PUSH00FF ");
break;
#endif
case ENF_LOAD_OFFSET:
printf("LOAD_OFFSET ");
break;
case ENF_BRTR:
printf("BRTR ");
break;
case ENF_BRFL:
printf("BRFL ");
break;
case ENF_POP:
printf("POP ");
break;
}
if (action >= ENF_PUSHWORD)
switch (op) {
case ENF_EQ:
printf("EQ ");
break;
case ENF_LT:
printf("LT ");
break;
case ENF_LE:
printf("LE ");
break;
case ENF_GT:
printf("GT ");
break;
case ENF_GE:
printf("GE ");
break;
case ENF_AND:
printf("AND ");
break;
case ENF_OR:
printf("OR ");
break;
case ENF_XOR:
printf("XOR ");
break;
case ENF_COR:
printf("COR ");
break;
case ENF_CAND:
printf("CAND ");
break;
case ENF_CNOR:
printf("CNOR ");
break;
case ENF_CNAND:
printf("CNAND ");
break;
case ENF_NEQ:
printf("NEQ ");
break;
}
if (action == ENF_PUSHLIT ||
action == ENF_LOAD_OFFSET ||
pc++;
}
printf("\n");
}
}
/*
* Emit packet filter code to check a
* field in the packet for a particular value.
* Need different code for each field size.
* Since the pf can only compare 16 bit quantities
* we have to use masking to compare byte values.
* Long word (32 bit) quantities have to be done
* as two 16 bit comparisons.
*/
static void
{
/*
* If the property being filtered on is absent in the media
* packet, error out.
*/
if (offset == -1)
pr_err("filter option unsupported on media");
switch (len) {
case 1:
#if defined(_BIG_ENDIAN)
if (offset % 2)
#else
if (!(offset % 2))
#endif
{
#ifdef ENF_PUSH00FF
#else
pf_emit(0x00FF);
#endif
} else {
#ifdef ENF_PUSHFF00
#else
pf_emit(0xFF00);
#endif
}
break;
case 2:
break;
case 4:
#if defined(_BIG_ENDIAN)
#elif defined(_LITTLE_ENDIAN)
#else
#endif
#if defined(_BIG_ENDIAN)
#else
#endif
break;
}
}
/*
* same as pf_compare_value, but only for emiting code to
* compare ipv6 addresses.
*/
static void
{
int i;
for (i = 0; i < len; i += 2) {
if (i != 0)
}
}
/*
* Same as above except mask the field value
* before doing the comparison. The comparison checks
* to make sure the values are equal.
*/
static void
{
}
/*
* Same as above except the values are compared to see if they are not
* equal.
*/
static void
{
}
/*
* Similar to pf_compare_value.
*
* This is the utility function that does the actual work to compare
* two values using a mask. The comparison operation is passed into
* the function.
*/
static void
{
/*
* If the property being filtered on is absent in the media
* packet, error out.
*/
if (offset == -1)
pr_err("filter option unsupported on media");
switch (len) {
case 1:
#if defined(_BIG_ENDIAN)
if (offset % 2)
#else
if (!offset % 2)
#endif
{
} else {
}
break;
case 2:
break;
case 4:
break;
}
}
/*
* Like pf_compare_value() but compare on a 64-bit zoneid value.
* The argument val passed in is in network byte order.
*/
static void
{
int i;
for (i = 0; i < sizeof (uint64_t) / 2; i ++) {
if (i != 0)
}
}
/*
* Generate pf code to match an IPv4 or IPv6 address.
*/
static void
char *hostname;
int inet_type;
{
int h_addr_index;
int error_num = 0;
int pass = 0;
int i;
/*
* The addr4offset and addr6offset variables simplify the code which
* generates the address comparison filter. With these two variables,
* duplicate code need not exist for the TO and FROM case.
* A value of -1 describes the ANY case (TO and FROM).
*/
int addr4offset;
int addr6offset;
found_host = 0;
pr_err("could not resolve %s (try again later)",
hostname);
} else {
}
}
pr_err("could not resolve %s (try again later)",
hostname);
} else {
}
}
/* Some hostname i.e. tokentype is ALPHA */
switch (inet_type) {
case IPV4_ONLY:
/* Only IPv4 address is needed */
found_host = 1;
}
break;
case IPV6_ONLY:
/* Only IPv6 address is needed */
found_host = 1;
}
break;
case IPV4_AND_IPV6:
/* Both IPv4 and IPv6 are needed */
found_host = 1;
}
break;
default:
found_host = 0;
}
if (!found_host) {
pr_err("could not resolve %s (try again later)",
hostname);
} else {
}
}
} else {
}
switch (which) {
case TO:
break;
case FROM:
break;
case ANY:
addr4offset = -1;
addr6offset = -1;
break;
}
pf_matchfn("ip");
h_addr_index = 0;
if (addr4offset == -1) {
*addr4ptr);
if (h_addr_index != 0)
*addr4ptr);
} else {
*addr4ptr);
if (h_addr_index != 0)
}
}
} else {
/* first pass: IPv4 addresses */
h_addr_index = 0;
if (IN6_IS_ADDR_V4MAPPED(addr6ptr)) {
if (first) {
pf_matchfn("ip");
}
pass++;
}
if (addr4offset == -1) {
addr4);
if (!first)
addr4);
} else {
addr4);
if (!first)
}
if (first)
}
}
if (!first) {
}
/* second pass: IPv6 addresses */
h_addr_index = 0;
if (!IN6_IS_ADDR_V4MAPPED(addr6ptr)) {
if (first) {
pf_matchfn("ip6");
}
pass++;
}
if (addr6offset == -1) {
16, *addr6ptr);
if (!first)
16, *addr6ptr);
} else {
*addr6ptr);
if (!first)
}
if (first)
}
}
if (!first) {
}
if (pass == 2) {
}
}
}
}
static void
{
/*
* If the property being filtered on is absent in the media
* packet, error out.
*/
if (offset == -1)
pr_err("filter option unsupported on media");
while (len > 0) {
if (len >= 4) {
addr += 4;
offset += 4;
len -= 4;
} else if (len >= 2) {
addr += 2;
offset += 2;
len -= 2;
} else {
len--;
}
if (didone)
}
}
/*
* Compare ethernet addresses.
*/
static void
char *hostname;
{
if (ether_hostton(hostname, &e))
if (!arp_for_ether(hostname, &e))
pr_err("cannot obtain ether addr for %s",
hostname);
ep = &e;
}
switch (which) {
case TO:
break;
case FROM:
break;
case ANY:
break;
}
}
/*
* Emit code to compare the network part of
* an IP address.
*/
static void
char *netname;
{
} else {
}
/*
* Left justify the address and figure
* out a mask based on the supplied address.
* Set the mask according to the number of zero
* low-order bytes.
* Note: this works only for whole octet masks.
*/
if (addr) {
}
}
switch (which) {
case TO:
break;
case FROM:
break;
case ANY:
break;
}
}
/*
* Emit code to match on src or destination zoneid.
* The zoneid passed in is in network byte order.
*/
static void
{
pr_err("zone filter option unsupported on media");
switch (which) {
case TO:
break;
case FROM:
break;
case ANY:
break;
}
}
/*
* A helper function to keep the code to emit instructions
* to change the offset register in one place.
*
* INPUTS: offset - An value representing an offset in 16-bit
* words.
* OUTPUTS: If there is enough room in the storage for the
* packet filtering program, instructions to load
* a constant to the offset register. Otherwise,
* nothing.
*/
static void
{
}
/*
* Clear pfmod's offset register.
*
* INPUTS: none
* OUTPUTS: Instructions to clear the offset register if
* there is enough space remaining in the packet
* filtering program structure's storage, and
* the last thing done to the offset register was
* not clearing the offset register. Otherwise,
* nothing.
*/
static void
{
if (last_offset_operation != (void*)pf_clear_offset_register) {
}
}
/*
* This function will issue opcodes to check if a packet
* is VLAN tagged, and if so, update the offset register
* with the appropriate offset.
*
* Note that if the packet is not VLAN tagged, then the offset
* register will be cleared.
*
* If the interface type is not an ethernet type, then this
* function returns without doing anything.
*
* If the last attempt to change the offset register occured because
* of a call to this function that was called with the same offset,
* then we don't issue packet filtering instructions.
*
* INPUTS: offset - an offset in 16 bit words. The function
* will set the offset register to this
* value if the packet is VLAN tagged.
* OUTPUTS: If the conditions are met, packet filtering instructions.
*/
static void
{
static uint_t last_offset = 0;
(last_offset_operation != (void*)pf_check_vlan_tag ||
last_offset != offset)) {
/*
* First thing is to clear the offset register.
* We don't know what state it is in, and if it
* is not zero, then we have no idea what we load
* when we execute ENF_PUSHWORD.
*/
/*
* Check the ethertype.
*/
/*
* And if it's not VLAN, don't load offset to the offset
* register.
*/
pf_emit(3);
/*
* Otherwise, load offset to the offset register.
*/
/*
* Now get rid of the results of the comparison,
* we don't want the results of the comparison to affect
* other logic in the packet filtering program.
*/
/*
* Set the last operation at the end, or any time
* after the call to pf_clear_offset because
* pf_clear_offset uses it.
*/
last_offset_operation = (void*)pf_check_vlan_tag;
}
}
/*
* Utility function used to emit packet filtering code
* to match an ethertype.
*
* INPUTS: ethertype - The ethertype we want to check for.
* Don't call htons on the ethertype before
* calling this function.
* OUTPUTS: If there is sufficient storage available, packet
* filtering code to check an ethertype. Otherwise,
* nothing.
*/
static void
{
/*
* If the user wants to filter on ethertype VLAN,
* then clear the offset register so that the offset
* for ENF_PUSHWORD points to the right place in the
* packet.
*
* Otherwise, call pf_check_vlan_tag to set the offset
* register such that the contents of the offset register
* plus the argument for ENF_PUSHWORD point to the right
* part of the packet, whether or not the packet is VLAN
* tagged. We call pf_check_vlan_tag with an offset of
* two words because if the packet is VLAN tagged, we have
* to move past the ethertype in the ethernet header, and
* past the lower two octets of the VLAN header to get to
* the ethertype in the VLAN header.
*/
if (ethertype == ETHERTYPE_VLAN)
else
}
static void
{
}
static void
{
}
/*
* This function uses the table above to generate a
* piece of a packet filtering program to check a transport
* protocol type.
*
* INPUTS: tranport_protocol - the transport protocol we're
* interested in.
* OUTPUTS: If there is sufficient storage, then packet filtering
* code to check a transport protocol type. Otherwise,
* nothing.
*/
static void
{
int i;
uint_t number_of_matches = 0;
if (transport_protocol ==
if (number_of_matches > 1) {
/*
* Since we have two or more matches, in
* order to have a correct and complete
* program we need to OR the result of
* each block of comparisons together.
*/
}
}
}
}
static void
pf_matchfn(const char *proto)
{
int i;
break;
}
}
}
static void
{
for (;;) {
break;
if (EQ("ip")) {
pf_matchfn("ip");
opstack++;
next();
break;
}
if (EQ("ip6")) {
pf_matchfn("ip6");
opstack++;
next();
break;
}
if (EQ("pppoe")) {
pf_matchfn("pppoe");
opstack++;
next();
break;
}
if (EQ("pppoed")) {
pf_matchfn("pppoed");
opstack++;
next();
break;
}
if (EQ("pppoes")) {
pf_matchfn("pppoes");
opstack++;
next();
break;
}
if (EQ("arp")) {
pf_matchfn("arp");
opstack++;
next();
break;
}
if (EQ("vlan")) {
pf_matchfn("vlan");
0, VLAN_ID_MASK);
opstack++;
next();
break;
}
if (EQ("vlan-id")) {
next();
pr_err("VLAN ID expected");
pf_matchfn("vlan-id");
opstack++;
next();
break;
}
if (EQ("rarp")) {
pf_matchfn("rarp");
opstack++;
next();
break;
}
if (EQ("tcp")) {
opstack++;
next();
break;
}
if (EQ("udp")) {
opstack++;
next();
break;
}
if (EQ("ospf")) {
opstack++;
next();
break;
}
if (EQ("sctp")) {
opstack++;
next();
break;
}
if (EQ("icmp")) {
opstack++;
next();
break;
}
if (EQ("icmp6")) {
opstack++;
next();
break;
}
if (EQ("ip-in-ip")) {
opstack++;
next();
break;
}
if (EQ("esp")) {
opstack++;
next();
break;
}
if (EQ("ah")) {
opstack++;
next();
break;
}
if (EQ("(")) {
inBrace++;
next();
if (EQ(")")) {
if (inBrace)
inBraceOR--;
inBrace--;
next();
}
break;
}
next();
continue;
}
next();
continue;
}
if (EQ("ether")) {
eaddr = 1;
next();
continue;
}
if (EQ("inet")) {
next();
if (EQ("host"))
next();
opstack++;
next();
break;
}
if (EQ("inet6")) {
next();
if (EQ("host"))
next();
opstack++;
next();
break;
}
if (EQ("proto")) {
next();
pr_err("IP proto type expected");
tokenval);
opstack++;
next();
break;
}
if (EQ("broadcast")) {
opstack++;
next();
break;
}
if (EQ("multicast")) {
opstack++;
next();
break;
}
if (EQ("ethertype")) {
next();
pr_err("ether type expected");
opstack++;
next();
break;
}
if (EQ("dstnet"))
else if (EQ("srcnet"))
next();
opstack++;
next();
break;
}
if (EQ("zone")) {
next();
pr_err("zoneid expected after inet");
opstack++;
next();
break;
}
/*
* Give up on anything that's obviously
* not a primary.
*/
break;
}
tokentype == ADDR_ETHER) {
next();
} else {
}
eaddr = 0;
opstack++;
next();
break;
}
break; /* unknown token */
}
}
static void
{
int s = opstack;
pf_primary();
for (;;) {
if (EQ("and"))
next();
pf_primary();
if (opstack != s + 2)
break;
opstack--;
}
}
static void
{
if (inBrace)
inBraceOR++;
else
foundOR++;
next();
opstack--;
}
}
/*
* Attempt to compile the expression
* in the string "e". If we can generate
* pf code for it then return 1 - otherwise
* return 0 and leave it up to the user-level
* filter.
*/
int
pf_compile(e, print)
char *e;
int print;
{
char *argstr;
sav_str = e;
return (0);
}
/*
* Set media specific packet offsets that this code uses.
*/
dl.dl_link_dest_offset = 0;
}
dl.dl_link_type_offset = 0;
}
dl.dl_link_type_offset = 0;
}
next();
/*
* The idea here is to do as much filtering as possible in
* the kernel. So even if we find a token we don't understand,
* we try to see if we can still set up a portion of the filter
* in the kernel and use the userland filter to filter the
* remaining stuff. Obviously, if our filter expression is of
* type A AND B, we can filter A in kernel and then apply B
* to the packets that got through. The same is not true for
* a filter of type A OR B. We can't apply A first and then B
* on the packets filtered through A.
*
* (We need to keep track of the fact when we find an OR,
* and the fact that we are inside brackets when we find OR.
* The variable 'foundOR' tells us if there was an OR behind,
* 'inBraceOR' tells us if we found an OR before we could find
* the end brace i.e. ')', and variable 'aheadOR' checks if
* there is an OR in the expression ahead. if either of these
* cases become true, we can't split the filtering)
*/
/* FORGET IN KERNEL FILTERING */
return (0);
} else {
/* CHECK IF NO OR AHEAD */
while (*ptr != '\0') {
switch (*ptr) {
case '(':
inBr++;
break;
case ')':
inBr--;
break;
case 'o':
case 'O':
aheadOR = 1;
break;
case ',':
if (!inBr)
aheadOR = 1;
break;
}
ptr++;
}
if (!aheadOR) {
/* NO OR AHEAD, SPLIT UP THE FILTERING */
if (print) {
}
return (2);
} else
return (0);
}
}
if (print) {
}
return (1);
}