mac_flow_impl.h revision 2b24ab6b3865caeede9eeb9db6b83e1d89dcd1ea
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _MAC_FLOW_IMPL_H
#define _MAC_FLOW_IMPL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/ksynch.h>
#include <sys/mac_flow.h>
#include <sys/stream.h>
#include <sys/sdt.h>
#include <net/if.h>
/*
* Macros to increment/decrement the reference count on a flow_entry_t.
*/
#define FLOW_REFHOLD(flent) { \
DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \
mutex_enter(&(flent)->fe_lock); \
(flent)->fe_refcnt++; \
mutex_exit(&(flent)->fe_lock); \
}
/*
* Data paths must not attempt to use a flow entry if it is marked INCIPIENT
* or QUIESCE. In the former case the set up is not yet complete and the
* data path could stumble on inconsistent data structures. In the latter
* case a control operation is waiting for quiescence so that it can
* change callbacks or other structures without the use of locks.
*/
#define FLOW_TRY_REFHOLD(flent, err) { \
DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \
(err) = 0; \
mutex_enter(&(flent)->fe_lock); \
if ((flent)->fe_flags & (FE_INCIPIENT | FE_QUIESCE | FE_CONDEMNED | \
FE_UF_NO_DATAPATH | FE_MC_NO_DATAPATH)) \
(err) = -1; \
else \
(flent)->fe_refcnt++; \
mutex_exit(&(flent)->fe_lock); \
}
#define FLOW_REFRELE(flent) { \
DTRACE_PROBE1(flow_refrele, flow_entry_t *, (flent)); \
mutex_enter(&(flent)->fe_lock); \
ASSERT((flent)->fe_refcnt != 0); \
(flent)->fe_refcnt--; \
if ((flent)->fe_flags & FE_WAITER) { \
ASSERT((flent)->fe_refcnt != 0); \
cv_signal(&(flent)->fe_cv); \
mutex_exit(&(flent)->fe_lock); \
} else if ((flent)->fe_refcnt == 0) { \
mac_flow_destroy(flent); \
} else { \
mutex_exit(&(flent)->fe_lock); \
} \
}
#define FLOW_USER_REFHOLD(flent) { \
mutex_enter(&(flent)->fe_lock); \
(flent)->fe_user_refcnt++; \
mutex_exit(&(flent)->fe_lock); \
}
#define FLOW_USER_REFRELE(flent) { \
mutex_enter(&(flent)->fe_lock); \
ASSERT((flent)->fe_user_refcnt != 0); \
if (--(flent)->fe_user_refcnt == 0 && \
((flent)->fe_flags & FE_WAITER)) \
cv_signal(&(flent)->fe_cv); \
mutex_exit(&(flent)->fe_lock); \
}
#define FLOW_FINAL_REFRELE(flent) { \
ASSERT(flent->fe_refcnt == 1 && flent->fe_user_refcnt == 0); \
FLOW_REFRELE(flent); \
}
/*
* Mark or unmark the flent with a bit flag
*/
#define FLOW_MARK(flent, flag) { \
mutex_enter(&(flent)->fe_lock); \
(flent)->fe_flags |= flag; \
mutex_exit(&(flent)->fe_lock); \
}
#define FLOW_UNMARK(flent, flag) { \
mutex_enter(&(flent)->fe_lock); \
(flent)->fe_flags &= ~flag; \
mutex_exit(&(flent)->fe_lock); \
}
#define FLENT_TO_MIP(flent) \
(flent->fe_mbg != NULL ? mac_bcast_grp_mip(flent->fe_mbg) : \
((mac_client_impl_t *)flent->fe_mcip)->mci_mip)
/* Convert a bandwidth expressed in bps to a number of bytes per tick. */
#define FLOW_BYTES_PER_TICK(bps) (((bps) >> 3) / hz)
/*
* Given an underlying range and a priority level, obtain the minimum for the
* new range.
*/
#define FLOW_MIN_PRIORITY(min, max, pri) \
((min) + ((((max) - (min)) / MRP_PRIORITY_LEVELS) * (pri)))
/*
* Given an underlying range and a minimum level (base), obtain the maximum
* for the new range.
*/
#define FLOW_MAX_PRIORITY(min, max, base) \
((base) + (((max) - (min)) / MRP_PRIORITY_LEVELS))
/*
* Given an underlying range and a priority level, get the absolute
* priority value. For now there are just 3 values, high, low and
* medium so we can just return max, min or min + (max - min) / 2.
* If there are more than three we need to change this computation.
*/
#define FLOW_PRIORITY(min, max, pri) \
(pri) == MPL_HIGH ? (max) : \
(pri) == MPL_LOW ? (min) : \
((min) + (((max) - (min)) / 2))
#define MAC_FLOW_TAB_SIZE 500
typedef struct flow_entry_s flow_entry_t;
typedef struct flow_tab_s flow_tab_t;
typedef struct flow_state_s flow_state_t;
struct mac_impl_s;
struct mac_client_impl_s;
/*
* Classification flags used to lookup the flow.
*/
#define FLOW_INBOUND 0x01
#define FLOW_OUTBOUND 0x02
/* Don't compare VID when classifying the packets, see mac_rx_classify() */
#define FLOW_IGNORE_VLAN 0x04
/* Generic flow client function signature */
typedef void (*flow_fn_t)(void *, void *, mblk_t *, boolean_t);
/* Flow state */
typedef enum {
FLOW_DRIVER_UPCALL,
FLOW_USER_REF
} mac_flow_state_t;
/* Matches a flow_entry_t using the extracted flow_state_t info */
typedef boolean_t (*flow_match_fn_t)(flow_tab_t *, flow_entry_t *,
flow_state_t *);
/* fe_flags */
#define FE_QUIESCE 0x01 /* Quiesce the flow */
#define FE_WAITER 0x02 /* Flow has a waiter */
#define FE_FLOW_TAB 0x04 /* Flow is in the flow tab list */
#define FE_G_FLOW_HASH 0x08 /* Flow is in the global flow hash */
#define FE_INCIPIENT 0x10 /* Being setup */
#define FE_CONDEMNED 0x20 /* Being deleted */
#define FE_UF_NO_DATAPATH 0x40 /* No datapath setup for User flow */
#define FE_MC_NO_DATAPATH 0x80 /* No datapath setup for mac client */
/* fe_type */
#define FLOW_PRIMARY_MAC 0x01 /* NIC primary MAC address */
#define FLOW_VNIC_MAC 0x02 /* VNIC flow */
#define FLOW_MCAST 0x04 /* Multicast (and broadcast) */
#define FLOW_OTHER 0x08 /* Other flows configured */
#define FLOW_USER 0x10 /* User defined flow */
#define FLOW_VNIC FLOW_VNIC_MAC
#define FLOW_NO_STATS 0x20 /* Don't create stats for the flow */
/*
* Shared Bandwidth control counters between the soft ring set and its
* associated soft rings. In case the flow associated with NIC/VNIC
* has a group of Rx rings assigned to it, we have the same
* number of soft ring sets as we have the Rx ring in the group
* and each individual SRS (and its soft rings) decide when to
* poll their Rx ring independently. But if there is a B/W limit
* associated with the NIC/VNIC, then the B/W control counter is
* shared across all the SRS in the group and their associated
* soft rings.
*
* There is a many to 1 mapping between the SRS and
* mac_bw_ctl if the flow has a group of Rx rings associated with
* it.
*/
typedef struct mac_bw_ctl_s {
kmutex_t mac_bw_lock;
uint32_t mac_bw_state;
size_t mac_bw_sz; /* ?? Is it needed */
size_t mac_bw_limit; /* Max bytes to process per tick */
size_t mac_bw_used; /* Bytes processed in current tick */
size_t mac_bw_drop_threshold; /* Max queue length */
size_t mac_bw_drop_bytes;
size_t mac_bw_polled;
size_t mac_bw_intr;
clock_t mac_bw_curr_time;
} mac_bw_ctl_t;
struct flow_entry_s { /* Protected by */
struct flow_entry_s *fe_next; /* ft_lock */
datalink_id_t fe_link_id; /* WO */
/* Properties as specified for this flow */
mac_resource_props_t fe_resource_props; /* SL */
/* Properties actually effective at run time for this flow */
mac_resource_props_t fe_effective_props; /* SL */
kmutex_t fe_lock;
char fe_flow_name[MAXFLOWNAMELEN]; /* fe_lock */
flow_desc_t fe_flow_desc; /* fe_lock */
kcondvar_t fe_cv; /* fe_lock */
/*
* Initial flow ref is 1 on creation. A thread that lookups the
* flent typically by a mac_flow_lookup() dynamically holds a ref.
* If the ref is 1, it means there arent' any upcalls from the driver
* or downcalls from the stack using this flent. Structures pointing
* to the flent or flent inserted in lists don't count towards this
* refcnt. Instead they are tracked using fe_flags. Only a control
* thread doing a teardown operation deletes the flent, after waiting
* for upcalls to finish synchronously. The fe_refcnt tracks
* the number of upcall refs
*/
uint32_t fe_refcnt; /* fe_lock */
/*
* This tracks lookups done using the global hash list for user
* generated flows. This refcnt only protects the flent itself
* from disappearing and helps walkers to read the flent info such
* as flow spec. However the flent may be quiesced and the SRS could
* be deleted. The fe_user_refcnt tracks the number of global flow
* has refs.
*/
uint32_t fe_user_refcnt; /* fe_lock */
uint_t fe_flags; /* fe_lock */
/*
* Function/args to invoke for delivering matching packets
* Only the function ff_fn may be changed dynamically and atomically.
* The ff_arg1 and ff_arg2 are set at creation time and may not
* be changed.
*/
flow_fn_t fe_cb_fn; /* fe_lock */
void *fe_cb_arg1; /* fe_lock */
void *fe_cb_arg2; /* fe_lock */
void *fe_client_cookie; /* WO */
void *fe_rx_ring_group; /* SL */
void *fe_rx_srs[MAX_RINGS_PER_GROUP]; /* fe_lock */
int fe_rx_srs_cnt; /* fe_lock */
void *fe_tx_srs; /* WO */
/*
* This is a unicast flow, and is a mac_client_impl_t
*/
void *fe_mcip; /* WO */
/*
* Used by mci_flent_list of mac_client_impl_t to track flows sharing
* the same mac_client_impl_t.
*/
struct flow_entry_s *fe_client_next;
/*
* This is a broadcast or multicast flow and is a mac_bcast_grp_t
*/
void *fe_mbg; /* WO */
uint_t fe_type; /* WO */
/*
* BW control info.
*/
mac_bw_ctl_t fe_tx_bw;
mac_bw_ctl_t fe_rx_bw;
/*
* Used by flow table lookup code
*/
flow_match_fn_t fe_match;
/*
* Used by mac_flow_remove().
*/
int fe_index;
flow_tab_t *fe_flow_tab;
kstat_t *fe_ksp;
flow_stats_t fe_flowstats;
boolean_t fe_desc_logged;
uint64_t fe_nic_speed;
};
/*
* Various structures used by the flows framework for keeping track
* of packet state information.
*/
/* Layer 2 */
typedef struct flow_l2info_s {
uchar_t *l2_start;
uint8_t *l2_daddr;
uint16_t l2_vid;
uint32_t l2_sap;
uint_t l2_hdrsize;
} flow_l2info_t;
/* Layer 3 */
typedef struct flow_l3info_s {
uchar_t *l3_start;
uint8_t l3_protocol;
uint8_t l3_version;
boolean_t l3_dst_or_src;
uint_t l3_hdrsize;
boolean_t l3_fragmented;
} flow_l3info_t;
/* Layer 4 */
typedef struct flow_l4info_s {
uchar_t *l4_start;
uint16_t l4_src_port;
uint16_t l4_dst_port;
uint16_t l4_hash_port;
} flow_l4info_t;
/*
* Combined state structure.
* Holds flow direction and an mblk_t pointer.
*/
struct flow_state_s {
uint_t fs_flags;
mblk_t *fs_mp;
flow_l2info_t fs_l2info;
flow_l3info_t fs_l3info;
flow_l4info_t fs_l4info;
};
/*
* Flow ops vector.
* There are two groups of functions. The ones ending with _fe are
* called when a flow is being added. The others (hash, accept) are
* called at flow lookup time.
*/
#define FLOW_MAX_ACCEPT 16
typedef struct flow_ops_s {
/*
* fo_accept_fe():
* Validates the contents of the flow and checks whether
* it's compatible with the flow table. sets the fe_match
* function of the flow.
*/
int (*fo_accept_fe)(flow_tab_t *, flow_entry_t *);
/*
* fo_hash_fe():
* Generates a hash index to the flow table. This function
* must use the same algorithm as fo_hash(), which is used
* by the flow lookup code path.
*/
uint32_t (*fo_hash_fe)(flow_tab_t *, flow_entry_t *);
/*
* fo_match_fe():
* This is used for finding identical flows.
*/
boolean_t (*fo_match_fe)(flow_tab_t *, flow_entry_t *,
flow_entry_t *);
/*
* fo_insert_fe():
* Used for inserting a flow to a flow chain.
* Protocols that have special ordering requirements would
* need to implement this. For those that don't,
* flow_generic_insert_fe() may be used.
*/
int (*fo_insert_fe)(flow_tab_t *, flow_entry_t **,
flow_entry_t *);
/*
* Calculates the flow hash index based on the accumulated
* state in flow_state_t. Must use the same algorithm as
* fo_hash_fe().
*/
uint32_t (*fo_hash)(flow_tab_t *, flow_state_t *);
/*
* Array of accept fuctions.
* Each function in the array will accumulate enough state
* (header length, protocol) to allow the next function to
* proceed. We support up to FLOW_MAX_ACCEPT functions which
* should be sufficient for all practical purposes.
*/
int (*fo_accept[FLOW_MAX_ACCEPT])(flow_tab_t *,
flow_state_t *);
} flow_ops_t;
/*
* Generic flow table.
*/
struct flow_tab_s {
krwlock_t ft_lock;
/*
* Contains a list of functions (described above)
* specific to this table type.
*/
flow_ops_t ft_ops;
/*
* Indicates what types of flows are supported.
*/
flow_mask_t ft_mask;
/*
* An array of flow_entry_t * of size ft_size.
* Each element is the beginning of a hash chain.
*/
flow_entry_t **ft_table;
uint_t ft_size;
/*
* The number of flows inserted into ft_table.
*/
uint_t ft_flow_count;
struct mac_impl_s *ft_mip;
struct mac_client_impl_s *ft_mcip;
};
/*
* This is used for describing what type of flow table can be created.
* mac_flow.c contains a list of these structures.
*/
typedef struct flow_tab_info_s {
flow_ops_t *fti_ops;
flow_mask_t fti_mask;
uint_t fti_size;
} flow_tab_info_t;
#define FLOW_TAB_EMPTY(ft) ((ft) == NULL || (ft)->ft_flow_count == 0)
/*
* This is used by mac_tx_send.
*/
typedef struct mac_tx_stats_s {
uint_t ts_opackets;
uint_t ts_obytes;
uint_t ts_oerrors;
} mac_tx_stats_t;
#define FLOW_STAT_UPDATE(f, s, c) { \
((flow_entry_t *)(f))->fe_flowstats.fs_##s += ((uint64_t)(c)); \
}
#define FLOW_TX_STATS_UPDATE(f, s) { \
FLOW_STAT_UPDATE((f), opackets, (s)->ts_opackets); \
FLOW_STAT_UPDATE((f), obytes, (s)->ts_obytes); \
FLOW_STAT_UPDATE((f), oerrors, (s)->ts_oerrors); \
}
extern void mac_flow_init();
extern void mac_flow_fini();
extern int mac_flow_create(flow_desc_t *, mac_resource_props_t *,
char *, void *, uint_t, flow_entry_t **);
extern int mac_flow_add(flow_tab_t *, flow_entry_t *);
extern int mac_flow_add_subflow(mac_client_handle_t, flow_entry_t *,
boolean_t);
extern int mac_flow_hash_add(flow_entry_t *);
extern int mac_flow_lookup_byname(char *, flow_entry_t **);
extern int mac_flow_lookup(flow_tab_t *, mblk_t *, uint_t,
flow_entry_t **);
extern int mac_flow_walk(flow_tab_t *, int (*)(flow_entry_t *, void *),
void *);
extern int mac_flow_walk_nolock(flow_tab_t *,
int (*)(flow_entry_t *, void *), void *);
extern void mac_flow_modify(flow_tab_t *, flow_entry_t *,
mac_resource_props_t *);
extern void *mac_flow_get_client_cookie(flow_entry_t *);
extern uint32_t mac_flow_modify_props(flow_entry_t *, mac_resource_props_t *);
extern int mac_flow_update(flow_tab_t *, flow_entry_t *, flow_desc_t *);
extern void mac_flow_get_desc(flow_entry_t *, flow_desc_t *);
extern void mac_flow_set_desc(flow_entry_t *, flow_desc_t *);
extern void mac_flow_remove(flow_tab_t *, flow_entry_t *, boolean_t);
extern void mac_flow_hash_remove(flow_entry_t *);
extern void mac_flow_wait(flow_entry_t *, mac_flow_state_t);
extern void mac_flow_quiesce(flow_entry_t *);
extern void mac_flow_restart(flow_entry_t *);
extern void mac_flow_cleanup(flow_entry_t *);
extern void mac_flow_destroy(flow_entry_t *);
extern void mac_flow_tab_create(flow_ops_t *, flow_mask_t, uint_t,
struct mac_impl_s *, flow_tab_t **);
extern void mac_flow_l2tab_create(struct mac_impl_s *, flow_tab_t **);
extern void mac_flow_tab_destroy(flow_tab_t *);
extern void mac_flow_drop(void *, void *, mblk_t *);
extern void flow_stat_destroy(flow_entry_t *);
#ifdef __cplusplus
}
#endif
#endif /* _MAC_FLOW_IMPL_H */