bge_chip2.c revision 67f02347ff26da3909c580115d4ba535ab72078c
1N/A * The contents of this file are subject to the terms of the 1N/A * Common Development and Distribution License (the "License"). 1N/A * You may not use this file except in compliance with the License. 1N/A * See the License for the specific language governing permissions 1N/A * and limitations under the License. 1N/A * When distributing Covered Code, include this CDDL HEADER in each 1N/A * If applicable, add the following below this CDDL HEADER, with the 1N/A * fields enclosed by brackets "[]" replaced with your own identifying 1N/A * information: Portions Copyright [yyyy] [name of copyright owner] 1N/A * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 1N/A * Use is subject to license terms. 1N/A#
pragma ident "%Z%%M% %I% %E% SMI" 1N/A * Future features ... ? 1N/A * By default MSI is enabled on all supported platforms but it is disabled 1N/A * for some Broadcom chips due to known MSI hardware issues. Currently MSI 1N/A * is enabled only for 5714C A2 and 5715C A2 broadcom chips. 1N/A * Patchable globals: 1N/A * Value to program into the MLCR; controls the chip's GPIO pins 1N/A * bge_dma_{rd,wr}prio 1N/A * Relative priorities of DMA reads & DMA writes respectively. 1N/A * These may each be patched to any value 0-3. Equal values 1N/A * will give "fair" (round-robin) arbitration for PCI access. 1N/A * Unequal values will give one or the other function priority. 1N/A * the Broadcom PRM for things you can fiddle with in this 1N/A * bge_{tx,rx}_{count,ticks}_{norm,intr} 1N/A * #s of descriptors, ticks are in microseconds. *norm* values 1N/A * refer to the 'during-interrupt' versions - see the PRM. 1N/A * NOTE: these values have been determined by measurement. They 1N/A * differ significantly from the values recommended in the PRM. * Memory pool configuration parameters. * These are generally specific to each member of the chip family, since * Setting the mbuf pool length for a specific type of chip to 0 inhibits * the driver from programming the various registers; instead they are left * at their hardware defaults. This is the preferred option for later chips * (5705+), whereas the older chips *required* these registers to be set, * since the h/w default was 0 ;-( * Various high and low water marks, thresholds, etc ... * Note: these are taken from revision 7 of the PRM, and some are different * from both the values in earlier PRMs *and* those determined experimentally * and used in earlier versions of this driver ... * ========== Low-level chip & ring buffer manipulation ========== * Config space read-modify-write routines * 8- and 16-bit set/clr operations are not used; all the config registers * that we need to do bit-twiddling on are 32 bits wide. I'll leave the * code here, though, in case we ever find that we do want it after all ... BGE_DEBUG((
"bge_cfg_set8($%p, 0x%lx, 0x%x): 0x%x => 0x%x",
BGE_DEBUG((
"bge_cfg_clr8($%p, 0x%lx, 0x%x): 0x%x => 0x%x",
BGE_TRACE((
"bge_cfg_set16($%p, 0x%lx, 0x%x)",
BGE_DEBUG((
"bge_cfg_set16($%p, 0x%lx, 0x%x): 0x%x => 0x%x",
BGE_TRACE((
"bge_cfg_clr16($%p, 0x%lx, 0x%x)",
BGE_DEBUG((
"bge_cfg_clr16($%p, 0x%lx, 0x%x): 0x%x => 0x%x",
BGE_TRACE((
"bge_cfg_set32($%p, 0x%lx, 0x%x)",
BGE_DEBUG((
"bge_cfg_set32($%p, 0x%lx, 0x%x): 0x%x => 0x%x",
BGE_TRACE((
"bge_cfg_clr32($%p, 0x%lx, 0x%x)",
BGE_DEBUG((
"bge_cfg_clr32($%p, 0x%lx, 0x%x): 0x%x => 0x%x",
* Indirect access to registers & RISC scratchpads, using config space * This isn't currently used, but someday we might want to use it for * restoring the Subsystem Device/Vendor registers (which aren't directly * writable in Config Space), or for downloading firmware into the RISCs * In any case there are endian issues to be resolved before this code is * enabled; the bizarre way that bytes get twisted by this chip AND by * the PCI bridge in SPARC systems mean that we shouldn't enable it until * it's been thoroughly tested for all access sizes on all supported * architectures (SPARC *and* x86!). BGE_DEBUG((
"bge_ind_get32($%p, 0x%lx) => 0x%x",
BGE_TRACE((
"bge_ind_put32($%p, 0x%lx, 0x%x)",
#
endif /* BGE_IND_IO32 */ BGE_DEBUG((
"bge_pci_check($%p): PCI status 0x%x",
#
endif /* BGE_DEBUGGING */ * Perform first-stage chip (re-)initialisation, using only config-space * returning the data in the structure pointed to by <idp>. * + Configure the target-mode endianness (swap) options. * + Disable interrupts and enable Memory Space accesses. * + Enable or disable Bus Mastering according to the <enable_dma> flag. * This sequence is adapted from Broadcom document 570X-PG102-R, * page 102, steps 1-3, 6-8 and 11-13. The omitted parts of the sequence * are 4 and 5 (Reset Core and wait) which are handled elsewhere. * This function MUST be called before any non-config-space accesses * are made; on this first call <enable_dma> is B_FALSE, and it * effectively performs steps 3-1(!) of the initialisation sequence * (the rest are not required but should be harmless). * It MUST also be called also after a chip reset, as this disables * Memory Space cycles! In this case, <enable_dma> is B_TRUE, and * it is effectively performing steps 6-8. BGE_TRACE((
"bge_chip_cfg_init($%p, $%p, %d)",
* Step 3: save PCI cache line size and subsystem vendor ID * Read all the config-space registers that characterise the * and subsystem device id. We expect (but don't check) that * (vendor == VENDOR_ID_BROADCOM) && (device == DEVICE_ID_5704) * Also save all bus-transation related registers (cache-line * cleared by reset, so we'll have to restore them later. This * comes from the Broadcom document 570X-PG102-R ... * Note: Broadcom document 570X-PG102-R seems to be in error * here w.r.t. the offsets of the Subsystem Vendor ID and * Subsystem (Device) ID registers, which are the opposite way * round according to the PCI standard. For good measure, we BGE_DEBUG((
"bge_chip_cfg_init: %s bus is %s and %s; #INTA is %s",
BGE_DEBUG((
"bge_chip_cfg_init: vendor 0x%x device 0x%x revision 0x%x",
BGE_DEBUG((
"bge_chip_cfg_init: subven 0x%x subdev 0x%x asic_rev 0x%x",
BGE_DEBUG((
"bge_chip_cfg_init: clsize %d latency %d command 0x%x",
* Step 2 (also step 6): disable and clear interrupts. * Steps 11-13: configure PIO endianness options, and enable * indirect register access. We'll also select any other * options controlled by the MHCR (eg tagged status, mask * interrupt mode) at this stage ... * Note: internally, the chip is 64-bit and BIG-endian, but * since it talks to the host over a (LITTLE-endian) PCI bus, * it normally swaps bytes around at the PCI interface. * However, the PCI host bridge on SPARC systems normally * swaps the byte lanes around too, since SPARCs are also * BIG-endian. So it turns out that on SPARC, the right * option is to tell the chip to swap (and the host bridge * will swap back again), whereas on x86 we ask the chip * NOT to swap, so the natural little-endianness of the * PCI bus is assumed. Then the only thing that doesn't * automatically work right is access to an 8-byte register * by a little-endian host; but we don't want to set the * MHCR_ENABLE_REGISTER_WORD_SWAP bit because then 4-byte * accesses don't go where expected ;-( So we live with * that, and perform word-swaps in software in the few cases * where a chip register is defined as an 8-byte value -- * see the code below for details ... * Note: the meaning of the 'MASK_INTERRUPT_MODE' bit isn't * very clear in the register description in the PRM, but * Broadcom document 570X-PG104-R page 248 explains a little * more (under "Broadcom Mask Mode"). The bit changes the way * the MASK_PCI_INT_OUTPUT bit works: with MASK_INTERRUPT_MODE * clear, the chip interprets MASK_PCI_INT_OUTPUT in the same * way as the 5700 did, which isn't very convenient. Setting * the MASK_INTERRUPT_MODE bit makes the MASK_PCI_INT_OUTPUT * bit do just what its name says -- MASK the PCI #INTA output * (i.e. deassert the signal at the pin) leaving all internal * state unchanged. This is much more convenient for our * interrupt handler, so we set MASK_INTERRUPT_MODE here. * Note: the inconvenient semantics of the interrupt mailbox * zero enables AND CLEARS it) would make race conditions * likely in the interrupt handler: * (1) acknowledge & disable interrupts * (3) enable interrupts -- also clears pending * If the chip received more packets and internally generated * an interrupt between the check at (2) and the mbox write * at (3), this interrupt would be lost :-( * The best way to avoid this is to use TAGGED STATUS mode, * where the chip includes a unique tag in each status block * update, and the host, when re-enabling interrupts, passes * the last tag it saw back to the chip; then the chip can * see whether the host is truly up to date, and regenerate * Step 1 (also step 7): Enable PCI Memory Space accesses * Enable or disable Bus Mastering * Note that all other bits are taken from the original value saved * the first time through here, rather than from the current register * value, 'cos that will have been cleared by a soft RESET since. * of the parity-error and system-error enable bits across multiple * Step 8: Disable PCI-X Relaxed Ordering -- doesn't apply * on BCM5714 revision A0, false parity error gets generated * due to a logic bug. Provide a workaround by disabling parrity * On some PCI-E device, there were instances when * the device was still link training. * Clear any remaining error status bits * Make sure these indirect-access registers are sane * rather than random after power-up or reset * These use to distinguish AMD64 or Intel EM64T of CPU running mode. * If CPU runs on Intel EM64T mode,the 64bit operation cannot works fine * for PCI-Express based network interface card. This is the work-around * Operating register get/set access routines BGE_TRACE((
"bge_reg_put32($%p, 0x%lx, 0x%x)",
BGE_TRACE((
"bge_reg_set32($%p, 0x%lx, 0x%x)",
BGE_TRACE((
"bge_reg_clr32($%p, 0x%lx, 0x%x)",
#
endif /* _LITTLE_ENDIAN */ BGE_TRACE((
"bge_reg_get64($%p, 0x%lx) = 0x%016llx",
BGE_TRACE((
"bge_reg_put64($%p, 0x%lx, 0x%016llx)",
#
endif /* _LITTLE_ENDIAN */ * The DDI doesn't provide get/put functions for 128 bit data * so we put RCBs out as two 64-bit chunks instead. BGE_TRACE((
"bge_reg_putrcb($%p, 0x%lx, 0x%016llx:%04x:%04x:%08x)",
BGE_TRACE((
"bge_mbx_put($%p, 0x%lx, 0x%016llx)",
* Mailbox registers are nominally 64 bits on the 5701, but * the MSW isn't used. On the 5703, they're only 32 bits * anyway. So here we just write the lower(!) 32 bits - * remembering that the chip is big-endian, even though the * PCI bus is little-endian ... * Blink all three LINK LEDs on simultaneously, then all off, * then restore to automatic hardware control. This is used * in laboratory testing to trigger a logic analyser or scope. #
endif /* BGE_DEBUGGING */ * NIC on-chip memory access routines * Only 32K of NIC memory is visible at a time, controlled by the * Memory Window Base Address Register (in PCI config space). Once * this is set, the 32K region of NIC-local memory that it refers * to can be directly addressed in the upper 32K of the 64K of PCI * memory space used for the device. /* workaround for word swap error */ BGE_TRACE((
"bge_nic_get32($%p, 0x%lx) = 0x%08x",
BGE_TRACE((
"bge_nic_put32($%p, 0x%lx, 0x%08x)",
/* workaround for word swap error */ BGE_TRACE((
"bge_nic_get64($%p, 0x%lx) = 0x%016llx",
BGE_TRACE((
"bge_nic_put64($%p, 0x%lx, 0x%016llx)",
* The DDI doesn't provide get/put functions for 128 bit data * so we put RCBs out as two 64-bit chunks instead. BGE_TRACE((
"bge_nic_putrcb($%p, 0x%lx, 0x%016llx:%04x:%04x:%08x)",
* MII (PHY) register get/set access routines * These use the chip's MII auto-access method, controlled by the * MII Communication register at 0x044c, so the CPU doesn't have * to fiddle with the individual bits. BGE_TRACE((
"bge_mii_access($%p, 0x%lx, 0x%x, 0x%x)",
* Assemble the command ... * Wait for any command already in progress ... * Note: this *shouldn't* ever find that there is a command * in progress, because we already hold the <genlock> mutex. * Nonetheless, we have sometimes seen the MI_COMMS_START * bit set here -- it seems that the chip can initiate MII * accesses internally, even with polling OFF. "MI_COMMS_START set for %d us; 0x%x->0x%x",
* Drop out early if the READ FAILED bit is set -- this chip * could be a 5703/4S, with a SerDes instead of a PHY! * The PRM says to wait 5us after seeing the START bit clear * and then re-read the register to get the final value of the * data field, in order to avoid a race condition where the * START bit is clear but the data field isn't yet valid. * Note: we don't actually seem to be encounter this race; * except when the START bit is seen set again (see below), * the data field doesn't change during this 5us interval. * Unfortunately, when following the PRMs instructions above, * we have occasionally seen the START bit set again(!) in the * value read after the 5us delay. This seems to be due to the * chip autonomously starting another MII access internally. * internal command, rather than the one that we thought had * just finished. So in this case, we fall back to returning * the data from the original read that showed START clear. "MI_COMMS_START set after transaction; 0x%x->0x%x",
BGE_TRACE((
"bge_mii_put16($%p, 0x%lx, 0x%x)",
* Basic SEEPROM get/set access routine * This uses the chip's SEEPROM auto-access method, controlled by the * Serial EEPROM Address/Data Registers at 0x6838/683c, so the CPU * doesn't have to fiddle with the individual bits. * The caller should hold <genlock> and *also* have already acquired * the right to access the SEEPROM, via bge_nvmem_acquire() above. * ENODATA on access timeout (maybe retryable: device may just be busy) * EPROTO on other h/w or s/w errors. * <*dp> is an input to a SEEPROM_ACCESS_WRITE operation, or an output * from a (successful) SEEPROM_ACCESS_READ. * On the newer chips that support both SEEPROM & Flash, we need * to specifically enable SEEPROM access (Flash is the default). * On older chips, we don't; SEEPROM is the only NVtype supported, * and the NVM control registers don't exist ... * Check there's no command in progress. * Note: this *shouldn't* ever find that there is a command * in progress, because we already hold the <genlock> mutex. * Also, to ensure we don't have a conflict with the chip's * internal firmware or a process accessing the same (shared) * SEEPROM through the other port of a 5704, we've already * been through the "software arbitration" protocol. * So this is just a final consistency check: we shouldn't * see EITHER the START bit (command started but not complete) * OR the COMPLETE bit (command completed but not cleared). * Assemble the command ... * By observation, a successful access takes ~20us on a 5703/4, * but apparently much longer (up to 1000us) on the obsolescent * timeouts here; but OTOH, we don't want a bogus access to lock * out interrupts for longer than necessary. So we'll allow up * All OK; read the SEEPROM data register, then write back * the value read from the address register in order to * clear the <complete> bit and leave the SEEPROM access * state machine idle, ready for the next access ... * Hmm ... what happened here? * Most likely, the user addressed an non-existent SEEPROM. Or * maybe the SEEPROM was busy internally (e.g. processing a write) * and didn't respond to being addressed. Either way, it's left * the SEEPROM access state machine wedged. So we'll reset it * before we leave, so it's ready for next time ... * Basic Flash get/set access routine * These use the chip's Flash auto-access method, controlled by the * Flash Access Registers at 0x7000-701c, so the CPU doesn't have to * fiddle with the individual bits. * The caller should hold <genlock> and *also* have already acquired * the right to access the Flash, via bge_nvmem_acquire() above. * ENODATA on access timeout (maybe retryable: device may just be busy) * ENODEV if the NVmem device is missing or otherwise unusable * <*dp> is an input to a NVM_FLASH_CMD_WR operation, or an output * from a (successful) NVM_FLASH_CMD_RD. * On the newer chips that support both SEEPROM & Flash, we need * to specifically disable SEEPROM access while accessing Flash. * The older chips don't support Flash, and the NVM registers don't * exist, so we shouldn't be here at all! * Assemble the command ... * All OK; read the data from the Flash read register * Hmm ... what happened here? * Most likely, the user addressed an non-existent Flash. Or * maybe the Flash was busy internally (e.g. processing a write) * and didn't respond to being addressed. Either way, there's * nothing we can here ... * The next two functions regulate access to the NVram (if fitted). * On a 5704 (dual core) chip, there's only one SEEPROM and one Flash * (SPI) interface, but they can be accessed through either port. These * are managed by different instance of this driver and have no software * In addition (and even on a single core chip) the chip's internal * firmware can access the SEEPROM/Flash, most notably after a RESET * when it may download code to run internally. * So we need to arbitrate between these various software agents. For * this purpose, the chip provides the Software Arbitration Register, * which implements hardware(!) arbitration. * This functionality didn't exist on older (5700/5701) chips, so there's * nothing we can do by way of arbitration on those; also, if there's no * SEEPROM/Flash fitted (or we couldn't determine what type), there's also * The internal firmware appears to use Request 0, which is the highest * priority. So we'd like to use Request 2, leaving one higher and one * lower for any future developments ... but apparently this doesn't * always work. So for now, the code uses Request 1 ;-( * No arbitration performed, no release needed * Our own request should be present (whether or not granted) ... * ... this will make it go away. * Arbitrate for access to the NVmem, if necessary * EAGAIN if the device is in use (retryable) * ENODEV if the NVmem device is missing or otherwise unusable * Access denied: no (recognisable) device fitted * Access granted: no arbitration needed (or possible) * Access conditional: conduct arbitration protocol * We're holding the per-port mutex <genlock>, so no-one other * threads can be attempting to access the NVmem through *this* * port. But it could be in use by the *other* port (of a 5704), * or by the chip's internal firmware, so we have to go through * the full (hardware) arbitration protocol ... * Note that *because* we're holding <genlock>, the interrupt handler * won't be able to progress. So we're only willing to spin for a * fairly short time. Specifically: * We *must* wait long enough for the hardware to resolve all * requests and determine the winner. Fortunately, this is * "almost instantaneous", even as observed by GHz CPUs. * A successful access by another Solaris thread (via either * port) typically takes ~20us. So waiting a bit longer than * that will give a good chance of success, if the other user * *is* another thread on the other port. * However, the internal firmware can hold on to the NVmem * for *much* longer: at least 10 milliseconds just after a * RESET, and maybe even longer if the NVmem actually contains * code to download and run on the internal CPUs. * So, we'll allow 50us; if that's not enough then it's up to the * caller to retry later (hence the choice of return code EAGAIN). * Somebody else must be accessing the NVmem, so abandon our * attempt take control of it. The caller can try again later ... * This code assumes that the GPIO1 bit has been wired up to the NVmem * write protect line in such a way that the NVmem is protected when * GPIO1 is an input, or is an output but driven high. Thus, to make the * NVmem writable we have to change GPIO1 to an output AND drive it low. * Note: there's only one set of GPIO pins on a 5704, even though they * can be accessed through either port. So the chip has to resolve what * happens if the two ports program a single pin differently ... the rule * it uses is that if the ports disagree about the *direction* of a pin, * "output" wins over "input", but if they disagree about its *value* as * an output, then the pin is TRISTATED instead! In such a case, no-one * wins, and the external signal does whatever the external circuitry * defines as the default -- which we've assumed is the PROTECTED state. * So, we always change GPIO1 back to being an *input* whenever we're not * specifically using it to unprotect the NVmem. This allows either port * to update the NVmem, although obviously only one at a a time! * The caller should hold <genlock> and *also* have already acquired the * right to access the NVmem, via bge_nvmem_acquire() above. * Now put it all together ... * Try to acquire control of the NVmem; if successful, then: * unprotect it (if we want to write to it) * perform the requested access * reprotect it (after a write) * EAGAIN if the device is in use (retryable) * ENODATA on access timeout (maybe retryable: device may just be busy) * ENODEV if the NVmem device is missing or otherwise unusable * EPROTO on other h/w or s/w errors. * Attempt to get a MAC address from the SEEPROM or Flash, if any * The Broadcom chip is natively BIG-endian, so that's how the * MAC address is represented in NVmem. We may need to swap it * around on a little-endian host ... #
else /* BGE_SEE_IO32 || BGE_FLASH_IO32 */ * Dummy version for when we're not supporting NVmem access #
endif /* BGE_SEE_IO32 || BGE_FLASH_IO32 */ * Determine the type of NVmem that is (or may be) attached to this chip, * We shouldn't get here; it means we don't recognise * the chip, which means we don't know how to determine * what sort of NVmem (if any) it has. So we'll say * NONE, to disable the NVmem access code ... * These devices support *only* SEEPROMs * receive rule: direct all TCP traffic to ring RULE_MATCH_TO_RING * 1. to direct UDP traffic, set: * rulep->control = RULE_PROTO_CONTROL; * rulep->mask_value = RULE_UDP_MASK_VALUE; * 2. to direct ICMP traffic, set: * rulep->control = RULE_PROTO_CONTROL; * rulep->mask_value = RULE_ICMP_MASK_VALUE; * 3. to direct traffic by source ip, set: * rulep->control = RULE_SIP_CONTROL; * rulep->mask_value = RULE_SIP_MASK_VALUE; * set receive rule registers * Using the values captured by bge_chip_cfg_init(), and additional probes * as required, characterise the chip fully: determine the label by which * to refer to this chip, the correct settings for various registers, and * of course whether the device and/or subsystem are supported! * Check the PCI device ID to determine the generic chip type and * select parameters that depend on this. * Note: because the SPARC platforms in general don't fit the * SEEPROM 'behind' the chip, the PCI revision ID register reads * as zero - which is why we use <asic_rev> rather than <revision> * Note: in general we can't distinguish between the Copper/SerDes * versions by ID alone, as some Copper devices (e.g. some but not * all 5703Cs) have the same ID as the SerDes equivalents. So we * treat them the same here, and the MII code works out the media * Revision A0 of the 5703/5793 had various errata * that we can't or don't work around, so it's not * supported, but all later versions are * Revision A0 of the 5704/5794 had various errata * but we have workarounds, so it *is* supported. * Apart from the label, we treat this as a 5705(?) * Apart from the label, we treat this as a 5705(?) * Setup the default jumbo parameter. * If jumbo is enabled and this kind of chipset supports jumbo feature, * setup below jumbo specific parameters. * Identify the NV memory type: SEEPROM or Flash? * Now, we want to check whether this device is part of a * supported subsystem (e.g., on the motherboard of a Sun * Rule 1: If the Subsystem Vendor ID is "Sun", then it's OK ;-) * Rule 2: If it's on the list on known subsystems, then it's OK. * Note: 0x14e41647 should *not* appear in the list, but the code * Rule 3: If it's a Taco/ENWS motherboard device, then it's OK * Unfortunately, early SunBlade 1500s and 2500s didn't reprogram * the Subsystem Vendor ID, so it defaults to Broadcom. Therefore, * we have to check specially for the exact device paths to the * motherboard devices on those platforms ;-( * Note: we can't just use the "supported-subsystems" mechanism * above, because the entry would have to be 0x14e41647 -- which * would then accept *any* plugin card that *didn't* contain a * Now check what we've discovered: is this truly a supported * chip on (the motherboard of) a supported platform? * Possible problems here: * 1) it's a completely unheard-of chip (e.g. 5761) * 2) it's a recognised but unsupported chip (e.g. 5701, 5703C-A0) * 3) it's a chip we would support if it were on the motherboard * of a Sun platform, but this one isn't ;-( "Device 'pci%04x,%04x' not recognized (%d?)",
"Device 'pci%04x,%04x' (%d) revision %d not supported",
"%d-based subsystem 'pci%04x,%04x' not validated",
* Various registers that control the chip's internal engines (state * machines) have a <reset> and <enable> bits (fortunately, in the * same place in each such register :-). * To reset the state machine, the <reset> bit must be written with 1; * it will then read back as 1 while the reset is in progress, but * self-clear to 0 when the reset completes. * To enable a state machine, one must set the <enable> bit, which * will continue to read back as 0 until the state machine is running. * To disable a state machine, the <enable> bit must be cleared, but * it will continue to read back as 1 until the state machine actually * This routine implements polling for completion of a reset, enable * or disable operation, returning B_TRUE on success (bit reached the * required state) or B_FALSE on timeout (200*100us == 20ms). BGE_TRACE((
"bge_chip_poll_engine($%p, 0x%lx, 0x%x, 0x%x)",
* Various registers that control the chip's internal engines (state * machines) have a <reset> bit (fortunately, in the same place in * each such register :-). To reset the state machine, this bit must * be written with 1; it will then read back as 1 while the reset is * in progress, but self-clear to 0 when the reset completes. * This code sets the bit, then polls for it to read back as zero. * The return value is B_TRUE on success (reset bit cleared itself), * or B_FALSE if the state machine didn't recover :( * NOTE: the Core reset is similar to other resets, except that we * can't poll for completion, since the Core reset disables memory * access! So we just have to assume that it will all complete in * 100us. See Broadcom document 570X-PG102-R, p102, steps 4-5. BGE_TRACE((
"bge_chip_reset_engine($%p, 0x%lx)",
BGE_DEBUG((
"bge_chip_reset_engine: 0x%lx before reset = 0x%08x",
* BCM5714/5721/5751 pcie chip special case. In order to avoid * resetting PCIE block and bringing PCIE link down, bit 29 * in the register needs to be set first, and then set it again * while the reset bit is written. * See:P500 of 57xx-PG102-RDS.pdf. * Special case - causes Core reset * On SPARC v9 we want to ensure that we don't start * timing until the I/O access has actually reached * the chip, otherwise we might make the next access * too early. And we can't just force the write out * by following it with a read (even to config space) * because that would cause the fault we're trying * to avoid. Hence the need for membar_sync() here. * On some platforms,system need about 300us for /* PCI-E device need more reset time */ /* Set PCIE max payload size and clear error status. */ * Various registers that control the chip's internal engines (state * machines) have an <enable> bit (fortunately, in the same place in * each such register :-). To stop the state machine, this bit must * be written with 0, then polled to see when the state machine has * The return value is B_TRUE on success (enable bit cleared), or * B_FALSE if the state machine didn't stop :( BGE_TRACE((
"bge_chip_disable_engine($%p, 0x%lx, 0x%x)",
* Not quite like the others; it doesn't * have an <enable> bit, but instead we * have to set and then clear all the bits * Various registers that control the chip's internal engines (state * machines) have an <enable> bit (fortunately, in the same place in * each such register :-). To start the state machine, this bit must * be written with 1, then polled to see when the state machine has * The return value is B_TRUE on success (enable bit set), or * B_FALSE if the state machine didn't start :( BGE_TRACE((
"bge_chip_enable_engine($%p, 0x%lx, 0x%x)",
* Not quite like the others; it doesn't * have an <enable> bit, but instead we * have to set and then clear all the bits * Reprogram the Ethernet, Transmit, and Receive MAC * modes to match the param_* variables * Reprogram the Ethernet MAC mode ... BGE_DEBUG((
"bge_sync_mac_modes($%p) Ethernet MAC mode 0x%x => 0x%x",
* ... the Transmit MAC mode ... BGE_DEBUG((
"bge_sync_mac_modes($%p) Transmit MAC mode 0x%x => 0x%x",
* ... and the Receive MAC mode BGE_DEBUG((
"bge_sync_mac_modes($%p) Receive MAC mode 0x%x => 0x%x",
* bge_chip_sync() -- program the chip with the unicast MAC address, * the multicast hash table, the required level of promiscuity, and * the current loopback mode ... * If the TX/RX MAC engines are already running, we should stop * them (and reset the RX engine) before changing the parameters. * If they're not running, this will have no effect ... * NOTE: this is currently disabled by default because stopping * and restarting the Tx engine may cause an outgoing packet in * transit to be truncated. Also, stopping and restarting the * Rx engine seems to not work correctly on the 5705. Testing * has not (yet!) revealed any problems with NOT stopping and * restarting these engines (and Broadcom say their drivers don't * do this), but if it is found to cause problems, this variable * can be patched to re-enable the old behaviour ... * Reprogram the hashed multicast address table ... * Transform the MAC address from host to chip format, then * reprogram the transmit random backoff seed and the unicast BGE_DEBUG((
"bge_chip_sync($%p) setting MAC address %012llx",
* Set or clear the PROMISCUOUS mode bit * Sync the rest of the MAC modes too ... * Restart RX/TX MAC engines if required ... * This array defines the sequence of state machine control registers * in which the <enable> bit must be cleared to bring the chip to a * clean stop. Taken from Broadcom document 570X-PG102-R, p116. * bge_chip_stop() -- stop all chip processing * If the <fault> parameter is B_TRUE, we're stopping the chip because * we've detected a problem internally; otherwise, this is a normal * (clean) stop (at user request i.e. the last STREAM has been closed). * When driver try to shutdown the BCM5705/5788/5721/5751/ * 5752/5714 and 5715 chipsets,the buffer manager and the mem * -ory arbiter should not be disabled. * Finally, disable (all) MAC events & clear the MAC status * Do we need to check whether everything completed OK? * Probably not ... it always works anyway. * Poll for completion of chip's ROM firmware; also, at least on the * first time through, find and return the hardware MAC address, if any. * Step 18: put the T3_MAGIC_NUMBER into the GENCOMM port * Step 19: poll for firmware completion (GENCOMM port set * to the ones complement of T3_MAGIC_NUMBER). * While we're at it, we also read the MAC address register; * at some stage the the firmware will load this with the * When both the magic number and the MAC address are set, * we're done; but we impose a time limit of one second * (1000*1000us) in case the firmware fails in some fashion * or the SEEPROM that provides that MAC address isn't fitted. * After the first time through (chip state != INITIAL), we * don't need the MAC address to be set (we've already got it * or not, from the first time), so we don't wait for it, but * we still have to wait for the T3_MAGIC_NUMBER. * Note: the magic number is only a 32-bit quantity, but the NIC * memory is 64-bit (and big-endian) internally. Addressing the * GENCOMM word as "the upper half of a 64-bit quantity" makes * it work correctly on both big- and little-endian hosts. BGE_DEBUG((
"bge_poll_firmware: put T3 magic 0x%llx in GENCOMM" for (i = 0; i <
1000; ++i) {
BGE_DEBUG((
"bge_poll_firmware($%p): PXE magic 0x%x after %d loops",
BGE_DEBUG((
"bge_poll_firmware: MAC %016llx, GENCOMM %016llx",
BGE_DEBUG((
"bge_chip_reset($%p, %d): current state is %d",
* Do we need to stop the chip cleanly before resetting? * Adapted from Broadcom document 570X-PG102-R, pp 102-116. * Updated to reflect Broadcom document 570X-PG104-R, pp 146-159. * Before reset Core clock,it is * also required to initialize the Memory Arbiter as specified in step9 * and Misc Host Control Register as specified in step-13 * Step 4-5: reset Core clock & wait for completion * Steps 6-8: are done by bge_chip_cfg_init() * Step 8a: This may belong elsewhere, but BCM5721 needs * Step 9: enable MAC memory arbiter,bit30 and bit31 of 5714/5715 should * Steps 10-11: configure PIO endianness options and * enable indirect register access -- already done * Steps 12-13: enable writing to the PCI state & clock * control registers -- not required; we aren't going to * Steps 14-15: Configure DMA endianness options. See * the comments on the setting of the MHCR above. /* Wait for NVRAM init */ * Steps 16-17: poll for firmware completion * Step 18: enable external memory -- doesn't apply. * However we take the opportunity to set the MLCR anyway, as * this register also controls the SEEPROM auto-access method * which we may want to use later ... * The proper value here depends on the way the chip is wired * into the circuit board, as this register *also* controls which * of the "Miscellaneous I/O" pins are driven as outputs and the * values driven onto those pins! * See also step 74 in the PRM ... * Step 20: clear the Ethernet MAC mode register * Step 21: restore cache-line-size, latency timer, and * subsystem ID registers to their original values (not * those read into the local structure <chipid>, 'cos * that was after they were cleared by the RESET). * directly writable in config space, so we use the shadow * copy in "Page Zero" of register space to restore them * The SEND INDEX registers should be reset to zero by the * global chip reset; if they're not, there'll be trouble * later on -- usually in the form of an ASSERTion failure * On the first time through, save the factory-set MAC address * (if any). If bge_poll_firmware() above didn't return one * (from a chip register) consider looking in the attached NV * memory device, if any. Once we have it, we save it in both * register-image (64-bit) and byte-array forms. All-zero and * all-one addresses are not valid, and we refuse to stash those. * bge_chip_start() -- start the chip transmitting and/or receiving, * including enabling interrupts * Taken from Broadcom document 570X-PG102-R, pp 102-116. * The document specifies 95 separate steps to fully * initialise the chip!!!! * The reset code above has already got us as far as step * 21, so we continue with ... * Step 22: clear the MAC statistics block * (0x0300-0x0aff in NIC-local memory) * Step 23: clear the status block (in host memory) * Step 24: set DMA read/write control register * Step 25: Configure DMA endianness -- already done (16/17) * Step 26: Configure Host-Based Send Rings * Step 27: Indicate Host Stack Up * Step 28: Configure checksum options: * Solaris supports the hardware default checksum options * so there's nothing to do here. * Step 29: configure Timer Prescaler. The value is always the * same: the Core Clock frequency in MHz (66), minus 1, shifted * into bits 7-1. Don't set bit 0, 'cos that's the RESET bit * Steps 30-31: Configure MAC local memory pool & DMA pool registers * If the mbuf_length is specified as 0, we just leave these at * their hardware defaults, rather than explicitly setting them. * As the Broadcom HRM,driver better not change the parameters * when the chipsets is 5705/5788/5721/5751/5714 and 5715. * Step 32: configure MAC memory pool watermarks * Step 33: configure DMA resource watermarks * Steps 34-36: enable buffer manager & internal h/w queues * Steps 37-39: initialise Receive Buffer (Producer) RCBs * Step 40: set Receive Buffer Descriptor Ring replenish thresholds * Steps 41-43: clear Send Ring Producer Indices and initialise * Send Producer Rings (0x0100-0x01ff in NIC-local memory) * Steps 44-45: initialise Receive Return Rings * (0x0200-0x02ff in NIC-local memory) * Step 46: initialise Receive Buffer (Producer) Ring indexes * Step 47: configure the MAC unicast address * Step 48: configure the random backoff seed * Step 96: set up multicast filters * Step 49: configure the MTU * Step 50: configure the IPG et al * Step 51: configure the default Rx Return Ring * Steps 52-54: configure Receive List Placement, * and enable Receive List Placement Statistics * Steps 55-56: enable Send Data Initiator Statistics * Steps 57-58: stop (?) the Host Coalescing Engine * Steps 59-62: initialise Host Coalescing parameters * Steps 63-64: initialise status block & statistics * The statistic block does not exist in some chipsets * Step 65: initialise Statistics Coalescing Tick Counter * Steps 66-67: initialise status block & statistics * NIC-local memory addresses * Steps 68-71: start the Host Coalescing Engine, the Receive BD * Completion Engine, the Receive List Placement Engine, and the * Receive List selector.Pay attention:0x3400 is not exist in BCM5714 * Step 72: Enable MAC DMA engines * Step 73: Clear & enable MAC statistics * Step 74: configure the MLCR (Miscellaneous Local Control * Register); not required, as we set up the MLCR in step 10 * (part of the reset code) above. * Step 75: clear Interrupt Mailbox 0 * Steps 76-87: Gentlemen, start your engines ... * Enable the DMA Completion Engine, the Write DMA Engine, * the Read DMA Engine, Receive Data Completion Engine, * the MBuf Cluster Free Engine, the Send Data Completion Engine, * the Send BD Completion Engine, the Receive BD Initiator Engine, * the Receive Data Initiator Engine, the Send Data Initiator Engine, * the Send BD Initiator Engine, and the Send BD Selector Engine. * Step 88: download firmware -- doesn't apply * Steps 89-90: enable Transmit & Receive MAC Engines * Step 91: disable auto-polling of PHY status * Step 92: configure D0 power state (not required) * Step 93: initialise LED control register () * Switch to 5700 (MAC) mode on these older chips * Step 95: set up physical layer (PHY/SerDes) * restart autoneg (if required) * Extra step (DSG): hand over all the Receive Buffers to the chip * MSI bits:The least significant MSI 16-bit word. * ISR will be triggered different. * Extra step (DSG): select which interrupts are enabled * Program the Ethernet MAC engine to signal attention on * Link Change events, then enable interrupts on MAC, DMA, * and FLOW attention signals. * Step 97: enable PCI interrupts!!! * ========== Hardware interrupt handler ========== * Sync the status block, then atomically clear the specified bits in * the <flags-and-tag> field of the status block. * the <flags> word of the status block, returning the value of the * <tag> and the <flags> before the bits were cleared. BGE_DEBUG((
"bge_status_sync($%p, 0x%llx) returning 0x%llx",
* bge_intr() -- handle chip interrupts * GLD v2 checks that s/w setup is complete before passing * interrupts to this routine, thus eliminating the old * (and well-known) race condition around ddi_add_intr() * Check whether chip's says it's asserting #INTA; * if not, don't process or claim the interrupt. * Note that the PCI signal is active low, so the * bit is *zero* when the interrupt is asserted. * Block further PCI interrupts ... * Sync the status block and grab the flags-n-tag from it. * We count the number of interrupts where there doesn't * seem to have been a DMA update of the status block; if * it *has* been updated, the counter will be cleared in * the while() loop below ... * Tell the chip that we're processing the interrupt * Drop the mutex while we: * Receive any newly-arrived packets * Recycle any newly-finished send buffers * Tell the chip we've finished processing, and * give it the tag that we got from the status * block earlier, so that it knows just how far * we've gone. If it's got more for us to do, * it will now update the status block and try * to assert an interrupt (but we've got the * #INTA blocked at present). If we see the * update, we'll loop around to do some more. * Eventually we'll get out of here ... * Check for exceptional conditions that we need to handle * Status block not updated * Probably due to the internal status tag not * being reset. Force a status block update now; * this should ensure that we get an update and * a new interrupt. After that, we should be in * If this happens multiple times in a row, * it means DMA is just not working. Maybe * the chip's failed, or maybe there's a * problem on the PCI bus or in the host-PCI * At all events, we want to stop further * interrupts and let the recovery code take * over to see whether anything can be done * We must stop ASF heart beat before * bge_chip_stop(), otherwise some * computers (ex. IBM HS20 blade * Reenable assertion of #INTA, unless there's a DMA fault * ========== Factotum, implemented as a softint handler ========== * Read all the registers that show the possible * reasons for the ERROR bit to be asserted BGE_DEBUG((
"factotum($%p) flow 0x%x rdma 0x%x wdma 0x%x",
BGE_DEBUG((
"factotum($%p) tmac 0x%x rmac 0x%x rxrs 0x%08x txrs 0x%08x",
* For now, just clear all the errors ... * Handler for hardware link state change. * When this routine is called, the hardware link state has changed * and the new state is reflected in the param_* variables. Here * we must update the softstate, reprogram the MAC to match, and * record the change in the log and/or on the console. * Update the s/w link_state * Reprogram the MAC modes to match * Finally, we have to decide whether to write a message * on the console or only in the log. If the PHY has * been reprogrammed (at user request) "recently", then * the message only goes in the log. Otherwise it's an * "unexpected" event, and it goes on the console as well. * Get & clear the writable status bits in the Tx status register * (some bits are write-1-to-clear, others are just readonly). * Get & clear the ERROR and LINK_CHANGED bits from the status block * Clear any errors flagged in the status block ... * We need to check the link status if: * the status block says there's been a link change * or there's any discrepancy between the various * flags indicating the link state (link_state, * param_link_up, and the LINK STATE bit in the * Transmit MAC status register). * If <check> is false, we're sure the link hasn't changed. * If true, however, it's not yet definitive; we have to call * bge_phys_check() to determine whether the link has settled * into a new state yet ... and if it has, then call the link * state change handler.But when the chip is 5700 in Dell 6650 * ,even if check is false, the link may have changed.So we * have to call bge_phys_check() to determine the link state. * Factotum routine to check for Tx stall, using the 'watchdog' counter * Specific check for Tx stall ... * The 'watchdog' counter is incremented whenever a packet * is queued, reset to 1 when some (but not all) buffers * are reclaimed, reset to 0 (disabled) when all buffers * are reclaimed, and shifted left here. If it exceeds the * threshold value, the chip is assumed to have stalled and * is put into the ERROR state. The factotum will then reset * All of which should ensure that we don't get into a state * where packets are left pending indefinitely! * The factotum is woken up when there's something to do that we'd rather * not do from inside a hardware interrupt handler or high-level cyclic. * Its two main tasks are: * reset & restart the chip after an error * check the link status whenever necessary * Fault detected, time to reset ... * Start our ASF heartbeat counter as soon as possible. * If an error is detected, stop the chip now, marking it as * faulty, so that it will be reset next time through ... * We must stop ASF heart beat before bge_chip_stop(), * otherwise some computers (ex. IBM HS20 blade server) * If the link state changed, tell the world about it. * Note: can't do this while still holding the mutex. * High-level cyclic handler * This routine schedules a (low-level) softint callback to the * factotum, and prods the chip to update the status block (which * will cause a hardware interrupt when complete). * ========== Ioctl subfunctions ========== BGE_TRACE((
"bge_chip_peek_seeprom($%p, $%p)",
BGE_TRACE((
"bge_chip_poke_seeprom($%p, $%p)",
#
endif /* BGE_SEE_IO32 */#
endif /* BGE_FLASH_IO32 */ BGE_DEBUG((
"bge_chip_peek_mem($%p, $%p) peeked 0x%llx from $%p",
BGE_DEBUG((
"bge_chip_poke_mem($%p, $%p) poking 0x%llx at $%p",
* Validate format of ioctl * Validate request parameters * Memory-mapped I/O space * NB: all PHY registers are two bytes, but the * addresses increment in ones (word addressing). * So we scale the address here, then undo the * transformation inside the peek/poke functions. * Attached SEEPROM(s), if any. * NB: we use the high-order bits of the 'address' as * a device select to accommodate multiple SEEPROMS, * If each one is the maximum size (64kbytes), this * makes them appear contiguous. Otherwise, there may * be holes in the mapping. ENxS doesn't have any #
endif /* BGE_SEE_IO32 */ * Attached Flash device (if any); a maximum of one device * is currently supported. But it can be up to 1MB (unlike * the 64k limit on SEEPROMs) so why would you need more ;-) #
endif /* BGE_FLASH_IO32 */ * Reset and reinitialise the 570x hardware #
endif /* BGE_DEBUGGING || BGE_DO_PPIO */ * Validate format of ioctl * Validate request parameters ... * Validate format of ioctl * Validate request parameters ... #
endif /* BGE_SEE_IO32 */ * Validate format of ioctl * Validate request parameters ... #
endif /* BGE_FLASH_IO32 */ BGE_TRACE((
"bge_chip_ioctl($%p, $%p, $%p, $%p)",
#
endif /* BGE_DEBUGGING || BGE_DO_PPIO */#
endif /* BGE_SEE_IO32 */#
endif /* BGE_FLASH_IO32 */ /* a workaround word swap error */ * The driver is supposed to notify ASF that the OS is still running * every three seconds, otherwise the management server may attempt * to reboot the machine. If it hasn't actually failed, this is * not a desireable result. However, this isn't running as a real-time * thread, and even if it were, it might not be able to generate the * heartbeat in a timely manner due to system load. As it isn't a * significant strain on the machine, we will set the interval to half * This function should be placed at the earliest postion of bge_attach(). * Here, we don't consider BAXTER, because BGE haven't * supported BAXTER (that is 5752). Also, as I know, * BAXTER doesn't support ASF feature. /* Issues "pause firmware" command and wait for ACK */ #
endif /* BGE_IPMI_ASF */