Index: sys/conf/files =================================================================== RCS file: /bucket/users/kmacy/devel/ncvs/src/sys/conf/files,v retrieving revision 1.1243.2.16 diff -u -r1.1243.2.16 files --- sys/conf/files 14 Apr 2008 06:21:34 -0000 1.1243.2.16 +++ sys/conf/files 17 Apr 2008 06:17:38 -0000 @@ -515,7 +515,6 @@ dev/cs/if_cs_pccard.c optional cs pccard dev/cxgb/cxgb_main.c optional cxgb pci dev/cxgb/cxgb_offload.c optional cxgb pci -dev/cxgb/cxgb_l2t.c optional cxgb pci dev/cxgb/cxgb_lro.c optional cxgb pci dev/cxgb/cxgb_sge.c optional cxgb pci dev/cxgb/common/cxgb_mc5.c optional cxgb pci @@ -526,6 +525,8 @@ dev/cxgb/common/cxgb_xgmac.c optional cxgb pci dev/cxgb/common/cxgb_t3_hw.c optional cxgb pci dev/cxgb/sys/uipc_mvec.c optional cxgb pci +dev/cxgb/sys/cxgb_support.c optional cxgb pci +dev/cxgb/cxgb_t3fw.c optional cxgb cxgb_t3fw dev/cy/cy.c optional cy dev/cy/cy_isa.c optional cy isa dev/cy/cy_pci.c optional cy pci Index: sys/modules/cxgb/Makefile =================================================================== RCS file: /bucket/users/kmacy/devel/ncvs/src/sys/modules/cxgb/Makefile,v retrieving revision 1.10 diff -u -r1.10 Makefile --- sys/modules/cxgb/Makefile 9 Sep 2007 01:28:02 -0000 1.10 +++ sys/modules/cxgb/Makefile 17 Apr 2008 06:20:15 -0000 @@ -1,45 +1,5 @@ # $FreeBSD: src/sys/modules/cxgb/Makefile,v 1.10 2007/09/09 01:28:02 kmacy Exp $ +SUBDIR= cxgb +SUBDIR+= cxgb_t3fw -CXGB = ${.CURDIR}/../../dev/cxgb -.PATH: ${CXGB} ${CXGB}/common ${CXGB}/sys - -KMOD= if_cxgb -SRCS= cxgb_mc5.c cxgb_vsc8211.c cxgb_ael1002.c cxgb_mv88e1xxx.c -SRCS+= cxgb_xgmac.c cxgb_vsc7323.c cxgb_t3_hw.c cxgb_main.c -SRCS+= cxgb_sge.c cxgb_lro.c cxgb_offload.c cxgb_l2t.c -SRCS+= device_if.h bus_if.h pci_if.h opt_zero.h -SRCS+= uipc_mvec.c - -CFLAGS+= -DCONFIG_CHELSIO_T3_CORE -g -DCONFIG_DEFINED -DDEFAULT_JUMBO -I${CXGB} -#CFLAGS+= -DDEBUG -DDEBUG_PRINT -#CFLAGS+= -DINVARIANT_SUPPORT -DINVARIANTS -DWITNESS - - -.if ${MACHINE_ARCH} != "ia64" -# ld is broken on ia64 -t3fw-4.7.0.bin: ${CXGB}/t3fw-4.7.0.bin.gz.uu - uudecode -p < ${CXGB}/t3fw-4.7.0.bin.gz.uu \ - | gzip -dc > ${.TARGET} - -FIRMWS= t3fw-4.7.0.bin:t3fw470 -CLEANFILES+= t3fw-4.7.0.bin - -t3b_protocol_sram-1.1.0.bin: ${CXGB}/t3b_protocol_sram-1.1.0.bin.gz.uu - uudecode -p < ${CXGB}/t3b_protocol_sram-1.1.0.bin.gz.uu \ - | gzip -dc > ${.TARGET} - -FIRMWS+= t3b_protocol_sram-1.1.0.bin:t3bps110 -CLEANFILES+= t3b_protocol_sram-1.1.0.bin - -t3b_tp_eeprom-1.1.0.bin: ${CXGB}/t3b_tp_eeprom-1.1.0.bin.gz.uu - uudecode -p < ${CXGB}/t3b_tp_eeprom-1.1.0.bin.gz.uu \ - | gzip -dc > ${.TARGET} - -FIRMWS+= t3b_tp_eeprom-1.1.0.bin:t3btpe110 -CLEANFILES+= t3b_tp_eeprom-1.1.0.bin - - -.endif - - -.include +.include Index: sys/modules/cxgb/cxgb_t3fw/Makefile =================================================================== RCS file: sys/modules/cxgb/cxgb_t3fw/Makefile diff -N sys/modules/cxgb/cxgb_t3fw/Makefile --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/modules/cxgb/cxgb_t3fw/Makefile 17 Apr 2008 06:21:18 -0000 @@ -0,0 +1,8 @@ +# $FreeBSD$ + +CXGB = ${.CURDIR}/../../../dev/cxgb +.PATH: ${CXGB} + +SRCS+= cxgb_t3fw.c + +.include Index: sys/dev/cxgb/cxgb_adapter.h =================================================================== RCS file: /bucket/users/kmacy/devel/ncvs/src/sys/dev/cxgb/cxgb_adapter.h,v retrieving revision 1.20 diff -u -r1.20 cxgb_adapter.h --- sys/dev/cxgb/cxgb_adapter.h 10 Sep 2007 00:59:51 -0000 1.20 +++ sys/dev/cxgb/cxgb_adapter.h 17 Apr 2008 05:16:45 -0000 @@ -1,6 +1,6 @@ /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -28,8 +28,8 @@ $FreeBSD: src/sys/dev/cxgb/cxgb_adapter.h,v 1.20 2007/09/10 00:59:51 kmacy Exp $ -***************************************************************************/ +***************************************************************************/ #ifndef _CXGB_ADAPTER_H_ @@ -42,25 +42,31 @@ #include #include #include +#include #include #include #include +#include #include #include + #include #include #include + #ifdef CONFIG_DEFINED #include -#include +#include +#include #include #else #include +#include #include -#include +#include #endif #define USE_SX @@ -117,8 +123,8 @@ uint8_t port_id; uint8_t tx_chan; uint8_t txpkt_intf; - uint8_t nqsets; uint8_t first_qset; + uint32_t nqsets; uint8_t hw_addr[ETHER_ADDR_LEN]; struct taskqueue *tq; @@ -126,10 +132,12 @@ struct task timer_reclaim_task; struct cdev *port_cdev; -#define PORT_NAME_LEN 32 +#define PORT_LOCK_NAME_LEN 32 #define TASKQ_NAME_LEN 32 - char lockbuf[PORT_NAME_LEN]; +#define PORT_NAME_LEN 32 + char lockbuf[PORT_LOCK_NAME_LEN]; char taskqbuf[TASKQ_NAME_LEN]; + char namebuf[PORT_NAME_LEN]; }; enum { /* adapter flags */ @@ -139,21 +147,26 @@ QUEUES_BOUND = (1 << 3), FW_UPTODATE = (1 << 4), TPS_UPTODATE = (1 << 5), + CXGB_SHUTDOWN = (1 << 6), + CXGB_OFLD_INIT = (1 << 7), + TP_PARITY_INIT = (1 << 8), }; - #define FL_Q_SIZE 4096 -#define JUMBO_Q_SIZE 512 +#define JUMBO_Q_SIZE 1024 #define RSPQ_Q_SIZE 1024 #define TX_ETH_Q_SIZE 1024 +enum { TXQ_ETH = 0, + TXQ_OFLD = 1, + TXQ_CTRL = 2, }; -/* - * Types of Tx queues in each queue set. Order here matters, do not change. - * XXX TOE is not implemented yet, so the extra queues are just placeholders. +/* + * work request size in bytes */ -enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL }; +#define WR_LEN (WR_FLITS * 8) +#define PIO_LEN (WR_LEN - sizeof(struct cpl_tx_pkt)) /* careful, the following are set on priv_flags and must not collide with @@ -196,12 +209,8 @@ uint32_t holdoff_tmr; uint32_t next_holdoff; uint32_t imm_data; - struct rsp_desc *desc; + uint32_t async_notif; uint32_t cntxt_id; - struct mtx lock; - struct mbuf *rx_head; /* offload packet receive queue head */ - struct mbuf *rx_tail; /* offload packet receive queue tail */ - uint32_t offload_pkts; uint32_t offload_bundles; uint32_t pure_rsps; @@ -212,9 +221,12 @@ bus_dmamap_t desc_map; struct t3_mbuf_hdr rspq_mh; + struct rsp_desc *desc; + struct mtx lock; #define RSPQ_NAME_LEN 32 char lockbuf[RSPQ_NAME_LEN]; - + uint32_t rspq_dump_start; + uint32_t rspq_dump_count; }; #ifndef DISABLE_MBUF_IOVEC @@ -231,8 +243,6 @@ uint32_t cidx; uint32_t pidx; uint32_t gen; - struct rx_desc *desc; - struct rx_sw_desc *sdesc; bus_addr_t phys_addr; uint32_t cntxt_id; uint64_t empty; @@ -240,6 +250,8 @@ bus_dmamap_t desc_map; bus_dma_tag_t entry_tag; uma_zone_t zone; + struct rx_desc *desc; + struct rx_sw_desc *sdesc; int type; }; @@ -273,8 +285,23 @@ bus_dmamap_t desc_map; bus_dma_tag_t entry_tag; struct mbuf_head sendq; + /* + * cleanq should really be an buf_ring to avoid extra + * mbuf touches + */ + struct mbuf_head cleanq; + struct buf_ring txq_mr; + struct mbuf *immpkt; + uint32_t txq_drops; + uint32_t txq_skipped; + uint32_t txq_coalesced; + uint32_t txq_enqueued; + uint32_t txq_dump_start; + uint32_t txq_dump_count; + unsigned long txq_frees; struct mtx lock; -#define TXQ_NAME_LEN 32 + struct sg_ent txq_sgl[TX_MAX_SEGS / 2 + 1]; + #define TXQ_NAME_LEN 32 char lockbuf[TXQ_NAME_LEN]; }; @@ -292,6 +319,10 @@ #define SGE_PSTAT_MAX (SGE_PSTATS_LRO_X_STREAMS+1) +#define QS_EXITING 0x1 +#define QS_RUNNING 0x2 +#define QS_BOUND 0x4 + struct sge_qset { struct sge_rspq rspq; struct sge_fl fl[SGE_RXQ_PER_SET]; @@ -301,6 +332,12 @@ uint64_t port_stats[SGE_PSTAT_MAX]; struct port_info *port; int idx; /* qset # */ + int qs_cpuid; + int qs_flags; + struct cv qs_cv; + struct mtx qs_mtx; +#define QS_NAME_LEN 32 + char namebuf[QS_NAME_LEN]; }; struct sge { @@ -318,6 +355,8 @@ /* PCI register resources */ int regs_rid; struct resource *regs_res; + int udbs_rid; + struct resource *udbs_res; bus_space_handle_t bh; bus_space_tag_t bt; bus_size_t mmio_len; @@ -342,7 +381,15 @@ void *msix_intr_tag[SGE_QSETS]; uint8_t rxpkt_map[8]; /* maps RX_PKT interface values to port ids */ uint8_t rrss_map[SGE_QSETS]; /* revers RSS map table */ + uint16_t rspq_map[RSS_TABLE_SIZE]; /* maps 7-bit cookie to qidx */ + union { + uint8_t fill[SGE_QSETS]; + uint64_t coalesce; + } u; +#define tunq_fill u.fill +#define tunq_coalesce u.coalesce + struct filter_info *filters; /* Tasks */ @@ -371,7 +418,7 @@ struct port_info port[MAX_NPORTS]; device_t portdev[MAX_NPORTS]; - struct toedev tdev; + struct t3cdev tdev; char fw_version[64]; uint32_t open_device_map; uint32_t registered_device_map; @@ -470,10 +517,23 @@ t3_get_next_mcaddr(struct t3_rx_mode *rm) { uint8_t *macaddr = NULL; - - if (rm->idx == 0) - macaddr = rm->port->hw_addr; + struct ifnet *ifp = rm->port->ifp; + struct ifmultiaddr *ifma; + int i = 0; + + IF_ADDR_LOCK(ifp); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + if (i == rm->idx) { + macaddr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); + break; + } + i++; + } + IF_ADDR_UNLOCK(ifp); + rm->idx++; return (macaddr); } @@ -497,7 +557,7 @@ void t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed, int duplex, int fc); void t3_sge_err_intr_handler(adapter_t *adapter); -int t3_offload_tx(struct toedev *, struct mbuf *); +int t3_offload_tx(struct t3cdev *, struct mbuf *); void t3_os_ext_intr_handler(adapter_t *adapter); void t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[]); int t3_mgmt_tx(adapter_t *adap, struct mbuf *m); @@ -513,18 +573,22 @@ void t3b_intr(void *data); void t3_intr_msi(void *data); void t3_intr_msix(void *data); -int t3_encap(struct port_info *, struct mbuf **, int *free); +int t3_encap(struct sge_qset *, struct mbuf **, int); int t3_sge_init_adapter(adapter_t *); +int t3_sge_reset_adapter(adapter_t *); int t3_sge_init_port(struct port_info *); void t3_sge_deinit_sw(adapter_t *); +void t3_free_tx_desc(struct sge_txq *q, int n); +void t3_free_tx_desc_all(struct sge_txq *q); void t3_rx_eth_lro(adapter_t *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad, uint32_t rss_hash, uint32_t rss_csum, int lro); void t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad); void t3_lro_flush(adapter_t *adap, struct sge_qset *qs, struct lro_state *state); -void t3_add_sysctls(adapter_t *sc); +void t3_add_attach_sysctls(adapter_t *sc); +void t3_add_configured_sysctls(adapter_t *sc); int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx, unsigned char *data); void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p); @@ -533,7 +597,7 @@ */ #define desc_reclaimable(q) ((int)((q)->processed - (q)->cleaned - TX_MAX_DESC)) -#define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field))) +#define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field))) static __inline struct sge_qset * fl_to_qset(struct sge_fl *q, int qidx) @@ -554,7 +618,7 @@ } static __inline struct adapter * -tdev2adap(struct toedev *d) +tdev2adap(struct t3cdev *d) { return container_of(d, struct adapter, tdev); } @@ -567,5 +631,13 @@ return isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT); } - +int cxgb_pcpu_enqueue_packet(struct ifnet *ifp, struct mbuf *m); +int cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *m); +void cxgb_pcpu_shutdown_threads(struct adapter *sc); +void cxgb_pcpu_startup_threads(struct adapter *sc); + +int process_responses(adapter_t *adap, struct sge_qset *qs, int budget); +void t3_free_qset(adapter_t *sc, struct sge_qset *q); +void cxgb_start(struct ifnet *ifp); +void refill_fl_service(adapter_t *adap, struct sge_fl *fl); #endif Index: sys/dev/cxgb/cxgb_config.h =================================================================== RCS file: /bucket/users/kmacy/devel/ncvs/src/sys/dev/cxgb/cxgb_config.h,v retrieving revision 1.4 diff -u -r1.4 cxgb_config.h --- sys/dev/cxgb/cxgb_config.h 13 Jun 2007 05:35:59 -0000 1.4 +++ sys/dev/cxgb/cxgb_config.h 18 Apr 2008 02:23:09 -0000 @@ -1,6 +1,6 @@ /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -31,7 +31,9 @@ ***************************************************************************/ #ifndef _CXGB_CONFIG_H_ #define _CXGB_CONFIG_H_ - +#define DISABLE_MBUF_IOVEC +#define RTALLOC2_DEFINED +#define VM_FAULT_HOLD_DEFINED #ifndef CONFIG_DEFINED #define CONFIG_CHELSIO_T3_CORE #endif Index: sys/dev/cxgb/cxgb_include.h =================================================================== RCS file: /bucket/users/kmacy/devel/ncvs/src/sys/dev/cxgb/cxgb_include.h,v retrieving revision 1.2 diff -u -r1.2 cxgb_include.h --- sys/dev/cxgb/cxgb_include.h 10 Sep 2007 00:59:51 -0000 1.2 +++ sys/dev/cxgb/cxgb_include.h 17 Apr 2008 05:17:50 -0000 @@ -1,8 +1,11 @@ /* - * $FreeBSD: src/sys/dev/cxgb/cxgb_include.h,v 1.2 2007/09/10 00:59:51 kmacy Exp $ + * $FreeBSD: src/sys/dev/cxgb/cxgb_include.h,v 1.2 2007/09/10 00:59:51 kmacy Exp $ */ - +#include +#include +#include +#include #ifdef CONFIG_DEFINED #include #include @@ -13,12 +16,8 @@ #include #include #include -#include -#include -#include #include - - +#include #else #include #include @@ -29,9 +28,8 @@ #include #include #include - -#include -#include -#include #include +#include #endif + + Index: sys/dev/cxgb/cxgb_ioctl.h =================================================================== RCS file: /bucket/users/kmacy/devel/ncvs/src/sys/dev/cxgb/cxgb_ioctl.h,v retrieving revision 1.5 diff -u -r1.5 cxgb_ioctl.h --- sys/dev/cxgb/cxgb_ioctl.h 17 Aug 2007 05:57:03 -0000 1.5 +++ sys/dev/cxgb/cxgb_ioctl.h 17 Apr 2008 05:18:32 -0000 @@ -1,6 +1,6 @@ /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -101,15 +101,16 @@ }; struct ch_qset_params { - uint32_t qset_idx; - int32_t txq_size[3]; - int32_t rspq_size; - int32_t fl_size[2]; - int32_t intr_lat; - int32_t polling; - int32_t cong_thres; - int32_t vector; - int32_t qnum; + uint32_t qset_idx; + int32_t txq_size[3]; + int32_t rspq_size; + int32_t fl_size[2]; + int32_t intr_lat; + int32_t polling; + int32_t lro; + int32_t cong_thres; + int32_t vector; + int32_t qnum; }; struct ch_pktsched_params { @@ -260,4 +261,6 @@ #define CHELSIO_SET_FILTER _IOW('f', CH_SET_FILTER, struct ch_filter) #define CHELSIO_DEL_FILTER _IOW('f', CH_DEL_FILTER, struct ch_filter) #define CHELSIO_DEVUP _IO('f', CH_DEVUP) + +#define CHELSIO_GET_TCB _IOWR('f', CH_GET_TCB, struct ch_tcb) #endif Index: sys/dev/cxgb/cxgb_l2t.c =================================================================== RCS file: sys/dev/cxgb/cxgb_l2t.c diff -N sys/dev/cxgb/cxgb_l2t.c --- sys/dev/cxgb/cxgb_l2t.c 17 Aug 2007 05:57:03 -0000 1.3 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,668 +0,0 @@ -/************************************************************************** - -Copyright (c) 2007, Chelsio Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Neither the name of the Chelsio Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -***************************************************************************/ - -#include -__FBSDID("$FreeBSD: src/sys/dev/cxgb/cxgb_l2t.c,v 1.3 2007/08/17 05:57:03 kmacy Exp $"); - -#include -#include -#include -#include -#include -#include -#include -#if __FreeBSD_version > 700000 -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CONFIG_DEFINED -#include -#else -#include -#endif - -#define VLAN_NONE 0xfff -#define SDL(s) ((struct sockaddr_dl *)s) -#define RT_ENADDR(rt) ((u_char *)LLADDR(SDL((rt)))) -#define rt_expire rt_rmx.rmx_expire - -struct llinfo_arp { - struct callout la_timer; - struct rtentry *la_rt; - struct mbuf *la_hold; /* last packet until resolved/timeout */ - u_short la_preempt; /* countdown for pre-expiry arps */ - u_short la_asked; /* # requests sent */ -}; - -/* - * Module locking notes: There is a RW lock protecting the L2 table as a - * whole plus a spinlock per L2T entry. Entry lookups and allocations happen - * under the protection of the table lock, individual entry changes happen - * while holding that entry's spinlock. The table lock nests outside the - * entry locks. Allocations of new entries take the table lock as writers so - * no other lookups can happen while allocating new entries. Entry updates - * take the table lock as readers so multiple entries can be updated in - * parallel. An L2T entry can be dropped by decrementing its reference count - * and therefore can happen in parallel with entry allocation but no entry - * can change state or increment its ref count during allocation as both of - * these perform lookups. - */ - -static inline unsigned int -vlan_prio(const struct l2t_entry *e) -{ - return e->vlan >> 13; -} - -static inline unsigned int -arp_hash(u32 key, int ifindex, const struct l2t_data *d) -{ - return jhash_2words(key, ifindex, 0) & (d->nentries - 1); -} - -static inline void -neigh_replace(struct l2t_entry *e, struct rtentry *rt) -{ - RT_LOCK(rt); - RT_ADDREF(rt); - RT_UNLOCK(rt); - - if (e->neigh) { - RT_LOCK(e->neigh); - RT_REMREF(e->neigh); - RT_UNLOCK(e->neigh); - } - e->neigh = rt; -} - -/* - * Set up an L2T entry and send any packets waiting in the arp queue. The - * supplied mbuf is used for the CPL_L2T_WRITE_REQ. Must be called with the - * entry locked. - */ -static int -setup_l2e_send_pending(struct toedev *dev, struct mbuf *m, - struct l2t_entry *e) -{ - struct cpl_l2t_write_req *req; - - if (!m) { - if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) - return (ENOMEM); - } - /* - * XXX MH_ALIGN - */ - req = mtod(m, struct cpl_l2t_write_req *); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, e->idx)); - req->params = htonl(V_L2T_W_IDX(e->idx) | V_L2T_W_IFF(e->smt_idx) | - V_L2T_W_VLAN(e->vlan & EVL_VLID_MASK) | - V_L2T_W_PRIO(vlan_prio(e))); - - memcpy(e->dmac, RT_ENADDR(e->neigh), sizeof(e->dmac)); - memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); - m_set_priority(m, CPL_PRIORITY_CONTROL); - cxgb_ofld_send(dev, m); - while (e->arpq_head) { - m = e->arpq_head; - e->arpq_head = m->m_next; - m->m_next = NULL; - cxgb_ofld_send(dev, m); - } - e->arpq_tail = NULL; - e->state = L2T_STATE_VALID; - - return 0; -} - -/* - * Add a packet to the an L2T entry's queue of packets awaiting resolution. - * Must be called with the entry's lock held. - */ -static inline void -arpq_enqueue(struct l2t_entry *e, struct mbuf *m) -{ - m->m_next = NULL; - if (e->arpq_head) - e->arpq_tail->m_next = m; - else - e->arpq_head = m; - e->arpq_tail = m; -} - -int -t3_l2t_send_slow(struct toedev *dev, struct mbuf *m, - struct l2t_entry *e) -{ - struct rtentry *rt; - struct mbuf *m0; - - if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) - return (ENOMEM); - - rt = e->neigh; - -again: - switch (e->state) { - case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ - arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt)); - mtx_lock(&e->lock); - if (e->state == L2T_STATE_STALE) - e->state = L2T_STATE_VALID; - mtx_unlock(&e->lock); - case L2T_STATE_VALID: /* fast-path, send the packet on */ - return cxgb_ofld_send(dev, m); - case L2T_STATE_RESOLVING: - mtx_lock(&e->lock); - if (e->state != L2T_STATE_RESOLVING) { // ARP already completed - mtx_unlock(&e->lock); - goto again; - } - arpq_enqueue(e, m); - mtx_unlock(&e->lock); - - if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) - return (ENOMEM); - /* - * Only the first packet added to the arpq should kick off - * resolution. However, because the m_gethdr below can fail, - * we allow each packet added to the arpq to retry resolution - * as a way of recovering from transient memory exhaustion. - * A better way would be to use a work request to retry L2T - * entries when there's no memory. - */ - if (arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt)) == 0) { - - mtx_lock(&e->lock); - if (e->arpq_head) - setup_l2e_send_pending(dev, m, e); - else - m_freem(m); - mtx_unlock(&e->lock); - } - } - return 0; -} - -void -t3_l2t_send_event(struct toedev *dev, struct l2t_entry *e) -{ - struct rtentry *rt; - struct mbuf *m0; - - if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) - return; - - rt = e->neigh; -again: - switch (e->state) { - case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ - arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt)); - mtx_lock(&e->lock); - if (e->state == L2T_STATE_STALE) { - e->state = L2T_STATE_VALID; - } - mtx_unlock(&e->lock); - return; - case L2T_STATE_VALID: /* fast-path, send the packet on */ - return; - case L2T_STATE_RESOLVING: - mtx_lock(&e->lock); - if (e->state != L2T_STATE_RESOLVING) { // ARP already completed - mtx_unlock(&e->lock); - goto again; - } - mtx_unlock(&e->lock); - - if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) - return; - /* - * Only the first packet added to the arpq should kick off - * resolution. However, because the alloc_skb below can fail, - * we allow each packet added to the arpq to retry resolution - * as a way of recovering from transient memory exhaustion. - * A better way would be to use a work request to retry L2T - * entries when there's no memory. - */ - arpresolve(rt->rt_ifp, rt, m0, rt->rt_gateway, RT_ENADDR(rt)); - - } - return; -} -/* - * Allocate a free L2T entry. Must be called with l2t_data.lock held. - */ -static struct l2t_entry * -alloc_l2e(struct l2t_data *d) -{ - struct l2t_entry *end, *e, **p; - - if (!atomic_load_acq_int(&d->nfree)) - return NULL; - - /* there's definitely a free entry */ - for (e = d->rover, end = &d->l2tab[d->nentries]; e != end; ++e) - if (atomic_load_acq_int(&e->refcnt) == 0) - goto found; - - for (e = &d->l2tab[1]; atomic_load_acq_int(&e->refcnt); ++e) ; -found: - d->rover = e + 1; - atomic_add_int(&d->nfree, -1); - - /* - * The entry we found may be an inactive entry that is - * presently in the hash table. We need to remove it. - */ - if (e->state != L2T_STATE_UNUSED) { - int hash = arp_hash(e->addr, e->ifindex, d); - - for (p = &d->l2tab[hash].first; *p; p = &(*p)->next) - if (*p == e) { - *p = e->next; - break; - } - e->state = L2T_STATE_UNUSED; - } - return e; -} - -/* - * Called when an L2T entry has no more users. The entry is left in the hash - * table since it is likely to be reused but we also bump nfree to indicate - * that the entry can be reallocated for a different neighbor. We also drop - * the existing neighbor reference in case the neighbor is going away and is - * waiting on our reference. - * - * Because entries can be reallocated to other neighbors once their ref count - * drops to 0 we need to take the entry's lock to avoid races with a new - * incarnation. - */ -void -t3_l2e_free(struct l2t_data *d, struct l2t_entry *e) -{ - mtx_lock(&e->lock); - if (atomic_load_acq_int(&e->refcnt) == 0) { /* hasn't been recycled */ - if (e->neigh) { - RT_LOCK(e->neigh); - RT_REMREF(e->neigh); - RT_UNLOCK(e->neigh); - e->neigh = NULL; - } - } - mtx_unlock(&e->lock); - atomic_add_int(&d->nfree, 1); -} - -/* - * Update an L2T entry that was previously used for the same next hop as neigh. - * Must be called with softirqs disabled. - */ -static inline void -reuse_entry(struct l2t_entry *e, struct rtentry *neigh) -{ - struct llinfo_arp *la; - - la = (struct llinfo_arp *)neigh->rt_llinfo; - - mtx_lock(&e->lock); /* avoid race with t3_l2t_free */ - if (neigh != e->neigh) - neigh_replace(e, neigh); - - if (memcmp(e->dmac, RT_ENADDR(neigh), sizeof(e->dmac)) || - (neigh->rt_expire > time_uptime)) - e->state = L2T_STATE_RESOLVING; - else if (la->la_hold == NULL) - e->state = L2T_STATE_VALID; - else - e->state = L2T_STATE_STALE; - mtx_unlock(&e->lock); -} - -struct l2t_entry * -t3_l2t_get(struct toedev *dev, struct rtentry *neigh, - unsigned int smt_idx) -{ - struct l2t_entry *e; - struct l2t_data *d = L2DATA(dev); - u32 addr = *(u32 *) rt_key(neigh); - int ifidx = neigh->rt_ifp->if_index; - int hash = arp_hash(addr, ifidx, d); - - rw_wlock(&d->lock); - for (e = d->l2tab[hash].first; e; e = e->next) - if (e->addr == addr && e->ifindex == ifidx && - e->smt_idx == smt_idx) { - l2t_hold(d, e); - if (atomic_load_acq_int(&e->refcnt) == 1) - reuse_entry(e, neigh); - goto done; - } - - /* Need to allocate a new entry */ - e = alloc_l2e(d); - if (e) { - mtx_lock(&e->lock); /* avoid race with t3_l2t_free */ - e->next = d->l2tab[hash].first; - d->l2tab[hash].first = e; - e->state = L2T_STATE_RESOLVING; - e->addr = addr; - e->ifindex = ifidx; - e->smt_idx = smt_idx; - atomic_store_rel_int(&e->refcnt, 1); - neigh_replace(e, neigh); -#ifdef notyet - /* - * XXX need to add accessor function for vlan tag - */ - if (neigh->rt_ifp->if_vlantrunk) - e->vlan = VLAN_DEV_INFO(neigh->dev)->vlan_id; - else -#endif - e->vlan = VLAN_NONE; - mtx_unlock(&e->lock); - } -done: - rw_wunlock(&d->lock); - return e; -} - -/* - * Called when address resolution fails for an L2T entry to handle packets - * on the arpq head. If a packet specifies a failure handler it is invoked, - * otherwise the packets is sent to the TOE. - * - * XXX: maybe we should abandon the latter behavior and just require a failure - * handler. - */ -static void -handle_failed_resolution(struct toedev *dev, struct mbuf *arpq) -{ - - while (arpq) { - struct mbuf *m = arpq; -#ifdef notyet - struct l2t_mbuf_cb *cb = L2T_MBUF_CB(m); -#endif - arpq = m->m_next; - m->m_next = NULL; -#ifdef notyet - if (cb->arp_failure_handler) - cb->arp_failure_handler(dev, m); - else -#endif - cxgb_ofld_send(dev, m); - } - -} - -#if defined(NETEVENT) || !defined(CONFIG_CHELSIO_T3_MODULE) -/* - * Called when the host's ARP layer makes a change to some entry that is - * loaded into the HW L2 table. - */ -void -t3_l2t_update(struct toedev *dev, struct rtentry *neigh) -{ - struct l2t_entry *e; - struct mbuf *arpq = NULL; - struct l2t_data *d = L2DATA(dev); - u32 addr = *(u32 *) rt_key(neigh); - int ifidx = neigh->rt_ifp->if_index; - int hash = arp_hash(addr, ifidx, d); - struct llinfo_arp *la; - - rw_rlock(&d->lock); - for (e = d->l2tab[hash].first; e; e = e->next) - if (e->addr == addr && e->ifindex == ifidx) { - mtx_lock(&e->lock); - goto found; - } - rw_runlock(&d->lock); - return; - -found: - rw_runlock(&d->lock); - if (atomic_load_acq_int(&e->refcnt)) { - if (neigh != e->neigh) - neigh_replace(e, neigh); - - la = (struct llinfo_arp *)neigh->rt_llinfo; - if (e->state == L2T_STATE_RESOLVING) { - - if (la->la_asked >= 5 /* arp_maxtries */) { - arpq = e->arpq_head; - e->arpq_head = e->arpq_tail = NULL; - } else if (la->la_hold == NULL) - setup_l2e_send_pending(dev, NULL, e); - } else { - e->state = (la->la_hold == NULL) ? - L2T_STATE_VALID : L2T_STATE_STALE; - if (memcmp(e->dmac, RT_ENADDR(neigh), 6)) - setup_l2e_send_pending(dev, NULL, e); - } - } - mtx_unlock(&e->lock); - - if (arpq) - handle_failed_resolution(dev, arpq); -} -#else -/* - * Called from a kprobe, interrupts are off. - */ -void -t3_l2t_update(struct toedev *dev, struct rtentry *neigh) -{ - struct l2t_entry *e; - struct l2t_data *d = L2DATA(dev); - u32 addr = *(u32 *) rt_key(neigh); - int ifidx = neigh->dev->ifindex; - int hash = arp_hash(addr, ifidx, d); - - rw_rlock(&d->lock); - for (e = d->l2tab[hash].first; e; e = e->next) - if (e->addr == addr && e->ifindex == ifidx) { - mtx_lock(&e->lock); - if (atomic_load_acq_int(&e->refcnt)) { - if (neigh != e->neigh) - neigh_replace(e, neigh); - e->tdev = dev; - mod_timer(&e->update_timer, jiffies + 1); - } - mtx_unlock(&e->lock); - break; - } - rw_runlock(&d->lock); -} - -static void -update_timer_cb(unsigned long data) -{ - struct mbuf *arpq = NULL; - struct l2t_entry *e = (struct l2t_entry *)data; - struct rtentry *neigh = e->neigh; - struct toedev *dev = e->tdev; - - barrier(); - if (!atomic_load_acq_int(&e->refcnt)) - return; - - rw_rlock(&neigh->lock); - mtx_lock(&e->lock); - - if (atomic_load_acq_int(&e->refcnt)) { - if (e->state == L2T_STATE_RESOLVING) { - if (neigh->nud_state & NUD_FAILED) { - arpq = e->arpq_head; - e->arpq_head = e->arpq_tail = NULL; - } else if (neigh_is_connected(neigh) && e->arpq_head) - setup_l2e_send_pending(dev, NULL, e); - } else { - e->state = neigh_is_connected(neigh) ? - L2T_STATE_VALID : L2T_STATE_STALE; - if (memcmp(e->dmac, RT_ENADDR(neigh), sizeof(e->dmac))) - setup_l2e_send_pending(dev, NULL, e); - } - } - mtx_unlock(&e->lock); - rw_runlock(&neigh->lock); - - if (arpq) - handle_failed_resolution(dev, arpq); -} -#endif - -struct l2t_data * -t3_init_l2t(unsigned int l2t_capacity) -{ - struct l2t_data *d; - int i, size = sizeof(*d) + l2t_capacity * sizeof(struct l2t_entry); - - d = cxgb_alloc_mem(size); - if (!d) - return NULL; - - d->nentries = l2t_capacity; - d->rover = &d->l2tab[1]; /* entry 0 is not used */ - atomic_store_rel_int(&d->nfree, l2t_capacity - 1); - rw_init(&d->lock, "L2T"); - - for (i = 0; i < l2t_capacity; ++i) { - d->l2tab[i].idx = i; - d->l2tab[i].state = L2T_STATE_UNUSED; - mtx_init(&d->l2tab[i].lock, "L2TAB", NULL, MTX_DEF); - atomic_store_rel_int(&d->l2tab[i].refcnt, 0); -#ifndef NETEVENT -#ifdef CONFIG_CHELSIO_T3_MODULE - setup_timer(&d->l2tab[i].update_timer, update_timer_cb, - (unsigned long)&d->l2tab[i]); -#endif -#endif - } - return d; -} - -void -t3_free_l2t(struct l2t_data *d) -{ -#ifndef NETEVENT -#ifdef CONFIG_CHELSIO_T3_MODULE - int i; - - /* Stop all L2T timers */ - for (i = 0; i < d->nentries; ++i) - del_timer_sync(&d->l2tab[i].update_timer); -#endif -#endif - cxgb_free_mem(d); -} - -#ifdef CONFIG_PROC_FS -#include -#include -#include - -static inline void * -l2t_get_idx(struct seq_file *seq, loff_t pos) -{ - struct l2t_data *d = seq->private; - - return pos >= d->nentries ? NULL : &d->l2tab[pos]; -} - -static void * -l2t_seq_start(struct seq_file *seq, loff_t *pos) -{ - return *pos ? l2t_get_idx(seq, *pos) : SEQ_START_TOKEN; -} - -static void * -l2t_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - v = l2t_get_idx(seq, *pos + 1); - if (v) - ++*pos; - return v; -} - -static void -l2t_seq_stop(struct seq_file *seq, void *v) -{ -} - -static char -l2e_state(const struct l2t_entry *e) -{ - switch (e->state) { - case L2T_STATE_VALID: return 'V'; /* valid, fast-path entry */ - case L2T_STATE_STALE: return 'S'; /* needs revalidation, but usable */ - case L2T_STATE_RESOLVING: - return e->arpq_head ? 'A' : 'R'; - default: - return 'U'; - } -} - -static int -l2t_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "Index IP address Ethernet address VLAN " - "Prio State Users SMTIDX Port\n"); - else { - char ip[20]; - struct l2t_entry *e = v; - - mtx_lock(&e->lock); - sprintf(ip, "%u.%u.%u.%u", NIPQUAD(e->addr)); - seq_printf(seq, "%-5u %-15s %02x:%02x:%02x:%02x:%02x:%02x %4d" - " %3u %c %7u %4u %s\n", - e->idx, ip, e->dmac[0], e->dmac[1], e->dmac[2], - e->dmac[3], e->dmac[4], e->dmac[5], - e->vlan & EVL_VLID_MASK, vlan_prio(e), - l2e_state(e), atomic_load_acq_int(&e->refcnt), e->smt_idx, - e->neigh ? e->neigh->dev->name : ""); - mtx_unlock(&e->lock); - } - return 0; -} - -#endif Index: sys/dev/cxgb/cxgb_l2t.h =================================================================== RCS file: sys/dev/cxgb/cxgb_l2t.h diff -N sys/dev/cxgb/cxgb_l2t.h --- sys/dev/cxgb/cxgb_l2t.h 17 Aug 2007 05:57:03 -0000 1.2 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,165 +0,0 @@ -/************************************************************************** - -Copyright (c) 2007, Chelsio Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Neither the name of the Chelsio Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. - -$FreeBSD: src/sys/dev/cxgb/cxgb_l2t.h,v 1.2 2007/08/17 05:57:03 kmacy Exp $ - -***************************************************************************/ -#ifndef _CHELSIO_L2T_H -#define _CHELSIO_L2T_H - -#include -#include - -#if __FreeBSD_version > 700000 -#include -#else -#define rwlock mtx -#define rw_wlock(x) mtx_lock((x)) -#define rw_wunlock(x) mtx_unlock((x)) -#define rw_rlock(x) mtx_lock((x)) -#define rw_runlock(x) mtx_unlock((x)) -#define rw_init(x, str) mtx_init((x), (str), NULL, MTX_DEF) -#define rw_destroy(x) mtx_destroy((x)) -#endif - -enum { - L2T_STATE_VALID, /* entry is up to date */ - L2T_STATE_STALE, /* entry may be used but needs revalidation */ - L2T_STATE_RESOLVING, /* entry needs address resolution */ - L2T_STATE_UNUSED /* entry not in use */ -}; - -/* - * Each L2T entry plays multiple roles. First of all, it keeps state for the - * corresponding entry of the HW L2 table and maintains a queue of offload - * packets awaiting address resolution. Second, it is a node of a hash table - * chain, where the nodes of the chain are linked together through their next - * pointer. Finally, each node is a bucket of a hash table, pointing to the - * first element in its chain through its first pointer. - */ -struct l2t_entry { - uint16_t state; /* entry state */ - uint16_t idx; /* entry index */ - uint32_t addr; /* dest IP address */ - int ifindex; /* neighbor's net_device's ifindex */ - uint16_t smt_idx; /* SMT index */ - uint16_t vlan; /* VLAN TCI (id: bits 0-11, prio: 13-15 */ - struct rtentry *neigh; /* associated neighbour */ - struct l2t_entry *first; /* start of hash chain */ - struct l2t_entry *next; /* next l2t_entry on chain */ - struct mbuf *arpq_head; /* queue of packets awaiting resolution */ - struct mbuf *arpq_tail; - struct mtx lock; - volatile uint32_t refcnt; /* entry reference count */ - uint8_t dmac[6]; /* neighbour's MAC address */ -#ifndef NETEVENT -#ifdef CONFIG_CHELSIO_T3_MODULE - struct timer_list update_timer; - struct toedev *tdev; -#endif -#endif -}; - -struct l2t_data { - unsigned int nentries; /* number of entries */ - struct l2t_entry *rover; /* starting point for next allocation */ - volatile uint32_t nfree; /* number of free entries */ - struct rwlock lock; - struct l2t_entry l2tab[0]; -}; - -typedef void (*arp_failure_handler_func)(struct toedev *dev, - struct mbuf *m); - -/* - * Callback stored in an skb to handle address resolution failure. - */ -struct l2t_mbuf_cb { - arp_failure_handler_func arp_failure_handler; -}; - -/* - * XXX - */ -#define L2T_MBUF_CB(skb) ((struct l2t_mbuf_cb *)(skb)->cb) - - -static __inline void set_arp_failure_handler(struct mbuf *m, - arp_failure_handler_func hnd) -{ -#if 0 - L2T_SKB_CB(skb)->arp_failure_handler = hnd; -#endif - panic("implement me"); -} - -/* - * Getting to the L2 data from an offload device. - */ -#define L2DATA(dev) ((dev)->l2opt) - -void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e); -void t3_l2t_update(struct toedev *dev, struct rtentry *ifp); -struct l2t_entry *t3_l2t_get(struct toedev *dev, struct rtentry *neigh, - unsigned int smt_idx); -int t3_l2t_send_slow(struct toedev *dev, struct mbuf *m, - struct l2t_entry *e); -void t3_l2t_send_event(struct toedev *dev, struct l2t_entry *e); -struct l2t_data *t3_init_l2t(unsigned int l2t_capacity); -void t3_free_l2t(struct l2t_data *d); - -#ifdef CONFIG_PROC_FS -int t3_l2t_proc_setup(struct proc_dir_entry *dir, struct l2t_data *d); -void t3_l2t_proc_free(struct proc_dir_entry *dir); -#else -#define l2t_proc_setup(dir, d) 0 -#define l2t_proc_free(dir) -#endif - -int cxgb_ofld_send(struct toedev *dev, struct mbuf *m); - -static inline int l2t_send(struct toedev *dev, struct mbuf *m, - struct l2t_entry *e) -{ - if (__predict_true(e->state == L2T_STATE_VALID)) - return cxgb_ofld_send(dev, m); - return t3_l2t_send_slow(dev, m, e); -} - -static inline void l2t_release(struct l2t_data *d, struct l2t_entry *e) -{ - if (atomic_fetchadd_int(&e->refcnt, -1) == 1) - t3_l2e_free(d, e); -} - -static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e) -{ - if (atomic_fetchadd_int(&e->refcnt, 1) == 1) /* 0 -> 1 transition */ - atomic_add_int(&d->nfree, 1); -} - -#endif Index: sys/dev/cxgb/cxgb_lro.c =================================================================== RCS file: /bucket/users/kmacy/devel/ncvs/src/sys/dev/cxgb/cxgb_lro.c,v retrieving revision 1.8 diff -u -r1.8 cxgb_lro.c --- sys/dev/cxgb/cxgb_lro.c 25 Aug 2007 21:07:36 -0000 1.8 +++ sys/dev/cxgb/cxgb_lro.c 17 Apr 2008 05:26:58 -0000 @@ -28,8 +28,8 @@ ***************************************************************************/ #include -__FBSDID("$FreeBSD: src/sys/dev/cxgb/cxgb_lro.c,v 1.8 2007/08/25 21:07:36 kmacy Exp $"); +__FBSDID("$FreeBSD: src/sys/dev/cxgb/cxgb_lro.c,v 1.8 2007/08/25 21:07:36 kmacy Exp $"); #include #include Index: sys/dev/cxgb/cxgb_main.c =================================================================== RCS file: /bucket/users/kmacy/devel/ncvs/src/sys/dev/cxgb/cxgb_main.c,v retrieving revision 1.36.2.1 diff -u -r1.36.2.1 cxgb_main.c --- sys/dev/cxgb/cxgb_main.c 5 Feb 2008 22:20:15 -0000 1.36.2.1 +++ sys/dev/cxgb/cxgb_main.c 18 Apr 2008 01:48:55 -0000 @@ -1,6 +1,6 @@ /************************************************************************** -Copyright (c) 2007, Chelsio Inc. +Copyright (c) 2007-2008, Chelsio Inc. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -49,8 +50,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -82,6 +85,10 @@ #include #endif +#ifdef IFNET_MULTIQUEUE +#include +#endif + static int cxgb_setup_msix(adapter_t *, int); static void cxgb_teardown_msix(adapter_t *); static void cxgb_init(void *); @@ -89,8 +96,6 @@ static void cxgb_stop_locked(struct port_info *); static void cxgb_set_rxmode(struct port_info *); static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t); -static void cxgb_start(struct ifnet *); -static void cxgb_start_proc(void *, int ncount); static int cxgb_media_change(struct ifnet *); static void cxgb_media_status(struct ifnet *, struct ifmediareq *); static int setup_sge_qsets(adapter_t *); @@ -114,11 +119,8 @@ static int cxgb_get_regs_len(void); static int offload_open(struct port_info *pi); static void touch_bars(device_t dev); - -#ifdef notyet -static int offload_close(struct toedev *tdev); -#endif - +static int offload_close(struct t3cdev *tdev); +static void cxgb_link_start(struct port_info *p); static device_method_t cxgb_controller_methods[] = { DEVMETHOD(device_probe, cxgb_controller_probe), @@ -180,7 +182,6 @@ #define SGE_MSIX_COUNT (SGE_QSETS + 1) -extern int collapse_mbufs; /* * The driver uses the best interrupt scheme available on a platform in the * order MSI-X, MSI, legacy pin interrupts. This parameter determines which @@ -210,11 +211,26 @@ * The driver uses an auto-queue algorithm by default. * To disable it and force a single queue-set per port, use singleq = 1. */ -static int singleq = 1; +static int singleq = 0; TUNABLE_INT("hw.cxgb.singleq", &singleq); SYSCTL_UINT(_hw_cxgb, OID_AUTO, singleq, CTLFLAG_RDTUN, &singleq, 0, "use a single queue-set per port"); + +/* + * The driver uses an auto-queue algorithm by default. + * To disable it and force a single queue-set per port, use singleq = 1. + */ +static int force_fw_update = 0; +TUNABLE_INT("hw.cxgb.force_fw_update", &force_fw_update); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0, + "update firmware even if up to date"); + +int cxgb_use_16k_clusters = 1; +TUNABLE_INT("hw.cxgb.use_16k_clusters", &cxgb_use_16k_clusters); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN, + &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue "); + enum { MAX_TXQ_ENTRIES = 16384, MAX_CTRL_TXQ_ENTRIES = 1024, @@ -273,10 +289,35 @@ {0, 0, 0, NULL} }; - static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset); -static inline char + +void +cxgb_log_tcb(struct adapter *sc, unsigned int tid) +{ + char buf[TCB_SIZE]; + uint64_t *tcb = (uint64_t *)buf; + int i, error; + struct mc7 *mem = &sc->cm; + + error = t3_mc7_bd_read(mem, tid*TCB_SIZE/8, TCB_SIZE/8, tcb); + if (error) + printf("cxgb_tcb_log failed\n"); + + CTR1(KTR_CXGB, "TCB tid=%u", tid); + for (i = 0; i < TCB_SIZE / 32; i++) { + CTR5(KTR_CXGB, "%1d: %08x %08x %08x %08x", + i, (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32), + (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32)); + tcb += 2; + CTR4(KTR_CXGB, " %08x %08x %08x %08x", + (uint32_t)tcb[1], (uint32_t)(tcb[1] >> 32), + (uint32_t)tcb[0], (uint32_t)(tcb[0] >> 32)); + tcb += 2; + } +} + +static __inline char t3rev2char(struct adapter *adapter) { char rev = 'z'; @@ -347,14 +388,13 @@ return (BUS_PROBE_DEFAULT); } -#define FW_FNAME "t3fw%d%d%d" -#define TPEEPROM_NAME "t3%ctpe%d%d%d" -#define TPSRAM_NAME "t3%cps%d%d%d" +#define FW_FNAME "cxgb_t3fw" +#define TPEEPROM_NAME "t3b_tp_eeprom" +#define TPSRAM_NAME "t3b_protocol_sram" static int upgrade_fw(adapter_t *sc) { - char buf[32]; #ifdef FIRMWARE_LATEST const struct firmware *fw; #else @@ -362,16 +402,11 @@ #endif int status; - snprintf(&buf[0], sizeof(buf), FW_FNAME, FW_VERSION_MAJOR, - FW_VERSION_MINOR, FW_VERSION_MICRO); - - fw = firmware_get(buf); - - if (fw == NULL) { - device_printf(sc->dev, "Could not find firmware image %s\n", buf); + if ((fw = firmware_get(FW_FNAME)) == NULL) { + device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME); return (ENOENT); } else - device_printf(sc->dev, "updating firmware on card with %s\n", buf); + device_printf(sc->dev, "updating firmware on card\n"); status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize); device_printf(sc->dev, "firmware update returned %s %d\n", (status == 0) ? "success" : "fail", status); @@ -392,7 +427,8 @@ int port_qsets = 1; #ifdef MSI_SUPPORTED int msi_needed, reg; -#endif +#endif + int must_load = 0; sc = device_get_softc(dev); sc->dev = dev; sc->msi_count = 0; @@ -429,9 +465,16 @@ sc->regs_rid = PCIR_BAR(0); if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE)) == NULL) { - device_printf(dev, "Cannot allocate BAR\n"); + device_printf(dev, "Cannot allocate BAR region 0\n"); return (ENXIO); } + sc->udbs_rid = PCIR_BAR(2); + if ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &sc->udbs_rid, RF_ACTIVE)) == NULL) { + device_printf(dev, "Cannot allocate BAR region 1\n"); + error = ENXIO; + goto out; + } snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d", device_get_unit(dev)); @@ -444,7 +487,7 @@ snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d", device_get_unit(dev)); - MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_DEF); + MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN); MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF); MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF); @@ -457,7 +500,7 @@ error = ENODEV; goto out; } - /* Allocate the BAR for doing MSI-X. If it succeeds, try to allocate + /* Allocate the BAR for doing MSI-X. If it succeeds, try to allocate * enough messages for the queue sets. If that fails, try falling * back to MSI. If that fails, then try falling back to the legacy * interrupt pin model. @@ -506,7 +549,9 @@ sc->cxgb_intr = t3b_intr; } - + if ((sc->flags & USING_MSIX) && !singleq) + port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus); + /* Create a private taskqueue thread for handling driver events */ #ifdef TASKQUEUE_CURRENT sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT, @@ -529,7 +574,7 @@ /* Create a periodic callout for checking adapter status */ callout_init(&sc->cxgb_tick_ch, TRUE); - if (t3_check_fw_version(sc) != 0) { + if ((t3_check_fw_version(sc, &must_load) != 0 && must_load) || force_fw_update) { /* * Warn user that a firmware update will be attempted in init. */ @@ -540,7 +585,7 @@ sc->flags |= FW_UPTODATE; } - if (t3_check_tpsram_version(sc) != 0) { + if (t3_check_tpsram_version(sc, &must_load) != 0 && must_load) { /* * Warn user that a firmware update will be attempted in init. */ @@ -551,9 +596,6 @@ sc->flags |= TPS_UPTODATE; } - if ((sc->flags & USING_MSIX) && !singleq) - port_qsets = min((SGE_QSETS/(sc)->params.nports), mp_ncpus); - /* * Create a child device for each MAC. The ethernet attachment * will be done in these children. @@ -574,6 +616,7 @@ pi->tx_chan = i >= ai->nports0; pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i; sc->rxpkt_map[pi->txpkt_intf] = i; + sc->port[i].tx_chan = i >= ai->nports0; sc->portdev[i] = child; device_set_softc(child, pi); } @@ -603,7 +646,9 @@ G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers)); - t3_add_sysctls(sc); + device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]); + callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc); + t3_add_attach_sysctls(sc); out: if (error) cxgb_free(sc); @@ -629,9 +674,14 @@ int i; ADAPTER_LOCK(sc); - /* - * drops the lock - */ + sc->flags |= CXGB_SHUTDOWN; + ADAPTER_UNLOCK(sc); + cxgb_pcpu_shutdown_threads(sc); + ADAPTER_LOCK(sc); + +/* + * drops the lock + */ cxgb_down_locked(sc); #ifdef MSI_SUPPORTED @@ -646,17 +696,13 @@ bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid, sc->msix_regs_res); } - - if (sc->tq != NULL) { - taskqueue_drain(sc->tq, &sc->ext_intr_task); - taskqueue_drain(sc->tq, &sc->tick_task); - } + t3_sge_deinit_sw(sc); /* * Wait for last callout */ - tsleep(&sc, 0, "cxgb unload", 3*hz); + DELAY(hz*100); for (i = 0; i < (sc)->params.nports; ++i) { if (sc->portdev[i] != NULL) @@ -664,22 +710,33 @@ } bus_generic_detach(sc->dev); - if (sc->tq != NULL) + if (sc->tq != NULL) { taskqueue_free(sc->tq); -#ifdef notyet + sc->tq = NULL; + } + if (is_offload(sc)) { cxgb_adapter_unofld(sc); if (isset(&sc->open_device_map, OFFLOAD_DEVMAP_BIT)) offload_close(&sc->tdev); - } -#endif - - t3_free_sge_resources(sc); + else + printf("cxgb_free: DEVMAP_BIT not set\n"); + } else + printf("not offloading set\n"); +#ifdef notyet + /* XXX need to handle unload in TOM */ + if (sc->flags & CXGB_OFLD_INIT) + cxgb_offload_deactivate(sc); +#endif free(sc->filters, M_DEVBUF); t3_sge_free(sc); cxgb_offload_exit(); - + + if (sc->udbs_res != NULL) + bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid, + sc->udbs_res); + if (sc->regs_res != NULL) bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); @@ -688,8 +745,6 @@ MTX_DESTROY(&sc->sge.reg_lock); MTX_DESTROY(&sc->elmer_lock); ADAPTER_LOCK_DEINIT(sc); - - return; } /** @@ -792,10 +847,8 @@ return (EINVAL); } sc->msix_irq_rid[k] = rid; - printf("setting up interrupt for port=%d\n", - qs->port->port_id); if (bus_setup_intr(sc->dev, sc->msix_irq_res[k], - INTR_MPSAFE|INTR_TYPE_NET, + INTR_MPSAFE|INTR_TYPE_NET, #ifdef INTR_FILTERS NULL, #endif @@ -804,10 +857,17 @@ "interrupt for message %d\n", rid); return (EINVAL); } +#ifdef IFNET_MULTIQUEUE + if (singleq == 0) { + int vector = rman_get_start(sc->msix_irq_res[k]); + if (bootverbose) + device_printf(sc->dev, "binding vector=%d to cpu=%d\n", vector, k % mp_ncpus); + intr_bind(vector, k % mp_ncpus); + } +#endif } } - return (0); } @@ -816,10 +876,11 @@ { struct port_info *p; char buf[80]; - + const char *desc; + p = device_get_softc(dev); - - snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, p->port_type->desc); + desc = p->phy.desc; + snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc); device_set_desc_copy(dev, buf); return (0); } @@ -861,9 +922,11 @@ struct port_info *p; struct ifnet *ifp; int err, media_flags; + struct adapter *sc; + p = device_get_softc(dev); - + sc = p->adapter; snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d", device_get_unit(device_get_parent(dev)), p->port_id); PORT_LOCK_INIT(p, p->lockbuf); @@ -884,10 +947,17 @@ ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = cxgb_ioctl; ifp->if_start = cxgb_start; + +#if 0 +#ifdef IFNET_MULTIQUEUE + ifp->if_flags |= IFF_MULTIQ; + ifp->if_mq_start = cxgb_pcpu_start; +#endif +#endif ifp->if_timer = 0; /* Disable ifnet watchdog */ ifp->if_watchdog = NULL; - ifp->if_snd.ifq_drv_maxlen = TX_ETH_Q_SIZE; + ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN; IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&ifp->if_snd); @@ -916,14 +986,14 @@ } ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change, cxgb_media_status); - - if (!strcmp(p->port_type->desc, "10GBASE-CX4")) { + + if (!strcmp(p->phy.desc, "10GBASE-CX4")) { media_flags = IFM_ETHER | IFM_10G_CX4 | IFM_FDX; - } else if (!strcmp(p->port_type->desc, "10GBASE-SR")) { + } else if (!strcmp(p->phy.desc, "10GBASE-SR")) { media_flags = IFM_ETHER | IFM_10G_SR | IFM_FDX; - } else if (!strcmp(p->port_type->desc, "10GBASE-XR")) { + } else if (!strcmp(p->phy.desc, "10GBASE-R")) { media_flags = IFM_ETHER | IFM_10G_LR | IFM_FDX; - } else if (!strcmp(p->port_type->desc, "10/100/1000BASE-T")) { + } else if (!strcmp(p->phy.desc, "10/100/1000BASE-T")) { ifmedia_add(&p->media, IFM_ETHER | IFM_10_T, 0, NULL); ifmedia_add(&p->media, IFM_ETHER | IFM_10_T | IFM_FDX, 0, NULL); @@ -935,7 +1005,7 @@ 0, NULL); media_flags = 0; } else { - printf("unsupported media type %s\n", p->port_type->desc); + printf("unsupported media type %s\n", p->phy.desc); return (ENXIO); } if (media_flags) { @@ -956,19 +1026,14 @@ /* Create a port for handling TX without starvation */ p->tq = taskqueue_create_fast(p->taskqbuf, M_NOWAIT, taskqueue_thread_enqueue, &p->tq); -#endif - - if (p->tq == NULL) { - device_printf(dev, "failed to allocate port task queue\n"); - return (ENOMEM); - } - taskqueue_start_threads(&p->tq, 1, PI_NET, "%s taskq", - device_get_nameunit(dev)); - - TASK_INIT(&p->start_task, 0, cxgb_start_proc, ifp); - +#endif + /* Get the latest mac address, User can use a LAA */ + bcopy(IF_LLADDR(p->ifp), p->hw_addr, ETHER_ADDR_LEN); t3_sge_init_port(p); - +#if defined(LINK_ATTACH) + cxgb_link_start(p); + t3_link_changed(sc, p->port_id); +#endif return (0); } @@ -991,6 +1056,9 @@ } ether_ifdetach(p->ifp); + printf("waiting for callout to stop ..."); + DELAY(1000000); + printf("done\n"); /* * the lock may be acquired in ifdetach */ @@ -1007,7 +1075,14 @@ t3_fatal_err(struct adapter *sc) { u_int fw_status[4]; - + int i = 0; + + /* + * We don't know which tcb caused the error so we just hope it was one of the first ten :-/ + */ + for (i = 0; i < 10; i++) + cxgb_log_tcb(sc, i); + if (sc->flags & FULL_INIT_DONE) { t3_sge_stop(sc); t3_write_reg(sc, A_XGM_TX_CTRL, 0); @@ -1017,9 +1092,14 @@ t3_intr_disable(sc); } device_printf(sc->dev,"encountered fatal error, operation suspended\n"); - if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status)) + if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status)) { + device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n", fw_status[0], fw_status[1], fw_status[2], fw_status[3]); + + CTR4(KTR_CXGB, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n", + fw_status[0], fw_status[1], fw_status[2], fw_status[3]); + } } int @@ -1092,7 +1172,7 @@ * t3_os_link_changed - handle link status changes * @adapter: the adapter associated with the link change * @port_id: the port index whose limk status has changed - * @link_stat: the new status of the link + * @link_status: the new status of the link * @speed: the new speed setting * @duplex: the new duplex setting * @fc: the new flow-control setting @@ -1108,17 +1188,21 @@ struct port_info *pi = &adapter->port[port_id]; struct cmac *mac = &adapter->port[port_id].mac; - if ((pi->ifp->if_flags & IFF_UP) == 0) - return; - if (link_status) { - t3_mac_enable(mac, MAC_DIRECTION_RX); + DELAY(10); + t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); + /* Clear errors created by MAC enable */ + t3_set_reg_field(adapter, + A_XGM_STAT_CTRL + pi->mac.offset, + F_CLRSTATS, 1); if_link_state_change(pi->ifp, LINK_STATE_UP); + } else { - if_link_state_change(pi->ifp, LINK_STATE_DOWN); pi->phy.ops->power_down(&pi->phy, 1); t3_mac_disable(mac, MAC_DIRECTION_RX); t3_link_start(&pi->phy, mac, &pi->link_config); + t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); + if_link_state_change(pi->ifp, LINK_STATE_DOWN); } } @@ -1184,6 +1268,84 @@ t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX); } + +static int +await_mgmt_replies(struct adapter *adap, unsigned long init_cnt, + unsigned long n) +{ + int attempts = 5; + + while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) { + if (!--attempts) + return (ETIMEDOUT); + t3_os_sleep(10); + } + return 0; +} + +static int +init_tp_parity(struct adapter *adap) +{ + int i; + struct mbuf *m; + struct cpl_set_tcb_field *greq; + unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts; + + t3_tp_set_offload_mode(adap, 1); + + for (i = 0; i < 16; i++) { + struct cpl_smt_write_req *req; + + m = m_gethdr(M_WAITOK, MT_DATA); + req = mtod(m, struct cpl_smt_write_req *); + m->m_len = m->m_pkthdr.len = sizeof(*req); + memset(req, 0, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i)); + req->iff = i; + t3_mgmt_tx(adap, m); + } + + for (i = 0; i < 2048; i++) { + struct cpl_l2t_write_req *req; + + m = m_gethdr(M_WAITOK, MT_DATA); + req = mtod(m, struct cpl_l2t_write_req *); + m->m_len = m->m_pkthdr.len = sizeof(*req); + memset(req, 0, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i)); + req->params = htonl(V_L2T_W_IDX(i)); + t3_mgmt_tx(adap, m); + } + + for (i = 0; i < 2048; i++) { + struct cpl_rte_write_req *req; + + m = m_gethdr(M_WAITOK, MT_DATA); + req = mtod(m, struct cpl_rte_write_req *); + m->m_len = m->m_pkthdr.len = sizeof(*req); + memset(req, 0, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i)); + req->l2t_idx = htonl(V_L2T_W_IDX(i)); + t3_mgmt_tx(adap, m); + } + + m = m_gethdr(M_WAITOK, MT_DATA); + greq = mtod(m, struct cpl_set_tcb_field *); + m->m_len = m->m_pkthdr.len = sizeof(*greq); + memset(greq, 0, sizeof(*greq)); + greq->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0)); + greq->mask = htobe64(1); + t3_mgmt_tx(adap, m); + + i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1); + t3_tp_set_offload_mode(adap, 0); + return (i); +} + /** * setup_rss - configure Receive Side Steering (per-queue connection demux) * @adap: the adapter @@ -1213,11 +1375,9 @@ nq[pi->tx_chan] += pi->nqsets; } - nq[0] = max(nq[0], 1U); - nq[1] = max(nq[1], 1U); for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) { - rspq_map[i] = i % nq[0]; - rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq[1]) + nq[0]; + rspq_map[i] = nq[0] ? i % nq[0] : 0; + rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0; } /* Calculate the reverse RSS map table */ for (i = 0; i < RSS_TABLE_SIZE; ++i) @@ -1226,7 +1386,8 @@ t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN | F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN | - V_RRCPLCPUSIZE(6), cpus, rspq_map); + F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ, + cpus, rspq_map); } @@ -1235,13 +1396,11 @@ * after dealing with any active network taps. */ static inline int -offload_tx(struct toedev *tdev, struct mbuf *m) +offload_tx(struct t3cdev *tdev, struct mbuf *m) { int ret; - critical_enter(); ret = t3_offload_tx(tdev, m); - critical_exit(); return (ret); } @@ -1256,6 +1415,8 @@ return (ENOMEM); req = mtod(m, struct cpl_smt_write_req *); + m->m_pkthdr.len = m->m_len = sizeof(struct cpl_smt_write_req); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, idx)); req->mtu_idx = NMTUS - 1; /* should be 0 but there's a T3 bug */ @@ -1317,6 +1478,7 @@ { int i, j; + cxgb_pcpu_startup_threads(sc); for (i = 0; i < (sc)->params.nports; ++i) { const struct port_info *pi = adap2pinfo(sc, i); @@ -1337,7 +1499,6 @@ struct firmware *tpeeprom; #endif - char buf[64]; uint32_t version; unsigned int major, minor; int ret, len; @@ -1352,13 +1513,10 @@ rev = t3rev2char(adap); - snprintf(buf, sizeof(buf), TPEEPROM_NAME, rev, - TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO); - - tpeeprom = firmware_get(buf); + tpeeprom = firmware_get(TPEEPROM_NAME); if (tpeeprom == NULL) { device_printf(adap->dev, "could not load TP EEPROM: unable to load %s\n", - buf); + TPEEPROM_NAME); return; } @@ -1369,7 +1527,7 @@ goto release_tpeeprom; if (len != TP_SRAM_LEN) { - device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", buf, len, TP_SRAM_LEN); + device_printf(adap->dev, "%s length is wrong len=%d expected=%d\n", TPEEPROM_NAME, len, TP_SRAM_LEN); return; } @@ -1397,7 +1555,6 @@ #else struct firmware *tpsram; #endif - char buf[64]; int ret; char rev; @@ -1407,16 +1564,12 @@ update_tpeeprom(adap); - snprintf(buf, sizeof(buf), TPSRAM_NAME, rev, - TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO); - - tpsram = firmware_get(buf); + tpsram = firmware_get(TPSRAM_NAME); if (tpsram == NULL){ - device_printf(adap->dev, "could not load TP SRAM: unable to load %s\n", - buf); + device_printf(adap->dev, "could not load TP SRAM\n"); return (EINVAL); } else - device_printf(adap->dev, "updating TP SRAM with %s\n", buf); + device_printf(adap->dev, "updating TP SRAM\n"); ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize); if (ret) @@ -1458,6 +1611,7 @@ if (err) goto out; + t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT); t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12)); err = setup_sge_qsets(sc); @@ -1465,6 +1619,7 @@ goto out; setup_rss(sc); + t3_add_configured_sysctls(sc); sc->flags |= FULL_INIT_DONE; } @@ -1497,8 +1652,18 @@ t3_sge_start(sc); t3_intr_enable(sc); + if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) && + is_offload(sc) && init_tp_parity(sc) == 0) + sc->flags |= TP_PARITY_INIT; + + if (sc->flags & TP_PARITY_INIT) { + t3_write_reg(sc, A_TP_INT_CAUSE, + F_CMCACHEPERR | F_ARPLUTPERR); + t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff); + } + + if (!(sc->flags & QUEUES_BOUND)) { - printf("bind qsets\n"); bind_qsets(sc); sc->flags |= QUEUES_BOUND; } @@ -1516,7 +1681,6 @@ static void cxgb_down_locked(struct adapter *sc) { - int i; t3_sge_stop(sc); t3_intr_disable(sc); @@ -1533,38 +1697,44 @@ sc->irq_res = NULL; } - if (sc->flags & USING_MSIX) + if (sc->flags & USING_MSIX) cxgb_teardown_msix(sc); - ADAPTER_UNLOCK(sc); - + + callout_stop(&sc->cxgb_tick_ch); + callout_stop(&sc->sge_timer_ch); callout_drain(&sc->cxgb_tick_ch); callout_drain(&sc->sge_timer_ch); if (sc->tq != NULL) { + printf("draining slow intr\n"); + taskqueue_drain(sc->tq, &sc->slow_intr_task); - for (i = 0; i < sc->params.nports; i++) - taskqueue_drain(sc->tq, &sc->port[i].timer_reclaim_task); + printf("draining ext intr\n"); + taskqueue_drain(sc->tq, &sc->ext_intr_task); + printf("draining tick task\n"); + taskqueue_drain(sc->tq, &sc->tick_task); } -#ifdef notyet - - if (sc->port[i].tq != NULL) -#endif - + ADAPTER_UNLOCK(sc); } static int offload_open(struct port_info *pi) { struct adapter *adapter = pi->adapter; - struct toedev *tdev = TOEDEV(pi->ifp); + struct t3cdev *tdev = &adapter->tdev; + int adap_up = adapter->open_device_map & PORT_MASK; int err = 0; + CTR1(KTR_CXGB, "device_map=0x%x", adapter->open_device_map); if (atomic_cmpset_int(&adapter->open_device_map, - (adapter->open_device_map & ~OFFLOAD_DEVMAP_BIT), - (adapter->open_device_map | OFFLOAD_DEVMAP_BIT)) == 0) + (adapter->open_device_map & ~(1<open_device_map | (1<open_device_map, OFFLOAD_DEVMAP_BIT)) + printf("offload_open: DEVMAP_BIT did not get set 0x%x\n", adapter->open_device_map); ADAPTER_LOCK(pi->adapter); if (!adap_up) err = cxgb_up(adapter); @@ -1573,10 +1743,7 @@ return (err); t3_tp_set_offload_mode(adapter, 1); - tdev->lldev = adapter->port[0].ifp; - err = cxgb_offload_activate(adapter); - if (err) - goto out; + tdev->lldev = pi->ifp; init_port_mtus(adapter); t3_load_mtus(adapter, adapter->params.mtus, adapter->params.a_wnd, @@ -1584,11 +1751,10 @@ adapter->params.rev == 0 ? adapter->port[0].ifp->if_mtu : 0xffff); init_smt(adapter); - +#ifdef TOE_ENABLED /* Call back all registered clients */ cxgb_add_clients(tdev); - -out: +#endif /* restore them in case the offload module has changed them */ if (err) { t3_tp_set_offload_mode(adapter, 0); @@ -1597,29 +1763,31 @@ } return (err); } -#ifdef notyet + static int -offload_close(struct toedev *tdev) +offload_close(struct t3cdev *tdev) { struct adapter *adapter = tdev2adap(tdev); if (!isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT)) return (0); - +#ifdef TOE_ENABLED /* Call back all registered clients */ cxgb_remove_clients(tdev); +#endif tdev->lldev = NULL; cxgb_set_dummy_ops(tdev); t3_tp_set_offload_mode(adapter, 0); clrbit(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT); + ADAPTER_LOCK(adapter); if (!adapter->open_device_map) - cxgb_down(adapter); - - cxgb_offload_deactivate(adapter); + cxgb_down_locked(adapter); + else + ADAPTER_UNLOCK(adapter); return (0); } -#endif + static void cxgb_init(void *arg) @@ -1649,7 +1817,6 @@ } if (p->adapter->open_device_map == 0) { t3_intr_clear(sc); - t3_sge_init_adapter(sc); } setbit(&p->adapter->open_device_map, p->port_id); ADAPTER_UNLOCK(p->adapter); @@ -1660,15 +1827,16 @@ log(LOG_WARNING, "Could not initialize offload capabilities\n"); } +#if !defined(LINK_ATTACH) cxgb_link_start(p); t3_link_changed(sc, p->port_id); +#endif ifp->if_baudrate = p->link_config.speed * 1000000; device_printf(sc->dev, "enabling interrupts on port=%d\n", p->port_id); t3_port_intr_enable(sc, p->port_id); - callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz, - cxgb_tick, sc); + t3_sge_reset_adapter(sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; @@ -1680,35 +1848,53 @@ struct t3_rx_mode rm; struct cmac *mac = &p->mac; - PORT_LOCK_ASSERT_OWNED(p); - t3_init_rx_mode(&rm, p); + mtx_lock(&p->adapter->mdio_lock); t3_mac_set_rx_mode(mac, &rm); + mtx_unlock(&p->adapter->mdio_lock); } static void -cxgb_stop_locked(struct port_info *p) +cxgb_stop_locked(struct port_info *pi) { struct ifnet *ifp; - PORT_LOCK_ASSERT_OWNED(p); - ADAPTER_LOCK_ASSERT_NOTOWNED(p->adapter); + PORT_LOCK_ASSERT_OWNED(pi); + ADAPTER_LOCK_ASSERT_NOTOWNED(pi->adapter); - ifp = p->ifp; - - t3_port_intr_disable(p->adapter, p->port_id); + ifp = pi->ifp; + t3_port_intr_disable(pi->adapter, pi->port_id); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - p->phy.ops->power_down(&p->phy, 1); - t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); - ADAPTER_LOCK(p->adapter); - clrbit(&p->adapter->open_device_map, p->port_id); + /* disable pause frames */ + t3_set_reg_field(pi->adapter, A_XGM_TX_CFG + pi->mac.offset, + F_TXPAUSEEN, 0); - - if (p->adapter->open_device_map == 0) { - cxgb_down_locked(p->adapter); + /* Reset RX FIFO HWM */ + t3_set_reg_field(pi->adapter, A_XGM_RXFIFO_CFG + pi->mac.offset, + V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0); + + + ADAPTER_LOCK(pi->adapter); + clrbit(&pi->adapter->open_device_map, pi->port_id); + + if (pi->adapter->open_device_map == 0) { + cxgb_down_locked(pi->adapter); } else - ADAPTER_UNLOCK(p->adapter); + ADAPTER_UNLOCK(pi->adapter); + +#if !defined(LINK_ATTACH) + DELAY(100); + + /* Wait for TXFIFO empty */ + t3_wait_op_done(pi->adapter, A_XGM_TXFIFO_CFG + pi->mac.offset, + F_TXFIFO_EMPTY, 1, 20, 5); + + DELAY(100); + t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX); + + pi->phy.ops->power_down(&pi->phy, 1); +#endif } @@ -1724,7 +1910,6 @@ PORT_LOCK(p); ifp->if_mtu = mtu; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - callout_stop(&p->adapter->cxgb_tick_ch); cxgb_stop_locked(p); cxgb_init_locked(p); } @@ -1750,19 +1935,18 @@ error = cxgb_set_mtu(p, ifr->ifr_mtu); break; case SIOCSIFADDR: - case SIOCGIFADDR: - PORT_LOCK(p); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; - if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { + PORT_LOCK(p); cxgb_init_locked(p); + PORT_UNLOCK(p); + } arp_ifinit(ifp, ifa); } else error = ether_ioctl(ifp, command, data); - PORT_UNLOCK(p); break; case SIOCSIFFLAGS: - callout_drain(&p->adapter->cxgb_tick_ch); PORT_LOCK(p); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { @@ -1776,13 +1960,13 @@ } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) cxgb_stop_locked(p); + PORT_UNLOCK(p); + break; + case SIOCADDMULTI: + case SIOCDELMULTI: if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - adapter_t *sc = p->adapter; - callout_reset(&sc->cxgb_tick_ch, - sc->params.stats_update_period * hz, - cxgb_tick, sc); + cxgb_set_rxmode(p); } - PORT_UNLOCK(p); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: @@ -1831,122 +2015,6 @@ } static int -cxgb_start_tx(struct ifnet *ifp, uint32_t txmax) -{ - struct sge_qset *qs; - struct sge_txq *txq; - struct port_info *p = ifp->if_softc; - struct mbuf *m = NULL; - int err, in_use_init, free; - - if (!p->link_config.link_ok) - return (ENXIO); - - if (IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - return (ENOBUFS); - - qs = &p->adapter->sge.qs[p->first_qset]; - txq = &qs->txq[TXQ_ETH]; - err = 0; - - if (txq->flags & TXQ_TRANSMITTING) - return (EINPROGRESS); - - mtx_lock(&txq->lock); - txq->flags |= TXQ_TRANSMITTING; - in_use_init = txq->in_use; - while ((txq->in_use - in_use_init < txmax) && - (txq->size > txq->in_use + TX_MAX_DESC)) { - free = 0; - IFQ_DRV_DEQUEUE(&ifp->if_snd, m); - if (m == NULL) - break; - /* - * Convert chain to M_IOVEC - */ - KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early")); -#ifdef notyet - m0 = m; - if (collapse_mbufs && m->m_pkthdr.len > MCLBYTES && - cxgb_m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) { - if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) { - m = m0; - cxgb_m_collapse(m, TX_MAX_SEGS, &m0); - } else - break; - } - m = m0; -#endif - if ((err = t3_encap(p, &m, &free)) != 0) - break; - BPF_MTAP(ifp, m); - if (free) - m_freem(m); - } - txq->flags &= ~TXQ_TRANSMITTING; - mtx_unlock(&txq->lock); - - if (__predict_false(err)) { - if (err == ENOMEM) { - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - IFQ_LOCK(&ifp->if_snd); - IFQ_DRV_PREPEND(&ifp->if_snd, m); - IFQ_UNLOCK(&ifp->if_snd); - } - } - if (err == 0 && m == NULL) - err = ENOBUFS; - else if ((err == 0) && (txq->size <= txq->in_use + TX_MAX_DESC) && - (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - err = ENOSPC; - } - return (err); -} - -static void -cxgb_start_proc(void *arg, int ncount) -{ - struct ifnet *ifp = arg; - struct port_info *pi = ifp->if_softc; - struct sge_qset *qs; - struct sge_txq *txq; - int error; - - qs = &pi->adapter->sge.qs[pi->first_qset]; - txq = &qs->txq[TXQ_ETH]; - - do { - if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC >> 2) - taskqueue_enqueue(pi->tq, &txq->qreclaim_task); - - error = cxgb_start_tx(ifp, TX_START_MAX_DESC); - } while (error == 0); -} - -static void -cxgb_start(struct ifnet *ifp) -{ - struct port_info *pi = ifp->if_softc; - struct sge_qset *qs; - struct sge_txq *txq; - int err; - - qs = &pi->adapter->sge.qs[pi->first_qset]; - txq = &qs->txq[TXQ_ETH]; - - if (desc_reclaimable(txq) > TX_CLEAN_MAX_DESC >> 2) - taskqueue_enqueue(pi->tq, - &txq->qreclaim_task); - - err = cxgb_start_tx(ifp, TX_START_MAX_DESC); - - if (err == 0) - taskqueue_enqueue(pi->tq, &pi->start_task); -} - - -static int cxgb_media_change(struct ifnet *ifp) { if_printf(ifp, "media change not supported\n"); @@ -2025,7 +2093,7 @@ for (i = 0; i < (sc)->params.nports; ++i) { struct port_info *p = &sc->port[i]; - if (!(p->port_type->caps & SUPPORTED_IRQ)) + if (!(p->phy.caps & SUPPORTED_IRQ)) t3_link_changed(sc, i); p->ifp->if_baudrate = p->link_config.speed * 1000000; } @@ -2036,11 +2104,17 @@ { int i; + if(adapter->flags & CXGB_SHUTDOWN) + return; + for_each_port(adapter, i) { struct port_info *p = &adapter->port[i]; struct ifnet *ifp = p->ifp; int status; - + + if(adapter->flags & CXGB_SHUTDOWN) + return; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) continue; @@ -2071,11 +2145,11 @@ { adapter_t *sc = (adapter_t *)arg; + if(sc->flags & CXGB_SHUTDOWN) + return; + taskqueue_enqueue(sc->tq, &sc->tick_task); - - if (sc->open_device_map != 0) - callout_reset(&sc->cxgb_tick_ch, sc->params.stats_update_period * hz, - cxgb_tick, sc); + callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc); } static void @@ -2084,17 +2158,20 @@ adapter_t *sc = (adapter_t *)arg; const struct adapter_params *p = &sc->params; + if(sc->flags & CXGB_SHUTDOWN) + return; + ADAPTER_LOCK(sc); if (p->linkpoll_period) check_link_status(sc); /* - * adapter lock can currently only be acquire after the + * adapter lock can currently only be acquired after the * port lock */ ADAPTER_UNLOCK(sc); - if (p->rev == T3_REV_B2 && p->nports < 4) + if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map) check_t3b2_mac(sc); } @@ -2262,7 +2339,7 @@ } case CHELSIO_GET_SGE_CONTEXT: { struct ch_cntxt *ecntxt = (struct ch_cntxt *)data; - mtx_lock(&sc->sge.reg_lock); + mtx_lock_spin(&sc->sge.reg_lock); switch (ecntxt->cntxt_type) { case CNTXT_TYPE_EGRESS: error = t3_sge_read_ecntxt(sc, ecntxt->cntxt_id, @@ -2284,7 +2361,7 @@ error = EINVAL; break; } - mtx_unlock(&sc->sge.reg_lock); + mtx_unlock_spin(&sc->sge.reg_lock); break; } case CHELSIO_GET_SGE_DESC: { @@ -2302,7 +2379,8 @@ case CHELSIO_SET_QSET_PARAMS: { struct qset_params *q; struct ch_qset_params *t = (struct ch_qset_params *)data; - + int i; + if (t->qset_idx >= SGE_QSETS) return (EINVAL); if (!in_range(t->intr_lat, 0, M_NEWTIMER) || @@ -2318,6 +2396,18 @@ MAX_RX_JUMBO_BUFFERS) || !in_range(t->rspq_size, MIN_RSPQ_ENTRIES, MAX_RSPQ_ENTRIES)) return (EINVAL); + + if ((sc->flags & FULL_INIT_DONE) && t->lro > 0) + for_each_port(sc, i) { + pi = adap2pinfo(sc, i); + if (t->qset_idx >= pi->first_qset && + t->qset_idx < pi->first_qset + pi->nqsets +#if 0 + && !pi->rx_csum_offload +#endif + ) + return -EINVAL; + } if ((sc->flags & FULL_INIT_DONE) && (t->rspq_size >= 0 || t->fl_size[0] >= 0 || t->fl_size[1] >= 0 || t->txq_size[0] >= 0 || @@ -2470,7 +2560,7 @@ * Read 256 bytes at a time as len can be large and we don't * want to use huge intermediate buffers. */ - useraddr = (uint8_t *)(t + 1); /* advance to start of buffer */ + useraddr = (uint8_t *)t->buf; while (t->len) { unsigned int chunk = min(t->len, sizeof(buf)); @@ -2626,3 +2716,6 @@ reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1), XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1)); } + + +MODULE_DEPEND(if_cxgb, cxgb_t3fw, 1, 1, 1); Index: sys/dev/cxgb/cxgb_multiq.c =================================================================== RCS file: sys/dev/cxgb/cxgb_multiq.c diff -N sys/dev/cxgb/cxgb_multiq.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/dev/cxgb/cxgb_multiq.c 17 Apr 2008 06:04:15 -0000 @@ -0,0 +1,799 @@ +/************************************************************************** + +Copyright (c) 2007-2008, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ + +#define DEBUG_BUFRING + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include + +#include +#include + +#ifdef CONFIG_DEFINED +#include +#include +#else +#include +#include +#endif + +extern struct sysctl_oid_list sysctl__hw_cxgb_children; +static int cxgb_pcpu_tx_coalesce = 0; +TUNABLE_INT("hw.cxgb.tx_coalesce", &cxgb_pcpu_tx_coalesce); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce, CTLFLAG_RDTUN, &cxgb_pcpu_tx_coalesce, 0, + "coalesce small packets into a single work request"); + +static int sleep_ticks = 1; +TUNABLE_INT("hw.cxgb.sleep_ticks", &sleep_ticks); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, sleep_ticks, CTLFLAG_RDTUN, &sleep_ticks, 0, + "ticks to sleep between checking pcpu queues"); + +int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE; +TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0, + "size of per-queue mbuf ring"); + + +static inline int32_t cxgb_pcpu_calc_cookie(struct ifnet *ifp, struct mbuf *immpkt); +static void cxgb_pcpu_start_proc(void *arg); +#ifdef IFNET_MULTIQUEUE +static int cxgb_pcpu_cookie_to_qidx(struct port_info *, uint32_t cookie); +#endif +static int cxgb_tx(struct sge_qset *qs, uint32_t txmax); + + +static inline int +cxgb_pcpu_enqueue_packet_(struct sge_qset *qs, struct mbuf *m) +{ + struct sge_txq *txq; + int err = 0; + +#ifndef IFNET_MULTIQUEUE + panic("not expecting enqueue without multiqueue"); +#endif + KASSERT(m != NULL, ("null mbuf")); + KASSERT(m->m_type == MT_DATA, ("bad mbuf type %d", m->m_type)); + if (qs->qs_flags & QS_EXITING) { + m_freem(m); + return (ENXIO); + } + txq = &qs->txq[TXQ_ETH]; + err = buf_ring_enqueue(&txq->txq_mr, m); + if (err) { + txq->txq_drops++; + m_freem(m); + } + if ((qs->txq[TXQ_ETH].flags & TXQ_TRANSMITTING) == 0) + wakeup(qs); + + return (err); +} + +int +cxgb_pcpu_enqueue_packet(struct ifnet *ifp, struct mbuf *m) +{ + struct port_info *pi = ifp->if_softc; + struct sge_qset *qs; + int err = 0, qidx; +#ifdef IFNET_MULTIQUEUE + int32_t calc_cookie; + + calc_cookie = m->m_pkthdr.rss_hash; + qidx = cxgb_pcpu_cookie_to_qidx(pi, calc_cookie); +#else + qidx = 0; +#endif + qs = &pi->adapter->sge.qs[qidx]; + + err = cxgb_pcpu_enqueue_packet_(qs, m); + + return (err); +} + +static int +cxgb_dequeue_packet(struct sge_txq *txq, struct mbuf **m_vec) +{ + struct mbuf *m; + struct sge_qset *qs; + int count, size, coalesced; + struct adapter *sc; +#ifndef IFNET_MULTIQUEUE + struct port_info *pi = txq->port; + + if (txq->immpkt != NULL) + panic("immediate packet set"); + mtx_assert(&txq->lock, MA_OWNED); + + IFQ_DRV_DEQUEUE(&pi->ifp->if_snd, m); + if (m == NULL) + return (0); + + m_vec[0] = m; + return (1); +#endif + + coalesced = count = size = 0; + qs = txq_to_qset(txq, TXQ_ETH); + if (qs->qs_flags & QS_EXITING) + return (0); + + if (txq->immpkt != NULL) { + DPRINTF("immediate packet\n"); + m_vec[0] = txq->immpkt; + txq->immpkt = NULL; + return (1); + } + sc = qs->port->adapter; + + m = buf_ring_dequeue(&txq->txq_mr); + if (m == NULL) + return (0); + + count = 1; + KASSERT(m->m_type == MT_DATA, + ("m=%p is bad mbuf type %d from ring cons=%d prod=%d", m, + m->m_type, txq->txq_mr.br_cons, txq->txq_mr.br_prod)); + m_vec[0] = m; + if (m->m_pkthdr.tso_segsz > 0 || m->m_pkthdr.len > TX_WR_SIZE_MAX || + m->m_next != NULL || (cxgb_pcpu_tx_coalesce == 0)) { + return (count); + } + + size = m->m_pkthdr.len; + for (m = buf_ring_peek(&txq->txq_mr); m != NULL; + m = buf_ring_peek(&txq->txq_mr)) { + + if (m->m_pkthdr.tso_segsz > 0 || + size + m->m_pkthdr.len > TX_WR_SIZE_MAX || m->m_next != NULL) + break; + + buf_ring_dequeue(&txq->txq_mr); + size += m->m_pkthdr.len; + m_vec[count++] = m; + + if (count == TX_WR_COUNT_MAX) + break; + + coalesced++; + } + txq->txq_coalesced += coalesced; + + return (count); +} + +static int32_t +cxgb_pcpu_get_cookie(struct ifnet *ifp, struct in6_addr *lip, uint16_t lport, struct in6_addr *rip, uint16_t rport, int ipv6) +{ + uint32_t base; + uint8_t buf[36]; + int count; + int32_t cookie; + + critical_enter(); + /* + * Can definitely bypass bcopy XXX + */ + if (ipv6 == 0) { + count = 12; + bcopy(rip, &buf[0], 4); + bcopy(lip, &buf[4], 4); + bcopy(&rport, &buf[8], 2); + bcopy(&lport, &buf[10], 2); + } else { + count = 36; + bcopy(rip, &buf[0], 16); + bcopy(lip, &buf[16], 16); + bcopy(&rport, &buf[32], 2); + bcopy(&lport, &buf[34], 2); + } + + base = 0xffffffff; + base = update_crc32(base, buf, count); + base = sctp_csum_finalize(base); + + /* + * Indirection table is 128 bits + * -> cookie indexes into indirection table which maps connection to queue + * -> RSS map maps queue to CPU + */ + cookie = (base & (RSS_TABLE_SIZE-1)); + critical_exit(); + + return (cookie); +} + +static int32_t +cxgb_pcpu_calc_cookie(struct ifnet *ifp, struct mbuf *immpkt) +{ + struct in6_addr lip, rip; + uint16_t lport, rport; + struct ether_header *eh; + int32_t cookie; + struct ip *ip; + struct ip6_hdr *ip6; + struct tcphdr *th; + struct udphdr *uh; + struct sctphdr *sh; + uint8_t *next, proto; + int etype; + + if (immpkt == NULL) + return -1; + +#if 1 + /* + * XXX perf test + */ + return (0); +#endif + rport = lport = 0; + cookie = -1; + next = NULL; + eh = mtod(immpkt, struct ether_header *); + etype = ntohs(eh->ether_type); + + switch (etype) { + case ETHERTYPE_IP: + ip = (struct ip *)(eh + 1); + next = (uint8_t *)(ip + 1); + bcopy(&ip->ip_src, &lip, 4); + bcopy(&ip->ip_dst, &rip, 4); + proto = ip->ip_p; + break; + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(eh + 1); + next = (uint8_t *)(ip6 + 1); + bcopy(&ip6->ip6_src, &lip, sizeof(struct in6_addr)); + bcopy(&ip6->ip6_dst, &rip, sizeof(struct in6_addr)); + if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { + struct ip6_hbh *hbh; + + hbh = (struct ip6_hbh *)(ip6 + 1); + proto = hbh->ip6h_nxt; + } else + proto = ip6->ip6_nxt; + break; + case ETHERTYPE_ARP: + default: + /* + * Default to queue zero + */ + proto = cookie = 0; + } + if (proto) { + switch (proto) { + case IPPROTO_TCP: + th = (struct tcphdr *)next; + lport = th->th_sport; + rport = th->th_dport; + break; + case IPPROTO_UDP: + uh = (struct udphdr *)next; + lport = uh->uh_sport; + rport = uh->uh_dport; + break; + case IPPROTO_SCTP: + sh = (struct sctphdr *)next; + lport = sh->src_port; + rport = sh->dest_port; + break; + default: + /* nothing to do */ + break; + } + } + + if (cookie) + cookie = cxgb_pcpu_get_cookie(ifp, &lip, lport, &rip, rport, (etype == ETHERTYPE_IPV6)); + + return (cookie); +} + +static void +cxgb_pcpu_free(struct sge_qset *qs) +{ + struct mbuf *m; + struct sge_txq *txq = &qs->txq[TXQ_ETH]; + + mtx_lock(&txq->lock); + while ((m = mbufq_dequeue(&txq->sendq)) != NULL) + m_freem(m); + while ((m = buf_ring_dequeue(&txq->txq_mr)) != NULL) + m_freem(m); + + t3_free_tx_desc_all(txq); + mtx_unlock(&txq->lock); +} + +static int +cxgb_pcpu_reclaim_tx(struct sge_txq *txq) +{ + int reclaimable; + struct sge_qset *qs = txq_to_qset(txq, TXQ_ETH); + +#ifdef notyet + KASSERT(qs->qs_cpuid == curcpu, ("cpu qset mismatch cpuid=%d curcpu=%d", + qs->qs_cpuid, curcpu)); +#endif + mtx_assert(&txq->lock, MA_OWNED); + + reclaimable = desc_reclaimable(txq); + if (reclaimable == 0) + return (0); + + t3_free_tx_desc(txq, reclaimable); + + txq->cleaned += reclaimable; + txq->in_use -= reclaimable; + if (isset(&qs->txq_stopped, TXQ_ETH)) + clrbit(&qs->txq_stopped, TXQ_ETH); + + return (reclaimable); +} + +static int +cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush) +{ + int i, err, initerr, flush, reclaimed, stopped; + struct port_info *pi; + struct sge_txq *txq; + adapter_t *sc; + uint32_t max_desc; + + pi = qs->port; + initerr = err = i = reclaimed = 0; + sc = pi->adapter; + txq = &qs->txq[TXQ_ETH]; + + mtx_assert(&txq->lock, MA_OWNED); + + retry: + if (!pi->link_config.link_ok) + initerr = ENXIO; + else if (qs->qs_flags & QS_EXITING) + initerr = ENXIO; + else if ((pi->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) + initerr = ENXIO; + else if ((pi->ifp->if_flags & IFF_UP) == 0) + initerr = ENXIO; + else if (immpkt) { + + if (!buf_ring_empty(&txq->txq_mr)) + initerr = cxgb_pcpu_enqueue_packet_(qs, immpkt); + else + txq->immpkt = immpkt; + + immpkt = NULL; + } + if (initerr && initerr != ENOBUFS) { + if (cxgb_debug) + log(LOG_WARNING, "cxgb link down\n"); + if (immpkt) + m_freem(immpkt); + return (initerr); + } + + if ((tx_flush && (desc_reclaimable(txq) > 0)) || + (desc_reclaimable(txq) > (TX_ETH_Q_SIZE>>1))) { + int reclaimed = 0; + + if (cxgb_debug) { + device_printf(qs->port->adapter->dev, + "cpuid=%d curcpu=%d reclaimable=%d txq=%p txq->cidx=%d txq->pidx=%d ", + qs->qs_cpuid, curcpu, desc_reclaimable(txq), + txq, txq->cidx, txq->pidx); + } + reclaimed = cxgb_pcpu_reclaim_tx(txq); + if (cxgb_debug) + printf("reclaimed=%d\n", reclaimed); + } + + stopped = isset(&qs->txq_stopped, TXQ_ETH); + flush = (((!buf_ring_empty(&txq->txq_mr) || (!IFQ_DRV_IS_EMPTY(&pi->ifp->if_snd))) && !stopped) || txq->immpkt); + max_desc = tx_flush ? TX_ETH_Q_SIZE : TX_START_MAX_DESC; + + if (cxgb_debug) + DPRINTF("stopped=%d flush=%d max_desc=%d\n", + stopped, flush, max_desc); + + err = flush ? cxgb_tx(qs, max_desc) : ENOSPC; + + + if ((tx_flush && flush && err == 0) && + (!buf_ring_empty(&txq->txq_mr) || + !IFQ_DRV_IS_EMPTY(&pi->ifp->if_snd))) { + struct thread *td = curthread; + + if (++i > 1) { + thread_lock(td); + sched_prio(td, PRI_MIN_TIMESHARE); + thread_unlock(td); + } + if (i > 50) { + if (cxgb_debug) + device_printf(qs->port->adapter->dev, + "exceeded max enqueue tries\n"); + return (EBUSY); + } + goto retry; + } + err = (initerr != 0) ? initerr : err; + + return (err); +} + +int +cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *immpkt) +{ + uint32_t cookie; + int err, qidx, locked, resid; + struct port_info *pi; + struct sge_qset *qs; + struct sge_txq *txq = NULL /* gcc is dumb */; + struct adapter *sc; + + pi = ifp->if_softc; + sc = pi->adapter; + qs = NULL; + qidx = resid = err = cookie = locked = 0; + +#ifdef IFNET_MULTIQUEUE + if (immpkt && (immpkt->m_pkthdr.rss_hash != 0)) { + cookie = immpkt->m_pkthdr.rss_hash; + qidx = cxgb_pcpu_cookie_to_qidx(pi, cookie); + DPRINTF("hash=0x%x qidx=%d cpu=%d\n", immpkt->m_pkthdr.rss_hash, qidx, curcpu); + qs = &pi->adapter->sge.qs[qidx]; + } else +#endif + qs = &pi->adapter->sge.qs[pi->first_qset]; + + txq = &qs->txq[TXQ_ETH]; + + if (((sc->tunq_coalesce == 0) || + (buf_ring_count(&txq->txq_mr) >= TX_WR_COUNT_MAX) || + (cxgb_pcpu_tx_coalesce == 0)) && mtx_trylock(&txq->lock)) { + if (cxgb_debug) + printf("doing immediate transmit\n"); + + txq->flags |= TXQ_TRANSMITTING; + err = cxgb_pcpu_start_(qs, immpkt, FALSE); + txq->flags &= ~TXQ_TRANSMITTING; + resid = (buf_ring_count(&txq->txq_mr) > 64) || (desc_reclaimable(txq) > 64); + mtx_unlock(&txq->lock); + } else if (immpkt) { + if (cxgb_debug) + printf("deferred coalesce=%jx ring_count=%d mtx_owned=%d\n", + sc->tunq_coalesce, buf_ring_count(&txq->txq_mr), mtx_owned(&txq->lock)); + err = cxgb_pcpu_enqueue_packet_(qs, immpkt); + } + + if (resid && (txq->flags & TXQ_TRANSMITTING) == 0) + wakeup(qs); + + return ((err == ENOSPC) ? 0 : err); +} + +void +cxgb_start(struct ifnet *ifp) +{ + struct port_info *p = ifp->if_softc; + + if (!p->link_config.link_ok) + return; + + if (IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + return; + + cxgb_pcpu_start(ifp, NULL); +} + +static void +cxgb_pcpu_start_proc(void *arg) +{ + struct sge_qset *qs = arg; + struct thread *td; + struct sge_txq *txq = &qs->txq[TXQ_ETH]; + int idleticks, err = 0; +#ifdef notyet + struct adapter *sc = qs->port->adapter; +#endif + td = curthread; + + sleep_ticks = max(hz/1000, 1); + qs->qs_flags |= QS_RUNNING; + thread_lock(td); + sched_bind(td, qs->qs_cpuid); + thread_unlock(td); + + DELAY(qs->qs_cpuid*100000); + if (bootverbose) + printf("bound to %d running on %d\n", qs->qs_cpuid, curcpu); + + for (;;) { + if (qs->qs_flags & QS_EXITING) + break; + + if ((qs->port->ifp->if_drv_flags && IFF_DRV_RUNNING) == 0) { + idleticks = hz; + if (!buf_ring_empty(&txq->txq_mr) || + !mbufq_empty(&txq->sendq)) + cxgb_pcpu_free(qs); + goto done; + } else + idleticks = sleep_ticks; + if (mtx_trylock(&txq->lock)) { + txq->flags |= TXQ_TRANSMITTING; + err = cxgb_pcpu_start_(qs, NULL, TRUE); + txq->flags &= ~TXQ_TRANSMITTING; + mtx_unlock(&txq->lock); + } else + err = EINPROGRESS; +#ifdef notyet + if (mtx_trylock(&qs->rspq.lock)) { + process_responses(sc, qs, -1); + + refill_fl_service(sc, &qs->fl[0]); + refill_fl_service(sc, &qs->fl[1]); + t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) | + V_NEWTIMER(qs->rspq.next_holdoff) | V_NEWINDEX(qs->rspq.cidx)); + + mtx_unlock(&qs->rspq.lock); + } +#endif + if ((!buf_ring_empty(&txq->txq_mr)) && err == 0) { + if (cxgb_debug) + printf("head=%p cons=%d prod=%d\n", + txq->sendq.head, txq->txq_mr.br_cons, + txq->txq_mr.br_prod); + continue; + } + done: + tsleep(qs, 1, "cxgbidle", sleep_ticks); + } + + if (bootverbose) + device_printf(qs->port->adapter->dev, "exiting thread for cpu%d\n", qs->qs_cpuid); + + + cxgb_pcpu_free(qs); + t3_free_qset(qs->port->adapter, qs); + + qs->qs_flags &= ~QS_RUNNING; + kthread_exit(0); +} + +#ifdef IFNET_MULTIQUEUE +static int +cxgb_pcpu_cookie_to_qidx(struct port_info *pi, uint32_t cookie) +{ + int qidx; + uint32_t tmp; + + /* + * Will probably need to be changed for 4-port XXX + */ + tmp = pi->tx_chan ? cookie : cookie & ((RSS_TABLE_SIZE>>1)-1); + DPRINTF(" tmp=%d ", tmp); + qidx = (tmp & (pi->nqsets -1)) + pi->first_qset; + + return (qidx); +} +#endif + +void +cxgb_pcpu_startup_threads(struct adapter *sc) +{ + int i, j, nqsets; + struct proc *p; + + + for (i = 0; i < (sc)->params.nports; ++i) { + struct port_info *pi = adap2pinfo(sc, i); + +#ifdef IFNET_MULTIQUEUE + nqsets = pi->nqsets; +#else + nqsets = 1; +#endif + for (j = 0; j < nqsets; ++j) { + struct sge_qset *qs; + + qs = &sc->sge.qs[pi->first_qset + j]; + qs->port = pi; + qs->qs_cpuid = ((pi->first_qset + j) % mp_ncpus); + device_printf(sc->dev, "starting thread for %d\n", + qs->qs_cpuid); + + kthread_create(cxgb_pcpu_start_proc, qs, &p, + RFNOWAIT, 0, "cxgbsp"); + DELAY(200); + } + } +} + +void +cxgb_pcpu_shutdown_threads(struct adapter *sc) +{ + int i, j; + int nqsets; + + for (i = 0; i < sc->params.nports; i++) { + struct port_info *pi = &sc->port[i]; + int first = pi->first_qset; + +#ifdef IFNET_MULTIQUEUE + nqsets = pi->nqsets; +#else + nqsets = 1; +#endif + for (j = 0; j < nqsets; j++) { + struct sge_qset *qs = &sc->sge.qs[first + j]; + + qs->qs_flags |= QS_EXITING; + wakeup(qs); + tsleep(&sc, PRI_MIN_TIMESHARE, "cxgb unload 0", hz>>2); + while (qs->qs_flags & QS_RUNNING) { + qs->qs_flags |= QS_EXITING; + device_printf(sc->dev, "qset thread %d still running - sleeping\n", first + j); + tsleep(&sc, PRI_MIN_TIMESHARE, "cxgb unload 1", 2*hz); + } + } + } +} + +static __inline void +check_pkt_coalesce(struct sge_qset *qs) +{ + struct adapter *sc; + struct sge_txq *txq; + + txq = &qs->txq[TXQ_ETH]; + sc = qs->port->adapter; + + if (sc->tunq_fill[qs->idx] && (txq->in_use < (txq->size - (txq->size>>2)))) + sc->tunq_fill[qs->idx] = 0; + else if (!sc->tunq_fill[qs->idx] && (txq->in_use > (txq->size - (txq->size>>2)))) + sc->tunq_fill[qs->idx] = 1; +} + +static int +cxgb_tx(struct sge_qset *qs, uint32_t txmax) +{ + struct sge_txq *txq; + struct ifnet *ifp = qs->port->ifp; + int i, err, in_use_init, count; + struct mbuf *m_vec[TX_WR_COUNT_MAX]; + + txq = &qs->txq[TXQ_ETH]; + ifp = qs->port->ifp; + in_use_init = txq->in_use; + err = 0; + + for (i = 0; i < TX_WR_COUNT_MAX; i++) + m_vec[i] = NULL; + + mtx_assert(&txq->lock, MA_OWNED); + while ((txq->in_use - in_use_init < txmax) && + (txq->size > txq->in_use + TX_MAX_DESC)) { + check_pkt_coalesce(qs); + count = cxgb_dequeue_packet(txq, m_vec); + if (count == 0) { + err = ENOBUFS; + break; + } + ETHER_BPF_MTAP(ifp, m_vec[0]); + + if ((err = t3_encap(qs, m_vec, count)) != 0) + break; + txq->txq_enqueued += count; + m_vec[0] = NULL; + } +#if 0 /* !MULTIQ */ + if (__predict_false(err)) { + if (err == ENOMEM) { + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + IFQ_LOCK(&ifp->if_snd); + IFQ_DRV_PREPEND(&ifp->if_snd, m_vec[0]); + IFQ_UNLOCK(&ifp->if_snd); + } + } + else if ((err == 0) && (txq->size <= txq->in_use + TX_MAX_DESC) && + (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { + setbit(&qs->txq_stopped, TXQ_ETH); + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + err = ENOSPC; + } +#else + if ((err == 0) && (txq->size <= txq->in_use + TX_MAX_DESC)) { + err = ENOSPC; + setbit(&qs->txq_stopped, TXQ_ETH); + } + if (err == ENOMEM) { + int i; + /* + * Sub-optimal :-/ + */ + printf("ENOMEM!!!"); + for (i = 0; i < count; i++) + m_freem(m_vec[i]); + } +#endif + return (err); +} + Index: sys/dev/cxgb/cxgb_offload.c =================================================================== RCS file: /bucket/users/kmacy/devel/ncvs/src/sys/dev/cxgb/cxgb_offload.c,v retrieving revision 1.8 diff -u -r1.8 cxgb_offload.c --- sys/dev/cxgb/cxgb_offload.c 17 Aug 2007 05:57:04 -0000 1.8 +++ sys/dev/cxgb/cxgb_offload.c 17 Apr 2008 06:05:23 -0000 @@ -31,7 +31,7 @@ #include -__FBSDID("$FreeBSD: src/sys/dev/cxgb/cxgb_offload.c,v 1.8 2007/08/17 05:57:04 kmacy Exp $"); +__FBSDID("$FreeBSD$"); #include #include @@ -52,8 +52,10 @@ #include #include #include +#include #include #include +#include #ifdef CONFIG_DEFINED #include @@ -61,37 +63,48 @@ #include #endif -#include #include -/* - * XXX - */ -#define LOG_NOTICE 2 -#define BUG_ON(...) #define VALIDATE_TID 0 - +MALLOC_DEFINE(M_CXGB, "cxgb", "Chelsio 10 Gigabit Ethernet and services"); TAILQ_HEAD(, cxgb_client) client_list; -TAILQ_HEAD(, toedev) ofld_dev_list; -TAILQ_HEAD(, adapter) adapter_list; +TAILQ_HEAD(, t3cdev) ofld_dev_list; + static struct mtx cxgb_db_lock; -static struct rwlock adapter_list_lock; -static const unsigned int MAX_ATIDS = 64 * 1024; -static const unsigned int ATID_BASE = 0x100000; static int inited = 0; static inline int -offload_activated(struct toedev *tdev) +offload_activated(struct t3cdev *tdev) { struct adapter *adapter = tdev2adap(tdev); return (isset(&adapter->open_device_map, OFFLOAD_DEVMAP_BIT)); } +static inline void +register_tdev(struct t3cdev *tdev) +{ + static int unit; + + mtx_lock(&cxgb_db_lock); + snprintf(tdev->name, sizeof(tdev->name), "ofld_dev%d", unit++); + TAILQ_INSERT_TAIL(&ofld_dev_list, tdev, entry); + mtx_unlock(&cxgb_db_lock); +} + +static inline void +unregister_tdev(struct t3cdev *tdev) +{ + mtx_lock(&cxgb_db_lock); + TAILQ_REMOVE(&ofld_dev_list, tdev, entry); + mtx_unlock(&cxgb_db_lock); +} + +#ifdef TOE_ENABLED /** * cxgb_register_client - register an offload client * @client: the client @@ -102,15 +115,19 @@ void cxgb_register_client(struct cxgb_client *client) { - struct toedev *tdev; + struct t3cdev *tdev; mtx_lock(&cxgb_db_lock); TAILQ_INSERT_TAIL(&client_list, client, client_entry); if (client->add) { - TAILQ_FOREACH(tdev, &ofld_dev_list, ofld_entry) { - if (offload_activated(tdev)) + TAILQ_FOREACH(tdev, &ofld_dev_list, entry) { + if (offload_activated(tdev)) { client->add(tdev); + } else + CTR1(KTR_CXGB, + "cxgb_register_client: %p not activated", tdev); + } } mtx_unlock(&cxgb_db_lock); @@ -126,13 +143,13 @@ void cxgb_unregister_client(struct cxgb_client *client) { - struct toedev *tdev; + struct t3cdev *tdev; mtx_lock(&cxgb_db_lock); TAILQ_REMOVE(&client_list, client, client_entry); if (client->remove) { - TAILQ_FOREACH(tdev, &ofld_dev_list, ofld_entry) { + TAILQ_FOREACH(tdev, &ofld_dev_list, entry) { if (offload_activated(tdev)) client->remove(tdev); } @@ -147,7 +164,7 @@ * Call backs all registered clients once a offload device is activated */ void -cxgb_add_clients(struct toedev *tdev) +cxgb_add_clients(struct t3cdev *tdev) { struct cxgb_client *client; @@ -166,7 +183,7 @@ * Call backs all registered clients once a offload device is deactivated */ void -cxgb_remove_clients(struct toedev *tdev) +cxgb_remove_clients(struct t3cdev *tdev) { struct cxgb_client *client; @@ -177,283 +194,24 @@ } mtx_unlock(&cxgb_db_lock); } +#endif -static int -is_offloading(struct ifnet *ifp) -{ - struct adapter *adapter; - int port; - - rw_rlock(&adapter_list_lock); - TAILQ_FOREACH(adapter, &adapter_list, adapter_entry) { - for_each_port(adapter, port) { - if (ifp == adapter->port[port].ifp) { - rw_runlock(&adapter_list_lock); - return 1; - } - } - } - rw_runlock(&adapter_list_lock); - return 0; -} - -static struct ifnet * -get_iff_from_mac(adapter_t *adapter, const uint8_t *mac, unsigned int vlan) -{ -#ifdef notyet - int i; - - for_each_port(adapter, i) { - const struct vlan_group *grp; - const struct port_info *p = &adapter->port[i]; - struct ifnet *ifnet = p->ifp; - - if (!memcmp(p->hw_addr, mac, ETHER_ADDR_LEN)) { - if (vlan && vlan != EVL_VLID_MASK) { - grp = p->vlan_grp; - dev = grp ? grp->vlan_devices[vlan] : NULL; - } else - while (dev->master) - dev = dev->master; - return dev; - } - } -#endif - return NULL; -} - -static inline void -failover_fixup(adapter_t *adapter, int port) -{ - if (adapter->params.rev == 0) { - struct ifnet *ifp = adapter->port[port].ifp; - struct cmac *mac = &adapter->port[port].mac; - if (!(ifp->if_flags & IFF_UP)) { - /* Failover triggered by the interface ifdown */ - t3_write_reg(adapter, A_XGM_TX_CTRL + mac->offset, - F_TXEN); - t3_read_reg(adapter, A_XGM_TX_CTRL + mac->offset); - } else { - /* Failover triggered by the interface link down */ - t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, 0); - t3_read_reg(adapter, A_XGM_RX_CTRL + mac->offset); - t3_write_reg(adapter, A_XGM_RX_CTRL + mac->offset, - F_RXEN); - } - } -} - -static int -cxgb_ulp_iscsi_ctl(adapter_t *adapter, unsigned int req, void *data) -{ - int ret = 0; - struct ulp_iscsi_info *uiip = data; - - switch (req) { - case ULP_ISCSI_GET_PARAMS: - uiip->llimit = t3_read_reg(adapter, A_ULPRX_ISCSI_LLIMIT); - uiip->ulimit = t3_read_reg(adapter, A_ULPRX_ISCSI_ULIMIT); - uiip->tagmask = t3_read_reg(adapter, A_ULPRX_ISCSI_TAGMASK); - /* - * On tx, the iscsi pdu has to be <= tx page size and has to - * fit into the Tx PM FIFO. - */ - uiip->max_txsz = min(adapter->params.tp.tx_pg_size, - t3_read_reg(adapter, A_PM1_TX_CFG) >> 17); - /* on rx, the iscsi pdu has to be < rx page size and the - whole pdu + cpl headers has to fit into one sge buffer */ - uiip->max_rxsz = - (unsigned int)min(adapter->params.tp.rx_pg_size, - (adapter->sge.qs[0].fl[1].buf_size - - sizeof(struct cpl_rx_data) * 2 - - sizeof(struct cpl_rx_data_ddp)) ); - break; - case ULP_ISCSI_SET_PARAMS: - t3_write_reg(adapter, A_ULPRX_ISCSI_TAGMASK, uiip->tagmask); - break; - default: - ret = (EOPNOTSUPP); - } - return ret; -} - -/* Response queue used for RDMA events. */ -#define ASYNC_NOTIF_RSPQ 0 - -static int -cxgb_rdma_ctl(adapter_t *adapter, unsigned int req, void *data) +/** + * cxgb_ofld_recv - process n received offload packets + * @dev: the offload device + * @m: an array of offload packets + * @n: the number of offload packets + * + * Process an array of ingress offload packets. Each packet is forwarded + * to any active network taps and then passed to the offload device's receive + * method. We optimize passing packets to the receive method by passing + * it the whole array at once except when there are active taps. + */ +int +cxgb_ofld_recv(struct t3cdev *dev, struct mbuf **m, int n) { - int ret = 0; - - switch (req) { - case RDMA_GET_PARAMS: { - struct rdma_info *req = data; - - req->udbell_physbase = rman_get_start(adapter->regs_res); - req->udbell_len = rman_get_size(adapter->regs_res); - req->tpt_base = t3_read_reg(adapter, A_ULPTX_TPT_LLIMIT); - req->tpt_top = t3_read_reg(adapter, A_ULPTX_TPT_ULIMIT); - req->pbl_base = t3_read_reg(adapter, A_ULPTX_PBL_LLIMIT); - req->pbl_top = t3_read_reg(adapter, A_ULPTX_PBL_ULIMIT); - req->rqt_base = t3_read_reg(adapter, A_ULPRX_RQ_LLIMIT); - req->rqt_top = t3_read_reg(adapter, A_ULPRX_RQ_ULIMIT); - req->kdb_addr = (void *)(rman_get_start(adapter->regs_res) + A_SG_KDOORBELL); - break; - } - case RDMA_CQ_OP: { - struct rdma_cq_op *req = data; - /* may be called in any context */ - mtx_lock(&adapter->sge.reg_lock); - ret = t3_sge_cqcntxt_op(adapter, req->id, req->op, - req->credits); - mtx_unlock(&adapter->sge.reg_lock); - break; - } - case RDMA_GET_MEM: { - struct ch_mem_range *t = data; - struct mc7 *mem; - - if ((t->addr & 7) || (t->len & 7)) - return (EINVAL); - if (t->mem_id == MEM_CM) - mem = &adapter->cm; - else if (t->mem_id == MEM_PMRX) - mem = &adapter->pmrx; - else if (t->mem_id == MEM_PMTX) - mem = &adapter->pmtx; - else - return (EINVAL); - - ret = t3_mc7_bd_read(mem, t->addr/8, t->len/8, (u64 *)t->buf); - if (ret) - return (ret); - break; - } - case RDMA_CQ_SETUP: { - struct rdma_cq_setup *req = data; - - mtx_lock(&adapter->sge.reg_lock); - ret = t3_sge_init_cqcntxt(adapter, req->id, req->base_addr, - req->size, ASYNC_NOTIF_RSPQ, - req->ovfl_mode, req->credits, - req->credit_thres); - mtx_unlock(&adapter->sge.reg_lock); - break; - } - case RDMA_CQ_DISABLE: - mtx_lock(&adapter->sge.reg_lock); - ret = t3_sge_disable_cqcntxt(adapter, *(unsigned int *)data); - mtx_unlock(&adapter->sge.reg_lock); - break; - case RDMA_CTRL_QP_SETUP: { - struct rdma_ctrlqp_setup *req = data; - - mtx_lock(&adapter->sge.reg_lock); - ret = t3_sge_init_ecntxt(adapter, FW_RI_SGEEC_START, 0, - SGE_CNTXT_RDMA, ASYNC_NOTIF_RSPQ, - req->base_addr, req->size, - FW_RI_TID_START, 1, 0); - mtx_unlock(&adapter->sge.reg_lock); - break; - } - default: - ret = EOPNOTSUPP; - } - return (ret); -} - -static int -cxgb_offload_ctl(struct toedev *tdev, unsigned int req, void *data) -{ - struct adapter *adapter = tdev2adap(tdev); - struct tid_range *tid; - struct mtutab *mtup; - struct iff_mac *iffmacp; - struct ddp_params *ddpp; - struct adap_ports *ports; - int port; - - switch (req) { - case GET_MAX_OUTSTANDING_WR: - *(unsigned int *)data = FW_WR_NUM; - break; - case GET_WR_LEN: - *(unsigned int *)data = WR_FLITS; - break; - case GET_TX_MAX_CHUNK: - *(unsigned int *)data = 1 << 20; /* 1MB */ - break; - case GET_TID_RANGE: - tid = data; - tid->num = t3_mc5_size(&adapter->mc5) - - adapter->params.mc5.nroutes - - adapter->params.mc5.nfilters - - adapter->params.mc5.nservers; - tid->base = 0; - break; - case GET_STID_RANGE: - tid = data; - tid->num = adapter->params.mc5.nservers; - tid->base = t3_mc5_size(&adapter->mc5) - tid->num - - adapter->params.mc5.nfilters - - adapter->params.mc5.nroutes; - break; - case GET_L2T_CAPACITY: - *(unsigned int *)data = 2048; - break; - case GET_MTUS: - mtup = data; - mtup->size = NMTUS; - mtup->mtus = adapter->params.mtus; - break; - case GET_IFF_FROM_MAC: - iffmacp = data; - iffmacp->dev = get_iff_from_mac(adapter, iffmacp->mac_addr, - iffmacp->vlan_tag & EVL_VLID_MASK); - break; - case GET_DDP_PARAMS: - ddpp = data; - ddpp->llimit = t3_read_reg(adapter, A_ULPRX_TDDP_LLIMIT); - ddpp->ulimit = t3_read_reg(adapter, A_ULPRX_TDDP_ULIMIT); - ddpp->tag_mask = t3_read_reg(adapter, A_ULPRX_TDDP_TAGMASK); - break; - case GET_PORTS: - ports = data; - ports->nports = adapter->params.nports; - for_each_port(adapter, port) - ports->lldevs[port] = adapter->port[port].ifp; - break; - case FAILOVER: - port = *(int *)data; - t3_port_failover(adapter, port); - failover_fixup(adapter, port); - break; - case FAILOVER_DONE: - port = *(int *)data; - t3_failover_done(adapter, port); - break; - case FAILOVER_CLEAR: - t3_failover_clear(adapter); - break; - case ULP_ISCSI_GET_PARAMS: - case ULP_ISCSI_SET_PARAMS: - if (!offload_running(adapter)) - return (EAGAIN); - return cxgb_ulp_iscsi_ctl(adapter, req, data); - case RDMA_GET_PARAMS: - case RDMA_CQ_OP: - case RDMA_CQ_SETUP: - case RDMA_CQ_DISABLE: - case RDMA_CTRL_QP_SETUP: - case RDMA_GET_MEM: - if (!offload_running(adapter)) - return (EAGAIN); - return cxgb_rdma_ctl(adapter, req, data); - default: - return (EOPNOTSUPP); - } - return 0; + return dev->recv(dev, m, n); } /* @@ -462,199 +220,28 @@ * normal to get offload packets at this stage. */ static int -rx_offload_blackhole(struct toedev *dev, struct mbuf **m, int n) +rx_offload_blackhole(struct t3cdev *dev, struct mbuf **m, int n) { - CH_ERR(tdev2adap(dev), "%d unexpected offload packets, first data 0x%x\n", - n, *mtod(m[0], uint32_t *)); while (n--) m_freem(m[n]); return 0; } static void -dummy_neigh_update(struct toedev *dev, struct rtentry *neigh) +dummy_neigh_update(struct t3cdev *dev, struct rtentry *neigh, uint8_t *enaddr, + struct sockaddr *sa) { } void -cxgb_set_dummy_ops(struct toedev *dev) +cxgb_set_dummy_ops(struct t3cdev *dev) { dev->recv = rx_offload_blackhole; - dev->neigh_update = dummy_neigh_update; -} - -/* - * Free an active-open TID. - */ -void * -cxgb_free_atid(struct toedev *tdev, int atid) -{ - struct tid_info *t = &(TOE_DATA(tdev))->tid_maps; - union active_open_entry *p = atid2entry(t, atid); - void *ctx = p->toe_tid.ctx; - - mtx_lock(&t->atid_lock); - p->next = t->afree; - t->afree = p; - t->atids_in_use--; - mtx_lock(&t->atid_lock); - - return ctx; -} - -/* - * Free a server TID and return it to the free pool. - */ -void -cxgb_free_stid(struct toedev *tdev, int stid) -{ - struct tid_info *t = &(TOE_DATA(tdev))->tid_maps; - union listen_entry *p = stid2entry(t, stid); - - mtx_lock(&t->stid_lock); - p->next = t->sfree; - t->sfree = p; - t->stids_in_use--; - mtx_unlock(&t->stid_lock); -} - -void -cxgb_insert_tid(struct toedev *tdev, struct cxgb_client *client, - void *ctx, unsigned int tid) -{ - struct tid_info *t = &(TOE_DATA(tdev))->tid_maps; - - t->tid_tab[tid].client = client; - t->tid_tab[tid].ctx = ctx; - atomic_add_int(&t->tids_in_use, 1); -} - -/* - * Populate a TID_RELEASE WR. The mbuf must be already propely sized. - */ -static inline void -mk_tid_release(struct mbuf *m, unsigned int tid) -{ - struct cpl_tid_release *req; - - m_set_priority(m, CPL_PRIORITY_SETUP); - req = mtod(m, struct cpl_tid_release *); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); -} - -static void -t3_process_tid_release_list(void *data, int pending) -{ - struct mbuf *m; - struct toedev *tdev = data; - struct toe_data *td = TOE_DATA(tdev); - - mtx_lock(&td->tid_release_lock); - while (td->tid_release_list) { - struct toe_tid_entry *p = td->tid_release_list; - - td->tid_release_list = (struct toe_tid_entry *)p->ctx; - mtx_unlock(&td->tid_release_lock); - m = m_get(M_WAIT, MT_DATA); - mk_tid_release(m, p - td->tid_maps.tid_tab); - cxgb_ofld_send(tdev, m); - p->ctx = NULL; - mtx_lock(&td->tid_release_lock); - } - mtx_unlock(&td->tid_release_lock); -} - -/* use ctx as a next pointer in the tid release list */ -void -cxgb_queue_tid_release(struct toedev *tdev, unsigned int tid) -{ - struct toe_data *td = TOE_DATA(tdev); - struct toe_tid_entry *p = &td->tid_maps.tid_tab[tid]; - - mtx_lock(&td->tid_release_lock); - p->ctx = td->tid_release_list; - td->tid_release_list = p; - - if (!p->ctx) - taskqueue_enqueue(tdev->adapter->tq, &td->tid_release_task); - - mtx_unlock(&td->tid_release_lock); -} - -/* - * Remove a tid from the TID table. A client may defer processing its last - * CPL message if it is locked at the time it arrives, and while the message - * sits in the client's backlog the TID may be reused for another connection. - * To handle this we atomically switch the TID association if it still points - * to the original client context. - */ -void -cxgb_remove_tid(struct toedev *tdev, void *ctx, unsigned int tid) -{ - struct tid_info *t = &(TOE_DATA(tdev))->tid_maps; - - BUG_ON(tid >= t->ntids); - if (tdev->type == T3A) - atomic_cmpset_ptr((uintptr_t *)&t->tid_tab[tid].ctx, (long)NULL, (long)ctx); - else { - struct mbuf *m; - - m = m_get(M_NOWAIT, MT_DATA); - if (__predict_true(m != NULL)) { - mk_tid_release(m, tid); - cxgb_ofld_send(tdev, m); - t->tid_tab[tid].ctx = NULL; - } else - cxgb_queue_tid_release(tdev, tid); - } - atomic_add_int(&t->tids_in_use, -1); -} - -int -cxgb_alloc_atid(struct toedev *tdev, struct cxgb_client *client, - void *ctx) -{ - int atid = -1; - struct tid_info *t = &(TOE_DATA(tdev))->tid_maps; - - mtx_lock(&t->atid_lock); - if (t->afree) { - union active_open_entry *p = t->afree; - - atid = (p - t->atid_tab) + t->atid_base; - t->afree = p->next; - p->toe_tid.ctx = ctx; - p->toe_tid.client = client; - t->atids_in_use++; - } - mtx_unlock(&t->atid_lock); - return atid; -} - -int -cxgb_alloc_stid(struct toedev *tdev, struct cxgb_client *client, - void *ctx) -{ - int stid = -1; - struct tid_info *t = &(TOE_DATA(tdev))->tid_maps; - - mtx_lock(&t->stid_lock); - if (t->sfree) { - union listen_entry *p = t->sfree; - - stid = (p - t->stid_tab) + t->stid_base; - t->sfree = p->next; - p->toe_tid.ctx = ctx; - p->toe_tid.client = client; - t->stids_in_use++; - } - mtx_unlock(&t->stid_lock); - return stid; + dev->arp_update = dummy_neigh_update; } static int -do_smt_write_rpl(struct toedev *dev, struct mbuf *m) +do_smt_write_rpl(struct t3cdev *dev, struct mbuf *m) { struct cpl_smt_write_rpl *rpl = cplhdr(m); @@ -667,7 +254,7 @@ } static int -do_l2t_write_rpl(struct toedev *dev, struct mbuf *m) +do_l2t_write_rpl(struct t3cdev *dev, struct mbuf *m) { struct cpl_l2t_write_rpl *rpl = cplhdr(m); @@ -680,146 +267,20 @@ } static int -do_act_open_rpl(struct toedev *dev, struct mbuf *m) -{ - struct cpl_act_open_rpl *rpl = cplhdr(m); - unsigned int atid = G_TID(ntohl(rpl->atid)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_atid(&(TOE_DATA(dev))->tid_maps, atid); - if (toe_tid->ctx && toe_tid->client && toe_tid->client->handlers && - toe_tid->client->handlers[CPL_ACT_OPEN_RPL]) { - return toe_tid->client->handlers[CPL_ACT_OPEN_RPL] (dev, m, - toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, CPL_ACT_OPEN_RPL); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } -} - -static int -do_stid_rpl(struct toedev *dev, struct mbuf *m) -{ - union opcode_tid *p = cplhdr(m); - unsigned int stid = G_TID(ntohl(p->opcode_tid)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_stid(&(TOE_DATA(dev))->tid_maps, stid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[p->opcode]) { - return toe_tid->client->handlers[p->opcode] (dev, m, toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, p->opcode); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } -} - -static int -do_hwtid_rpl(struct toedev *dev, struct mbuf *m) +do_rte_write_rpl(struct t3cdev *dev, struct mbuf *m) { - union opcode_tid *p = cplhdr(m); - unsigned int hwtid; - struct toe_tid_entry *toe_tid; - - printf("do_hwtid_rpl m=%p\n", m); - return (0); - - - hwtid = G_TID(ntohl(p->opcode_tid)); + struct cpl_rte_write_rpl *rpl = cplhdr(m); - toe_tid = lookup_tid(&(TOE_DATA(dev))->tid_maps, hwtid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[p->opcode]) { - return toe_tid->client->handlers[p->opcode] - (dev, m, toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, p->opcode); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } -} - -static int -do_cr(struct toedev *dev, struct mbuf *m) -{ - struct cpl_pass_accept_req *req = cplhdr(m); - unsigned int stid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_stid(&(TOE_DATA(dev))->tid_maps, stid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ]) { - return toe_tid->client->handlers[CPL_PASS_ACCEPT_REQ] - (dev, m, toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, CPL_PASS_ACCEPT_REQ); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } -} - -static int -do_abort_req_rss(struct toedev *dev, struct mbuf *m) -{ - union opcode_tid *p = cplhdr(m); - unsigned int hwtid = G_TID(ntohl(p->opcode_tid)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_tid(&(TOE_DATA(dev))->tid_maps, hwtid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[p->opcode]) { - return toe_tid->client->handlers[p->opcode] - (dev, m, toe_tid->ctx); - } else { - struct cpl_abort_req_rss *req = cplhdr(m); - struct cpl_abort_rpl *rpl; - - struct mbuf *m = m_get(M_NOWAIT, MT_DATA); - if (!m) { - log(LOG_NOTICE, "do_abort_req_rss: couldn't get mbuf!\n"); - goto out; - } - - m_set_priority(m, CPL_PRIORITY_DATA); -#if 0 - __skb_put(skb, sizeof(struct cpl_abort_rpl)); -#endif - rpl = cplhdr(m); - rpl->wr.wr_hi = - htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); - rpl->wr.wr_lo = htonl(V_WR_TID(GET_TID(req))); - OPCODE_TID(rpl) = - htonl(MK_OPCODE_TID(CPL_ABORT_RPL, GET_TID(req))); - rpl->cmd = req->status; - cxgb_ofld_send(dev, m); - out: - return CPL_RET_BUF_DONE; - } -} + if (rpl->status != CPL_ERR_NONE) + log(LOG_ERR, + "Unexpected L2T_WRITE_RPL status %u for entry %u\n", + rpl->status, GET_TID(rpl)); -static int -do_act_establish(struct toedev *dev, struct mbuf *m) -{ - struct cpl_act_establish *req = cplhdr(m); - unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_atid(&(TOE_DATA(dev))->tid_maps, atid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[CPL_ACT_ESTABLISH]) { - return toe_tid->client->handlers[CPL_ACT_ESTABLISH] - (dev, m, toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, CPL_PASS_ACCEPT_REQ); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } + return CPL_RET_BUF_DONE; } static int -do_set_tcb_rpl(struct toedev *dev, struct mbuf *m) +do_set_tcb_rpl(struct t3cdev *dev, struct mbuf *m) { struct cpl_set_tcb_rpl *rpl = cplhdr(m); @@ -831,7 +292,7 @@ } static int -do_trace(struct toedev *dev, struct mbuf *m) +do_trace(struct t3cdev *dev, struct mbuf *m) { #if 0 struct cpl_trace_pkt *p = cplhdr(m); @@ -846,283 +307,37 @@ return 0; } -static int -do_term(struct toedev *dev, struct mbuf *m) -{ - unsigned int hwtid = ntohl(m_get_priority(m)) >> 8 & 0xfffff; - unsigned int opcode = G_OPCODE(ntohl(m->m_pkthdr.csum_data)); - struct toe_tid_entry *toe_tid; - - toe_tid = lookup_tid(&(TOE_DATA(dev))->tid_maps, hwtid); - if (toe_tid->ctx && toe_tid->client->handlers && - toe_tid->client->handlers[opcode]) { - return toe_tid->client->handlers[opcode](dev, m, toe_tid->ctx); - } else { - log(LOG_ERR, "%s: received clientless CPL command 0x%x\n", - dev->name, opcode); - return CPL_RET_BUF_DONE | CPL_RET_BAD_MSG; - } - return (0); -} - -#if defined(FOO) -#include -#include -#include -#include - -static int (*orig_arp_constructor)(struct ifnet *); - -static void -neigh_suspect(struct ifnet *neigh) -{ - struct hh_cache *hh; - - neigh->output = neigh->ops->output; - - for (hh = neigh->hh; hh; hh = hh->hh_next) - hh->hh_output = neigh->ops->output; -} - -static void -neigh_connect(struct ifnet *neigh) -{ - struct hh_cache *hh; - - neigh->output = neigh->ops->connected_output; - - for (hh = neigh->hh; hh; hh = hh->hh_next) - hh->hh_output = neigh->ops->hh_output; -} - -static inline int -neigh_max_probes(const struct neighbour *n) -{ - const struct neigh_parms *p = n->parms; - return (n->nud_state & NUD_PROBE ? - p->ucast_probes : - p->ucast_probes + p->app_probes + p->mcast_probes); -} - -static void -neigh_timer_handler_offload(unsigned long arg) -{ - unsigned long now, next; - struct neighbour *neigh = (struct neighbour *)arg; - unsigned state; - int notify = 0; - - write_lock(&neigh->lock); - - state = neigh->nud_state; - now = jiffies; - next = now + HZ; - - if (!(state & NUD_IN_TIMER)) { -#ifndef CONFIG_SMP - log(LOG_WARNING, "neigh: timer & !nud_in_timer\n"); -#endif - goto out; - } - - if (state & NUD_REACHABLE) { - if (time_before_eq(now, - neigh->confirmed + - neigh->parms->reachable_time)) { - next = neigh->confirmed + neigh->parms->reachable_time; - } else if (time_before_eq(now, - neigh->used + - neigh->parms->delay_probe_time)) { - neigh->nud_state = NUD_DELAY; - neigh->updated = jiffies; - neigh_suspect(neigh); - next = now + neigh->parms->delay_probe_time; - } else { - neigh->nud_state = NUD_STALE; - neigh->updated = jiffies; - neigh_suspect(neigh); - cxgb_neigh_update(neigh); - } - } else if (state & NUD_DELAY) { - if (time_before_eq(now, - neigh->confirmed + - neigh->parms->delay_probe_time)) { - neigh->nud_state = NUD_REACHABLE; - neigh->updated = jiffies; - neigh_connect(neigh); - cxgb_neigh_update(neigh); - next = neigh->confirmed + neigh->parms->reachable_time; - } else { - neigh->nud_state = NUD_PROBE; - neigh->updated = jiffies; - atomic_set_int(&neigh->probes, 0); - next = now + neigh->parms->retrans_time; - } - } else { - /* NUD_PROBE|NUD_INCOMPLETE */ - next = now + neigh->parms->retrans_time; - } - /* - * Needed for read of probes - */ - mb(); - if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && - neigh->probes >= neigh_max_probes(neigh)) { - struct mbuf *m; - - neigh->nud_state = NUD_FAILED; - neigh->updated = jiffies; - notify = 1; - cxgb_neigh_update(neigh); - NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); - - /* It is very thin place. report_unreachable is very - complicated routine. Particularly, it can hit the same - neighbour entry! - So that, we try to be accurate and avoid dead loop. --ANK - */ - while (neigh->nud_state == NUD_FAILED && - (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { - write_unlock(&neigh->lock); - neigh->ops->error_report(neigh, skb); - write_lock(&neigh->lock); - } - skb_queue_purge(&neigh->arp_queue); - } - - if (neigh->nud_state & NUD_IN_TIMER) { - if (time_before(next, jiffies + HZ/2)) - next = jiffies + HZ/2; - if (!mod_timer(&neigh->timer, next)) - neigh_hold(neigh); - } - if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { - struct mbuf *m = skb_peek(&neigh->arp_queue); - - write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); - atomic_add_int(&neigh->probes, 1); - if (m) - m_free(m); - } else { -out: - write_unlock(&neigh->lock); - } - -#ifdef CONFIG_ARPD - if (notify && neigh->parms->app_probes) - neigh_app_notify(neigh); -#endif - neigh_release(neigh); -} - -static int -arp_constructor_offload(struct neighbour *neigh) -{ - if (neigh->ifp && is_offloading(neigh->ifp)) - neigh->timer.function = neigh_timer_handler_offload; - return orig_arp_constructor(neigh); -} - -/* - * This must match exactly the signature of neigh_update for jprobes to work. - * It runs from a trap handler with interrupts off so don't disable BH. - */ -static int -neigh_update_offload(struct neighbour *neigh, const u8 *lladdr, - u8 new, u32 flags) -{ - write_lock(&neigh->lock); - cxgb_neigh_update(neigh); - write_unlock(&neigh->lock); - jprobe_return(); - /* NOTREACHED */ - return 0; -} - -static struct jprobe neigh_update_jprobe = { - .entry = (kprobe_opcode_t *) neigh_update_offload, - .kp.addr = (kprobe_opcode_t *) neigh_update -}; - -#ifdef MODULE_SUPPORT -static int -prepare_arp_with_t3core(void) -{ - int err; - - err = register_jprobe(&neigh_update_jprobe); - if (err) { - log(LOG_ERR, "Could not install neigh_update jprobe, " - "error %d\n", err); - return err; - } - - orig_arp_constructor = arp_tbl.constructor; - arp_tbl.constructor = arp_constructor_offload; - - return 0; -} - -static void -restore_arp_sans_t3core(void) -{ - arp_tbl.constructor = orig_arp_constructor; - unregister_jprobe(&neigh_update_jprobe); -} - -#else /* Module suport */ -static inline int -prepare_arp_with_t3core(void) -{ - return 0; -} - -static inline void -restore_arp_sans_t3core(void) -{} -#endif -#endif /* * Process a received packet with an unknown/unexpected CPL opcode. */ static int -do_bad_cpl(struct toedev *dev, struct mbuf *m) +do_bad_cpl(struct t3cdev *dev, struct mbuf *m) { log(LOG_ERR, "%s: received bad CPL command 0x%x\n", dev->name, - *mtod(m, uint32_t *)); + 0xFF & *mtod(m, uint32_t *)); + kdb_backtrace(); return (CPL_RET_BUF_DONE | CPL_RET_BAD_MSG); } /* * Handlers for each CPL opcode */ -static cpl_handler_func cpl_handlers[NUM_CPL_CMDS]; - -/* - * Add a new handler to the CPL dispatch table. A NULL handler may be supplied - * to unregister an existing handler. - */ -void -t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h) -{ - if (opcode < NUM_CPL_CMDS) - cpl_handlers[opcode] = h ? h : do_bad_cpl; - else - log(LOG_ERR, "T3C: handler registration for " - "opcode %x failed\n", opcode); -} +static cpl_handler_func cpl_handlers[256]; /* - * TOEDEV's receive method. + * T3CDEV's receive method. */ int -process_rx(struct toedev *dev, struct mbuf **m, int n) +process_rx(struct t3cdev *dev, struct mbuf **m, int n) { while (n--) { struct mbuf *m0 = *m++; - unsigned int opcode = G_OPCODE(ntohl(m0->m_pkthdr.csum_data)); - int ret = cpl_handlers[opcode] (dev, m0); + unsigned int opcode = G_OPCODE(ntohl(m0->m_pkthdr.csum_data)); + int ret; + + DPRINTF("processing op=0x%x m=%p data=%p\n", opcode, m0, m0->m_data); + + ret = cpl_handlers[opcode] (dev, m0); #if VALIDATE_TID if (ret & CPL_RET_UNKNOWN_TID) { @@ -1140,138 +355,17 @@ } /* - * Sends an sk_buff to a T3C driver after dealing with any active network taps. - */ -int -cxgb_ofld_send(struct toedev *dev, struct mbuf *m) -{ - int r; - - critical_enter(); - r = dev->send(dev, m); - critical_exit(); - return r; -} - - -/** - * cxgb_ofld_recv - process n received offload packets - * @dev: the offload device - * @m: an array of offload packets - * @n: the number of offload packets - * - * Process an array of ingress offload packets. Each packet is forwarded - * to any active network taps and then passed to the offload device's receive - * method. We optimize passing packets to the receive method by passing - * it the whole array at once except when there are active taps. + * Add a new handler to the CPL dispatch table. A NULL handler may be supplied + * to unregister an existing handler. */ -int -cxgb_ofld_recv(struct toedev *dev, struct mbuf **m, int n) -{ - -#if defined(CONFIG_CHELSIO_T3) - if (likely(!netdev_nit)) - return dev->recv(dev, skb, n); - - for ( ; n; n--, skb++) { - skb[0]->dev = dev->lldev; - dev_queue_xmit_nit(skb[0], dev->lldev); - skb[0]->dev = NULL; - dev->recv(dev, skb, 1); - } - return 0; -#else - return dev->recv(dev, m, n); -#endif -} - -void -cxgb_neigh_update(struct rtentry *rt) -{ - - if (is_offloading(rt->rt_ifp)) { - struct toedev *tdev = TOEDEV(rt->rt_ifp); - - BUG_ON(!tdev); - t3_l2t_update(tdev, rt); - } -} - -static void -set_l2t_ix(struct toedev *tdev, u32 tid, struct l2t_entry *e) -{ - struct mbuf *m; - struct cpl_set_tcb_field *req; - - m = m_gethdr(M_NOWAIT, MT_DATA); - if (!m) { - log(LOG_ERR, "%s: cannot allocate mbuf!\n", __FUNCTION__); - return; - } - - m_set_priority(m, CPL_PRIORITY_CONTROL); - req = mtod(m, struct cpl_set_tcb_field *); - req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); - req->reply = 0; - req->cpu_idx = 0; - req->word = htons(W_TCB_L2T_IX); - req->mask = htobe64(V_TCB_L2T_IX(M_TCB_L2T_IX)); - req->val = htobe64(V_TCB_L2T_IX(e->idx)); - tdev->send(tdev, m); -} - void -cxgb_redirect(struct rtentry *old, struct rtentry *new) +t3_register_cpl_handler(unsigned int opcode, cpl_handler_func h) { - struct ifnet *olddev, *newdev; - struct tid_info *ti; - struct toedev *tdev; - u32 tid; - int update_tcb; - struct l2t_entry *e; - struct toe_tid_entry *te; - - olddev = old->rt_ifp; - newdev = new->rt_ifp; - if (!is_offloading(olddev)) - return; - if (!is_offloading(newdev)) { - log(LOG_WARNING, "%s: Redirect to non-offload" - "device ignored.\n", __FUNCTION__); - return; - } - tdev = TOEDEV(olddev); - BUG_ON(!tdev); - if (tdev != TOEDEV(newdev)) { - log(LOG_WARNING, "%s: Redirect to different " - "offload device ignored.\n", __FUNCTION__); - return; - } - - /* Add new L2T entry */ - e = t3_l2t_get(tdev, new, ((struct port_info *)new->rt_ifp->if_softc)->port_id); - if (!e) { - log(LOG_ERR, "%s: couldn't allocate new l2t entry!\n", - __FUNCTION__); - return; - } - - /* Walk tid table and notify clients of dst change. */ - ti = &(TOE_DATA(tdev))->tid_maps; - for (tid=0; tid < ti->ntids; tid++) { - te = lookup_tid(ti, tid); - BUG_ON(!te); - if (te->ctx &&