? foo ? np.out Index: cxgb_main.c =================================================================== RCS file: /home/ncvs/src/sys/dev/cxgb/cxgb_main.c,v retrieving revision 1.12 diff -d -u -r1.12 cxgb_main.c --- cxgb_main.c 12 Apr 2007 03:07:24 -0000 1.12 +++ cxgb_main.c 12 Apr 2007 07:04:08 -0000 @@ -56,8 +56,6 @@ #include #include - - #include #include #include @@ -85,6 +83,8 @@ #include #include +#include + #ifdef PRIV_SUPPORTED #include @@ -532,6 +532,11 @@ { int i; + /* + * XXX need to drain the ifq by hand until + * it is taught about mbuf iovecs + */ + callout_drain(&sc->cxgb_tick_ch); t3_sge_deinit_sw(sc); @@ -1188,6 +1193,9 @@ int flags, error = 0; uint32_t mask; + /* + * XXX need to check that we aren't in the middle of an unload + */ switch (command) { case SIOCSIFMTU: if ((ifr->ifr_mtu < ETHERMIN) || @@ -1212,7 +1220,6 @@ error = ether_ioctl(ifp, command, data); break; case SIOCSIFFLAGS: - if (ifp->if_flags & IFF_UP) { PORT_LOCK(p); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { @@ -1294,7 +1301,7 @@ struct sge_qset *qs; struct sge_txq *txq; struct port_info *p = ifp->if_softc; - struct mbuf *m = NULL; + struct mbuf *m0, *m = NULL; int err, in_use_init; @@ -1315,6 +1322,35 @@ IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; + /* + * Convert chain to M_IOVEC + */ + KASSERT((m->m_flags & M_IOVEC) == 0, ("IOVEC set too early")); + m0 = m; +#ifdef INVARIANTS + /* + * Clean up after net stack sloppiness + * before calling m_sanity + */ + m0 = m->m_next; + while (m0) { + m0->m_flags &= ~M_PKTHDR; + m0 = m0->m_next; + } + m_sanity(m0, 0); + m0 = m; +#endif +#if 1 + if (m->m_pkthdr.len > MCLBYTES && + m_collapse(m, TX_MAX_SEGS, &m0) == EFBIG) { + if ((m0 = m_defrag(m, M_NOWAIT)) != NULL) { + m = m0; + m_collapse(m, TX_MAX_SEGS, &m0); + } else + break; + } + m = m0; +#endif if ((err = t3_encap(p, &m)) != 0) break; BPF_MTAP(ifp, m); Index: cxgb_sge.c =================================================================== RCS file: /home/ncvs/src/sys/dev/cxgb/cxgb_sge.c,v retrieving revision 1.10 diff -d -u -r1.10 cxgb_sge.c --- cxgb_sge.c 12 Apr 2007 04:48:54 -0000 1.10 +++ cxgb_sge.c 12 Apr 2007 07:04:09 -0000 @@ -113,6 +113,7 @@ } __packed; #define RX_SW_DESC_MAP_CREATED (1 << 0) +#define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) @@ -269,7 +270,7 @@ * * Return a packet containing the immediate data of the given response. */ -static __inline int +static __inline void get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl) { int len; @@ -280,28 +281,19 @@ * would be a firmware bug */ if (sopeop == RSPQ_NSOP_NEOP || sopeop == RSPQ_SOP) - return (0); - - - len = G_RSPD_LEN(ntohl(resp->len_cq)); + return; - if (m) { - - - switch (sopeop) { - case RSPQ_SOP_EOP: - m = m_gethdr(M_NOWAIT, MT_DATA); - m->m_len = m->m_pkthdr.len = len; - memcpy(m->m_data, resp->imm_data, IMMED_PKT_SIZE); - MH_ALIGN(m, IMMED_PKT_SIZE); - break; - case RSPQ_EOP: - memcpy(cl, resp->imm_data, len); - m_iovappend(m, cl, MSIZE, len, 0); - break; - } + len = G_RSPD_LEN(ntohl(resp->len_cq)); + switch (sopeop) { + case RSPQ_SOP_EOP: + m->m_len = m->m_pkthdr.len = len; + memcpy(m->m_data, resp->imm_data, len); + break; + case RSPQ_EOP: + memcpy(cl, resp->imm_data, len); + m_iovappend(m, cl, MSIZE, len, 0); + break; } - return (m != NULL); } @@ -722,7 +714,7 @@ mtx_unlock(&txq->lock); for (i = 0; i < n; i++) { - m_freem(m_vec[i]); + m_freem_vec(m_vec[i]); } } @@ -734,7 +726,7 @@ mtx_unlock(&txq->lock); for (i = 0; i < n; i++) { - m_freem(m_vec[i]); + m_freem_vec(m_vec[i]); } } @@ -825,7 +817,7 @@ flits = sgl_len(nsegs) + 2; #ifdef TSO_SUPPORTED - if (m->m_pkthdr.tso_segsz) + if (m->m_pkthdr.csum_flags & (CSUM_TSO)) flits++; #endif return flits_to_desc(flits); @@ -840,7 +832,16 @@ m0 = *m; pktlen = m0->m_pkthdr.len; - err = bus_dmamap_load_mbuf_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); + + if ((stx->flags & TX_SW_DESC_MAP_CREATED) == 0) { + if ((err = bus_dmamap_create(txq->entry_tag, 0, &stx->map))) { + log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); + return (err); + } + stx->flags |= TX_SW_DESC_MAP_CREATED; + } + err = bus_dmamap_load_mvec_sg(txq->entry_tag, stx->map, m0, segs, nsegs, 0); +#ifdef DEBUG if (err) { int n = 0; struct mbuf *mtmp = m0; @@ -848,12 +849,10 @@ n++; mtmp = mtmp->m_next; } -#ifdef DEBUG printf("map_mbufs: bus_dmamap_load_mbuf_sg failed with %d - pkthdr.len==%d nmbufs=%d\n", err, m0->m_pkthdr.len, n); -#endif } - +#endif if (err == EFBIG) { /* Too many segments, try to defrag */ m0 = m_defrag(m0, M_NOWAIT); @@ -873,7 +872,7 @@ if (err) { if (cxgb_debug) printf("map failure err=%d pktlen=%d\n", err, pktlen); - m_freem(m0); + m_freem_vec(m0); *m = NULL; return (err); } @@ -1002,7 +1001,6 @@ cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); if (m0->m_pkthdr.csum_flags & (CSUM_TSO)) tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz); - #endif if (tso_info) { int eth_type; @@ -1010,7 +1008,7 @@ struct ip *ip; struct tcphdr *tcp; uint8_t *pkthdr, tmp[TCPPKTHDRSIZE]; /* is this too large for the stack? */ - + txd->flit[2] = 0; cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO); hdr->cntrl = htonl(cntrl); @@ -1032,12 +1030,11 @@ } tcp = (struct tcphdr *)((uint8_t *)ip + sizeof(*ip)); - + tso_info |= V_LSO_ETH_TYPE(eth_type) | V_LSO_IPHDR_WORDS(ip->ip_hl) | V_LSO_TCPHDR_WORDS(tcp->th_off); hdr->lso_info = htonl(tso_info); - flits = 3; } else { cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT); @@ -1051,7 +1048,7 @@ memcpy(&txd->flit[2], m0->m_data, mlen); else m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]); - + flits = (mlen + 7) / 8 + 2; cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) | V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | @@ -1430,7 +1427,6 @@ printf("error %d from alloc ring tx %i\n", ret, i); goto err; } - q->txq[i].gen = 1; q->txq[i].size = p->txq_size[i]; mtx_init(&q->txq[i].lock, "t3 txq lock", NULL, MTX_DEF); @@ -1755,9 +1751,13 @@ m->m_flags |= M_VLANTAG; } #endif - m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.header = m->m_data + sizeof(*cpl) + ethpad; m_explode(m); + /* + * adjust after conversion to mbuf chain + */ m_adj(m, sizeof(*cpl) + ethpad); (*ifp->if_input)(ifp, m); Index: sys/mvec.h =================================================================== RCS file: /home/ncvs/src/sys/dev/cxgb/sys/mvec.h,v retrieving revision 1.3 diff -d -u -r1.3 mvec.h --- sys/mvec.h 11 Apr 2007 13:47:24 -0000 1.3 +++ sys/mvec.h 12 Apr 2007 07:04:09 -0000 @@ -34,9 +34,9 @@ #define mtomv(m) ((struct mbuf_vec *)((m)->m_pktdat)) -#define M_IOVEC 0x40000 /* mbuf immediate data area is used for cluster ptrs */ -#define MAX_MBUF_IOV 12 -#define MBUF_IOV_TYPE_MASK ((1<<3)-1) +#define M_IOVEC 0x100000 /* mbuf immediate data area is used for cluster ptrs */ +#define MAX_MBUF_IOV 7 +#define MBUF_IOV_TYPE_MASK ((1<<3)-1) #define mbuf_vec_set_type(mv, i, type) \ (mv)->mv_vec[(i)].mi_flags = (((mv)->mv_vec[(i)].mi_flags \ & ~MBUF_IOV_TYPE_MASK) | type) @@ -46,16 +46,17 @@ struct mbuf_iovec { - uint32_t mi_flags; /* per-cluster flags */ - uint16_t mi_size; /* length of clusters */ - uint16_t mi_offset; /* data offsets of clusters */ - caddr_t mi_base; /* pointers to clusters */ + uint16_t mi_flags; /* per-cluster flags */ + uint16_t mi_len; /* length of cluster */ + uint32_t mi_offset; /* data offsets into cluster */ + uint8_t *mi_base; /* pointers to cluster */ + volatile uint32_t *mi_refcnt; /* refcnt for cluster */ }; /* - * m_pktdat == 200 bytes on 64-bit arches, need to stay below that + * m_pktdat == 184 bytes on 64-bit arches, need to stay below that * - * 12*16 + 8 == 200 + * 7*24 + 8 == 176 */ struct mbuf_vec { uint16_t mv_first; /* first valid cluster */ @@ -74,12 +75,13 @@ { struct mbuf_vec *mv = mtomv(m); - mv->mv_first = mv->mv_count = 0; + mv->mv_first = mv->mv_count = 0; + m->m_pkthdr.len = m->m_len = 0; m->m_flags |= M_IOVEC; } static __inline void -m_iovappend(struct mbuf *m, void *cl, int size, int len, int offset) +m_iovappend(struct mbuf *m, uint8_t *cl, int size, int len, int offset) { struct mbuf_vec *mv = mtomv(m); struct mbuf_iovec *iov; @@ -90,12 +92,12 @@ panic("invalid flags in %s", __func__); if (mv->mv_count == 0) - m->m_data = cl; + m->m_data = cl + offset; iov = &mv->mv_vec[idx]; iov->mi_flags = m_gettype(size); iov->mi_base = cl; - iov->mi_size = len; + iov->mi_len = len; iov->mi_offset = offset; m->m_pkthdr.len += len; m->m_len += len; @@ -104,7 +106,19 @@ static __inline int m_explode(struct mbuf *m) -{ +{ +#ifdef INVARIANTS + struct mbuf *m0 = m->m_next; + struct m_tag *foo; + foo = m_tag_locate(m, 5000, 5, NULL); + + while (m0) { + KASSERT((m0->m_flags & M_PKTHDR) == 0, + ("pkthdr set on intermediate mbuf")); + m0 = m0->m_next; + + } +#endif if ((m->m_flags & M_IOVEC) == 0) return (0); @@ -114,29 +128,32 @@ static __inline int m_collapse(struct mbuf *m, int maxbufs, struct mbuf **mnew) { - /* - * Add checks here - */ - + if (m->m_next == NULL) { + *mnew = m; + return (0); + } return _m_collapse(m, maxbufs, mnew); } +static __inline struct mbuf * +m_free_vec(struct mbuf *m) +{ + struct mbuf *n = m->m_next; + + if (m->m_flags & M_IOVEC) + mb_free_vec(m); + else if (m->m_flags & M_EXT) + mb_free_ext(m); + else + uma_zfree(zone_mbuf, m); + return (n); +} + static __inline void m_freem_vec(struct mbuf *m) { - struct mbuf *n; - - while (m != NULL) { - n = m->m_next; - - if (m->m_flags & M_IOVEC) - mb_free_vec(m); - else if (m->m_flags & M_EXT) - mb_free_ext(m); - else - uma_zfree(zone_mbuf, m); - m = n; - } + while (m != NULL) + m = m_free_vec(m); } @@ -144,4 +161,39 @@ bus_dmamap_load_mvec_sg(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0, bus_dma_segment_t *segs, int *nsegs, int flags); + +static __inline uma_zone_t +m_getzonefromtype(int type) +{ + uma_zone_t zone; + + switch (type) { + case EXT_MBUF: + zone = zone_mbuf; + break; + case EXT_CLUSTER: + zone = zone_clust; + break; +#if MJUMPAGESIZE != MCLBYTES + case EXT_JUMBOP: + zone = zone_jumbop; + break; +#endif + case EXT_JUMBO9: + zone = zone_jumbo9; + break; + case EXT_JUMBO16: + zone = zone_jumbo16; + break; +#ifndef PACKET_ZONE_DISABLED + case EXT_PACKET: + zone = zone_pack; + break; +#endif + default: + panic("%s: invalid cluster type %d", __func__, type); + } + return (zone); +} + #endif Index: sys/uipc_mvec.c =================================================================== RCS file: /home/ncvs/src/sys/dev/cxgb/sys/uipc_mvec.c,v retrieving revision 1.4 diff -d -u -r1.4 uipc_mvec.c --- sys/uipc_mvec.c 9 Apr 2007 17:05:54 -0000 1.4 +++ sys/uipc_mvec.c 12 Apr 2007 07:04:09 -0000 @@ -43,124 +43,309 @@ #include +#ifdef DEBUG +#define DPRINTF printf +#else +#define DPRINTF(...) +#endif + +#ifdef INVARIANTS +#define M_SANITY m_sanity +#else +#define M_SANITY(a, b) +#endif + +#define MAX_BUFS 36 +#define MAX_HVEC 8 + +struct mbuf_ext { + struct mbuf *me_m; + caddr_t me_base; + volatile u_int *me_refcnt; + int me_flags; + uint32_t me_offset; +}; + int _m_explode(struct mbuf *m) { - int i, offset, type; - void *cl; + int i, offset, type, first, len; + uint8_t *cl; struct mbuf *m0, *head = NULL; struct mbuf_vec *mv; - + +#ifdef INVARIANTS + len = m->m_len; + m0 = m->m_next; + while (m0) { + KASSERT((m0->m_flags & M_PKTHDR) == 0, + ("pkthdr set on intermediate mbuf - pre")); + len += m0->m_len; + m0 = m0->m_next; + + } + if (len != m->m_pkthdr.len) + panic("at start len=%d pktlen=%d", len, m->m_pkthdr.len); +#endif mv = mtomv(m); - for (i = mv->mv_count + mv->mv_first - 1; - i > mv->mv_first; i--) { + first = mv->mv_first; + for (i = mv->mv_count + first - 1; i > first; i--) { + type = mbuf_vec_get_type(mv, i); cl = mv->mv_vec[i].mi_base; - if ((m0 = m_get(M_NOWAIT, MT_DATA)) == NULL) { - m_freem(head); - return (ENOMEM); + offset = mv->mv_vec[i].mi_offset; + len = mv->mv_vec[i].mi_len; + if (__predict_false(type == EXT_MBUF)) { + m0 = (struct mbuf *)cl; + KASSERT((m0->m_flags & M_EXT) == 0, ("M_EXT set on mbuf")); + m0->m_len = len; + m0->m_data = cl + offset; + goto skip_cluster; + + } else if ((m0 = m_get(M_NOWAIT, MT_DATA)) == NULL) { + /* + * Check for extra memory leaks + */ + m_freem(head); + return (ENOMEM); } m0->m_flags = 0; - type = mbuf_vec_get_type(mv, i); - m_cljset(m0, (uint8_t *)cl, type); - m0->m_len = mv->mv_vec[i].mi_size; - - offset = mv->mv_vec[i].mi_offset; + m_cljset(m0, (uint8_t *)cl, type); + m0->m_len = mv->mv_vec[i].mi_len; if (offset) - m_adj(m, offset); - + m_adj(m0, offset); + skip_cluster: m0->m_next = head; m->m_len -= m0->m_len; head = m0; } - offset = mv->mv_vec[0].mi_offset; - cl = mv->mv_vec[0].mi_base; - type = mbuf_vec_get_type(mv, 0); - m->m_flags &= ~(M_IOVEC); + offset = mv->mv_vec[first].mi_offset; + cl = mv->mv_vec[first].mi_base; + type = mbuf_vec_get_type(mv, first); + m->m_flags &= ~(M_IOVEC); m_cljset(m, cl, type); if (offset) m_adj(m, offset); m->m_next = head; - + head = m; + M_SANITY(m, 0); +#ifdef INVARIANTS + len = head->m_len; + m = m->m_next; + while (m) { + KASSERT((m->m_flags & M_PKTHDR) == 0, + ("pkthdr set on intermediate mbuf - post")); + len += m->m_len; + m = m->m_next; + + } + if (len != head->m_pkthdr.len) + panic("len=%d pktlen=%d", len, head->m_pkthdr.len); + + { + struct m_tag *foo; + foo = m_tag_locate(head, 5000, 5, NULL); + } +#endif return (0); } -#define MAX_BUFS 36 +static __inline int +m_vectorize(struct mbuf *m, int max, struct mbuf **vec, int *count) +{ + int i, error = 0; + + for (i = 0; i < max; i++) { + if (m == NULL) + break; +#ifndef PACKET_ZONE_DISABLED + if ((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_PACKET)) + return (EINVAL); +#endif +#ifndef SFBUF_SUPPORT + if ((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_SFBUF)) + return (EINVAL); +#endif + if (m->m_len == 0) + DPRINTF("m=%p is len=0\n", m); + M_SANITY(m, 0); + vec[i] = m; + m = m->m_next; + } + if (m) + error = EFBIG; + + *count = i; + + return (error); +} + +static __inline int +m_findmbufs(struct mbuf **ivec, int maxbufs, struct mbuf_ext *ovec, int osize, int *ocount) +{ + int i, j, nhbufsneed, nhbufs; + struct mbuf *m; + + nhbufsneed = min(((maxbufs - 1)/MAX_MBUF_IOV) + 1, osize); + ovec[0].me_m = NULL; + + for (nhbufs = j = i = 0; i < maxbufs && nhbufs < nhbufsneed; i++) { + if ((ivec[i]->m_flags & M_EXT) == 0) + continue; + m = ivec[i]; + ovec[nhbufs].me_m = m; + ovec[nhbufs].me_base = m->m_ext.ext_buf; + ovec[nhbufs].me_refcnt = m->m_ext.ref_cnt; + ovec[nhbufs].me_offset = (m->m_data - m->m_ext.ext_buf); + ovec[nhbufs].me_flags = m->m_ext.ext_type; + nhbufs++; + } + if (nhbufs == 0) { + if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) + goto m_getfail; + ovec[nhbufs].me_m = m; + nhbufs = 1; + } + while (nhbufs < nhbufsneed) { + if ((m = m_get(M_NOWAIT, MT_DATA)) == NULL) + goto m_getfail; + ovec[nhbufs].me_m = m; + nhbufs++; + } + /* + * Copy over packet header to new head of chain + */ + if (ovec[0].me_m != ivec[0]) { + ovec[0].me_m->m_flags |= M_PKTHDR; + memcpy(&ovec[0].me_m->m_pkthdr, &ivec[0]->m_pkthdr, sizeof(struct pkthdr)); + SLIST_INIT(&ivec[0]->m_pkthdr.tags); + } + *ocount = nhbufs; + return (0); +m_getfail: + for (i = 0; i < nhbufs; i++) + if ((ovec[i].me_m->m_flags & M_EXT) == 0) + uma_zfree(zone_mbuf, ovec[i].me_m); + return (ENOMEM); + +} + +static __inline void +m_setiovec(struct mbuf_iovec *mi, struct mbuf *m, struct mbuf_ext *extvec, int *me_index, + int max_me_index) +{ + int idx = *me_index; + + mi->mi_len = m->m_len; + if (idx < max_me_index && extvec[idx].me_m == m) { + struct mbuf_ext *me = &extvec[idx]; + (*me_index)++; + mi->mi_base = me->me_base; + mi->mi_refcnt = me->me_refcnt; + mi->mi_offset = me->me_offset; + mi->mi_flags = me->me_flags; + } else if (m->m_flags & M_EXT) { + mi->mi_base = m->m_ext.ext_buf; + mi->mi_refcnt = m->m_ext.ref_cnt; + mi->mi_offset = + (m->m_data - m->m_ext.ext_buf); + mi->mi_flags = m->m_ext.ext_type; + } else { + KASSERT(m->m_len < 256, ("mbuf too large len=%d", + m->m_len)); + mi->mi_base = (uint8_t *)m; + mi->mi_refcnt = NULL; + mi->mi_offset = + (m->m_data - (caddr_t)m); + mi->mi_flags = EXT_MBUF; + } + DPRINTF("type=%d len=%d refcnt=%p cl=%p offset=0x%x\n", + mi->mi_flags, mi->mi_len, mi->mi_refcnt, mi->mi_base, + mi->mi_offset); +} int _m_collapse(struct mbuf *m, int maxbufs, struct mbuf **mnew) { - struct mbuf *m0, *lvec[MAX_BUFS]; - struct mbuf **mnext, **vec = &lvec[0]; + struct mbuf *m0, *lmvec[MAX_BUFS]; + struct mbuf **mnext; + struct mbuf **vec = &lmvec[0]; struct mbuf *mhead = NULL; struct mbuf_vec *mv; - int i, j, max; - - if (maxbufs > MAX_BUFS) + int err, i, j, max, len, nhbufs; + struct mbuf_ext dvec[MAX_HVEC]; + int hidx = 0, dvecidx; + + M_SANITY(m, 0); + if (maxbufs > MAX_BUFS) { if ((vec = malloc(maxbufs * sizeof(struct mbuf *), M_DEVBUF, M_NOWAIT)) == NULL) return (ENOMEM); - - m0 = m; - for (i = 0; i < maxbufs; i++) { - if (m0 == NULL) - goto batch; - vec[i] = m0; - m0 = m0->m_next; } - if (i == maxbufs) - return (EFBIG); -batch: - max = i; - i = 0; - m0 = NULL; - mnext = NULL; - while (i < max) { - if ((vec[i]->m_flags & M_EXT) == 0) { - m0 = m_get(M_NOWAIT, MT_DATA); - } else { - m0 = vec[i]; - m0->m_flags = (vec[i]->m_flags & ~M_EXT); - } + if ((err = m_vectorize(m, maxbufs, vec, &max)) != 0) + return (err); + if ((err = m_findmbufs(vec, max, dvec, MAX_HVEC, &nhbufs)) != 0) + return (err); + + KASSERT(max > 0, ("invalid mbuf count")); + KASSERT(nhbufs > 0, ("invalid header mbuf count")); + + + mhead = m0 = dvec[0].me_m; + + DPRINTF("nbufs=%d nhbufs=%d\n", max, nhbufs); + for (hidx = dvecidx = i = 0, mnext = NULL; i < max; hidx++) { + m0 = dvec[hidx].me_m; + m0->m_flags &= ~M_EXT; m0->m_flags |= M_IOVEC; - if (m0 == NULL) - goto m_getfail; - if (i == 0) - mhead = m0; - if (mnext) + + if (mnext) *mnext = m0; + mv = mtomv(m0); - mv->mv_count = mv->mv_first = 0; - for (j = 0; j < MAX_MBUF_IOV; j++, i++) { - if (vec[i]->m_flags & M_EXT) { - mv->mv_vec[j].mi_base = vec[i]->m_ext.ext_buf; - mv->mv_vec[j].mi_offset = - (vec[i]->m_ext.ext_buf - vec[i]->m_data); - mv->mv_vec[j].mi_size = vec[i]->m_ext.ext_size; - mv->mv_vec[j].mi_flags = vec[i]->m_ext.ext_type; - } else { - mv->mv_vec[j].mi_base = (caddr_t)vec[i]; - mv->mv_vec[j].mi_offset = - ((caddr_t)vec[i] - vec[i]->m_data); - mv->mv_vec[j].mi_size = MSIZE; - mv->mv_vec[j].mi_flags = EXT_MBUF; - } + len = mv->mv_first = 0; + for (j = 0; j < MAX_MBUF_IOV && i < max; j++, i++) { + struct mbuf_iovec *mi = &mv->mv_vec[j]; + + DPRINTF("mi=%p i=%d dvecidx=%d\n", mi, i, dvecidx); + m_setiovec(mi, vec[i], dvec, &dvecidx, nhbufs); + len += mi->mi_len; } + m0->m_data = mv->mv_vec[0].mi_base + mv->mv_vec[0].mi_offset; + mv->mv_count = j; + m0->m_len = len; mnext = &m0->m_next; + DPRINTF("count=%d len=%d\n", j, len); } + /* + * Terminate chain + */ + m0->m_next = NULL; - mhead->m_flags |= (m0->m_flags & M_PKTHDR); + /* + * Free all mbufs not used by the mbuf iovec chain + */ + for (i = 0; i < max; i++) + if (m->m_flags & M_EXT) { + m->m_flags &= ~M_EXT; + DPRINTF("freeing m=%p\n", vec[i]); + uma_zfree(zone_mbuf, vec[i]); + } +#ifdef INVARIANTS + len = mhead->m_len; + m = mhead->m_next; + while (m) { + KASSERT((m->m_flags & M_PKTHDR) == 0, + ("pkthdr set on intermediate mbuf")); + len += m->m_len; + m = m->m_next; + } + KASSERT(len == mhead->m_pkthdr.len, + ("len=%d pktlen=%d nbufs=%d", len, mhead->m_pkthdr.len, max)); +#endif *mnew = mhead; return (0); - -m_getfail: - m0 = mhead; - while (mhead) { - mhead = m0->m_next; - uma_zfree(zone_mbuf, m0); - } - return (ENOMEM); } void @@ -170,34 +355,42 @@ int i; KASSERT((m->m_flags & (M_EXT|M_IOVEC)) == M_IOVEC, - ("%s: M_IOVEC not set", __func__)); + ("%s: M_EXT set", __func__)); mv = mtomv(m); KASSERT(mv->mv_count <= MAX_MBUF_IOV, ("%s: mi_count too large %d", __func__, mv->mv_count)); + DPRINTF("count=%d len=%d\n", mv->mv_count, m->m_len); for (i = mv->mv_first; i < mv->mv_count; i++) { uma_zone_t zone = NULL; - int *refcnt; + volatile int *refcnt = mv->mv_vec[i].mi_refcnt; int type = mbuf_vec_get_type(mv, i); void *cl = mv->mv_vec[i].mi_base; - int size = mv->mv_vec[i].mi_size; - - zone = m_getzone(size); - refcnt = uma_find_refcnt(zone, cl); - if (*refcnt != 1 && atomic_fetchadd_int(refcnt, -1) != 1) + + if (refcnt && *refcnt != 1 && atomic_fetchadd_int(refcnt, -1) != 1) continue; - + + DPRINTF("freeing idx=%d refcnt=%p type=%d cl=%p\n", i, refcnt, type, cl); switch (type) { +#ifndef PACKET_ZONE_DISABLED case EXT_PACKET: /* The packet zone is special. */ + panic("EXT_PACKET should not be in an mbuf iovec"); if (*refcnt == 0) *refcnt = 1; uma_zfree(zone_pack, m); return; /* Job done. */ +#endif + case EXT_MBUF: + DPRINTF("freeing iovec mbuf m=%p\n", cl); + uma_zfree(zone_mbuf, cl); + continue; case EXT_CLUSTER: case EXT_JUMBOP: case EXT_JUMBO9: case EXT_JUMBO16: + zone = m_getzonefromtype(type); + DPRINTF("freeing cl=%p type=%d\n", cl, type); uma_zfree(zone, cl); continue; case EXT_SFBUF: @@ -224,16 +417,18 @@ } } /* - * Free this mbuf back to the mbuf zone with all m_ext + * Free this mbuf back to the mbuf zone with all iovec * information purged. */ - m->m_flags &= ~M_IOVEC; + DPRINTF("freeing parent mbuf m=%p\n", m); uma_zfree(zone_mbuf, m); } + #if (!defined(__sparc64__) && !defined(__sun4v__)) struct mvec_sg_cb_arg { + bus_dma_segment_t *segs; int error; - bus_dma_segment_t seg; + int index; int nseg; }; @@ -263,9 +458,9 @@ struct mvec_sg_cb_arg *cb_arg = arg; cb_arg->error = error; - cb_arg->seg = segs[0]; + cb_arg->segs[cb_arg->index] = segs[0]; cb_arg->nseg = nseg; - + KASSERT(nseg == 1, ("nseg=%d", nseg)); } int @@ -273,19 +468,21 @@ bus_dma_segment_t *segs, int *nsegs, int flags) { int error; - struct mbuf_vec *mv; struct mvec_sg_cb_arg cb_arg; - - M_ASSERTPKTHDR(m0); + M_ASSERTPKTHDR(m0); + + if ((m0->m_flags & M_IOVEC) == 0) + return (bus_dmamap_load_mbuf_sg(dmat, map, m0, segs, nsegs, flags)); + flags |= BUS_DMA_NOWAIT; *nsegs = 0; error = 0; - if (m0->m_pkthdr.len <= - dmat->maxsize) { + if (m0->m_pkthdr.len <= dmat->maxsize) { struct mbuf *m; - + cb_arg.segs = segs; for (m = m0; m != NULL && error == 0; m = m->m_next) { + struct mbuf_vec *mv; int count, first, i; if (!(m->m_len > 0)) continue; @@ -293,15 +490,25 @@ mv = mtomv(m); count = mv->mv_count; first = mv->mv_first; + KASSERT(count <= MAX_MBUF_IOV, ("count=%d too large", count)); for (i = first; i < count; i++) { - void *data = mv->mv_vec[i].mi_base; - int size = mv->mv_vec[i].mi_size; - - cb_arg.seg = *segs; + void *data = mv->mv_vec[i].mi_base + mv->mv_vec[i].mi_offset; + int size = mv->mv_vec[i].mi_len; + + if (size == 0) + continue; + DPRINTF("mapping data=%p size=%d\n", data, size); + cb_arg.index = *nsegs; error = bus_dmamap_load(dmat, map, data, size, mvec_cb, &cb_arg, flags); - segs++; - *nsegs++; + (*nsegs)++; + + if (*nsegs >= dmat->nsegments) { + DPRINTF("*nsegs=%d dmat->nsegments=%d index=%d\n", + *nsegs, dmat->nsegments, cb_arg.index); + error = EFBIG; + goto err_out; + } if (error || cb_arg.error) goto err_out; } @@ -309,9 +516,10 @@ } else { error = EINVAL; } - +#if 0 /* XXX FIXME: Having to increment nsegs is really annoying */ ++*nsegs; +#endif CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", __func__, dmat, dmat->flags, error, *nsegs); return (error); @@ -322,4 +530,4 @@ return (error); } -#endif /* !__sparc64__ */ +#endif /* !__sparc64__ && !__sun4v__ */