Index: sys/sys/mbuf.h =================================================================== RCS file: /home/kmacy/devel/ncvs/src/sys/sys/mbuf.h,v retrieving revision 1.215 diff -d -u -r1.215 mbuf.h --- sys/sys/mbuf.h 24 Aug 2007 00:53:53 -0000 1.215 +++ sys/sys/mbuf.h 6 Oct 2007 20:18:07 -0000 @@ -44,6 +44,21 @@ #endif #endif + +/* + * Network buffer allocation zones + * + * The rest of it is defined in kern/kern_mbuf.c + */ + +extern uma_zone_t zone_mbuf; +extern uma_zone_t zone_clust; +extern uma_zone_t zone_pack; +extern uma_zone_t zone_jumbop; +extern uma_zone_t zone_jumbo9; +extern uma_zone_t zone_jumbo16; +extern uma_zone_t zone_ext_refcnt; + /* * Mbufs are of a single size, MSIZE (sys/param.h), which includes overhead. * An mbuf may add a single "mbuf cluster" of size MCLBYTES (also in @@ -60,6 +75,50 @@ #ifdef _KERNEL /*- + * mbuf external reference count management macros. + * + * MEXT_IS_REF(m): true if (m) is not the only mbuf referencing + * the external buffer ext_buf. + * + * MEXT_REM_REF(m): remove reference to m_ext object. + * + * MEXT_ADD_REF(m): add reference to m_ext object already + * referred to by (m). XXX Note that it is VERY important that you + * always set the second mbuf's m_ext.ref_cnt to point to the first + * one's (i.e., n->m_ext.ref_cnt = m->m_ext.ref_cnt) AFTER you run + * MEXT_ADD_REF(m). This is because m might have a lazy initialized + * ref_cnt (NULL) before this is run and it will only be looked up + * from here. We should make MEXT_ADD_REF() always take two mbufs + * as arguments so that it can take care of this itself. + */ +static __inline uma_zone_t m_getrefcntzonefromtype(int type); + +#define MEXT_IS_REF(m) (((m)->m_ext.ref_cnt != NULL) \ + && (*((m)->m_ext.ref_cnt) > 1)) + +#define MEXT_REM_REF(m) do { \ + KASSERT((m)->m_ext.ref_cnt != NULL, ("m_ext refcnt lazy NULL")); \ + KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0")); \ + atomic_subtract_int((m)->m_ext.ref_cnt, 1); \ +} while(0) + +#define MEXT_ADD_REF(m) do { \ + if ((m)->m_ext.ref_cnt == NULL) { \ + KASSERT((m)->m_ext.ext_type == EXT_CLUSTER || \ + (m)->m_ext.ext_type == EXT_PACKET || \ + (m)->m_ext.ext_type == EXT_JUMBOP || \ + (m)->m_ext.ext_type == EXT_JUMBO9 || \ + (m)->m_ext.ext_type == EXT_JUMBO16, \ + ("Unexpected mbuf type has lazy refcnt %d", \ + (m)->m_ext.ext_type)); \ + (m)->m_ext.ref_cnt = (u_int *)uma_find_refcnt( \ + m_getzone((m)->m_ext.ext_size), (m)->m_ext.ext_buf); \ + *((m)->m_ext.ref_cnt) = 2; \ + } else \ + atomic_add_int((m)->m_ext.ref_cnt, 1); \ +} while (0) + +/*- * Macros for type conversion: * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. * dtom(x) -- Convert data pointer within mbuf to mbuf pointer (XXX). @@ -327,20 +386,6 @@ #define MBUF_CHECKSLEEP(how) #endif -/* - * Network buffer allocation API - * - * The rest of it is defined in kern/kern_mbuf.c - */ - -extern uma_zone_t zone_mbuf; -extern uma_zone_t zone_clust; -extern uma_zone_t zone_pack; -extern uma_zone_t zone_jumbop; -extern uma_zone_t zone_jumbo9; -extern uma_zone_t zone_jumbo16; -extern uma_zone_t zone_ext_refcnt; - static __inline struct mbuf *m_getcl(int how, short type, int flags); static __inline struct mbuf *m_get(int how, short type); static __inline struct mbuf *m_gethdr(int how, short type); @@ -414,6 +459,38 @@ return (zone); } +static __inline uma_zone_t +m_getrefcntzonefromtype(int type) +{ + uma_zone_t zone; + + switch (type) { + case EXT_MBUF: + zone = zone_mbuf; + break; + case EXT_CLUSTER: + zone = zone_clust; + break; + case EXT_PACKET: + zone = zone_clust; + break; + case EXT_JUMBOP: + zone = zone_jumbop; + break; + case EXT_JUMBO9: + zone = zone_jumbo9; + break; + case EXT_JUMBO16: + zone = zone_jumbo16; + break; + default: + panic("%s: m_getjcl: invalid cluster type %d", __func__, type); + } + + return (zone); +} + + static __inline struct mbuf * m_get(int how, short type) { @@ -571,13 +648,12 @@ break; } + m->m_flags |= M_EXT; m->m_data = m->m_ext.ext_buf = cl; m->m_ext.ext_free = m->m_ext.ext_args = NULL; m->m_ext.ext_size = size; m->m_ext.ext_type = type; - m->m_ext.ref_cnt = uma_find_refcnt(zone, cl); - m->m_flags |= M_EXT; - + m->m_ext.ref_cnt = NULL; /* lazy assignment */ } static __inline void @@ -615,8 +691,8 @@ * whether M_EXT is set). */ #define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && \ - (!(((m)->m_flags & M_EXT)) || \ - (*((m)->m_ext.ref_cnt) == 1)) ) \ + (!(((m)->m_flags & M_EXT)) || \ + !MEXT_IS_REF(m))) \ /* Check if the supplied mbuf has a packet header, or else panic. */ #define M_ASSERTPKTHDR(m) \ Index: sys/kern/kern_mbuf.c =================================================================== RCS file: /home/kmacy/devel/ncvs/src/sys/kern/kern_mbuf.c,v retrieving revision 1.30 diff -d -u -r1.30 kern_mbuf.c --- sys/kern/kern_mbuf.c 4 Jun 2007 18:25:07 -0000 1.30 +++ sys/kern/kern_mbuf.c 24 Sep 2007 00:57:40 -0000 @@ -368,7 +368,6 @@ KASSERT(m->m_ext.ext_args == NULL, ("%s: ext_args != NULL", __func__)); KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); - KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__)); #ifdef INVARIANTS trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); #endif @@ -394,7 +393,6 @@ mb_ctor_clust(void *mem, int size, void *arg, int how) { struct mbuf *m; - u_int *refcnt; int type; uma_zone_t zone; @@ -425,10 +423,8 @@ break; } - m = (struct mbuf *)arg; - refcnt = uma_find_refcnt(zone, mem); - *refcnt = 1; - if (m != NULL) { + if (arg != NULL) { + m = (struct mbuf *)arg; m->m_ext.ext_buf = (caddr_t)mem; m->m_data = m->m_ext.ext_buf; m->m_flags |= M_EXT; @@ -436,7 +432,7 @@ m->m_ext.ext_args = NULL; m->m_ext.ext_size = size; m->m_ext.ext_type = type; - m->m_ext.ref_cnt = refcnt; + m->m_ext.ref_cnt = NULL; /* lazy assignment */ } return (0); @@ -527,7 +523,8 @@ m->m_len = 0; m->m_flags = (flags | M_EXT); m->m_type = type; - + m->m_ext.ref_cnt = NULL; /* Lazy counter assign */ + if (flags & M_PKTHDR) { m->m_pkthdr.rcvif = NULL; m->m_pkthdr.len = 0; Index: sys/kern/uipc_mbuf.c =================================================================== RCS file: /home/kmacy/devel/ncvs/src/sys/kern/uipc_mbuf.c,v retrieving revision 1.173 diff -d -u -r1.173 uipc_mbuf.c --- sys/kern/uipc_mbuf.c 16 May 2007 20:41:07 -0000 1.173 +++ sys/kern/uipc_mbuf.c 6 Oct 2007 19:38:59 -0000 @@ -211,16 +211,35 @@ void mb_free_ext(struct mbuf *m) { + int dofree; + u_int cnt; + KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); + /* Account for lazy ref count assign. */ + if (m->m_ext.ref_cnt == NULL) + dofree = 1; + else + dofree = 0; + + /* + * This is tricky. We need to make sure to decrement the + * refcount in a safe way but to also clean up if we're the + * last reference. This method seems to do it without race. + */ + while (dofree == 0) { + cnt = *(m->m_ext.ref_cnt); + if (atomic_cmpset_int(m->m_ext.ref_cnt, cnt, cnt - 1)) { + if (cnt == 1) + dofree = 1; + break; + } + } + /* Free attached storage if this mbuf is the only reference to it. */ - if (*(m->m_ext.ref_cnt) == 1 || - atomic_fetchadd_int(m->m_ext.ref_cnt, -1) == 1) { + if (dofree) { switch (m->m_ext.ext_type) { case EXT_PACKET: /* The packet zone is special. */ - if (*(m->m_ext.ref_cnt) == 0) - *(m->m_ext.ref_cnt) = 1; uma_zfree(zone_pack, m); return; /* Job done. */ case EXT_CLUSTER: @@ -235,35 +254,23 @@ case EXT_JUMBO16: uma_zfree(zone_jumbo16, m->m_ext.ext_buf); break; - case EXT_SFBUF: - case EXT_NET_DRV: - case EXT_MOD_TYPE: - case EXT_DISPOSABLE: - *(m->m_ext.ref_cnt) = 0; - uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, - m->m_ext.ref_cnt)); - /* FALLTHROUGH */ - case EXT_EXTREF: + default: KASSERT(m->m_ext.ext_free != NULL, - ("%s: ext_free not set", __func__)); + ("%s: external free pointer not set", __func__)); (*(m->m_ext.ext_free))(m->m_ext.ext_buf, m->m_ext.ext_args); - break; - default: - KASSERT(m->m_ext.ext_type == 0, - ("%s: unknown ext_type", __func__)); + if (m->m_ext.ext_type != EXT_EXTREF) { + if (m->m_ext.ref_cnt != NULL) { + *(m->m_ext.ref_cnt) = 0; + uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, + m->m_ext.ref_cnt)); + m->m_ext.ref_cnt = NULL; + } + } } } - /* - * Free this mbuf back to the mbuf zone with all m_ext - * information purged. - */ + m->m_ext.ext_buf = NULL; - m->m_ext.ext_free = NULL; - m->m_ext.ext_args = NULL; - m->m_ext.ref_cnt = NULL; - m->m_ext.ext_size = 0; - m->m_ext.ext_type = 0; m->m_flags &= ~M_EXT; uma_zfree(zone_mbuf, m); } @@ -276,13 +283,9 @@ mb_dupcl(struct mbuf *n, struct mbuf *m) { KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); - KASSERT(m->m_ext.ref_cnt != NULL, ("%s: ref_cnt not set", __func__)); KASSERT((n->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__)); - if (*(m->m_ext.ref_cnt) == 1) - *(m->m_ext.ref_cnt) += 1; - else - atomic_add_int(m->m_ext.ref_cnt, 1); + MEXT_ADD_REF(m); n->m_ext.ext_buf = m->m_ext.ext_buf; n->m_ext.ext_free = m->m_ext.ext_free; n->m_ext.ext_args = m->m_ext.ext_args;