Index: tcp_offload.c =================================================================== RCS file: tcp_offload.c diff -N tcp_offload.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ tcp_offload.c 16 Dec 2007 19:30:37 -0000 @@ -0,0 +1,94 @@ +/*- + * Copyright (c) 2007, Chelsio Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Neither the name of the Chelsio Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: src/sys/netinet/tcp_ofld.c,v 1.2 2007/12/12 23:31:49 kmacy Exp $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + + +int +tcp_offload_connect(struct socket *so, struct sockaddr *nam) +{ + struct ifnet *ifp; + struct toedev *tdev; + struct rtentry *rt; + int error; + + /* + * Look up the route used for the connection to + * determine if it uses an interface capable of + * offloading the connection. + */ + rt = rtalloc1(nam, 1 /*report*/, 0 /*ignflags*/); + if (rt) + RT_UNLOCK(rt); + else + return (EHOSTUNREACH); + + ifp = rt->rt_ifp; + if ((ifp->if_capenable & IFCAP_TOE) == 0) { + error = EINVAL; + goto fail; + } + + tdev = TOEDEV(ifp); + if (tdev == NULL) { + error = EPERM; + goto fail; + } + + if (tdev->tod_can_offload(tdev, so) == 0) { + error = EPERM; + goto fail; + } + + return (tdev->tod_connect(tdev, so, rt, nam)); +fail: + RTFREE_LOCKED(rt); + return (error); +} Index: tcp_offload.h =================================================================== RCS file: tcp_offload.h diff -N tcp_offload.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ tcp_offload.h 16 Dec 2007 19:30:51 -0000 @@ -0,0 +1,328 @@ +/*- + * Copyright (c) 2007, Chelsio Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Neither the name of the Chelsio Corporation nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD: src/sys/netinet/tcp_ofld.h,v 1.3 2007/12/13 01:24:20 kmacy Exp $ + */ + +#ifndef _NETINET_TCP_OFFLOAD_H_ +#define _NETINET_TCP_OFFLOAD_H_ + +#ifndef _KERNEL +#error "no user-serviceable parts inside" +#endif + +/* + * A driver publishes that it provides offload services + * by setting IFCAP_TOE in the ifnet. The offload connect + * will bypass any further work if the interface that a + * connection would use does not support TCP offload. + * + * The TOE API assumes that the tcp offload engine can offload the + * the entire connection from set up to teardown, with some provision + * being made to allowing the software stack to handle time wait. If + * the device does not meet these criteria, it is the driver's responsibility + * to overload the functions that it needs to in tcp_usrreqs and make + * its own calls to tcp_output if it needs to do so. + * + * There is currently no provision for the device advertising the congestion + * control algorithms it supports as there is currently no API for querying + * an operating system for the protocols that it has loaded. This is a desirable + * future extension. + * + * + * + * It is assumed that individuals deploying TOE will want connections + * to be offloaded without software changes so all connections on an + * interface providing TOE are offloaded unless the the SO_NO_OFFLOAD + * flag is set on the socket. + * + * + * The toe_usrreqs structure constitutes the TOE driver's + * interface to the TCP stack for functionality that doesn't + * interact directly with userspace. If one wants to provide + * (optional) functionality to do zero-copy to/from + * userspace one still needs to override soreceive/sosend + * with functions that fault in and pin the user buffers. + * + * + tu_send + * - tells the driver that new data may have been added to the + * socket's send buffer - the driver should not fail if the + * buffer is in fact unchanged + * - the driver is responsible for providing credits (bytes in the send window) + * back to the socket by calling sbdrop() as segments are acknowledged. + * - The driver expects the inpcb lock to be held - the driver is expected + * not to drop the lock. Hence the driver is not allowed to acquire the + * pcbinfo lock during this call. + * + * + tu_rcvd + * - returns credits to the driver and triggers window updates + * to the peer (a credit as used here is a byte in the peer's receive window) + * - the driver is expected to determine how many bytes have been + * consumed and credit that back to the card so that it can grow + * the window again by maintaining its own state between invocations. + * - In principle this could be used to shrink the window as well as + * grow the window, although it is not used for that now. + * - this function needs to correctly handle being called any number of + * times without any bytes being consumed from the receive buffer. + * - The driver expects the inpcb lock to be held - the driver is expected + * not to drop the lock. Hence the driver is not allowed to acquire the + * pcbinfo lock during this call. + * + * + tu_disconnect + * - tells the driver to send FIN to peer + * - driver is expected to send the remaining data and then do a clean half close + * - disconnect implies at least half-close so only send, reset, and detach + * are legal + * - the driver is expected to handle transition through the shutdown + * state machine and allow the stack to support SO_LINGER. + * - The driver expects the inpcb lock to be held - the driver is expected + * not to drop the lock. Hence the driver is not allowed to acquire the + * pcbinfo lock during this call. + * + * + tu_reset + * - closes the connection and sends a RST to peer + * - driver is expectd to trigger an RST and detach the toepcb + * - no further calls are legal after reset + * - The driver expects the inpcb lock to be held - the driver is expected + * not to drop the lock. Hence the driver is not allowed to acquire the + * pcbinfo lock during this call. + * + * The following fields in the tcpcb are expected to be referenced by the driver: + * + iss + * + rcv_nxt + * + rcv_wnd + * + snd_isn + * + snd_max + * + snd_nxt + * + snd_una + * + t_flags + * + t_inpcb + * + t_maxseg + * + t_toe + * + * The following fields in the inpcb are expected to be referenced by the driver: + * + inp_lport + * + inp_fport + * + inp_laddr + * + inp_fport + * + inp_socket + * + inp_ip_tos + * + * The following fields in the socket are expected to be referenced by the + * driver: + * + so_comp + * + so_error + * + so_linger + * + so_options + * + so_rcv + * + so_snd + * + so_state + * + so_timeo + * + * These functions all return 0 on success and can return the following errors + * as appropriate: + * + EPERM: + * + ENOBUFS: memory allocation failed + * + EMSGSIZE: MTU changed during the call + * + EHOSTDOWN: + * + EHOSTUNREACH: + * + ENETDOWN: + * * ENETUNREACH: the peer is no longer reachable + * + * + tu_detach + * - tells driver that the socket is going away so disconnect + * the toepcb and free appropriate resources + * - allows the driver to cleanly handle the case of connection state + * outliving the socket + * - no further calls are legal after detach + * - the driver is expected to provide its own synchronization between + * detach and receiving new data. + * + * + tu_syncache_event + * - even if it is not actually needed, the driver is expected to + * call syncache_add for the initial SYN and then syncache_expand + * for the SYN,ACK + * - tells driver that a connection either has not been added or has + * been dropped from the syncache + * - the driver is expected to maintain state that lives outside the + * software stack so the syncache needs to be able to notify the + * toe driver that the software stack is not going to create a connection + * for a received SYN + * - The driver is responsible for any synchronization required between + * the syncache dropping an entry and the driver processing the SYN,ACK. + * + */ +struct toe_usrreqs { + int (*tu_send)(struct tcpcb *tp); + int (*tu_rcvd)(struct tcpcb *tp); + int (*tu_disconnect)(struct tcpcb *tp); + int (*tu_reset)(struct tcpcb *tp); + void (*tu_detach)(struct tcpcb *tp); + void (*tu_syncache_event)(int event, void *toep); +}; + +#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */ +#define TOE_SC_DROP 2 /* connection was timed out */ + +/* + * Because listen is a one-to-many relationship (a socket can be listening + * on all interfaces on a machine some of which may be using different TCP + * offload devices), listen uses a publish/subscribe mechanism. The TCP + * offload driver registers a listen notification function with the stack. + * When a listen socket is created all TCP offload devices are notified + * so that they can do the appropriate set up to offload connections on the + * port to which the socket is bound. When the listen socket is closed, + * the offload devices are notified so that they will stop listening on that + * port and free any associated resources as well as sending RSTs on any + * connections in the SYN_RCVD state. + * + */ + +typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *); +typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *); + +EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn); +EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn); + +/* + * Check if the socket can be offloaded by the following steps: + * - determine the egress interface + * - check the interface for TOE capability and TOE is enabled + * - check if the device has resources to offload the connection + */ +int tcp_offload_connect(struct socket *so, struct sockaddr *nam); + +/* + * The tcp_gen_* routines are wrappers around the toe_usrreqs calls, + * in the non-offloaded case they translate to tcp_output. + * + * Listen is a special case because it is a 1 to many relationship + * and there can be more than one offload driver in the system. + */ + +/* + * Connection is offloaded + */ +#define tp_offload(tp) ((tp)->t_flags & TF_TOE) +/* + * The socket has not been marked as "do not offload" + */ +#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0) + +static __inline int +tcp_gen_connect(struct socket *so, struct sockaddr *nam) +{ + struct tcpcb *tp = sototcpcb(so); + int error; + + /* + * If offload has been disabled for this socket or the + * connection cannot be offloaded just call tcp_output + * to start the TCP state machine. + */ +#ifndef TCP_OFFLOAD_DISABLE + if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0) +#endif + error = tcp_output(tp); + return (error); +} + +static __inline int +tcp_gen_send(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + if (tp_offload(tp)) + return (tp->t_tu->tu_send(tp)); +#endif + return (tcp_output(tp)); +} + +static __inline int +tcp_gen_rcvd(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + if (tp_offload(tp)) + return (tp->t_tu->tu_rcvd(tp)); +#endif + return (tcp_output(tp)); +} + +static __inline int +tcp_gen_disconnect(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + if (tp_offload(tp)) + return (tp->t_tu->tu_disconnect(tp)); +#endif + return (tcp_output(tp)); +} + +static __inline int +tcp_gen_reset(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + if (tp_offload(tp)) + return (tp->t_tu->tu_reset(tp)); +#endif + return (tcp_output(tp)); +} + +static __inline void +tcp_gen_detach(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + if (tp_offload(tp)) + tp->t_tu->tu_detach(tp); +#endif +} + +static __inline void +tcp_gen_listen_open(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket)) + EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp); +#endif +} + +static __inline void +tcp_gen_listen_close(struct tcpcb *tp) +{ + +#ifndef TCP_OFFLOAD_DISABLE + EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp); +#endif +} + +#undef tp_offload +#undef SO_OFFLOADABLE +#endif /* _NETINET_TCP_OFFLOAD_H_ */ Index: tcp_ofld.c =================================================================== RCS file: tcp_ofld.c diff -N tcp_ofld.c --- tcp_ofld.c 12 Dec 2007 23:31:49 -0000 1.2 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,128 +0,0 @@ -/*- - * Copyright (c) 2007, Chelsio Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Neither the name of the Chelsio Corporation nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD: src/sys/netinet/tcp_ofld.c,v 1.2 2007/12/12 23:31:49 kmacy Exp $"); - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -int -ofld_connect(struct socket *so, struct sockaddr *nam) -{ - struct ifnet *ifp; - struct toedev *tdev; - struct rtentry *rt; - int error; - - rt = rtalloc1(nam, 1, 0); - if (rt) - RT_UNLOCK(rt); - else - return (EHOSTUNREACH); - - ifp = rt->rt_ifp; - tdev = TOEDEV(ifp); - if (tdev == NULL) - return (EINVAL); - - if (tdev->tod_can_offload(tdev, so) == 0) - return (EINVAL); - - if ((error = tdev->tod_connect(tdev, so, ifp))) - return (error); - - return (0); -} - -int -ofld_send(struct tcpcb *tp) -{ - - return (tp->t_tu->tu_send(tp)); -} - -int -ofld_rcvd(struct tcpcb *tp) -{ - - return (tp->t_tu->tu_rcvd(tp)); -} - -int -ofld_disconnect(struct tcpcb *tp) -{ - - return (tp->t_tu->tu_disconnect(tp)); -} - -int -ofld_abort(struct tcpcb *tp) -{ - - return (tp->t_tu->tu_abort(tp)); -} - -void -ofld_detach(struct tcpcb *tp) -{ - - tp->t_tu->tu_detach(tp); -} - -void -ofld_listen_open(struct tcpcb *tp) -{ - - EVENTHANDLER_INVOKE(ofld_listen, OFLD_LISTEN_OPEN, tp); -} - -void -ofld_listen_close(struct tcpcb *tp) -{ - - EVENTHANDLER_INVOKE(ofld_listen, OFLD_LISTEN_CLOSE, tp); -} Index: tcp_ofld.h =================================================================== RCS file: tcp_ofld.h diff -N tcp_ofld.h --- tcp_ofld.h 13 Dec 2007 01:24:20 -0000 1.3 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,203 +0,0 @@ -/*- - * Copyright (c) 2007, Chelsio Inc. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Neither the name of the Chelsio Corporation nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD: src/sys/netinet/tcp_ofld.h,v 1.3 2007/12/13 01:24:20 kmacy Exp $ - */ - -#ifndef _NETINET_TCP_OFLD_H_ -#define _NETINET_TCP_OFLD_H_ - -#ifndef _KERNEL -#error "no user-serviceable parts inside" -#endif - -#define SC_ENTRY_PRESENT 1 -#define SC_DROP 2 - -#define tp_offload(tp) ((tp)->t_flags & TF_TOE) -#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0) - -int ofld_connect(struct socket *so, struct sockaddr *nam); -int ofld_can_offload(struct tcpcb *tp, struct sockaddr *nam); - -int ofld_abort(struct tcpcb *tp); -int ofld_disconnect(struct tcpcb *tp); -int ofld_send(struct tcpcb *tp); -int ofld_rcvd(struct tcpcb *tp); -void ofld_detach(struct tcpcb *tp); -void ofld_listen_close(struct tcpcb *tp); -void ofld_listen_open(struct tcpcb *tp); - -#ifndef DISABLE_TCP_OFFLOAD -static __inline int -tcp_gen_connect(struct socket *so, struct sockaddr *nam) -{ - struct tcpcb *tp = sototcpcb(so); - int error; - - if (!SO_OFFLOADABLE(so) || (error = ofld_connect(so, nam)) != 0) - error = tcp_output(tp); - return (error); -} - -static __inline int -tcp_gen_disconnect(struct tcpcb *tp) -{ - int error; - - if (tp_offload(tp)) - error = ofld_disconnect(tp); - else - error = tcp_output(tp); - return (error); -} - -static __inline int -tcp_gen_abort(struct tcpcb *tp) -{ - int error; - - if (tp_offload(tp)) - error = ofld_abort(tp); - else - error = tcp_output(tp); - return (error); -} - -static __inline int -tcp_gen_send(struct tcpcb *tp) -{ - int error; - - if (tp_offload(tp)) - error = ofld_send(tp); - else - error = tcp_output(tp); - return (error); -} - -static __inline int -tcp_gen_rcvd(struct tcpcb *tp) -{ - int error; - - if (tp_offload(tp)) - error = ofld_rcvd(tp); - else - error = tcp_output(tp); - return (error); -} - -static __inline void -tcp_gen_listen_open(struct tcpcb *tp) -{ - - if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket)) - ofld_listen_open(tp); -} - -static __inline void -tcp_gen_listen_close(struct tcpcb *tp) -{ - ofld_listen_close(tp); -} - -static __inline void -tcp_gen_detach(struct tcpcb *tp) -{ - if (tp_offload(tp)) - ofld_detach(tp); -} - -#else - -static __inline int -tcp_gen_connect(struct socket *so, struct sockaddr *nam) -{ - - return (tcp_output(tp)); -} - -static __inline int -tcp_gen_disconnect(struct tcpcb *tp) -{ - - return (tcp_output(tp)); -} - -static __inline int -tcp_gen_abort(struct tcpcb *tp) -{ - - return (tcp_output(tp)); -} - -static __inline int -tcp_gen_send(struct tcpcb *tp) -{ - - return (tcp_output(tp)); -} - -static __inline int -tcp_gen_rcvd(struct tcpcb *tp) -{ - - return (tcp_output(tp)); -} - -static __inline void -tcp_gen_listen_open(struct tcpcb *tp) -{ -} - -static __inline void -tcp_gen_listen_close(struct tcpcb *tp) -{ -} - -static __inline void -tcp_gen_detach(struct tcpcb *tp) -{ -} - -#endif - -struct toe_usrreqs { - int (*tu_send)(struct tcpcb *tp); - int (*tu_rcvd)(struct tcpcb *tp); - int (*tu_disconnect)(struct tcpcb *tp); - int (*tu_abort)(struct tcpcb *tp); - void (*tu_detach)(struct tcpcb *tp); - void (*tu_syncache_event)(int event, void *toep); -}; - -#define OFLD_LISTEN_OPEN 1 -#define OFLD_LISTEN_CLOSE 2 -typedef void (*ofld_listen_fn)(void *, int, struct tcpcb *); -EVENTHANDLER_DECLARE(ofld_listen, ofld_listen_fn); - -#endif Index: tcp_subr.c =================================================================== RCS file: /home/kmacy/devel/ncvs/src/sys/netinet/tcp_subr.c,v retrieving revision 1.303 diff -d -u -r1.303 tcp_subr.c --- tcp_subr.c 28 Nov 2007 13:23:50 -0000 1.303 +++ tcp_subr.c 16 Dec 2007 20:01:28 -0000 @@ -85,6 +85,7 @@ #include #include #include +#include #ifdef INET6 #include #endif @@ -592,6 +593,7 @@ return (NULL); tp = &tm->tcb; tp->t_timers = &tm->tt; + /* LIST_INIT(&tp->t_segq); */ /* XXX covered by M_ZERO */ tp->t_maxseg = tp->t_maxopd = #ifdef INET6 @@ -651,7 +653,7 @@ if (TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_state = TCPS_CLOSED; - (void) tcp_output(tp); + (void) tcp_gen_reset(tp); tcpstat.tcps_drops++; } else tcpstat.tcps_conndrops++; @@ -749,6 +751,9 @@ tp->t_segqlen--; tcp_reass_qsize--; } + /* disconnect offload device, if any */ + tcp_gen_detach(tp); + tcp_free_sackholes(tp); inp->inp_ppcb = NULL; tp->t_inpcb = NULL; @@ -768,6 +773,9 @@ INP_INFO_WLOCK_ASSERT(&tcbinfo); INP_LOCK_ASSERT(inp); + if (tp->t_state == TCPS_LISTEN) + tcp_gen_listen_close(tp); + in_pcbdrop(inp); tcpstat.tcps_closed++; KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL")); @@ -1562,7 +1570,7 @@ tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_SACK_PERMIT) EXIT_FASTRECOVERY(tp); - tcp_output(tp); + tcp_gen_send(tp); return (inp); } Index: tcp_syncache.c =================================================================== RCS file: /home/kmacy/devel/ncvs/src/sys/netinet/tcp_syncache.c,v retrieving revision 1.139 diff -d -u -r1.139 tcp_syncache.c --- tcp_syncache.c 12 Dec 2007 20:35:59 -0000 1.139 +++ tcp_syncache.c 15 Dec 2007 23:50:10 -0000 @@ -78,7 +78,7 @@ #include #include #include -#include +#include #ifdef INET6 #include #endif @@ -136,7 +136,7 @@ #define SCF_UNREACH 0x10 /* icmp unreachable received */ #define SCF_SIGNATURE 0x20 /* send MD5 digests */ #define SCF_SACK 0x80 /* send SACK option */ -#ifndef DISABLE_TCP_OFFLOAD +#ifndef TCP_OFFLOAD_DISABLE struct toe_usrreqs *sc_tu; /* TOE operations */ void *sc_toepcb; /* TOE protocol block */ #endif @@ -145,6 +145,13 @@ #endif }; +#ifdef TCP_OFFLOAD_DISABLE +#define TOEPCB_ISSET(sc) (0) +#else +#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL) +#endif + + struct syncache_head { struct mtx sch_mtx; TAILQ_HEAD(sch_head, syncache) sch_bucket; @@ -358,9 +365,9 @@ TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); sch->sch_length--; -#ifndef DISABLE_TCP_OFFLOAD +#ifndef TCP_OFFLOAD_DISABLE if (sc->sc_tu) - sc->sc_tu->tu_syncache_event(SC_DROP, sc->sc_toepcb); + sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb); #endif syncache_free(sc); tcp_syncache.cache_count--; @@ -878,7 +885,7 @@ * Segment validation: * ACK must match our initial sequence number + 1 (the SYN|ACK). */ - if (th->th_ack != sc->sc_iss + 1 && sc->sc_toepcb == NULL) { + if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment " "rejected\n", s, __func__, th->th_ack, sc->sc_iss); @@ -889,7 +896,7 @@ * number + 1 (the SYN) because we didn't ACK any data that * may have come with the SYN. */ - if (th->th_seq != sc->sc_irs + 1 && sc->sc_toepcb == NULL) { + if (th->th_seq != sc->sc_irs + 1 && !TOEPCB_ISSET(sc)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment " "rejected\n", s, __func__, th->th_seq, sc->sc_irs); @@ -907,7 +914,7 @@ * must be equal to what we actually sent in the SYN|ACK. */ if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts && - sc->sc_toepcb == NULL) { + !TOEPCB_ISSET(sc)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, " "segment rejected\n", @@ -1028,9 +1035,9 @@ sc = syncache_lookup(inc, &sch); /* returns locked entry */ SCH_LOCK_ASSERT(sch); if (sc != NULL) { -#ifndef DISABLE_TCP_OFFLOAD +#ifndef TCP_OFFLOAD_DISABLE if (sc->sc_tu) - sc->sc_tu->tu_syncache_event(SC_ENTRY_PRESENT, + sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT, sc->sc_toepcb); #endif tcpstat.tcps_sc_dupsyn++; @@ -1067,7 +1074,7 @@ s, __func__); free(s, M_TCPLOG); } - if ((sc->sc_toepcb == NULL) && syncache_respond(sc) == 0) { + if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) { sc->sc_rxmits = 0; syncache_timeout(sc, sch, 1); tcpstat.tcps_sndacks++; @@ -1116,7 +1123,7 @@ sc->sc_ip_tos = ip_tos; sc->sc_ip_ttl = ip_ttl; } -#ifndef DISABLE_TCP_OFFLOAD +#ifndef TCP_OFFLOAD_DISABLE sc->sc_tu = tu; sc->sc_toepcb = toepcb; #endif @@ -1211,7 +1218,7 @@ /* * Do a standard 3-way handshake. */ - if (sc->sc_toepcb || syncache_respond(sc) == 0) { + if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) { if (tcp_syncookies && tcp_syncookiesonly && sc != &scs) syncache_free(sc); else if (sc != &scs) Index: tcp_usrreq.c =================================================================== RCS file: /home/kmacy/devel/ncvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.164 diff -d -u -r1.164 tcp_usrreq.c --- tcp_usrreq.c 19 Oct 2007 08:53:14 -0000 1.164 +++ tcp_usrreq.c 14 Dec 2007 20:16:03 -0000 @@ -85,6 +85,7 @@ #ifdef TCPDEBUG #include #endif +#include /* * TCP protocol interface to socket abstraction. @@ -385,6 +386,7 @@ if (error == 0) { tp->t_state = TCPS_LISTEN; solisten_proto(so, backlog); + tcp_gen_listen_open(tp); } SOCK_UNLOCK(so); @@ -476,7 +478,7 @@ TCPDEBUG1(); if ((error = tcp_connect(tp, nam, td)) != 0) goto out; - error = tcp_output(tp); + error = tcp_gen_connect(so, nam); out: TCPDEBUG2(PRU_CONNECT); INP_UNLOCK(inp); @@ -528,7 +530,7 @@ inp->inp_vflag &= ~INP_IPV6; if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0) goto out; - error = tcp_output(tp); + error = tcp_gen_connect(so, nam); goto out; } inp->inp_vflag &= ~INP_IPV4; @@ -536,7 +538,7 @@ inp->inp_inc.inc_isipv6 = 1; if ((error = tcp6_connect(tp, nam, td)) != 0) goto out; - error = tcp_output(tp); + error = tcp_gen_connect(so, nam); out: TCPDEBUG2(PRU_CONNECT); @@ -703,7 +705,7 @@ TCPDEBUG1(); socantsendmore(so); tcp_usrclosed(tp); - error = tcp_output(tp); + error = tcp_gen_disconnect(tp); out: TCPDEBUG2(PRU_SHUTDOWN); @@ -733,7 +735,7 @@ } tp = intotcpcb(inp); TCPDEBUG1(); - tcp_output(tp); + tcp_gen_rcvd(tp); out: TCPDEBUG2(PRU_RCVD); @@ -838,7 +840,7 @@ if (tp != NULL) { if (flags & PRUS_MORETOCOME) tp->t_flags |= TF_MORETOCOME; - error = tcp_output(tp); + error = tcp_gen_send(tp); if (flags & PRUS_MORETOCOME) tp->t_flags &= ~TF_MORETOCOME; } @@ -889,7 +891,7 @@ } tp->snd_up = tp->snd_una + so->so_snd.sb_cc; tp->t_flags |= TF_FORCEDATA; - error = tcp_output(tp); + error = tcp_gen_send(tp); tp->t_flags &= ~TF_FORCEDATA; } out: @@ -1489,7 +1491,7 @@ sbflush(&so->so_rcv); tcp_usrclosed(tp); if (!(inp->inp_vflag & INP_DROPPED)) - tcp_output(tp); + tcp_gen_disconnect(tp); } } @@ -1511,8 +1513,9 @@ INP_LOCK_ASSERT(tp->t_inpcb); switch (tp->t_state) { - case TCPS_CLOSED: case TCPS_LISTEN: + tcp_gen_listen_close(tp); + case TCPS_CLOSED: tp->t_state = TCPS_CLOSED; tp = tcp_close(tp); /*