kernel - network protocol thread routing
authorMatthew Dillon <dillon@apollo.backplane.com>
Thu, 9 Sep 2010 08:32:07 +0000 (01:32 -0700)
committerMatthew Dillon <dillon@apollo.backplane.com>
Thu, 9 Sep 2010 08:32:07 +0000 (01:32 -0700)
* ip_input() now calls ip_mport() unconditionally and physically compares
  the port to &curthread->td_msgport.  If they do not match the packet
  will be forwarded to the correct protocol thread.

* ip6_input() now unconditionally calls sw6->pr_soport() on the last
  received header and forwards the packet to the correct protocol thread.

  udp6, tcp6, and rip6 are now marked as terminal protocol (PR_LASTHDR).
  Though I'm not sure this is correct they must be marked this way for
  the packet to be forwarded to the correct protocol thread.

  This fixes an assertion panic when tcp6_input() calls tcp_input().
  tcp_input() is expecting to be run on the same protocol thread assigned
  to its socket, otherwise callout timers and other entities will get
  confused.

sys/net/netisr.h
sys/netinet/ip_input.c
sys/netinet6/in6_proto.c
sys/netinet6/ip6_input.c
sys/netinet6/ip6_var.h

index 4ace6aa..74c79a2 100644 (file)
@@ -133,6 +133,7 @@ typedef __boolean_t (*msg_predicate_fn_t)(struct netmsg *);
 struct netmsg_packet {
     struct netmsg      nm_netmsg;
     struct mbuf                *nm_packet;
+    int                        nm_nxt;
 };
 
 struct netmsg_pr_timeout {
index bbef673..428f217 100644 (file)
@@ -451,10 +451,10 @@ ip_input(struct mbuf *m)
        u_short sum;
        struct in_addr pkt_dst;
        boolean_t using_srcrt = FALSE;          /* forward (by PFIL_HOOKS) */
-       boolean_t needredispatch = FALSE;
        struct in_addr odst;                    /* original dst address(NAT) */
        struct m_tag *mtag;
        struct sockaddr_in *next_hop = NULL;
+       lwkt_port_t port;
 #ifdef FAST_IPSEC
        struct tdb_ident *tdbi;
        struct secpolicy *sp;
@@ -463,6 +463,22 @@ ip_input(struct mbuf *m)
 
        M_ASSERTPKTHDR(m);
 
+       /*
+        * This does necessary pullups and figures out the protocol
+        * port.  If the packet is really badly formed it will blow
+        * it away and return NULL.
+        *
+        * We do not necessarily make use of the port (forwarding,
+        * defragmentation, etc).
+        */
+       port = ip_mport(&m, IP_MPORT_IN);
+       if (port == NULL)
+               return;
+       ip = mtod(m, struct ip *);
+
+       /*
+        * Pull out certain tags
+        */
        if (m->m_pkthdr.fw_flags & IPFORWARD_MBUF_TAGGED) {
                /* Next hop */
                mtag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
@@ -481,7 +497,6 @@ ip_input(struct mbuf *m)
 
        /* length checks already done in ip_mport() */
        KASSERT(m->m_len >= sizeof(struct ip), ("IP header not in one mbuf"));
-       ip = mtod(m, struct ip *);
 
        if (IP_VHL_V(ip->ip_vhl) != IPVERSION) {
                ipstat.ips_badvers++;
@@ -610,7 +625,6 @@ iphack:
                return;
        }
        if (m->m_pkthdr.fw_flags & FW_MBUF_REDISPATCH) {
-               needredispatch = TRUE;
                m->m_pkthdr.fw_flags &= ~FW_MBUF_REDISPATCH;
        }
 pass:
@@ -866,8 +880,6 @@ ours:
 
                /* Get the header length of the reassembled packet */
                hlen = IP_VHL_HL(ip->ip_vhl) << 2;
-
-               needredispatch = TRUE;
        } else {
                ip->ip_len -= hlen;
        }
@@ -925,19 +937,16 @@ DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
 #endif /* FAST_IPSEC */
 
        /*
-        * NOTE: ip_len in host form and adjusted down by hlen for
+        * NOTE: ip_len is now in host form and adjusted down by hlen for
         *       protocol processing.
+        *
+        * We must forward the packet to the correct protocol thread if
+        * we are not already in it.
         */
        ipstat.ips_delivered++;
-       if (needredispatch) {
-               struct netmsg_packet *pmsg;
-               lwkt_port_t port;
 
-               ip->ip_off = htons(ip->ip_off);
-               ip->ip_len = htons(ip->ip_len + hlen);
-               port = ip_mport_in(&m);
-               if (port == NULL)
-                       return;
+       if (port != &curthread->td_msgport) {
+               struct netmsg_packet *pmsg;
 
                pmsg = &m->m_hdr.mh_netmsg;
                netmsg_init(&pmsg->nm_netmsg, NULL, &netisr_apanic_rport,
@@ -945,9 +954,6 @@ DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
                pmsg->nm_packet = m;
                pmsg->nm_netmsg.nm_lmsg.u.ms_result = hlen;
 
-               ip = mtod(m, struct ip *);
-               ip->ip_len = ntohs(ip->ip_len) - hlen;
-               ip->ip_off = ntohs(ip->ip_off);
                lwkt_sendmsg(port, &pmsg->nm_netmsg.nm_lmsg);
        } else {
                transport_processing_oncpu(m, hlen, ip);
index 4e8e8eb..aeb6361 100644 (file)
@@ -163,13 +163,13 @@ struct ip6protosw inet6sw[] = {
   ip6_init,    0,              frag6_slowtimo, frag6_drain,
   &nousrreqs,
 },
-{ SOCK_DGRAM,  &inet6domain,   IPPROTO_UDP,    PR_ATOMIC|PR_ADDR,
+{ SOCK_DGRAM,  &inet6domain,   IPPROTO_UDP,    PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   udp6_input,  0,              udp6_ctlinput,  ip6_ctloutput,
   cpu0_soport, cpu0_ctlport,
   0,           0,              0,              0,
   &udp6_usrreqs,
 },
-{ SOCK_STREAM, &inet6domain,   IPPROTO_TCP,    PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN,
+{ SOCK_STREAM, &inet6domain,   IPPROTO_TCP,    PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN|PR_LASTHDR,
   tcp6_input,  0,              tcp6_ctlinput,  tcp_ctloutput,
   tcp6_soport, cpu0_ctlport,
 #ifdef INET    /* don't call initialization and timeout routines twice */
@@ -179,7 +179,7 @@ struct ip6protosw inet6sw[] = {
 #endif
   &tcp6_usrreqs,
 },
-{ SOCK_RAW,    &inet6domain,   IPPROTO_RAW,    PR_ATOMIC|PR_ADDR,
+{ SOCK_RAW,    &inet6domain,   IPPROTO_RAW,    PR_ATOMIC|PR_ADDR|PR_LASTHDR,
   rip6_input,  rip6_output,    rip6_ctlinput,  rip6_ctloutput,
   cpu0_soport, cpu0_ctlport,
   0,           0,              0,              0,
index 79885d8..fa325f5 100644 (file)
@@ -86,9 +86,6 @@
 #include <sys/proc.h>
 #include <sys/priv.h>
 
-#include <sys/thread2.h>
-#include <sys/msgport2.h>
-
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/netisr.h>
 #include <net/pfil.h>
 
+#include <sys/thread2.h>
+#include <sys/msgport2.h>
+#include <net/netmsg2.h>
+
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #ifdef INET
@@ -164,6 +165,7 @@ static void ip6_input(struct netmsg *msg);
 #ifdef PULLDOWN_TEST
 static struct mbuf *ip6_pullexthdr (struct mbuf *, size_t, int);
 #endif
+static void transport6_processing_handler(netmsg_t netmsg);
 
 /*
  * IP6 initialization: fill in IP6 protocol switch table.
@@ -796,6 +798,8 @@ hbhcheck:
 
        rh_present = 0;
        while (nxt != IPPROTO_DONE) {
+               struct ip6protosw *sw6;
+
                if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
                        ip6stat.ip6s_toomanyhdr++;
                        in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
@@ -837,20 +841,41 @@ hbhcheck:
                        }
                }
 
+               sw6 = &inet6sw[ip6_protox[nxt]];
 #ifdef IPSEC
                /*
                 * enforce IPsec policy checking if we are seeing last header.
                 * note that we do not visit this with protocols with pcb layer
                 * code - like udp/tcp/raw ip.
                 */
-               if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) &&
-                   ipsec6_in_reject(m, NULL)) {
+               if ((sw6->pr_flags & PR_LASTHDR) && ipsec6_in_reject(m, NULL)) {
                        ipsec6stat.in_polvio++;
                        goto bad;
                }
 #endif
-
-               nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
+               /*
+                * If this is a terminal header forward to the port, otherwise
+                * process synchronously for more headers.
+                */
+               if (sw6->pr_flags & PR_LASTHDR) {
+                       struct netmsg_packet *pmsg;
+                       lwkt_port_t port;
+
+                       port = sw6->pr_soport(NULL, NULL, &m);
+                       KKASSERT(port != NULL);
+                       pmsg = &m->m_hdr.mh_netmsg;
+                       netmsg_init(&pmsg->nm_netmsg, NULL,
+                                   &netisr_apanic_rport,
+                                   MSGF_MPSAFE, transport6_processing_handler);
+                       pmsg->nm_packet = m;
+                       pmsg->nm_nxt = nxt;
+                       pmsg->nm_netmsg.nm_lmsg.u.ms_result = off;
+                       lwkt_sendmsg(port, &pmsg->nm_netmsg.nm_lmsg);
+                       /* done with m */
+                       nxt = IPPROTO_DONE;
+               } else {
+                       nxt = sw6->pr_input(&m, &off, nxt);
+               }
        }
        goto bad2;
 bad:
@@ -861,6 +886,28 @@ bad2:
 }
 
 /*
+ * We have to call the pr_input() function from the correct protocol
+ * thread.  The sw6->pr_soport() request at the end of ip6_input()
+ * returns the port and we forward a netmsg to the port to execute
+ * this function.
+ */
+static void
+transport6_processing_handler(netmsg_t netmsg)
+{
+       struct netmsg_packet *pmsg = (struct netmsg_packet *)netmsg;
+       struct ip6protosw *sw6;
+       int hlen;
+       int nxt;
+
+       sw6 = &inet6sw[ip6_protox[pmsg->nm_nxt]];
+       hlen = pmsg->nm_netmsg.nm_lmsg.u.ms_result;
+
+       nxt = sw6->pr_input(&pmsg->nm_packet, &hlen, pmsg->nm_nxt);
+       KKASSERT(nxt == IPPROTO_DONE);
+       /* netmsg was embedded in the mbuf, do not reply! */
+}
+
+/*
  * set/grab in6_ifaddr correspond to IPv6 destination address.
  * XXX backward compatibility wrapper
  */
index 5837810..dad1195 100644 (file)
@@ -290,6 +290,10 @@ struct ip6aux {
 #define        IPV6_FORWARDING         0x02    /* most of IPv6 header exists */
 #define        IPV6_MINMTU             0x04    /* use minimum MTU (IPV6_USE_MIN_MTU) */
 
+/* direction passed to ip_mport as last parameter */
+#define IP6_MPORT_IN           0 /* Find lwkt port for incoming packets */
+#define IP6_MPORT_OUT          1 /* Find lwkt port for outgoing packets */
+
 extern struct  ip6stat ip6stat;        /* statistics */
 extern u_int32_t ip6_id;               /* fragment identifier */
 extern int     ip6_defhlim;            /* default hop limit */