From: Matthew Dillon Date: Fri, 27 Jun 2014 17:24:24 +0000 (-0700) Subject: kernel - Force manual hash calculation for IP fragments X-Git-Url: https://gitweb.dragonflybsd.org/~nant/dragonfly.git/commitdiff_plain/aecff6d16ba3e705aa6b0aaae2be1f9ef0e4e37c kernel - Force manual hash calculation for IP fragments * Hardware toeplitz hashes do not always distinguish IP fragments from full IP packets. An IP fragment will typically have the (src,dst,srcport,dstport) in the first fragment, but will only have (src,dst) in the remaining fragments. The HW might not understand this and generate hashes which prevent the fragments from being directed to the same cpu. * Force a manual recalculation of the hash (ignore the HW hash) for any IP fragment. That is, any IP packet with the IP_MF bit set or with a non-zero offset (masked by IP_OFFMASK). * Direct all IP fragments to a target cpu based on the hash. We previously directed all IP fragments to cpu 0. * NOTE: The IP fragment handling code in the kernel is still serialized with a lock. This will be addressed in a later commit. However, fragments which are piped into PF will now be properly distributed across available cpus. Submitted-by: sephe --- diff --git a/sys/netinet/ip_demux.c b/sys/netinet/ip_demux.c index 7c446c5bcf..782efce6dc 100644 --- a/sys/netinet/ip_demux.c +++ b/sys/netinet/ip_demux.c @@ -289,11 +289,9 @@ ip_hashfn(struct mbuf **mptr, int hoff, int dir) ip = mtodoff(m, struct ip *, hoff); iphlen = ip->ip_hl << 2; - /* - * XXX generic packet handling defrag on CPU 0 for now. - */ if (ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) { - hash = 0; + hash = toeplitz_hash(toeplitz_rawhash_addr( + ip->ip_src.s_addr, ip->ip_dst.s_addr)); goto back; } diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index b231569e4d..ef83fffc0c 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -425,7 +425,6 @@ ip_input(struct mbuf *m) int hlen, checkif; u_short sum; struct in_addr pkt_dst; - boolean_t check_msgport = FALSE; boolean_t using_srcrt = FALSE; /* forward (by PFIL_HOOKS) */ struct in_addr odst; /* original dst address(NAT) */ struct m_tag *mtag; @@ -443,37 +442,31 @@ ip_input(struct mbuf *m) * This routine is called from numerous places which may not have * characterized the packet. */ - if ((m->m_flags & M_HASH) == 0) { - atomic_add_long(&ip_hash_count, 1); - ip_hashfn(&m, 0, IP_MPORT_IN); - if (m == NULL) - return; - KKASSERT(m->m_flags & M_HASH); - check_msgport = TRUE; - } ip = mtod(m, struct ip *); - if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || ntohs(ip->ip_off) & (IP_MF | IP_OFFMASK)) { /* - * XXX handle multicast and fragment on CPU0 for now. - * - * This could happen for IP packets hashed by hardwares - * using RSS: - * - Hardware may not differentiate multicast IP packets - * from unicast IP packets. - * - Hardware may not differentiate IP fragments from - * unfragmented IP packets. + * Force hash recalculation for fragments and multicast + * packets; hardware may not do it correctly. + * XXX add flag to indicate the hash is from hardware */ - m->m_pkthdr.hash = 0; - check_msgport = TRUE; + m->m_flags &= ~M_HASH; } + if ((m->m_flags & M_HASH) == 0) { + ip_hashfn(&m, 0, IP_MPORT_IN); + if (m == NULL) + return; + KKASSERT(m->m_flags & M_HASH); - if (check_msgport && - &curthread->td_msgport != netisr_hashport(m->m_pkthdr.hash)) { - netisr_queue(NETISR_IP, m); - /* Requeued to other netisr msgport; done */ - return; + if (&curthread->td_msgport != + netisr_hashport(m->m_pkthdr.hash)) { + netisr_queue(NETISR_IP, m); + /* Requeued to other netisr msgport; done */ + return; + } + + /* mbuf could have been changed */ + ip = mtod(m, struct ip *); } /*