From d371a63a9934c68bb05ffcc6d301211b1fa9345a Mon Sep 17 00:00:00 2001 From: Jeffrey Hsu Date: Mon, 8 Mar 2004 19:44:32 +0000 Subject: [PATCH] Partition the TCP connection table. --- sys/netinet/ip_demux.c | 20 ++++++- sys/netinet/tcp_input.c | 19 +++++-- sys/netinet/tcp_subr.c | 116 ++++++++++++++++++++++++--------------- sys/netinet/tcp_usrreq.c | 7 ++- sys/netinet/tcp_var.h | 6 +- sys/netinet/udp_var.h | 4 +- 6 files changed, 115 insertions(+), 57 deletions(-) diff --git a/sys/netinet/ip_demux.c b/sys/netinet/ip_demux.c index b3d3abf8ae..5fbc419440 100644 --- a/sys/netinet/ip_demux.c +++ b/sys/netinet/ip_demux.c @@ -2,7 +2,7 @@ * Copyright (c) 2003 Jeffrey Hsu * All rights reserved. * - * $DragonFly: src/sys/netinet/ip_demux.c,v 1.6 2004/03/06 01:58:55 hsu Exp $ + * $DragonFly: src/sys/netinet/ip_demux.c,v 1.7 2004/03/08 19:44:32 hsu Exp $ */ #include "opt_inet.h" @@ -212,6 +212,24 @@ udp_soport(struct socket *so, struct sockaddr *nam) inp->inp_fport)].td_msgport); } +/* + * Map a network address to a processor. + */ +int +tcp_addrcpu(in_addr_t src, in_port_t sport, in_addr_t dst, in_port_t dport) +{ + return (INP_MPORT_HASH(src, dst, sport, dport)); +} + +int +udp_addrcpu(in_addr_t src, in_port_t sport, in_addr_t dst, in_port_t dport) +{ + if (IN_MULTICAST(ntohl(dst))) + return (0); + else + return (INP_MPORT_HASH(src, dst, sport, dport)); +} + void tcp_thread_init(void) { diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 6b28fc9642..ba002d745c 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -33,7 +33,7 @@ * * @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.107.2.38 2003/05/21 04:46:41 cjc Exp $ - * $DragonFly: src/sys/netinet/tcp_input.c,v 1.18 2004/03/08 00:39:00 hsu Exp $ + * $DragonFly: src/sys/netinet/tcp_input.c,v 1.19 2004/03/08 19:44:32 hsu Exp $ */ #include "opt_ipfw.h" /* for ipfw_fwd */ @@ -161,7 +161,7 @@ SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD, &tcp_reass_overflows, 0, "Global number of TCP Segment Reassembly Queue Overflows"); -struct inpcbinfo tcbinfo; +struct inpcbinfo tcbinfo[MAXCPU]; static void tcp_dooptions(struct tcpopt *, u_char *, int, int); static void tcp_pulloutofband(struct socket *, @@ -401,6 +401,7 @@ tcp_input(m, off0, proto) struct rmxp_tao tao_noncached; /* in case there's no cached entry */ struct sockaddr_in *next_hop = NULL; int rstreason; /* For badport_bandlim accounting purposes */ + int cpu; struct ip6_hdr *ip6 = NULL; #ifdef INET6 int isipv6; @@ -569,12 +570,18 @@ findpcb: * Transparently forwarded. Pretend to be the destination. * already got one like this? */ - inp = in_pcblookup_hash(&tcbinfo, ip->ip_src, th->th_sport, + inp = in_pcblookup_hash(&tcbinfo[mycpu->gd_cpuid], + ip->ip_src, th->th_sport, ip->ip_dst, th->th_dport, 0, m->m_pkthdr.rcvif); if (!inp) { /* It's new. Try find the ambushing socket. */ - inp = in_pcblookup_hash(&tcbinfo, + cpu = tcp_addrcpu(ip->ip_src.s_addr, th->th_sport, + next_hop->sin_addr.s_addr, + next_hop->sin_port ? + ntohs(next_hop->sin_port) : + th->th_dport); + inp = in_pcblookup_hash(&tcbinfo[cpu], ip->ip_src, th->th_sport, next_hop->sin_addr, next_hop->sin_port ? @@ -584,12 +591,12 @@ findpcb: } } else { if (isipv6) - inp = in6_pcblookup_hash(&tcbinfo, + inp = in6_pcblookup_hash(&tcbinfo[0], &ip6->ip6_src, th->th_sport, &ip6->ip6_dst, th->th_dport, 1, m->m_pkthdr.rcvif); else - inp = in_pcblookup_hash(&tcbinfo, + inp = in_pcblookup_hash(&tcbinfo[mycpu->gd_cpuid], ip->ip_src, th->th_sport, ip->ip_dst, th->th_dport, 1, m->m_pkthdr.rcvif); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index eafb05e2bd..2995fa623f 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -32,7 +32,7 @@ * * @(#)tcp_subr.c 8.2 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.73.2.31 2003/01/24 05:11:34 sam Exp $ - * $DragonFly: src/sys/netinet/tcp_subr.c,v 1.13 2004/03/06 05:00:41 hsu Exp $ + * $DragonFly: src/sys/netinet/tcp_subr.c,v 1.14 2004/03/08 19:44:32 hsu Exp $ */ #include "opt_compat.h" @@ -142,8 +142,9 @@ static int do_tcpdrain = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0, "Enable tcp_drain routine for extra help when low on mbufs"); +/* XXX JH */ SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, - &tcbinfo.ipi_count, 0, "Number of active PCBs"); + &tcbinfo[0].ipi_count, 0, "Number of active PCBs"); static int icmp_may_rst = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0, @@ -219,7 +220,13 @@ struct inp_tp { void tcp_init() { + struct inpcbporthead *porthashbase; + u_long porthashmask; + struct inpcbhead *bindhashbase; + u_long bindhashmask; + struct vm_zone *ipi_zone; int hashsize = TCBHASHSIZE; + int cpu; tcp_ccgen = 1; tcp_cleartaocache(); @@ -233,18 +240,27 @@ tcp_init() tcp_rexmit_min = TCPTV_MIN; tcp_rexmit_slop = TCPTV_CPU_VAR; - LIST_INIT(&tcbinfo.listhead); TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize); if (!powerof2(hashsize)) { printf("WARNING: TCB hash size not a power of 2\n"); hashsize = 512; /* safe default */ } tcp_tcbhashsize = hashsize; - tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask); - tcbinfo.porthashbase = hashinit(hashsize, M_PCB, &tcbinfo.porthashmask); - tcbinfo.bindhashbase = hashinit(hashsize, M_PCB, &tcbinfo.bindhashmask); - tcbinfo.ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets, - ZONE_INTERRUPT, 0); + porthashbase = hashinit(hashsize, M_PCB, &porthashmask); + bindhashbase = hashinit(hashsize, M_PCB, &bindhashmask); + ipi_zone = zinit("tcpcb", sizeof(struct inp_tp), maxsockets, + ZONE_INTERRUPT, 0); + + for (cpu = 0; cpu < ncpus2; cpu++) { + LIST_INIT(&tcbinfo[cpu].listhead); + tcbinfo[cpu].hashbase = hashinit(hashsize, M_PCB, + &tcbinfo[cpu].hashmask); + tcbinfo[cpu].porthashbase = porthashbase; + tcbinfo[cpu].porthashmask = porthashmask; + tcbinfo[cpu].bindhashbase = bindhashbase; + tcbinfo[cpu].bindhashmask = bindhashmask; + tcbinfo[cpu].ipi_zone = ipi_zone; + } tcp_reass_maxseg = nmbclusters / 16; TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments", @@ -772,11 +788,13 @@ tcp_close(tp) void tcp_drain() { - if (do_tcpdrain) - { - struct inpcb *inpb; - struct tcpcb *tcpb; - struct tseg_qent *te; + struct inpcb *inpb; + struct tcpcb *tcpb; + struct tseg_qent *te; + int cpu; + + if (!do_tcpdrain) + return; /* * Walk the tcpbs, if existing, and flush the reassembly queue, @@ -786,10 +804,11 @@ tcp_drain() * where we're really low on mbufs, this is potentially * usefull. */ - LIST_FOREACH(inpb, &tcbinfo.listhead, inp_list) { + for (cpu = 0; cpu < ncpus2; cpu++) { + LIST_FOREACH(inpb, &tcbinfo[cpu].listhead, inp_list) { if ((tcpb = intotcpcb(inpb))) { while ((te = LIST_FIRST(&tcpb->t_segq)) - != NULL) { + != NULL) { LIST_REMOVE(te, tqe_q); m_freem(te->tqe_m); FREE(te, M_TSEGQ); @@ -797,7 +816,6 @@ tcp_drain() } } } - } } @@ -852,7 +870,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) * resource-intensive to repeat twice on every request. */ if (req->oldptr == 0) { - n = tcbinfo.ipi_count; + n = tcbinfo[mycpu->gd_cpuid].ipi_count; req->oldidx = 2 * (sizeof xig) + (n + n/8) * sizeof(struct xtcpcb); return 0; @@ -865,8 +883,8 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) * OK, now we're committed to doing something. */ s = splnet(); - gencnt = tcbinfo.ipi_gencnt; - n = tcbinfo.ipi_count; + gencnt = tcbinfo[mycpu->gd_cpuid].ipi_gencnt; + n = tcbinfo[mycpu->gd_cpuid].ipi_count; splx(s); xig.xig_len = sizeof xig; @@ -882,8 +900,8 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) return ENOMEM; s = splnet(); - for (inp = LIST_FIRST(&tcbinfo.listhead), i = 0; inp && i < n; - inp = LIST_NEXT(inp, inp_list)) { + for (inp = LIST_FIRST(&tcbinfo[mycpu->gd_cpuid].listhead), i = 0; + inp && i < n; inp = LIST_NEXT(inp, inp_list)) { if (inp->inp_gencnt <= gencnt && !prison_xinpcb(req->td, inp)) inp_list[i++] = inp; } @@ -918,9 +936,9 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS) * might be necessary to retry. */ s = splnet(); - xig.xig_gen = tcbinfo.ipi_gencnt; + xig.xig_gen = tcbinfo[mycpu->gd_cpuid].ipi_gencnt; xig.xig_sogen = so_gencnt; - xig.xig_count = tcbinfo.ipi_count; + xig.xig_count = tcbinfo[mycpu->gd_cpuid].ipi_count; splx(s); error = SYSCTL_OUT(req, &xig, sizeof xig); } @@ -936,6 +954,7 @@ tcp_getcred(SYSCTL_HANDLER_ARGS) { struct sockaddr_in addrs[2]; struct inpcb *inp; + int cpu; int error, s; error = suser(req->td); @@ -945,8 +964,10 @@ tcp_getcred(SYSCTL_HANDLER_ARGS) if (error) return (error); s = splnet(); - inp = in_pcblookup_hash(&tcbinfo, addrs[1].sin_addr, addrs[1].sin_port, - addrs[0].sin_addr, addrs[0].sin_port, 0, NULL); + cpu = tcp_addrcpu(addrs[1].sin_addr.s_addr, addrs[1].sin_port, + addrs[0].sin_addr.s_addr, addrs[0].sin_port); + inp = in_pcblookup_hash(&tcbinfo[cpu], addrs[1].sin_addr, + addrs[1].sin_port, addrs[0].sin_addr, addrs[0].sin_port, 0, NULL); if (inp == NULL || inp->inp_socket == NULL) { error = ENOENT; goto out; @@ -981,18 +1002,19 @@ tcp6_getcred(SYSCTL_HANDLER_ARGS) return (EINVAL); } s = splnet(); - if (mapped == 1) - inp = in_pcblookup_hash(&tcbinfo, - *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], - addrs[1].sin6_port, - *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], - addrs[0].sin6_port, - 0, NULL); - else - inp = in6_pcblookup_hash(&tcbinfo, &addrs[1].sin6_addr, - addrs[1].sin6_port, - &addrs[0].sin6_addr, addrs[0].sin6_port, - 0, NULL); + if (mapped == 1) { + inp = in_pcblookup_hash(&tcbinfo[0], + *(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12], + addrs[1].sin6_port, + *(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12], + addrs[0].sin6_port, + 0, NULL); + } else { + inp = in6_pcblookup_hash(&tcbinfo[0], + &addrs[1].sin6_addr, addrs[1].sin6_port, + &addrs[0].sin6_addr, addrs[0].sin6_port, + 0, NULL); + } if (inp == NULL || inp->inp_socket == NULL) { error = ENOENT; goto out; @@ -1023,6 +1045,7 @@ tcp_ctlinput(cmd, sa, vip) struct tcpcb *tp; void (*notify) (struct inpcb *, int) = tcp_notify; tcp_seq icmp_seq; + int cpu; int s; faddr = ((struct sockaddr_in *)sa)->sin_addr; @@ -1047,7 +1070,9 @@ tcp_ctlinput(cmd, sa, vip) s = splnet(); th = (struct tcphdr *)((caddr_t)ip + (IP_VHL_HL(ip->ip_vhl) << 2)); - inp = in_pcblookup_hash(&tcbinfo, faddr, th->th_dport, + cpu = tcp_addrcpu(faddr.s_addr, th->th_dport, + ip->ip_src.s_addr, th->th_sport); + inp = in_pcblookup_hash(&tcbinfo[cpu], faddr, th->th_dport, ip->ip_src, th->th_sport, 0, NULL); if (inp != NULL && inp->inp_socket != NULL) { icmp_seq = htonl(th->th_seq); @@ -1068,9 +1093,11 @@ tcp_ctlinput(cmd, sa, vip) syncache_unreach(&inc, th); } splx(s); - } else - in_pcbnotifyall(&tcbinfo.listhead, faddr, inetctlerrmap[cmd], - notify); + } else { + for (cpu = 0; cpu < ncpus2; cpu++) + in_pcbnotifyall(&tcbinfo[cpu].listhead, faddr, + inetctlerrmap[cmd], notify); + } } #ifdef INET6 @@ -1132,7 +1159,7 @@ tcp6_ctlinput(cmd, sa, d) bzero(&th, sizeof(th)); m_copydata(m, off, sizeof(*thp), (caddr_t)&th); - in6_pcbnotify(&tcbinfo.listhead, sa, th.th_dport, + in6_pcbnotify(&tcbinfo[0].listhead, sa, th.th_dport, (struct sockaddr *)ip6cp->ip6c_src, th.th_sport, cmd, notify); @@ -1143,9 +1170,8 @@ tcp6_ctlinput(cmd, sa, d) inc.inc_isipv6 = 1; syncache_unreach(&inc, &th); } else - in6_pcbnotify(&tcbinfo.listhead, sa, 0, - (const struct sockaddr *)sa6_src, - 0, cmd, notify); + in6_pcbnotify(&tcbinfo[0].listhead, sa, 0, + (const struct sockaddr *)sa6_src, 0, cmd, notify); } #endif /* INET6 */ diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 5a2edad8c3..200a956a36 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -32,7 +32,7 @@ * * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.17 2002/10/11 11:46:44 ume Exp $ - * $DragonFly: src/sys/netinet/tcp_usrreq.c,v 1.7 2004/03/05 16:57:15 hsu Exp $ + * $DragonFly: src/sys/netinet/tcp_usrreq.c,v 1.8 2004/03/08 19:44:32 hsu Exp $ */ #include "opt_ipsec.h" @@ -43,6 +43,9 @@ #include #include #include +#include +#include + #include #ifdef INET6 #include @@ -1019,7 +1022,7 @@ tcp_attach(struct socket *so, struct pru_attach_info *ai) if (error) return (error); } - error = in_pcballoc(so, &tcbinfo); + error = in_pcballoc(so, &tcbinfo[mycpu->gd_cpuid]); if (error) return (error); inp = sotoinpcb(so); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 6ae332d0e8..d876efd24b 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -33,7 +33,7 @@ * * @(#)tcp_var.h 8.4 (Berkeley) 5/24/95 * $FreeBSD: src/sys/netinet/tcp_var.h,v 1.56.2.13 2003/02/03 02:34:07 hsu Exp $ - * $DragonFly: src/sys/netinet/tcp_var.h,v 1.13 2004/03/08 00:39:00 hsu Exp $ + * $DragonFly: src/sys/netinet/tcp_var.h,v 1.14 2004/03/08 19:44:32 hsu Exp $ */ #ifndef _NETINET_TCP_VAR_H_ @@ -456,7 +456,7 @@ struct xtcpcb { SYSCTL_DECL(_net_inet_tcp); #endif -extern struct inpcbinfo tcbinfo; +extern struct inpcbinfo tcbinfo[]; extern struct tcpstat tcpstat; /* tcp statistics */ extern int tcp_mssdflt; /* XXX */ extern int tcp_delack_enabled; @@ -465,6 +465,8 @@ extern int path_mtu_discovery; extern int ss_fltsz; extern int ss_fltsz_local; +int tcp_addrcpu(in_addr_t faddr, in_port_t fport, + in_addr_t laddr, in_port_t lport); void tcp_canceltimers (struct tcpcb *); struct tcpcb * tcp_close (struct tcpcb *); diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h index 921731d4ce..830ab4a36b 100644 --- a/sys/netinet/udp_var.h +++ b/sys/netinet/udp_var.h @@ -32,7 +32,7 @@ * * @(#)udp_var.h 8.1 (Berkeley) 6/10/93 * $FreeBSD: src/sys/netinet/udp_var.h,v 1.22.2.1 2001/02/18 07:12:25 luigi Exp $ - * $DragonFly: src/sys/netinet/udp_var.h,v 1.6 2004/03/06 05:00:41 hsu Exp $ + * $DragonFly: src/sys/netinet/udp_var.h,v 1.7 2004/03/08 19:44:32 hsu Exp $ */ #ifndef _NETINET_UDP_VAR_H_ @@ -105,6 +105,8 @@ extern u_long udp_recvspace; extern struct udpstat udpstat; extern int log_in_vain; +int udp_addrcpu (in_addr_t src, in_port_t sport, + in_addr_t dst, in_port_t dport); void udp_ctlinput (int, struct sockaddr *, void *); void udp_init (void); void udp_thread_init (void); -- 2.41.0