From d31e82302ae39c4b008419f7fda0c1ac5f927f3b Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Sun, 21 Sep 2014 21:00:32 +0800 Subject: [PATCH] inpcb: Save local group index So that the caller, e.g. UDP protocol, could redistribute the inpcb accordingly. We keep the local group sorted by the inpcb local group index in ascending order. This eases the multi-process userland application which uses SO_REUSEPORT sockets and binds process to the owner cpu of the SO_REUSEPORT socket: If we didn't sort the local group by the inpcb local group index and one of the process owning an inpcb in this local group restarted, e.g. crashed and restarted by watchdog, other processes owning a inpcb in this local group would have to detect that event, refetch its socket's owner cpu, and re-bind. --- sys/netinet/in_pcb.c | 57 +++++++++++++++++++++++++++++++++++++++++++- sys/netinet/in_pcb.h | 1 + 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 1d0fc7243b..c79ef8bd3a 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -285,6 +285,7 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) inp = kmalloc(pcbinfo->ipi_size, M_PCB, M_WAITOK|M_ZERO|M_NULLOK); if (inp == NULL) return (ENOMEM); + inp->inp_lgrpindex = -1; inp->inp_gencnt = ++pcbinfo->ipi_gencnt; inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; @@ -1803,6 +1804,7 @@ in_pcbinslocalgrphash_oncpu(struct inpcb *inp, struct inpcbinfo *pcbinfo) struct inp_localgrphead *hdr; struct inp_localgroup *grp, *grp_alloc = NULL; struct ucred *cred; + int i, idx; ASSERT_PCBINFO_TOKEN_HELD(pcbinfo); @@ -1930,7 +1932,59 @@ again: KASSERT(grp->il_inpcnt < grp->il_inpsiz, ("invalid local group size %d and count %d", grp->il_inpsiz, grp->il_inpcnt)); - grp->il_inp[grp->il_inpcnt] = inp; + + /* + * Keep the local group sorted by the inpcb local group index + * in ascending order. + * + * This eases the multi-process userland application which uses + * SO_REUSEPORT sockets and binds process to the owner cpu of + * the SO_REUSEPORT socket: + * If we didn't sort the local group by the inpcb local group + * index and one of the process owning an inpcb in this local + * group restarted, e.g. crashed and restarted by watchdog, + * other processes owning a inpcb in this local group would have + * to detect that event, refetch its socket's owner cpu, and + * re-bind. + */ + idx = grp->il_inpcnt; + for (i = 0; i < idx; ++i) { + struct inpcb *oinp = grp->il_inp[i]; + + if (oinp->inp_lgrpindex > i) { + if (inp->inp_lgrpindex < 0) { + inp->inp_lgrpindex = i; + } else if (inp->inp_lgrpindex != i) { + if (bootverbose) { + kprintf("inp %p: grpidx %d, " + "assigned to %d, cpu%d\n", + inp, inp->inp_lgrpindex, i, + mycpuid); + } + } + grp->il_inp[i] = inp; + + /* Pull down inpcbs */ + for (; i < grp->il_inpcnt; ++i) { + struct inpcb *oinp1 = grp->il_inp[i + 1]; + + grp->il_inp[i + 1] = oinp; + oinp = oinp1; + } + grp->il_inpcnt++; + return; + } + } + + if (inp->inp_lgrpindex < 0) { + inp->inp_lgrpindex = idx; + } else if (inp->inp_lgrpindex != idx) { + if (bootverbose) { + kprintf("inp %p: grpidx %d, assigned to %d, cpu%d\n", + inp, inp->inp_lgrpindex, idx, mycpuid); + } + } + grp->il_inp[idx] = inp; grp->il_inpcnt++; } @@ -2048,6 +2102,7 @@ in_pcbremwildcardhash(struct inpcb *inp) KASSERT(inp->inp_flags & INP_WILDCARD, ("inp not wildcard")); in_pcbremwildcardhash_oncpu(inp, pcbinfo); + inp->inp_lgrpindex = -1; inp->inp_flags &= ~INP_WILDCARD; } diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index fc8c9c6e5e..11c70aea4b 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -191,6 +191,7 @@ struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* hash list */ LIST_ENTRY(inpcb) inp_list; /* list for all PCBs of this proto */ u_int32_t inp_flow; + int inp_lgrpindex; /* local group index */ /* local and foreign ports, local and foreign addr */ struct in_conninfo inp_inc; -- 2.41.0