From 0f7585238463a8fc3f76acd17deef393fa6efb84 Mon Sep 17 00:00:00 2001 From: Sepherosa Ziehau Date: Sun, 30 Nov 2008 16:56:16 +0800 Subject: [PATCH] Defer various TCP timer function from callout threads to TCP threads; mainly to avoid possible threading races, e.g. when output processing blocking the current thread. To save space, only one netmsg is used and is embedded in tcpcb. The timer functions needed to be carried out are indicated by the tasks field in the netmsg. Reviewed-by: dillon@ Discussed-with: dillon@ With-input-from: hsu@ Tested-by: hasso@ --- sys/netinet/tcp_debug.c | 2 + sys/netinet/tcp_subr.c | 17 +++ sys/netinet/tcp_syncache.c | 1 + sys/netinet/tcp_timer.c | 241 +++++++++++++++++++++++++++++++------ sys/netinet/tcp_timer.h | 12 ++ sys/netinet/tcp_usrreq.c | 3 + sys/netinet/tcp_var.h | 5 +- 7 files changed, 240 insertions(+), 41 deletions(-) diff --git a/sys/netinet/tcp_debug.c b/sys/netinet/tcp_debug.c index 0300b282ca..59f57c2ce5 100644 --- a/sys/netinet/tcp_debug.c +++ b/sys/netinet/tcp_debug.c @@ -56,6 +56,8 @@ #include #include +#include + #include #include #include diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index f52320d689..00bd9429d9 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -295,6 +295,7 @@ struct inp_tp { struct tcpcb tcb; struct callout inp_tp_rexmt, inp_tp_persist, inp_tp_keep, inp_tp_2msl; struct callout inp_tp_delack; + struct netmsg_tcp_timer inp_tp_timermsg; }; #undef ALIGNMENT #undef ALIGNM1 @@ -705,6 +706,19 @@ tcp_newtcpcb(struct inpcb *inp) callout_init(tp->tt_2msl = &it->inp_tp_2msl); callout_init(tp->tt_delack = &it->inp_tp_delack); + tp->tt_msg = &it->inp_tp_timermsg; + if (isipv6) { + /* Don't mess with IPv6; always create timer message */ + tcp_create_timermsg(tp); + } else { + /* + * Zero out timer message. We don't create it here, + * since the current CPU may not be the owner of this + * inpcb. + */ + bzero(tp->tt_msg, sizeof(*tp->tt_msg)); + } + if (tcp_do_rfc1323) tp->t_flags = (TF_REQ_SCALE | TF_REQ_TSTMP); if (tcp_do_rfc1644) @@ -965,6 +979,9 @@ no_valid_rt: inp->inp_ppcb = NULL; soisdisconnected(so); + + tcp_destroy_timermsg(tp); + /* * Discard the inp. In the SMP case a wildcard inp's hash (created * by a listen socket or an INADDR_ANY udp socket) is replicated diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 64e2d0f8e1..0f7cd34dce 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -791,6 +791,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) */ if (sc->sc_rxtslot != 0) tp->snd_cwnd = tp->t_maxseg; + tcp_create_timermsg(tp); callout_reset(tp->tt_keep, tcp_keepinit, tcp_timer_keep, tp); tcpstat.tcps_accepts++; diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 92332c1436..cdba12f92a 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -83,10 +83,12 @@ #include #include #include +#include #include /* before tcp_seq.h, for tcp_random18() */ #include +#include #include #include @@ -105,6 +107,30 @@ #include #endif +#define TCP_TIMER_REXMT 0x01 +#define TCP_TIMER_PERSIST 0x02 +#define TCP_TIMER_KEEP 0x04 +#define TCP_TIMER_2MSL 0x08 +#define TCP_TIMER_DELACK 0x10 + +static struct tcpcb *tcp_timer_rexmt_handler(struct tcpcb *); +static struct tcpcb *tcp_timer_persist_handler(struct tcpcb *); +static struct tcpcb *tcp_timer_keep_handler(struct tcpcb *); +static struct tcpcb *tcp_timer_2msl_handler(struct tcpcb *); +static struct tcpcb *tcp_timer_delack_handler(struct tcpcb *); + +static const struct tcp_timer { + uint32_t tt_task; + struct tcpcb *(*tt_handler)(struct tcpcb *); +} tcp_timer_handlers[] = { + { TCP_TIMER_DELACK, tcp_timer_delack_handler }, + { TCP_TIMER_PERSIST, tcp_timer_persist_handler }, + { TCP_TIMER_REXMT, tcp_timer_rexmt_handler }, + { TCP_TIMER_KEEP, tcp_timer_keep_handler }, + { TCP_TIMER_2MSL, tcp_timer_2msl_handler }, + { 0, NULL } +}; + static int sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) { @@ -190,6 +216,22 @@ tcp_canceltimers(struct tcpcb *tp) callout_stop(tp->tt_rexmt); } +/* + * Caller should be in critical section + */ +static void +tcp_send_timermsg(struct tcpcb *tp, uint32_t task) +{ + struct netmsg_tcp_timer *tmsg = tp->tt_msg; + + KKASSERT(tmsg != NULL && tmsg->tt_cpuid == mycpuid && + tmsg->tt_tcb != NULL); + + tmsg->tt_tasks |= task; + if (tmsg->tt_nmsg.nm_lmsg.ms_flags & MSGF_DONE) + lwkt_sendmsg(tcp_cport(mycpuid), &tmsg->tt_nmsg.nm_lmsg); +} + int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; @@ -198,6 +240,16 @@ int tcp_backoff[TCP_MAXRXTSHIFT + 1] = static int tcp_totbackoff = 511; /* sum of tcp_backoff[] */ +/* Caller should be in critical section */ +static struct tcpcb * +tcp_timer_delack_handler(struct tcpcb *tp) +{ + tp->t_flags |= TF_ACKNOW; + tcpstat.tcps_delack++; + tcp_output(tp); + return tp; +} + /* * TCP timer processing. */ @@ -212,28 +264,21 @@ tcp_timer_delack(void *xtp) return; } callout_deactivate(tp->tt_delack); - - tp->t_flags |= TF_ACKNOW; - tcpstat.tcps_delack++; - tcp_output(tp); + tcp_send_timermsg(tp, TCP_TIMER_DELACK); crit_exit(); } -void -tcp_timer_2msl(void *xtp) +/* Caller should be in critical section */ +static struct tcpcb * +tcp_timer_2msl_handler(struct tcpcb *tp) { - struct tcpcb *tp = xtp; #ifdef TCPDEBUG int ostate; +#endif +#ifdef TCPDEBUG ostate = tp->t_state; #endif - crit_enter(); - if (callout_pending(tp->tt_2msl) || !callout_active(tp->tt_2msl)) { - crit_exit(); - return; - } - callout_deactivate(tp->tt_2msl); /* * 2 MSL timeout in shutdown went off. If we're closed but * still waiting for peer to close and connection has been idle @@ -251,25 +296,36 @@ tcp_timer_2msl(void *xtp) if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); #endif - crit_exit(); + return tp; } void -tcp_timer_keep(void *xtp) +tcp_timer_2msl(void *xtp) { struct tcpcb *tp = xtp; + + crit_enter(); + if (callout_pending(tp->tt_2msl) || !callout_active(tp->tt_2msl)) { + crit_exit(); + return; + } + callout_deactivate(tp->tt_2msl); + tcp_send_timermsg(tp, TCP_TIMER_2MSL); + crit_exit(); +} + +/* Caller should be in critical section */ +static struct tcpcb * +tcp_timer_keep_handler(struct tcpcb *tp) +{ struct tcptemp *t_template; #ifdef TCPDEBUG int ostate; +#endif +#ifdef TCPDEBUG ostate = tp->t_state; #endif - crit_enter(); - if (callout_pending(tp->tt_keep) || !callout_active(tp->tt_keep)) { - crit_exit(); - return; - } - callout_deactivate(tp->tt_keep); /* * Keep-alive timer went off; send something * or drop connection if idle for too long. @@ -310,8 +366,7 @@ tcp_timer_keep(void *xtp) if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); #endif - crit_exit(); - return; + return tp; dropit: tcpstat.tcps_keepdrops++; @@ -321,24 +376,35 @@ dropit: if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); #endif - crit_exit(); + return tp; } void -tcp_timer_persist(void *xtp) +tcp_timer_keep(void *xtp) { struct tcpcb *tp = xtp; -#ifdef TCPDEBUG - int ostate; - ostate = tp->t_state; -#endif crit_enter(); - if (callout_pending(tp->tt_persist) || !callout_active(tp->tt_persist)){ + if (callout_pending(tp->tt_keep) || !callout_active(tp->tt_keep)) { crit_exit(); return; } - callout_deactivate(tp->tt_persist); + callout_deactivate(tp->tt_keep); + tcp_send_timermsg(tp, TCP_TIMER_KEEP); + crit_exit(); +} + +/* Caller should be in critical section */ +static struct tcpcb * +tcp_timer_persist_handler(struct tcpcb *tp) +{ +#ifdef TCPDEBUG + int ostate; +#endif + +#ifdef TCPDEBUG + ostate = tp->t_state; +#endif /* * Persistance timer into zero window. * Force a byte to be output, if possible. @@ -368,6 +434,21 @@ out: if (tp && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); #endif + return tp; +} + +void +tcp_timer_persist(void *xtp) +{ + struct tcpcb *tp = xtp; + + crit_enter(); + if (callout_pending(tp->tt_persist) || !callout_active(tp->tt_persist)){ + crit_exit(); + return; + } + callout_deactivate(tp->tt_persist); + tcp_send_timermsg(tp, TCP_TIMER_PERSIST); crit_exit(); } @@ -414,22 +495,18 @@ tcp_revert_congestion_state(struct tcpcb *tp) #endif } -void -tcp_timer_rexmt(void *xtp) +/* Caller should be in critical section */ +static struct tcpcb * +tcp_timer_rexmt_handler(struct tcpcb *tp) { - struct tcpcb *tp = xtp; int rexmt; #ifdef TCPDEBUG int ostate; +#endif +#ifdef TCPDEBUG ostate = tp->t_state; #endif - crit_enter(); - if (callout_pending(tp->tt_rexmt) || !callout_active(tp->tt_rexmt)) { - crit_exit(); - return; - } - callout_deactivate(tp->tt_rexmt); /* * Retransmission timer went off. Message has not * been acked within retransmit interval. Back off @@ -545,5 +622,89 @@ out: if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); #endif + return tp; +} + +void +tcp_timer_rexmt(void *xtp) +{ + struct tcpcb *tp = xtp; + + crit_enter(); + if (callout_pending(tp->tt_rexmt) || !callout_active(tp->tt_rexmt)) { + crit_exit(); + return; + } + callout_deactivate(tp->tt_rexmt); + tcp_send_timermsg(tp, TCP_TIMER_REXMT); + crit_exit(); +} + +static void +tcp_timer_handler(struct netmsg *nmsg) +{ + struct netmsg_tcp_timer *tmsg = (struct netmsg_tcp_timer *)nmsg; + const struct tcp_timer *tt; + struct tcpcb *tp; + uint32_t tasks; + + crit_enter(); + + KKASSERT(tmsg->tt_cpuid == mycpuid && tmsg->tt_tcb != NULL); + tp = tmsg->tt_tcb; + + /* Save pending tasks and reset the tasks in message */ + tasks = tmsg->tt_tasks; + tmsg->tt_tasks = 0; + + /* Reply ASAP */ + lwkt_replymsg(&tmsg->tt_nmsg.nm_lmsg, 0); + + for (tt = tcp_timer_handlers; tt->tt_handler != NULL; ++tt) { + if ((tasks & tt->tt_task) == 0) + continue; + + tp = tt->tt_handler(tp); + if (tp == NULL) + break; + + tasks &= ~tt->tt_task; + if (tasks == 0) /* nothing left to do */ + break; + } + + crit_exit(); +} + +void +tcp_create_timermsg(struct tcpcb *tp) +{ + struct netmsg_tcp_timer *tmsg = tp->tt_msg; + + netmsg_init(&tmsg->tt_nmsg, &netisr_adone_rport, MSGF_DROPABLE, + tcp_timer_handler); + tmsg->tt_cpuid = mycpuid; + tmsg->tt_tcb = tp; + tmsg->tt_tasks = 0; +} + +void +tcp_destroy_timermsg(struct tcpcb *tp) +{ + struct netmsg_tcp_timer *tmsg = tp->tt_msg; + + if (tmsg == NULL || /* listen socket */ + tmsg->tt_tcb == NULL) /* only tcp_attach() is called */ + return; + + KKASSERT(tmsg->tt_cpuid == mycpuid); + crit_enter(); + if ((tmsg->tt_nmsg.nm_lmsg.ms_flags & MSGF_DONE) == 0) { + /* + * This message is still pending to be processed; + * drop it. + */ + lwkt_dropmsg(&tmsg->tt_nmsg.nm_lmsg); + } crit_exit(); } diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h index 94683fd9d0..92dd1baf6c 100644 --- a/sys/netinet/tcp_timer.h +++ b/sys/netinet/tcp_timer.h @@ -129,6 +129,15 @@ static char *tcptimers[] = } while(0) #ifdef _KERNEL + +struct tcpcb; +struct netmsg_tcp_timer { + struct netmsg tt_nmsg; + struct tcpcb *tt_tcb; + int tt_cpuid; + uint32_t tt_tasks; +}; + extern int tcp_keepinit; /* time to establish connection */ extern int tcp_keepidle; /* time before keepalive probes begin */ extern int tcp_keepintvl; /* time between keepalive probes */ @@ -147,6 +156,9 @@ void tcp_timer_persist (void *xtp); void tcp_timer_rexmt (void *xtp); void tcp_timer_delack (void *xtp); +void tcp_create_timermsg(struct tcpcb *); +void tcp_destroy_timermsg(struct tcpcb *); + #endif /* _KERNEL */ #endif /* !_NETINET_TCP_TIMER_H_ */ diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index e3859129ab..89a7b935b5 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -360,6 +360,7 @@ tcp_usr_listen(struct socket *so, struct thread *td) } tp->t_state = TCPS_LISTEN; + tp->tt_msg = NULL; /* Catch any invalid timer usage */ #ifdef SMP /* * We have to set the flag because we can't have other cpus @@ -926,6 +927,8 @@ tcp_connect_oncpu(struct tcpcb *tp, struct sockaddr_in *sin, inp->inp_cpcbinfo = &tcbinfo[mycpu->gd_cpuid]; in_pcbinsconnhash(inp); + tcp_create_timermsg(tp); + /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.ssb_hiwat) diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 5b76b25752..00defcd308 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -126,6 +126,8 @@ struct scoreboard { struct sackblock *lastfound; /* search hint */ }; +struct netmsg_tcp_timer; + /* * Tcp control block, one per tcp; fields: * Organized for 16 byte cacheline efficiency. @@ -134,12 +136,13 @@ struct tcpcb { struct tsegqe_head t_segq; int t_dupacks; /* consecutive dup acks recd */ int tt_cpu; /* sanity check the cpu */ - struct callout *tt_rexmt; /* retransmit timer */ + struct callout *tt_rexmt; /* retransmit timer */ struct callout *tt_persist; /* retransmit persistence */ struct callout *tt_keep; /* keepalive */ struct callout *tt_2msl; /* 2*msl TIME_WAIT timer */ struct callout *tt_delack; /* delayed ACK timer */ + struct netmsg_tcp_timer *tt_msg; /* timer message */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ int t_state; /* state of this connection */ -- 2.41.0