tp->t_rxtcur, tcp_timer_rexmt);
}
sowwakeup(so);
- if (so->so_snd.ssb_cc > 0)
- tcp_output(tp);
+ if (so->so_snd.ssb_cc > 0 &&
+ !tcp_output_pending(tp))
+ tcp_output_fair(tp);
return(IPPROTO_DONE);
}
} else if (tiwin == tp->snd_wnd &&
/*
* Return any desired output.
*/
- if (needoutput || (tp->t_flags & TF_ACKNOW))
- tcp_output(tp);
+ if ((tp->t_flags & TF_ACKNOW) ||
+ (needoutput && tcp_sack_report_needed(tp))) {
+ tcp_output_cancel(tp);
+ tcp_output_fair(tp);
+ } else if (needoutput && !tcp_output_pending(tp)) {
+ tcp_output_fair(tp);
+ }
tcp_sack_report_cleanup(tp);
return(IPPROTO_DONE);
#include <net/if_var.h>
#include <net/route.h>
+#include <net/netmsg2.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_RW,
&tcp_do_tso, 0, "Enable TCP Segmentation Offload (TSO)");
+static int tcp_fairsend = 4;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, fairsend, CTLFLAG_RW,
+ &tcp_fairsend, 0,
+ "Amount of segments sent before yield to other senders or receivers");
+
static void tcp_idle_cwnd_validate(struct tcpcb *);
static int tcp_tso_getsize(struct tcpcb *tp, u_int *segsz, u_int *hlen);
+static void tcp_output_sched(struct tcpcb *tp);
/*
* Tcp output routine: figure out what should be sent and send it.
boolean_t can_tso = FALSE, use_tso;
boolean_t report_sack, idle_cwv = FALSE;
u_int segsz, tso_hlen, tso_lenmax = 0;
+ int segcnt = 0;
+ boolean_t need_sched = FALSE;
KKASSERT(so->so_port == &curthread->td_msgport);
if (len > segsz) {
if (!use_tso) {
len = segsz;
+ ++segcnt;
} else {
+ int nsegs;
+
if (__predict_false(tso_lenmax < segsz))
tso_lenmax = segsz << 1;
* big-small-...).
*/
len = min(len, tso_lenmax);
- len = (min(len, (IP_MAXPACKET - tso_hlen)) / segsz) *
- segsz;
- if (len <= segsz)
+ nsegs = min(len, (IP_MAXPACKET - tso_hlen)) / segsz;
+ KKASSERT(nsegs > 0);
+
+ len = nsegs * segsz;
+
+ if (len <= segsz) {
use_tso = FALSE;
+ ++segcnt;
+ } else {
+ segcnt += nsegs;
+ }
}
sendalot = TRUE;
} else {
use_tso = FALSE;
+ if (len > 0)
+ ++segcnt;
}
if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.ssb_cc))
flags &= ~TH_FIN;
return (0);
send:
+ if (need_sched && len > 0) {
+ tcp_output_sched(tp);
+ return 0;
+ }
+
/*
* Before ESTABLISHED, force sending of initial options
* unless TCP set not to do any options.
tp->t_flags &= ~(TF_ACKNOW | TF_XMITNOW);
if (tcp_delack_enabled)
tcp_callout_stop(tp, tp->tt_delack);
- if (sendalot)
+ if (sendalot) {
+ if (tcp_fairsend > 0 && (tp->t_flags & TF_FAIRSEND) &&
+ segcnt >= tcp_fairsend)
+ need_sched = TRUE;
goto again;
+ }
return (0);
}
*hlen0 = hlen;
return 0;
}
+
+static void
+tcp_output_sched_handler(netmsg_t nmsg)
+{
+ struct tcpcb *tp = nmsg->lmsg.u.ms_resultp;
+
+ /* Reply ASAP */
+ crit_enter();
+ lwkt_replymsg(&nmsg->lmsg, 0);
+ crit_exit();
+
+ tcp_output_fair(tp);
+}
+
+void
+tcp_output_init(struct tcpcb *tp)
+{
+ netmsg_init(tp->tt_sndmore, NULL, &netisr_adone_rport, MSGF_DROPABLE,
+ tcp_output_sched_handler);
+ tp->tt_sndmore->lmsg.u.ms_resultp = tp;
+}
+
+void
+tcp_output_cancel(struct tcpcb *tp)
+{
+ crit_enter();
+ if ((tp->tt_sndmore->lmsg.ms_flags & MSGF_DONE) == 0) {
+ /*
+ * This message is still pending to be processed;
+ * drop it.
+ */
+ lwkt_dropmsg(&tp->tt_sndmore->lmsg);
+ }
+ crit_exit();
+}
+
+boolean_t
+tcp_output_pending(struct tcpcb *tp)
+{
+ if ((tp->tt_sndmore->lmsg.ms_flags & MSGF_DONE) == 0)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+static void
+tcp_output_sched(struct tcpcb *tp)
+{
+ crit_enter();
+ if (tp->tt_sndmore->lmsg.ms_flags & MSGF_DONE)
+ lwkt_sendmsg(netisr_portfn(mycpuid), &tp->tt_sndmore->lmsg);
+ crit_exit();
+}
+
+int
+tcp_output_fair(struct tcpcb *tp)
+{
+ int ret;
+
+ tp->t_flags |= TF_FAIRSEND;
+ ret = tcp_output(tp);
+ tp->t_flags &= ~TF_FAIRSEND;
+
+ return ret;
+}
}
/*
+ * Whether SACK report is needed or not
+ */
+boolean_t
+tcp_sack_report_needed(const struct tcpcb *tp)
+{
+ if ((tp->sack_flags &
+ (TSACK_F_DUPSEG | TSACK_F_ENCLOSESEG | TSACK_F_SACKLEFT)) ||
+ tp->reportblk.rblk_start != tp->reportblk.rblk_end)
+ return TRUE;
+ else
+ return FALSE;
+}
+
+/*
* Returns 0 if not D-SACK block,
* 1 if D-SACK,
* 2 if duplicate of out-of-order D-SACK block.
struct tcp_callout inp_tp_2msl;
struct tcp_callout inp_tp_delack;
struct netmsg_tcp_timer inp_tp_timermsg;
+ struct netmsg_base inp_tp_sndmore;
};
#undef ALIGNMENT
#undef ALIGNM1
inp->inp_ip_ttl = ip_defttl;
inp->inp_ppcb = tp;
tcp_sack_tcpcb_init(tp);
+
+ tp->tt_sndmore = &it->inp_tp_sndmore;
+ tcp_output_init(tp);
+
return (tp); /* XXX */
}
/* note: pcb detached later on */
tcp_destroy_timermsg(tp);
+ tcp_output_cancel(tp);
if (tp->t_flags & TF_LISTEN)
syncache_destroy(tp);
socantsendmore(so);
tp = tcp_usrclosed(tp);
}
- if (tp != NULL) {
+ if (tp != NULL && !tcp_output_pending(tp)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
- error = tcp_output(tp);
+ error = tcp_output_fair(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
};
struct netmsg_tcp_timer;
+struct netmsg_base;
/*
* Tcp control block, one per tcp; fields:
struct tcp_callout *tt_delack; /* delayed ACK timer */
struct netmsg_tcp_timer *tt_msg; /* timer message */
+ struct netmsg_base *tt_sndmore;/* defer sending (fair send) */
+
struct inpcb *t_inpcb; /* back pointer to internet pcb */
int t_state; /* state of this connection */
u_int t_flags;
#define TF_UNUSED009 0x01000000
#define TF_FORCE 0x02000000 /* Set if forcing out a byte */
#define TF_ONOUTPUTQ 0x04000000 /* on t_outputq list */
-#define TF_UNUSED002 0x08000000
+#define TF_FAIRSEND 0x08000000
#define TF_UNUSED003 0x10000000
#define TF_UNUSED004 0x20000000
#define TF_KEEPALIVE 0x40000000 /* temporary keepalive */
void tcp_mtudisc (struct inpcb *, int);
struct tcpcb *
tcp_newtcpcb (struct inpcb *);
-int tcp_output (struct tcpcb *);
+int tcp_output(struct tcpcb *);
+int tcp_output_fair(struct tcpcb *);
+void tcp_output_init(struct tcpcb *);
+void tcp_output_cancel(struct tcpcb *);
+boolean_t
+ tcp_output_pending(struct tcpcb *);
void tcp_quench (struct inpcb *, int);
void tcp_respond (struct tcpcb *, void *,
struct tcphdr *, struct mbuf *, tcp_seq, tcp_seq, int);
void tcp_sack_destroy(struct scoreboard *scb);
void tcp_sack_discard(struct tcpcb *tp);
void tcp_sack_report_cleanup(struct tcpcb *tp);
+boolean_t
+ tcp_sack_report_needed(const struct tcpcb *tp);
int tcp_sack_ndsack_blocks(const struct raw_sackblock *blocks,
const int numblocks, tcp_seq snd_una);
void tcp_sack_fill_report(struct tcpcb *tp, u_char *opt, u_int *plen);