Commit | Line | Data |
---|---|---|
86e10434 | 1 | /* |
66d6c637 JH |
2 | * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. |
3 | * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. | |
f23061d4 | 4 | * |
66d6c637 JH |
5 | * This code is derived from software contributed to The DragonFly Project |
6 | * by Jeffrey M. Hsu. | |
f23061d4 | 7 | * |
66d6c637 JH |
8 | * Redistribution and use in source and binary forms, with or without |
9 | * modification, are permitted provided that the following conditions | |
10 | * are met: | |
11 | * 1. Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * 2. Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in the | |
15 | * documentation and/or other materials provided with the distribution. | |
16 | * 3. Neither the name of The DragonFly Project nor the names of its | |
17 | * contributors may be used to endorse or promote products derived | |
18 | * from this software without specific, prior written permission. | |
f23061d4 | 19 | * |
66d6c637 JH |
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
24 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
25 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
30 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
31 | * SUCH DAMAGE. | |
32 | */ | |
33 | ||
66d6c637 | 34 | /* |
86e10434 JH |
35 | * All advertising materials mentioning features or use of this software |
36 | * must display the following acknowledgement: | |
37 | * This product includes software developed by Jeffrey M. Hsu. | |
38 | * | |
984263bc MD |
39 | * Copyright (c) 2001 Networks Associates Technologies, Inc. |
40 | * All rights reserved. | |
41 | * | |
42 | * This software was developed for the FreeBSD Project by Jonathan Lemon | |
43 | * and NAI Labs, the Security Research Division of Network Associates, Inc. | |
44 | * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the | |
45 | * DARPA CHATS research program. | |
46 | * | |
47 | * Redistribution and use in source and binary forms, with or without | |
48 | * modification, are permitted provided that the following conditions | |
49 | * are met: | |
50 | * 1. Redistributions of source code must retain the above copyright | |
51 | * notice, this list of conditions and the following disclaimer. | |
52 | * 2. Redistributions in binary form must reproduce the above copyright | |
53 | * notice, this list of conditions and the following disclaimer in the | |
54 | * documentation and/or other materials provided with the distribution. | |
55 | * 3. The name of the author may not be used to endorse or promote | |
56 | * products derived from this software without specific prior written | |
57 | * permission. | |
58 | * | |
59 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
60 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
61 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
62 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
63 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
64 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
65 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
66 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
67 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
68 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
69 | * SUCH DAMAGE. | |
70 | * | |
71 | * $FreeBSD: src/sys/netinet/tcp_syncache.c,v 1.5.2.14 2003/02/24 04:02:27 silby Exp $ | |
72 | */ | |
73 | ||
b1992928 | 74 | #include "opt_inet.h" |
984263bc | 75 | #include "opt_inet6.h" |
984263bc MD |
76 | |
77 | #include <sys/param.h> | |
78 | #include <sys/systm.h> | |
79 | #include <sys/kernel.h> | |
80 | #include <sys/sysctl.h> | |
81 | #include <sys/malloc.h> | |
82 | #include <sys/mbuf.h> | |
83 | #include <sys/md5.h> | |
84 | #include <sys/proc.h> /* for proc0 declaration */ | |
85 | #include <sys/random.h> | |
86 | #include <sys/socket.h> | |
87 | #include <sys/socketvar.h> | |
3f9db7f8 | 88 | #include <sys/in_cksum.h> |
984263bc | 89 | |
00943fd6 | 90 | #include <sys/msgport2.h> |
4599cf19 | 91 | #include <net/netmsg2.h> |
5337421c | 92 | #include <net/netisr2.h> |
00943fd6 | 93 | |
984263bc MD |
94 | #include <net/if.h> |
95 | #include <net/route.h> | |
96 | ||
97 | #include <netinet/in.h> | |
98 | #include <netinet/in_systm.h> | |
99 | #include <netinet/ip.h> | |
100 | #include <netinet/in_var.h> | |
101 | #include <netinet/in_pcb.h> | |
102 | #include <netinet/ip_var.h> | |
984263bc | 103 | #include <netinet/ip6.h> |
61896e3c | 104 | #ifdef INET6 |
984263bc MD |
105 | #include <netinet/icmp6.h> |
106 | #include <netinet6/nd6.h> | |
61896e3c | 107 | #endif |
984263bc MD |
108 | #include <netinet6/ip6_var.h> |
109 | #include <netinet6/in6_pcb.h> | |
984263bc MD |
110 | #include <netinet/tcp.h> |
111 | #include <netinet/tcp_fsm.h> | |
112 | #include <netinet/tcp_seq.h> | |
113 | #include <netinet/tcp_timer.h> | |
a48c5dd5 | 114 | #include <netinet/tcp_timer2.h> |
984263bc | 115 | #include <netinet/tcp_var.h> |
984263bc | 116 | #include <netinet6/tcp6_var.h> |
984263bc | 117 | |
984263bc MD |
118 | static int tcp_syncookies = 1; |
119 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW, | |
f23061d4 | 120 | &tcp_syncookies, 0, |
984263bc MD |
121 | "Use TCP SYN cookies if the syncache overflows"); |
122 | ||
123 | static void syncache_drop(struct syncache *, struct syncache_head *); | |
124 | static void syncache_free(struct syncache *); | |
125 | static void syncache_insert(struct syncache *, struct syncache_head *); | |
4accef2f SZ |
126 | static struct syncache *syncache_lookup(struct in_conninfo *, |
127 | struct syncache_head **); | |
984263bc | 128 | static int syncache_respond(struct syncache *, struct mbuf *); |
7e31206a SZ |
129 | static struct socket *syncache_socket(struct syncache *, struct socket *, |
130 | struct mbuf *); | |
984263bc MD |
131 | static void syncache_timer(void *); |
132 | static u_int32_t syncookie_generate(struct syncache *); | |
133 | static struct syncache *syncookie_lookup(struct in_conninfo *, | |
134 | struct tcphdr *, struct socket *); | |
135 | ||
136 | /* | |
137 | * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies. | |
60536dc2 SZ |
138 | * 4 retransmits corresponds to a timeout of (3 + 3 + 3 + 3 + 3 == 15) seconds |
139 | * or (1 + 1 + 2 + 4 + 8 == 16) seconds if RFC6298 is used, the odds are that | |
140 | * the user has given up attempting to connect by then. | |
984263bc | 141 | */ |
60536dc2 | 142 | #define SYNCACHE_MAXREXMTS 4 |
984263bc MD |
143 | |
144 | /* Arbitrary values */ | |
145 | #define TCP_SYNCACHE_HASHSIZE 512 | |
146 | #define TCP_SYNCACHE_BUCKETLIMIT 30 | |
147 | ||
4599cf19 | 148 | static void syncache_timer_handler(netmsg_t); |
950b8840 | 149 | static int syncache_sysctl_count(SYSCTL_HANDLER_ARGS); |
00943fd6 | 150 | |
984263bc | 151 | struct tcp_syncache { |
984263bc MD |
152 | u_int hashsize; |
153 | u_int hashmask; | |
154 | u_int bucket_limit; | |
984263bc MD |
155 | u_int cache_limit; |
156 | u_int rexmt_limit; | |
157 | u_int hash_secret; | |
984263bc MD |
158 | }; |
159 | static struct tcp_syncache tcp_syncache; | |
160 | ||
186d7dce | 161 | struct syncache_timerq { |
011ef42c | 162 | TAILQ_HEAD(, syncache) list; |
186d7dce SZ |
163 | struct callout timeo; |
164 | struct netmsg_base nm; | |
165 | }; | |
166 | ||
00943fd6 JH |
167 | struct tcp_syncache_percpu { |
168 | struct syncache_head *hashbase; | |
169 | u_int cache_count; | |
186d7dce | 170 | struct syncache_timerq timerq[SYNCACHE_MAXREXMTS + 1]; |
a31a8e3c SZ |
171 | }; |
172 | ||
173 | static struct tcp_syncache_percpu *tcp_syncache_percpu[MAXCPU]; | |
00943fd6 | 174 | |
984263bc MD |
175 | SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache"); |
176 | ||
177 | SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RD, | |
178 | &tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache"); | |
179 | ||
180 | SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RD, | |
181 | &tcp_syncache.cache_limit, 0, "Overall entry limit for syncache"); | |
182 | ||
950b8840 SZ |
183 | SYSCTL_PROC(_net_inet_tcp_syncache, OID_AUTO, count, (CTLTYPE_INT | CTLFLAG_RD), |
184 | 0, 0, syncache_sysctl_count, "I", "Current number of entries in syncache"); | |
984263bc MD |
185 | |
186 | SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RD, | |
187 | &tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable"); | |
188 | ||
189 | SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW, | |
190 | &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions"); | |
191 | ||
192 | static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache"); | |
193 | ||
f23061d4 | 194 | #define SYNCACHE_HASH(inc, mask) \ |
984263bc MD |
195 | ((tcp_syncache.hash_secret ^ \ |
196 | (inc)->inc_faddr.s_addr ^ \ | |
f23061d4 | 197 | ((inc)->inc_faddr.s_addr >> 16) ^ \ |
984263bc MD |
198 | (inc)->inc_fport ^ (inc)->inc_lport) & mask) |
199 | ||
f23061d4 | 200 | #define SYNCACHE_HASH6(inc, mask) \ |
984263bc | 201 | ((tcp_syncache.hash_secret ^ \ |
f23061d4 JH |
202 | (inc)->inc6_faddr.s6_addr32[0] ^ \ |
203 | (inc)->inc6_faddr.s6_addr32[3] ^ \ | |
984263bc MD |
204 | (inc)->inc_fport ^ (inc)->inc_lport) & mask) |
205 | ||
206 | #define ENDPTS_EQ(a, b) ( \ | |
207 | (a)->ie_fport == (b)->ie_fport && \ | |
208 | (a)->ie_lport == (b)->ie_lport && \ | |
209 | (a)->ie_faddr.s_addr == (b)->ie_faddr.s_addr && \ | |
210 | (a)->ie_laddr.s_addr == (b)->ie_laddr.s_addr \ | |
211 | ) | |
212 | ||
213 | #define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0) | |
214 | ||
d5082e3d SZ |
215 | static __inline int |
216 | syncache_rto(int slot) | |
217 | { | |
218 | if (tcp_low_rtobase) | |
219 | return (TCPTV_RTOBASE * tcp_syn_backoff_low[slot]); | |
220 | else | |
221 | return (TCPTV_RTOBASE * tcp_syn_backoff[slot]); | |
222 | } | |
223 | ||
00943fd6 JH |
224 | static __inline void |
225 | syncache_timeout(struct tcp_syncache_percpu *syncache_percpu, | |
226 | struct syncache *sc, int slot) | |
227 | { | |
186d7dce | 228 | struct syncache_timerq *tq; |
9d173e54 SZ |
229 | int rto; |
230 | ||
95a16627 SZ |
231 | KASSERT(slot <= SYNCACHE_MAXREXMTS, |
232 | ("syncache: invalid slot %d", slot)); | |
233 | ||
be34e534 SZ |
234 | if (slot > 0) { |
235 | /* | |
d5082e3d SZ |
236 | * Record the time that we spent in SYN|ACK |
237 | * retransmition. | |
238 | * | |
be34e534 SZ |
239 | * Needed by RFC3390 and RFC6298. |
240 | */ | |
d5082e3d | 241 | sc->sc_rxtused += syncache_rto(slot - 1); |
be34e534 | 242 | } |
00943fd6 | 243 | sc->sc_rxtslot = slot; |
9d173e54 | 244 | |
d5082e3d | 245 | rto = syncache_rto(slot); |
9d173e54 SZ |
246 | sc->sc_rxttime = ticks + rto; |
247 | ||
186d7dce SZ |
248 | tq = &syncache_percpu->timerq[slot]; |
249 | TAILQ_INSERT_TAIL(&tq->list, sc, sc_timerq); | |
250 | if (!callout_active(&tq->timeo)) | |
251 | callout_reset(&tq->timeo, rto, syncache_timer, &tq->nm); | |
00943fd6 | 252 | } |
984263bc MD |
253 | |
254 | static void | |
255 | syncache_free(struct syncache *sc) | |
256 | { | |
257 | struct rtentry *rt; | |
61896e3c JH |
258 | #ifdef INET6 |
259 | const boolean_t isipv6 = sc->sc_inc.inc_isipv6; | |
260 | #else | |
261 | const boolean_t isipv6 = FALSE; | |
262 | #endif | |
984263bc MD |
263 | |
264 | if (sc->sc_ipopts) | |
f23061d4 | 265 | m_free(sc->sc_ipopts); |
a5263048 JH |
266 | |
267 | rt = isipv6 ? sc->sc_route6.ro_rt : sc->sc_route.ro_rt; | |
984263bc MD |
268 | if (rt != NULL) { |
269 | /* | |
a5263048 | 270 | * If this is the only reference to a protocol-cloned |
984263bc MD |
271 | * route, remove it immediately. |
272 | */ | |
91499441 SZ |
273 | if ((rt->rt_flags & (RTF_WASCLONED | RTF_LLINFO)) == |
274 | RTF_WASCLONED && rt->rt_refcnt == 1) { | |
f23061d4 JH |
275 | rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, |
276 | rt_mask(rt), rt->rt_flags, NULL); | |
91499441 | 277 | } |
984263bc MD |
278 | RTFREE(rt); |
279 | } | |
9f42c129 | 280 | kfree(sc, M_SYNCACHE); |
984263bc MD |
281 | } |
282 | ||
a31a8e3c SZ |
283 | static void |
284 | syncache_init_dispatch(netmsg_t nm) | |
285 | { | |
286 | struct tcp_syncache_percpu *syncache_percpu; | |
287 | int i; | |
288 | ||
289 | ASSERT_NETISR_NCPUS(mycpuid); | |
290 | ||
291 | syncache_percpu = kmalloc(sizeof(*syncache_percpu), M_SYNCACHE, | |
3a4ff7da | 292 | M_WAITOK | M_ZERO); |
a31a8e3c SZ |
293 | |
294 | /* Allocate the hash table. */ | |
3a4ff7da MD |
295 | syncache_percpu->hashbase = kmalloc(tcp_syncache.hashsize * |
296 | sizeof(struct syncache_head), | |
297 | M_SYNCACHE, M_WAITOK | M_ZERO); | |
a31a8e3c SZ |
298 | |
299 | /* Initialize the hash buckets. */ | |
300 | for (i = 0; i < tcp_syncache.hashsize; i++) { | |
301 | struct syncache_head *bucket; | |
302 | ||
303 | bucket = &syncache_percpu->hashbase[i]; | |
304 | TAILQ_INIT(&bucket->sch_bucket); | |
305 | bucket->sch_length = 0; | |
306 | } | |
307 | ||
308 | for (i = 0; i <= SYNCACHE_MAXREXMTS; i++) { | |
309 | struct syncache_timerq *tq = | |
310 | &syncache_percpu->timerq[i]; | |
311 | ||
312 | /* Initialize the timer queues. */ | |
313 | TAILQ_INIT(&tq->list); | |
314 | callout_init_mp(&tq->timeo); | |
315 | ||
316 | netmsg_init(&tq->nm, NULL, &netisr_adone_rport, | |
317 | MSGF_PRIORITY, syncache_timer_handler); | |
318 | tq->nm.lmsg.u.ms_result = i; | |
319 | } | |
320 | ||
321 | tcp_syncache_percpu[mycpuid] = syncache_percpu; | |
322 | ||
323 | netisr_forwardmsg(&nm->base, mycpuid + 1); | |
324 | } | |
325 | ||
984263bc MD |
326 | void |
327 | syncache_init(void) | |
328 | { | |
a31a8e3c | 329 | struct netmsg_base nm; |
984263bc | 330 | |
984263bc MD |
331 | tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; |
332 | tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT; | |
333 | tcp_syncache.cache_limit = | |
334 | tcp_syncache.hashsize * tcp_syncache.bucket_limit; | |
335 | tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS; | |
0ced1954 | 336 | tcp_syncache.hash_secret = karc4random(); |
984263bc | 337 | |
f23061d4 | 338 | TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize", |
984263bc | 339 | &tcp_syncache.hashsize); |
f23061d4 | 340 | TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit", |
984263bc | 341 | &tcp_syncache.cache_limit); |
f23061d4 | 342 | TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit", |
984263bc MD |
343 | &tcp_syncache.bucket_limit); |
344 | if (!powerof2(tcp_syncache.hashsize)) { | |
a6ec04bc | 345 | kprintf("WARNING: syncache hash size is not a power of 2.\n"); |
984263bc | 346 | tcp_syncache.hashsize = 512; /* safe default */ |
f23061d4 | 347 | } |
984263bc MD |
348 | tcp_syncache.hashmask = tcp_syncache.hashsize - 1; |
349 | ||
a31a8e3c SZ |
350 | netmsg_init(&nm, NULL, &curthread->td_msgport, 0, |
351 | syncache_init_dispatch); | |
352 | netisr_domsg_global(&nm); | |
984263bc MD |
353 | } |
354 | ||
355 | static void | |
f3f70f0d | 356 | syncache_insert(struct syncache *sc, struct syncache_head *sch) |
984263bc | 357 | { |
00943fd6 | 358 | struct tcp_syncache_percpu *syncache_percpu; |
984263bc | 359 | struct syncache *sc2; |
d982be66 | 360 | int i; |
984263bc | 361 | |
a31a8e3c | 362 | syncache_percpu = tcp_syncache_percpu[mycpu->gd_cpuid]; |
00943fd6 | 363 | |
984263bc MD |
364 | /* |
365 | * Make sure that we don't overflow the per-bucket | |
366 | * limit or the total cache size limit. | |
367 | */ | |
984263bc MD |
368 | if (sch->sch_length >= tcp_syncache.bucket_limit) { |
369 | /* | |
370 | * The bucket is full, toss the oldest element. | |
371 | */ | |
372 | sc2 = TAILQ_FIRST(&sch->sch_bucket); | |
21b7d68b SZ |
373 | if (sc2->sc_tp != NULL) |
374 | sc2->sc_tp->ts_recent = ticks; | |
984263bc MD |
375 | syncache_drop(sc2, sch); |
376 | tcpstat.tcps_sc_bucketoverflow++; | |
00943fd6 | 377 | } else if (syncache_percpu->cache_count >= tcp_syncache.cache_limit) { |
984263bc MD |
378 | /* |
379 | * The cache is full. Toss the oldest entry in the | |
380 | * entire cache. This is the front entry in the | |
381 | * first non-empty timer queue with the largest | |
382 | * timeout value. | |
383 | */ | |
384 | for (i = SYNCACHE_MAXREXMTS; i >= 0; i--) { | |
186d7dce | 385 | sc2 = TAILQ_FIRST(&syncache_percpu->timerq[i].list); |
984263bc MD |
386 | if (sc2 != NULL) |
387 | break; | |
388 | } | |
21b7d68b SZ |
389 | if (sc2->sc_tp != NULL) |
390 | sc2->sc_tp->ts_recent = ticks; | |
984263bc MD |
391 | syncache_drop(sc2, NULL); |
392 | tcpstat.tcps_sc_cacheoverflow++; | |
393 | } | |
394 | ||
395 | /* Initialize the entry's timer. */ | |
00943fd6 | 396 | syncache_timeout(syncache_percpu, sc, 0); |
984263bc MD |
397 | |
398 | /* Put it into the bucket. */ | |
399 | TAILQ_INSERT_TAIL(&sch->sch_bucket, sc, sc_hash); | |
400 | sch->sch_length++; | |
00943fd6 | 401 | syncache_percpu->cache_count++; |
984263bc | 402 | tcpstat.tcps_sc_added++; |
984263bc MD |
403 | } |
404 | ||
e5fe3477 | 405 | void |
02ad2f0b | 406 | syncache_destroy(struct tcpcb *tp, struct tcpcb *tp_inh) |
e5fe3477 MD |
407 | { |
408 | struct tcp_syncache_percpu *syncache_percpu; | |
e5fe3477 MD |
409 | int i; |
410 | ||
96fef49f SZ |
411 | ASSERT_NETISR_NCPUS(mycpuid); |
412 | ||
a31a8e3c | 413 | syncache_percpu = tcp_syncache_percpu[mycpu->gd_cpuid]; |
e5fe3477 | 414 | for (i = 0; i < tcp_syncache.hashsize; i++) { |
1af94f56 SZ |
415 | struct syncache_head *bucket; |
416 | struct syncache *sc; | |
417 | ||
e5fe3477 MD |
418 | bucket = &syncache_percpu->hashbase[i]; |
419 | TAILQ_FOREACH(sc, &bucket->sch_bucket, sc_hash) { | |
7123bbff | 420 | if (sc->sc_tp == tp) |
02ad2f0b | 421 | sc->sc_tp = tp_inh; |
e5fe3477 MD |
422 | } |
423 | } | |
e5fe3477 MD |
424 | } |
425 | ||
984263bc | 426 | static void |
f3f70f0d | 427 | syncache_drop(struct syncache *sc, struct syncache_head *sch) |
984263bc | 428 | { |
00943fd6 | 429 | struct tcp_syncache_percpu *syncache_percpu; |
61896e3c JH |
430 | #ifdef INET6 |
431 | const boolean_t isipv6 = sc->sc_inc.inc_isipv6; | |
432 | #else | |
433 | const boolean_t isipv6 = FALSE; | |
434 | #endif | |
984263bc | 435 | |
a31a8e3c | 436 | syncache_percpu = tcp_syncache_percpu[mycpu->gd_cpuid]; |
00943fd6 | 437 | |
984263bc | 438 | if (sch == NULL) { |
61896e3c | 439 | if (isipv6) { |
00943fd6 | 440 | sch = &syncache_percpu->hashbase[ |
984263bc | 441 | SYNCACHE_HASH6(&sc->sc_inc, tcp_syncache.hashmask)]; |
61896e3c | 442 | } else { |
00943fd6 | 443 | sch = &syncache_percpu->hashbase[ |
984263bc MD |
444 | SYNCACHE_HASH(&sc->sc_inc, tcp_syncache.hashmask)]; |
445 | } | |
446 | } | |
447 | ||
984263bc MD |
448 | TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); |
449 | sch->sch_length--; | |
00943fd6 | 450 | syncache_percpu->cache_count--; |
984263bc | 451 | |
e5fe3477 MD |
452 | /* |
453 | * Cleanup | |
454 | */ | |
5c778c08 | 455 | sc->sc_tp = NULL; |
e5fe3477 | 456 | |
00943fd6 JH |
457 | /* |
458 | * Remove the entry from the syncache timer/timeout queue. Note | |
459 | * that we do not try to stop any running timer since we do not know | |
460 | * whether the timer's message is in-transit or not. Since timeouts | |
461 | * are fairly long, taking an unneeded callout does not detrimentally | |
462 | * effect performance. | |
463 | */ | |
186d7dce SZ |
464 | TAILQ_REMOVE(&syncache_percpu->timerq[sc->sc_rxtslot].list, sc, |
465 | sc_timerq); | |
984263bc MD |
466 | |
467 | syncache_free(sc); | |
468 | } | |
469 | ||
470 | /* | |
00943fd6 JH |
471 | * Place a timeout message on the TCP thread's message queue. |
472 | * This routine runs in soft interrupt context. | |
473 | * | |
474 | * An invariant is for this routine to be called, the callout must | |
475 | * have been active. Note that the callout is not deactivated until | |
476 | * after the message has been processed in syncache_timer_handler() below. | |
477 | */ | |
478 | static void | |
479 | syncache_timer(void *p) | |
480 | { | |
95a16627 | 481 | struct netmsg_base *msg = p; |
00943fd6 | 482 | |
d7471b53 SZ |
483 | KKASSERT(mycpuid < netisr_ncpus); |
484 | ||
485 | crit_enter(); | |
95a16627 SZ |
486 | if (msg->lmsg.ms_flags & MSGF_DONE) |
487 | netisr_sendmsg_oncpu(msg); | |
d7471b53 | 488 | crit_exit(); |
00943fd6 JH |
489 | } |
490 | ||
491 | /* | |
492 | * Service a timer message queued by timer expiration. | |
493 | * This routine runs in the TCP protocol thread. | |
494 | * | |
984263bc MD |
495 | * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted. |
496 | * If we have retransmitted an entry the maximum number of times, expire it. | |
00943fd6 JH |
497 | * |
498 | * When we finish processing timed-out entries, we restart the timer if there | |
499 | * are any entries still on the queue and deactivate it otherwise. Only after | |
500 | * a timer has been deactivated here can it be restarted by syncache_timeout(). | |
984263bc | 501 | */ |
4599cf19 | 502 | static void |
002c1265 | 503 | syncache_timer_handler(netmsg_t msg) |
984263bc | 504 | { |
00943fd6 | 505 | struct tcp_syncache_percpu *syncache_percpu; |
011ef42c | 506 | struct syncache *nsc; |
186d7dce | 507 | struct syncache_timerq *tq; |
00943fd6 | 508 | int slot; |
984263bc | 509 | |
96fef49f SZ |
510 | ASSERT_NETISR_NCPUS(mycpuid); |
511 | ||
d7471b53 SZ |
512 | /* Reply ASAP. */ |
513 | crit_enter(); | |
514 | netisr_replymsg(&msg->base, 0); | |
515 | crit_exit(); | |
516 | ||
a31a8e3c | 517 | syncache_percpu = tcp_syncache_percpu[mycpu->gd_cpuid]; |
984263bc | 518 | |
95a16627 SZ |
519 | slot = msg->lmsg.u.ms_result; |
520 | KASSERT(slot <= SYNCACHE_MAXREXMTS, | |
521 | ("syncache: invalid slot %d", slot)); | |
186d7dce | 522 | tq = &syncache_percpu->timerq[slot]; |
c1d0893d | 523 | |
011ef42c SZ |
524 | nsc = TAILQ_FIRST(&tq->list); |
525 | while (nsc != NULL) { | |
526 | struct syncache *sc; | |
c1d0893d | 527 | |
011ef42c | 528 | if (ticks < nsc->sc_rxttime) |
00943fd6 | 529 | break; /* finished because timerq sorted by time */ |
011ef42c SZ |
530 | |
531 | sc = nsc; | |
e5fe3477 | 532 | if (sc->sc_tp == NULL) { |
011ef42c | 533 | nsc = TAILQ_NEXT(sc, sc_timerq); |
e5fe3477 MD |
534 | syncache_drop(sc, NULL); |
535 | tcpstat.tcps_sc_stale++; | |
536 | continue; | |
537 | } | |
984263bc MD |
538 | if (slot == SYNCACHE_MAXREXMTS || |
539 | slot >= tcp_syncache.rexmt_limit || | |
96a3f37a | 540 | sc->sc_tp->t_inpcb->inp_gencnt != sc->sc_inp_gencnt) { |
011ef42c | 541 | nsc = TAILQ_NEXT(sc, sc_timerq); |
984263bc MD |
542 | syncache_drop(sc, NULL); |
543 | tcpstat.tcps_sc_stale++; | |
544 | continue; | |
545 | } | |
546 | /* | |
547 | * syncache_respond() may call back into the syncache to | |
548 | * to modify another entry, so do not obtain the next | |
549 | * entry on the timer chain until it has completed. | |
550 | */ | |
f23061d4 | 551 | syncache_respond(sc, NULL); |
984263bc | 552 | tcpstat.tcps_sc_retransmitted++; |
011ef42c SZ |
553 | nsc = TAILQ_NEXT(sc, sc_timerq); |
554 | TAILQ_REMOVE(&tq->list, sc, sc_timerq); | |
00943fd6 | 555 | syncache_timeout(syncache_percpu, sc, slot + 1); |
984263bc | 556 | } |
c1d0893d | 557 | |
011ef42c SZ |
558 | if (nsc != NULL) { |
559 | callout_reset(&tq->timeo, nsc->sc_rxttime - ticks, | |
186d7dce | 560 | syncache_timer, &tq->nm); |
c1d0893d | 561 | } else { |
186d7dce | 562 | callout_deactivate(&tq->timeo); |
c1d0893d | 563 | } |
984263bc MD |
564 | } |
565 | ||
566 | /* | |
567 | * Find an entry in the syncache. | |
568 | */ | |
4accef2f | 569 | static struct syncache * |
f3f70f0d | 570 | syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp) |
984263bc | 571 | { |
00943fd6 | 572 | struct tcp_syncache_percpu *syncache_percpu; |
984263bc MD |
573 | struct syncache *sc; |
574 | struct syncache_head *sch; | |
984263bc | 575 | |
a31a8e3c | 576 | syncache_percpu = tcp_syncache_percpu[mycpu->gd_cpuid]; |
984263bc MD |
577 | #ifdef INET6 |
578 | if (inc->inc_isipv6) { | |
00943fd6 | 579 | sch = &syncache_percpu->hashbase[ |
984263bc MD |
580 | SYNCACHE_HASH6(inc, tcp_syncache.hashmask)]; |
581 | *schp = sch; | |
d982be66 JH |
582 | TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) |
583 | if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) | |
984263bc | 584 | return (sc); |
984263bc MD |
585 | } else |
586 | #endif | |
587 | { | |
00943fd6 | 588 | sch = &syncache_percpu->hashbase[ |
984263bc MD |
589 | SYNCACHE_HASH(inc, tcp_syncache.hashmask)]; |
590 | *schp = sch; | |
984263bc MD |
591 | TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) { |
592 | #ifdef INET6 | |
593 | if (sc->sc_inc.inc_isipv6) | |
594 | continue; | |
595 | #endif | |
d982be66 | 596 | if (ENDPTS_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) |
984263bc | 597 | return (sc); |
984263bc | 598 | } |
984263bc MD |
599 | } |
600 | return (NULL); | |
601 | } | |
602 | ||
603 | /* | |
604 | * This function is called when we get a RST for a | |
605 | * non-existent connection, so that we can see if the | |
606 | * connection is in the syn cache. If it is, zap it. | |
607 | */ | |
608 | void | |
f3f70f0d | 609 | syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th) |
984263bc MD |
610 | { |
611 | struct syncache *sc; | |
612 | struct syncache_head *sch; | |
613 | ||
96fef49f SZ |
614 | ASSERT_NETISR_NCPUS(mycpuid); |
615 | ||
984263bc | 616 | sc = syncache_lookup(inc, &sch); |
c1d0893d | 617 | if (sc == NULL) { |
984263bc | 618 | return; |
c1d0893d | 619 | } |
984263bc MD |
620 | /* |
621 | * If the RST bit is set, check the sequence number to see | |
622 | * if this is a valid reset segment. | |
623 | * RFC 793 page 37: | |
624 | * In all states except SYN-SENT, all reset (RST) segments | |
625 | * are validated by checking their SEQ-fields. A reset is | |
626 | * valid if its sequence number is in the window. | |
627 | * | |
628 | * The sequence number in the reset segment is normally an | |
629 | * echo of our outgoing acknowlegement numbers, but some hosts | |
630 | * send a reset with the sequence number at the rightmost edge | |
631 | * of our receive window, and we have to handle this case. | |
632 | */ | |
633 | if (SEQ_GEQ(th->th_seq, sc->sc_irs) && | |
634 | SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) { | |
635 | syncache_drop(sc, sch); | |
636 | tcpstat.tcps_sc_reset++; | |
637 | } | |
638 | } | |
639 | ||
640 | void | |
f3f70f0d | 641 | syncache_badack(struct in_conninfo *inc) |
984263bc MD |
642 | { |
643 | struct syncache *sc; | |
644 | struct syncache_head *sch; | |
645 | ||
96fef49f SZ |
646 | ASSERT_NETISR_NCPUS(mycpuid); |
647 | ||
984263bc MD |
648 | sc = syncache_lookup(inc, &sch); |
649 | if (sc != NULL) { | |
650 | syncache_drop(sc, sch); | |
651 | tcpstat.tcps_sc_badack++; | |
652 | } | |
653 | } | |
654 | ||
655 | void | |
ffb15150 | 656 | syncache_unreach(struct in_conninfo *inc, const struct tcphdr *th) |
984263bc MD |
657 | { |
658 | struct syncache *sc; | |
659 | struct syncache_head *sch; | |
660 | ||
96fef49f SZ |
661 | ASSERT_NETISR_NCPUS(mycpuid); |
662 | ||
984263bc MD |
663 | /* we are called at splnet() here */ |
664 | sc = syncache_lookup(inc, &sch); | |
665 | if (sc == NULL) | |
666 | return; | |
667 | ||
668 | /* If the sequence number != sc_iss, then it's a bogus ICMP msg */ | |
669 | if (ntohl(th->th_seq) != sc->sc_iss) | |
670 | return; | |
671 | ||
672 | /* | |
673 | * If we've rertransmitted 3 times and this is our second error, | |
674 | * we remove the entry. Otherwise, we allow it to continue on. | |
675 | * This prevents us from incorrectly nuking an entry during a | |
676 | * spurious network outage. | |
677 | * | |
678 | * See tcp_notify(). | |
679 | */ | |
680 | if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtslot < 3) { | |
681 | sc->sc_flags |= SCF_UNREACH; | |
682 | return; | |
683 | } | |
684 | syncache_drop(sc, sch); | |
685 | tcpstat.tcps_sc_unreach++; | |
686 | } | |
687 | ||
688 | /* | |
689 | * Build a new TCP socket structure from a syncache entry. | |
48e7b118 MD |
690 | * |
691 | * This is called from the context of the SYN+ACK | |
984263bc MD |
692 | */ |
693 | static struct socket * | |
7e31206a | 694 | syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) |
984263bc | 695 | { |
ed894f8c | 696 | struct inpcb *inp = NULL, *linp; |
984263bc | 697 | struct socket *so; |
2ce132be | 698 | struct tcpcb *tp, *ltp; |
48e7b118 | 699 | lwkt_port_t port; |
61896e3c JH |
700 | #ifdef INET6 |
701 | const boolean_t isipv6 = sc->sc_inc.inc_isipv6; | |
702 | #else | |
703 | const boolean_t isipv6 = FALSE; | |
704 | #endif | |
88da6203 | 705 | struct sockaddr_in sin_faddr; |
68dc4251 | 706 | struct sockaddr_in6 sin6_faddr; |
88da6203 SZ |
707 | struct sockaddr *faddr; |
708 | ||
76a9ffca SZ |
709 | KASSERT(m->m_flags & M_HASH, ("mbuf has no hash")); |
710 | ||
88da6203 | 711 | if (isipv6) { |
68dc4251 SZ |
712 | faddr = (struct sockaddr *)&sin6_faddr; |
713 | sin6_faddr.sin6_family = AF_INET6; | |
714 | sin6_faddr.sin6_len = sizeof(sin6_faddr); | |
715 | sin6_faddr.sin6_addr = sc->sc_inc.inc6_faddr; | |
716 | sin6_faddr.sin6_port = sc->sc_inc.inc_fport; | |
717 | sin6_faddr.sin6_flowinfo = sin6_faddr.sin6_scope_id = 0; | |
88da6203 | 718 | } else { |
88da6203 SZ |
719 | faddr = (struct sockaddr *)&sin_faddr; |
720 | sin_faddr.sin_family = AF_INET; | |
721 | sin_faddr.sin_len = sizeof(sin_faddr); | |
722 | sin_faddr.sin_addr = sc->sc_inc.inc_faddr; | |
723 | sin_faddr.sin_port = sc->sc_inc.inc_fport; | |
724 | bzero(sin_faddr.sin_zero, sizeof(sin_faddr.sin_zero)); | |
725 | } | |
984263bc MD |
726 | |
727 | /* | |
728 | * Ok, create the full blown connection, and set things up | |
729 | * as they would have been set up if we had created the | |
730 | * connection when the SYN arrived. If we can't create | |
731 | * the connection, abort it. | |
0ce0603e MD |
732 | * |
733 | * Set the protocol processing port for the socket to the current | |
734 | * port (that the connection came in on). | |
37e299d5 SZ |
735 | * |
736 | * NOTE: | |
737 | * We don't keep a reference on the new socket, since its | |
738 | * destruction will run in this thread (netisrN); there is no | |
739 | * race here. | |
984263bc | 740 | */ |
37e299d5 SZ |
741 | so = sonewconn_faddr(lso, SS_ISCONNECTED, faddr, |
742 | FALSE /* don't ref */); | |
984263bc MD |
743 | if (so == NULL) { |
744 | /* | |
745 | * Drop the connection; we will send a RST if the peer | |
746 | * retransmits the ACK, | |
747 | */ | |
748 | tcpstat.tcps_listendrop++; | |
749 | goto abort; | |
750 | } | |
751 | ||
984263bc MD |
752 | /* |
753 | * Insert new socket into hash list. | |
754 | */ | |
48e7b118 | 755 | inp = so->so_pcb; |
984263bc | 756 | inp->inp_inc.inc_isipv6 = sc->sc_inc.inc_isipv6; |
61896e3c | 757 | if (isipv6) { |
984263bc MD |
758 | inp->in6p_laddr = sc->sc_inc.inc6_laddr; |
759 | } else { | |
727ccde8 | 760 | KASSERT(INP_ISIPV4(inp), ("not inet pcb")); |
984263bc | 761 | inp->inp_laddr = sc->sc_inc.inc_laddr; |
984263bc | 762 | } |
984263bc | 763 | inp->inp_lport = sc->sc_inc.inc_lport; |
05e43c26 | 764 | |
0ce0603e | 765 | linp = lso->so_pcb; |
4a43469a SZ |
766 | ltp = intotcpcb(linp); |
767 | ||
768 | tcp_pcbport_insert(ltp, inp); | |
769 | ||
61896e3c | 770 | if (isipv6) { |
984263bc | 771 | struct in6_addr laddr6; |
984263bc MD |
772 | /* |
773 | * Inherit socket options from the listening socket. | |
774 | * Note that in6p_inputopts are not (and should not be) | |
775 | * copied, since it stores previously received options and is | |
776 | * used to detect if each new option is different than the | |
777 | * previous one and hence should be passed to a user. | |
f23061d4 | 778 | * If we copied in6p_inputopts, a user would not be able to |
984263bc MD |
779 | * receive options just after calling the accept system call. |
780 | */ | |
ed894f8c JH |
781 | inp->inp_flags |= linp->inp_flags & INP_CONTROLOPTS; |
782 | if (linp->in6p_outputopts) | |
984263bc | 783 | inp->in6p_outputopts = |
ed894f8c | 784 | ip6_copypktopts(linp->in6p_outputopts, M_INTWAIT); |
984263bc MD |
785 | inp->in6p_route = sc->sc_route6; |
786 | sc->sc_route6.ro_rt = NULL; | |
787 | ||
984263bc MD |
788 | laddr6 = inp->in6p_laddr; |
789 | if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) | |
790 | inp->in6p_laddr = sc->sc_inc.inc6_laddr; | |
68dc4251 | 791 | if (in6_pcbconnect(inp, faddr, &thread0)) { |
984263bc | 792 | inp->in6p_laddr = laddr6; |
984263bc MD |
793 | goto abort; |
794 | } | |
7f20f9e1 | 795 | port = tcp6_addrport(); |
61896e3c | 796 | } else { |
984263bc | 797 | struct in_addr laddr; |
984263bc | 798 | |
7e31206a | 799 | inp->inp_options = ip_srcroute(m); |
984263bc MD |
800 | if (inp->inp_options == NULL) { |
801 | inp->inp_options = sc->sc_ipopts; | |
802 | sc->sc_ipopts = NULL; | |
803 | } | |
804 | inp->inp_route = sc->sc_route; | |
805 | sc->sc_route.ro_rt = NULL; | |
806 | ||
984263bc MD |
807 | laddr = inp->inp_laddr; |
808 | if (inp->inp_laddr.s_addr == INADDR_ANY) | |
809 | inp->inp_laddr = sc->sc_inc.inc_laddr; | |
88da6203 | 810 | if (in_pcbconnect(inp, faddr, &thread0)) { |
984263bc | 811 | inp->inp_laddr = laddr; |
984263bc MD |
812 | goto abort; |
813 | } | |
76a9ffca SZ |
814 | |
815 | inp->inp_flags |= INP_HASH; | |
816 | inp->inp_hashval = m->m_pkthdr.hash; | |
7f20f9e1 | 817 | port = netisr_hashport(inp->inp_hashval); |
984263bc MD |
818 | } |
819 | ||
48e7b118 MD |
820 | /* |
821 | * The current port should be in the context of the SYN+ACK and | |
822 | * so should match the tcp address port. | |
48e7b118 | 823 | */ |
b8c1d2bd SZ |
824 | KASSERT(port == &curthread->td_msgport, |
825 | ("TCP PORT MISMATCH %p vs %p\n", port, &curthread->td_msgport)); | |
48e7b118 | 826 | |
984263bc | 827 | tp = intotcpcb(inp); |
b2bc8a6a | 828 | TCP_STATE_CHANGE(tp, TCPS_SYN_RECEIVED); |
984263bc MD |
829 | tp->iss = sc->sc_iss; |
830 | tp->irs = sc->sc_irs; | |
831 | tcp_rcvseqinit(tp); | |
832 | tcp_sendseqinit(tp); | |
df1d2774 | 833 | tp->snd_wnd = sc->sc_sndwnd; |
984263bc MD |
834 | tp->snd_wl1 = sc->sc_irs; |
835 | tp->rcv_up = sc->sc_irs + 1; | |
836 | tp->rcv_wnd = sc->sc_wnd; | |
837 | tp->rcv_adv += tp->rcv_wnd; | |
838 | ||
61896e3c | 839 | tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH | TF_NODELAY); |
984263bc MD |
840 | if (sc->sc_flags & SCF_NOOPT) |
841 | tp->t_flags |= TF_NOOPT; | |
842 | if (sc->sc_flags & SCF_WINSCALE) { | |
61896e3c | 843 | tp->t_flags |= TF_REQ_SCALE | TF_RCVD_SCALE; |
df1d2774 | 844 | tp->snd_scale = sc->sc_requested_s_scale; |
984263bc MD |
845 | tp->request_r_scale = sc->sc_request_r_scale; |
846 | } | |
847 | if (sc->sc_flags & SCF_TIMESTAMP) { | |
61896e3c | 848 | tp->t_flags |= TF_REQ_TSTMP | TF_RCVD_TSTMP; |
984263bc MD |
849 | tp->ts_recent = sc->sc_tsrecent; |
850 | tp->ts_recent_age = ticks; | |
851 | } | |
91489f6b JH |
852 | if (sc->sc_flags & SCF_SACK_PERMITTED) |
853 | tp->t_flags |= TF_SACK_PERMITTED; | |
984263bc | 854 | |
b1992928 MD |
855 | #ifdef TCP_SIGNATURE |
856 | if (sc->sc_flags & SCF_SIGNATURE) | |
857 | tp->t_flags |= TF_SIGNATURE; | |
858 | #endif /* TCP_SIGNATURE */ | |
859 | ||
d5082e3d | 860 | tp->t_rxtsyn = sc->sc_rxtused; |
9a6e3e53 | 861 | tcp_rmx_init(tp, sc->sc_peer_mss); |
984263bc | 862 | |
2ce132be SZ |
863 | /* |
864 | * Inherit some properties from the listen socket | |
865 | */ | |
2ce132be | 866 | tp->t_keepinit = ltp->t_keepinit; |
7ea3a353 | 867 | tp->t_keepidle = ltp->t_keepidle; |
5d61ded3 SZ |
868 | tp->t_keepintvl = ltp->t_keepintvl; |
869 | tp->t_keepcnt = ltp->t_keepcnt; | |
870 | tp->t_maxidle = ltp->t_maxidle; | |
2ce132be | 871 | |
48e7b118 | 872 | tcp_create_timermsg(tp, port); |
2ce132be | 873 | tcp_callout_reset(tp, tp->tt_keep, tp->t_keepinit, tcp_timer_keep); |
984263bc MD |
874 | |
875 | tcpstat.tcps_accepts++; | |
876 | return (so); | |
877 | ||
878 | abort: | |
879 | if (so != NULL) | |
94aba184 | 880 | soabort_direct(so); |
984263bc MD |
881 | return (NULL); |
882 | } | |
883 | ||
884 | /* | |
885 | * This function gets called when we receive an ACK for a | |
886 | * socket in the LISTEN state. We look up the connection | |
887 | * in the syncache, and if its there, we pull it out of | |
888 | * the cache and turn it into a full-blown connection in | |
889 | * the SYN-RECEIVED state. | |
890 | */ | |
891 | int | |
f3f70f0d SW |
892 | syncache_expand(struct in_conninfo *inc, struct tcphdr *th, struct socket **sop, |
893 | struct mbuf *m) | |
984263bc MD |
894 | { |
895 | struct syncache *sc; | |
896 | struct syncache_head *sch; | |
897 | struct socket *so; | |
898 | ||
96fef49f SZ |
899 | ASSERT_NETISR_NCPUS(mycpuid); |
900 | ||
984263bc MD |
901 | sc = syncache_lookup(inc, &sch); |
902 | if (sc == NULL) { | |
903 | /* | |
f23061d4 | 904 | * There is no syncache entry, so see if this ACK is |
984263bc MD |
905 | * a returning syncookie. To do this, first: |
906 | * A. See if this socket has had a syncache entry dropped in | |
907 | * the past. We don't want to accept a bogus syncookie | |
f23061d4 | 908 | * if we've never received a SYN. |
984263bc MD |
909 | * B. check that the syncookie is valid. If it is, then |
910 | * cobble up a fake syncache entry, and return. | |
911 | */ | |
912 | if (!tcp_syncookies) | |
913 | return (0); | |
914 | sc = syncookie_lookup(inc, th, *sop); | |
915 | if (sc == NULL) | |
916 | return (0); | |
917 | sch = NULL; | |
918 | tcpstat.tcps_sc_recvcookie++; | |
919 | } | |
920 | ||
921 | /* | |
922 | * If seg contains an ACK, but not for our SYN/ACK, send a RST. | |
923 | */ | |
924 | if (th->th_ack != sc->sc_iss + 1) | |
925 | return (0); | |
926 | ||
7e31206a | 927 | so = syncache_socket(sc, *sop, m); |
984263bc MD |
928 | if (so == NULL) { |
929 | #if 0 | |
930 | resetandabort: | |
931 | /* XXXjlemon check this - is this correct? */ | |
f23061d4 | 932 | tcp_respond(NULL, m, m, th, |
61896e3c | 933 | th->th_seq + tlen, (tcp_seq)0, TH_RST | TH_ACK); |
984263bc MD |
934 | #endif |
935 | m_freem(m); /* XXX only needed for above */ | |
936 | tcpstat.tcps_sc_aborted++; | |
937 | } else { | |
984263bc MD |
938 | tcpstat.tcps_sc_completed++; |
939 | } | |
940 | if (sch == NULL) | |
941 | syncache_free(sc); | |
942 | else | |
943 | syncache_drop(sc, sch); | |
944 | *sop = so; | |
945 | return (1); | |
946 | } | |
947 | ||
948 | /* | |
949 | * Given a LISTEN socket and an inbound SYN request, add | |
950 | * this to the syn cache, and send back a segment: | |
951 | * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> | |
952 | * to the source. | |
953 | * | |
954 | * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN. | |
955 | * Doing so would require that we hold onto the data and deliver it | |
956 | * to the application. However, if we are the target of a SYN-flood | |
957 | * DoS attack, an attacker could send data which would eventually | |
958 | * consume all available buffer space if it were ACKed. By not ACKing | |
959 | * the data, we avoid this DoS scenario. | |
960 | */ | |
961 | int | |
f3f70f0d | 962 | syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, |
b09567cc | 963 | struct socket *so, struct mbuf *m) |
984263bc | 964 | { |
00943fd6 | 965 | struct tcp_syncache_percpu *syncache_percpu; |
984263bc | 966 | struct tcpcb *tp; |
984263bc MD |
967 | struct syncache *sc = NULL; |
968 | struct syncache_head *sch; | |
969 | struct mbuf *ipopts = NULL; | |
913d40d1 | 970 | int win; |
984263bc | 971 | |
96fef49f | 972 | ASSERT_NETISR_NCPUS(mycpuid); |
76a9ffca SZ |
973 | KASSERT(m->m_flags & M_HASH, ("mbuf has no hash")); |
974 | ||
a31a8e3c | 975 | syncache_percpu = tcp_syncache_percpu[mycpu->gd_cpuid]; |
984263bc MD |
976 | tp = sototcpcb(so); |
977 | ||
978 | /* | |
979 | * Remember the IP options, if any. | |
980 | */ | |
981 | #ifdef INET6 | |
982 | if (!inc->inc_isipv6) | |
983 | #endif | |
7e31206a | 984 | ipopts = ip_srcroute(m); |
984263bc MD |
985 | |
986 | /* | |
987 | * See if we already have an entry for this connection. | |
988 | * If we do, resend the SYN,ACK, and reset the retransmit timer. | |
989 | * | |
990 | * XXX | |
91489f6b JH |
991 | * The syncache should be re-initialized with the contents |
992 | * of the new SYN which may have different options. | |
984263bc MD |
993 | */ |
994 | sc = syncache_lookup(inc, &sch); | |
995 | if (sc != NULL) { | |
76a9ffca SZ |
996 | KASSERT(sc->sc_flags & SCF_HASH, ("syncache has no hash")); |
997 | KASSERT(sc->sc_hashval == m->m_pkthdr.hash, | |
998 | ("syncache/mbuf hash mismatches")); | |
999 | ||
984263bc MD |
1000 | tcpstat.tcps_sc_dupsyn++; |
1001 | if (ipopts) { | |
1002 | /* | |
1003 | * If we were remembering a previous source route, | |
1004 | * forget it and use the new one we've been given. | |
1005 | */ | |
1006 | if (sc->sc_ipopts) | |
f23061d4 | 1007 | m_free(sc->sc_ipopts); |
984263bc MD |
1008 | sc->sc_ipopts = ipopts; |
1009 | } | |
1010 | /* | |
1011 | * Update timestamp if present. | |
1012 | */ | |
1013 | if (sc->sc_flags & SCF_TIMESTAMP) | |
1014 | sc->sc_tsrecent = to->to_tsval; | |
91489f6b JH |
1015 | |
1016 | /* Just update the TOF_SACK_PERMITTED for now. */ | |
1017 | if (tcp_do_sack && (to->to_flags & TOF_SACK_PERMITTED)) | |
1018 | sc->sc_flags |= SCF_SACK_PERMITTED; | |
1019 | else | |
1020 | sc->sc_flags &= ~SCF_SACK_PERMITTED; | |
1021 | ||
df1d2774 SZ |
1022 | /* Update initial send window */ |
1023 | sc->sc_sndwnd = th->th_win; | |
1024 | ||
984263bc MD |
1025 | /* |
1026 | * PCB may have changed, pick up new values. | |
1027 | */ | |
1028 | sc->sc_tp = tp; | |
1029 | sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt; | |
1030 | if (syncache_respond(sc, m) == 0) { | |
186d7dce SZ |
1031 | TAILQ_REMOVE( |
1032 | &syncache_percpu->timerq[sc->sc_rxtslot].list, | |
1033 | sc, sc_timerq); | |
00943fd6 | 1034 | syncache_timeout(syncache_percpu, sc, sc->sc_rxtslot); |
f23061d4 | 1035 | tcpstat.tcps_sndacks++; |
984263bc MD |
1036 | tcpstat.tcps_sndtotal++; |
1037 | } | |
984263bc MD |
1038 | return (1); |
1039 | } | |
1040 | ||
984263bc MD |
1041 | /* |
1042 | * Fill in the syncache values. | |
1043 | */ | |
9f42c129 | 1044 | sc = kmalloc(sizeof(struct syncache), M_SYNCACHE, M_WAITOK|M_ZERO); |
984263bc MD |
1045 | sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt; |
1046 | sc->sc_ipopts = ipopts; | |
1047 | sc->sc_inc.inc_fport = inc->inc_fport; | |
1048 | sc->sc_inc.inc_lport = inc->inc_lport; | |
e5fe3477 | 1049 | sc->sc_tp = tp; |
984263bc MD |
1050 | #ifdef INET6 |
1051 | sc->sc_inc.inc_isipv6 = inc->inc_isipv6; | |
1052 | if (inc->inc_isipv6) { | |
1053 | sc->sc_inc.inc6_faddr = inc->inc6_faddr; | |
1054 | sc->sc_inc.inc6_laddr = inc->inc6_laddr; | |
1055 | sc->sc_route6.ro_rt = NULL; | |
1056 | } else | |
1057 | #endif | |
1058 | { | |
1059 | sc->sc_inc.inc_faddr = inc->inc_faddr; | |
1060 | sc->sc_inc.inc_laddr = inc->inc_laddr; | |
1061 | sc->sc_route.ro_rt = NULL; | |
1062 | } | |
1063 | sc->sc_irs = th->th_seq; | |
76a9ffca SZ |
1064 | sc->sc_flags = SCF_HASH; |
1065 | sc->sc_hashval = m->m_pkthdr.hash; | |
984263bc MD |
1066 | sc->sc_peer_mss = to->to_flags & TOF_MSS ? to->to_mss : 0; |
1067 | if (tcp_syncookies) | |
1068 | sc->sc_iss = syncookie_generate(sc); | |
1069 | else | |
0ced1954 | 1070 | sc->sc_iss = karc4random(); |
984263bc | 1071 | |
6d49aa6f MD |
1072 | /* Initial receive window: clip ssb_space to [0 .. TCP_MAXWIN] */ |
1073 | win = ssb_space(&so->so_rcv); | |
984263bc MD |
1074 | win = imax(win, 0); |
1075 | win = imin(win, TCP_MAXWIN); | |
1076 | sc->sc_wnd = win; | |
1077 | ||
1078 | if (tcp_do_rfc1323) { | |
1079 | /* | |
1080 | * A timestamp received in a SYN makes | |
1081 | * it ok to send timestamp requests and replies. | |
1082 | */ | |
1083 | if (to->to_flags & TOF_TS) { | |
1084 | sc->sc_tsrecent = to->to_tsval; | |
1085 | sc->sc_flags |= SCF_TIMESTAMP; | |
1086 | } | |
1087 | if (to->to_flags & TOF_SCALE) { | |
46e92930 | 1088 | int wscale = TCP_MIN_WINSHIFT; |
984263bc MD |
1089 | |
1090 | /* Compute proper scaling value from buffer space */ | |
1091 | while (wscale < TCP_MAX_WINSHIFT && | |
46e92930 | 1092 | (TCP_MAXWIN << wscale) < so->so_rcv.ssb_hiwat) { |
984263bc | 1093 | wscale++; |
46e92930 | 1094 | } |
984263bc MD |
1095 | sc->sc_request_r_scale = wscale; |
1096 | sc->sc_requested_s_scale = to->to_requested_s_scale; | |
1097 | sc->sc_flags |= SCF_WINSCALE; | |
1098 | } | |
1099 | } | |
91489f6b JH |
1100 | if (tcp_do_sack && (to->to_flags & TOF_SACK_PERMITTED)) |
1101 | sc->sc_flags |= SCF_SACK_PERMITTED; | |
984263bc MD |
1102 | if (tp->t_flags & TF_NOOPT) |
1103 | sc->sc_flags = SCF_NOOPT; | |
b1992928 MD |
1104 | #ifdef TCP_SIGNATURE |
1105 | /* | |
1106 | * If listening socket requested TCP digests, and received SYN | |
1107 | * contains the option, flag this in the syncache so that | |
1108 | * syncache_respond() will do the right thing with the SYN+ACK. | |
1109 | * XXX Currently we always record the option by default and will | |
1110 | * attempt to use it in syncache_respond(). | |
1111 | */ | |
1112 | if (to->to_flags & TOF_SIGNATURE) | |
1113 | sc->sc_flags = SCF_SIGNATURE; | |
1114 | #endif /* TCP_SIGNATURE */ | |
df1d2774 | 1115 | sc->sc_sndwnd = th->th_win; |
984263bc | 1116 | |
984263bc MD |
1117 | if (syncache_respond(sc, m) == 0) { |
1118 | syncache_insert(sc, sch); | |
1119 | tcpstat.tcps_sndacks++; | |
1120 | tcpstat.tcps_sndtotal++; | |
1121 | } else { | |
1122 | syncache_free(sc); | |
1123 | tcpstat.tcps_sc_dropped++; | |
1124 | } | |
984263bc MD |
1125 | return (1); |
1126 | } | |
1127 | ||
1128 | static int | |
f3f70f0d | 1129 | syncache_respond(struct syncache *sc, struct mbuf *m) |
984263bc MD |
1130 | { |
1131 | u_int8_t *optp; | |
1132 | int optlen, error; | |
1133 | u_int16_t tlen, hlen, mssopt; | |
1134 | struct ip *ip = NULL; | |
1135 | struct rtentry *rt; | |
1136 | struct tcphdr *th; | |
984263bc | 1137 | struct ip6_hdr *ip6 = NULL; |
61896e3c JH |
1138 | #ifdef INET6 |
1139 | const boolean_t isipv6 = sc->sc_inc.inc_isipv6; | |
1140 | #else | |
1141 | const boolean_t isipv6 = FALSE; | |
984263bc MD |
1142 | #endif |
1143 | ||
61896e3c | 1144 | if (isipv6) { |
984263bc MD |
1145 | rt = tcp_rtlookup6(&sc->sc_inc); |
1146 | if (rt != NULL) | |
1147 | mssopt = rt->rt_ifp->if_mtu - | |
1148 | (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)); | |
f23061d4 | 1149 | else |
984263bc MD |
1150 | mssopt = tcp_v6mssdflt; |
1151 | hlen = sizeof(struct ip6_hdr); | |
61896e3c | 1152 | } else { |
984263bc MD |
1153 | rt = tcp_rtlookup(&sc->sc_inc); |
1154 | if (rt != NULL) | |
1155 | mssopt = rt->rt_ifp->if_mtu - | |
1156 | (sizeof(struct ip) + sizeof(struct tcphdr)); | |
f23061d4 | 1157 | else |
984263bc MD |
1158 | mssopt = tcp_mssdflt; |
1159 | hlen = sizeof(struct ip); | |
1160 | } | |
1161 | ||
1162 | /* Compute the size of the TCP options. */ | |
1163 | if (sc->sc_flags & SCF_NOOPT) { | |
1164 | optlen = 0; | |
1165 | } else { | |
1166 | optlen = TCPOLEN_MAXSEG + | |
1167 | ((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) + | |
1168 | ((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0) + | |
91489f6b | 1169 | ((sc->sc_flags & SCF_SACK_PERMITTED) ? |
f23061d4 | 1170 | TCPOLEN_SACK_PERMITTED_ALIGNED : 0); |
b1992928 | 1171 | #ifdef TCP_SIGNATURE |
4931a889 SZ |
1172 | optlen += ((sc->sc_flags & SCF_SIGNATURE) ? |
1173 | (TCPOLEN_SIGNATURE + 2) : 0); | |
b1992928 | 1174 | #endif /* TCP_SIGNATURE */ |
984263bc MD |
1175 | } |
1176 | tlen = hlen + sizeof(struct tcphdr) + optlen; | |
1177 | ||
1178 | /* | |
1179 | * XXX | |
1180 | * assume that the entire packet will fit in a header mbuf | |
1181 | */ | |
1182 | KASSERT(max_linkhdr + tlen <= MHLEN, ("syncache: mbuf too small")); | |
1183 | ||
1184 | /* | |
1185 | * XXX shouldn't this reuse the mbuf if possible ? | |
1186 | * Create the IP+TCP header from scratch. | |
1187 | */ | |
1188 | if (m) | |
1189 | m_freem(m); | |
1190 | ||
b5523eac | 1191 | m = m_gethdr(M_NOWAIT, MT_HEADER); |
984263bc MD |
1192 | if (m == NULL) |
1193 | return (ENOBUFS); | |
1194 | m->m_data += max_linkhdr; | |
1195 | m->m_len = tlen; | |
1196 | m->m_pkthdr.len = tlen; | |
1197 | m->m_pkthdr.rcvif = NULL; | |
4cc8caef SZ |
1198 | if (tcp_prio_synack) |
1199 | m->m_flags |= M_PRIO; | |
984263bc | 1200 | |
61896e3c | 1201 | if (isipv6) { |
984263bc MD |
1202 | ip6 = mtod(m, struct ip6_hdr *); |
1203 | ip6->ip6_vfc = IPV6_VERSION; | |
1204 | ip6->ip6_nxt = IPPROTO_TCP; | |
1205 | ip6->ip6_src = sc->sc_inc.inc6_laddr; | |
1206 | ip6->ip6_dst = sc->sc_inc.inc6_faddr; | |
1207 | ip6->ip6_plen = htons(tlen - hlen); | |
1208 | /* ip6_hlim is set after checksum */ | |
1209 | /* ip6_flow = ??? */ | |
1210 | ||
1211 | th = (struct tcphdr *)(ip6 + 1); | |
61896e3c | 1212 | } else { |
984263bc MD |
1213 | ip = mtod(m, struct ip *); |
1214 | ip->ip_v = IPVERSION; | |
1215 | ip->ip_hl = sizeof(struct ip) >> 2; | |
8a93af2a | 1216 | ip->ip_len = htons(tlen); |
984263bc MD |
1217 | ip->ip_id = 0; |
1218 | ip->ip_off = 0; | |
1219 | ip->ip_sum = 0; | |
1220 | ip->ip_p = IPPROTO_TCP; | |
1221 | ip->ip_src = sc->sc_inc.inc_laddr; | |
1222 | ip->ip_dst = sc->sc_inc.inc_faddr; | |
1223 | ip->ip_ttl = sc->sc_tp->t_inpcb->inp_ip_ttl; /* XXX */ | |
1224 | ip->ip_tos = sc->sc_tp->t_inpcb->inp_ip_tos; /* XXX */ | |
1225 | ||
1226 | /* | |
61896e3c JH |
1227 | * See if we should do MTU discovery. Route lookups are |
1228 | * expensive, so we will only unset the DF bit if: | |
984263bc MD |
1229 | * |
1230 | * 1) path_mtu_discovery is disabled | |
1231 | * 2) the SCF_UNREACH flag has been set | |
1232 | */ | |
1233 | if (path_mtu_discovery | |
8a93af2a MD |
1234 | && ((sc->sc_flags & SCF_UNREACH) == 0)) |
1235 | { | |
1236 | ip->ip_off |= htons(IP_DF); | |
984263bc MD |
1237 | } |
1238 | ||
1239 | th = (struct tcphdr *)(ip + 1); | |
1240 | } | |
1241 | th->th_sport = sc->sc_inc.inc_lport; | |
1242 | th->th_dport = sc->sc_inc.inc_fport; | |
1243 | ||
1244 | th->th_seq = htonl(sc->sc_iss); | |
1245 | th->th_ack = htonl(sc->sc_irs + 1); | |
1246 | th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; | |
1247 | th->th_x2 = 0; | |
61896e3c | 1248 | th->th_flags = TH_SYN | TH_ACK; |
984263bc MD |
1249 | th->th_win = htons(sc->sc_wnd); |
1250 | th->th_urp = 0; | |
1251 | ||
1252 | /* Tack on the TCP options. */ | |
1253 | if (optlen == 0) | |
1254 | goto no_options; | |
1255 | optp = (u_int8_t *)(th + 1); | |
1256 | *optp++ = TCPOPT_MAXSEG; | |
1257 | *optp++ = TCPOLEN_MAXSEG; | |
1258 | *optp++ = (mssopt >> 8) & 0xff; | |
1259 | *optp++ = mssopt & 0xff; | |
1260 | ||
1261 | if (sc->sc_flags & SCF_WINSCALE) { | |
1262 | *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 | | |
1263 | TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 | | |
1264 | sc->sc_request_r_scale); | |
1265 | optp += 4; | |
1266 | } | |
1267 | ||
1268 | if (sc->sc_flags & SCF_TIMESTAMP) { | |
1269 | u_int32_t *lp = (u_int32_t *)(optp); | |
1270 | ||
1271 | /* Form timestamp option as shown in appendix A of RFC 1323. */ | |
1272 | *lp++ = htonl(TCPOPT_TSTAMP_HDR); | |
1273 | *lp++ = htonl(ticks); | |
1274 | *lp = htonl(sc->sc_tsrecent); | |
1275 | optp += TCPOLEN_TSTAMP_APPA; | |
1276 | } | |
1277 | ||
b1992928 MD |
1278 | #ifdef TCP_SIGNATURE |
1279 | /* | |
1280 | * Handle TCP-MD5 passive opener response. | |
1281 | */ | |
1282 | if (sc->sc_flags & SCF_SIGNATURE) { | |
1283 | u_int8_t *bp = optp; | |
1284 | int i; | |
1285 | ||
1286 | *bp++ = TCPOPT_SIGNATURE; | |
1287 | *bp++ = TCPOLEN_SIGNATURE; | |
1288 | for (i = 0; i < TCP_SIGLEN; i++) | |
1289 | *bp++ = 0; | |
1290 | tcpsignature_compute(m, 0, optlen, | |
1291 | optp + 2, IPSEC_DIR_OUTBOUND); | |
1292 | *bp++ = TCPOPT_NOP; | |
1293 | *bp++ = TCPOPT_EOL; | |
1294 | optp += TCPOLEN_SIGNATURE + 2; | |
4931a889 | 1295 | } |
b1992928 MD |
1296 | #endif /* TCP_SIGNATURE */ |
1297 | ||
91489f6b JH |
1298 | if (sc->sc_flags & SCF_SACK_PERMITTED) { |
1299 | *((u_int32_t *)optp) = htonl(TCPOPT_SACK_PERMITTED_ALIGNED); | |
1300 | optp += TCPOLEN_SACK_PERMITTED_ALIGNED; | |
1301 | } | |
1302 | ||
61896e3c JH |
1303 | no_options: |
1304 | if (isipv6) { | |
984263bc MD |
1305 | struct route_in6 *ro6 = &sc->sc_route6; |
1306 | ||
1307 | th->th_sum = 0; | |
1308 | th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen); | |
1309 | ip6->ip6_hlim = in6_selecthlim(NULL, | |
1310 | ro6->ro_rt ? ro6->ro_rt->rt_ifp : NULL); | |
1311 | error = ip6_output(m, NULL, ro6, 0, NULL, NULL, | |
1312 | sc->sc_tp->t_inpcb); | |
61896e3c | 1313 | } else { |
f23061d4 JH |
1314 | th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, |
1315 | htons(tlen - hlen + IPPROTO_TCP)); | |
984263bc MD |
1316 | m->m_pkthdr.csum_flags = CSUM_TCP; |
1317 | m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); | |
7df36335 | 1318 | m->m_pkthdr.csum_thlen = sizeof(struct tcphdr) + optlen; |
76a9ffca SZ |
1319 | KASSERT(sc->sc_flags & SCF_HASH, ("syncache has no hash")); |
1320 | m_sethash(m, sc->sc_hashval); | |
1dbb3516 SZ |
1321 | error = ip_output(m, sc->sc_ipopts, &sc->sc_route, |
1322 | IP_DEBUGROUTE, NULL, sc->sc_tp->t_inpcb); | |
984263bc MD |
1323 | } |
1324 | return (error); | |
1325 | } | |
1326 | ||
1327 | /* | |
1328 | * cookie layers: | |
1329 | * | |
1330 | * |. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .| | |
1331 | * | peer iss | | |
1332 | * | MD5(laddr,faddr,secret,lport,fport) |. . . . . . .| | |
1333 | * | 0 |(A)| | | |
1334 | * (A): peer mss index | |
1335 | */ | |
1336 | ||
1337 | /* | |
1338 | * The values below are chosen to minimize the size of the tcp_secret | |
1339 | * table, as well as providing roughly a 16 second lifetime for the cookie. | |
1340 | */ | |
1341 | ||
1342 | #define SYNCOOKIE_WNDBITS 5 /* exposed bits for window indexing */ | |
1343 | #define SYNCOOKIE_TIMESHIFT 1 /* scale ticks to window time units */ | |
1344 | ||
1345 | #define SYNCOOKIE_WNDMASK ((1 << SYNCOOKIE_WNDBITS) - 1) | |
1346 | #define SYNCOOKIE_NSECRETS (1 << SYNCOOKIE_WNDBITS) | |
1347 | #define SYNCOOKIE_TIMEOUT \ | |
1348 | (hz * (1 << SYNCOOKIE_WNDBITS) / (1 << SYNCOOKIE_TIMESHIFT)) | |
f23061d4 | 1349 | #define SYNCOOKIE_DATAMASK ((3 << SYNCOOKIE_WNDBITS) | SYNCOOKIE_WNDMASK) |
984263bc MD |
1350 | |
1351 | static struct { | |
1352 | u_int32_t ts_secbits[4]; | |
1353 | u_int ts_expire; | |
1354 | } tcp_secret[SYNCOOKIE_NSECRETS]; | |
1355 | ||
1356 | static int tcp_msstab[] = { 0, 536, 1460, 8960 }; | |
1357 | ||
1358 | static MD5_CTX syn_ctx; | |
1359 | ||
1360 | #define MD5Add(v) MD5Update(&syn_ctx, (u_char *)&v, sizeof(v)) | |
1361 | ||
1362 | struct md5_add { | |
1363 | u_int32_t laddr, faddr; | |
1364 | u_int32_t secbits[4]; | |
1365 | u_int16_t lport, fport; | |
1366 | }; | |
1367 | ||
1368 | #ifdef CTASSERT | |
1369 | CTASSERT(sizeof(struct md5_add) == 28); | |
1370 | #endif | |
1371 | ||
1372 | /* | |
1373 | * Consider the problem of a recreated (and retransmitted) cookie. If the | |
f23061d4 JH |
1374 | * original SYN was accepted, the connection is established. The second |
1375 | * SYN is inflight, and if it arrives with an ISN that falls within the | |
1376 | * receive window, the connection is killed. | |
984263bc MD |
1377 | * |
1378 | * However, since cookies have other problems, this may not be worth | |
1379 | * worrying about. | |
1380 | */ | |
1381 | ||
1382 | static u_int32_t | |
1383 | syncookie_generate(struct syncache *sc) | |
1384 | { | |
1385 | u_int32_t md5_buffer[4]; | |
1386 | u_int32_t data; | |
1387 | int idx, i; | |
1388 | struct md5_add add; | |
61896e3c JH |
1389 | #ifdef INET6 |
1390 | const boolean_t isipv6 = sc->sc_inc.inc_isipv6; | |
1391 | #else | |
1392 | const boolean_t isipv6 = FALSE; | |
1393 | #endif | |
984263bc MD |
1394 | |
1395 | idx = ((ticks << SYNCOOKIE_TIMESHIFT) / hz) & SYNCOOKIE_WNDMASK; | |
1396 | if (tcp_secret[idx].ts_expire < ticks) { | |
1397 | for (i = 0; i < 4; i++) | |
0ced1954 | 1398 | tcp_secret[idx].ts_secbits[i] = karc4random(); |
984263bc MD |
1399 | tcp_secret[idx].ts_expire = ticks + SYNCOOKIE_TIMEOUT; |
1400 | } | |
b370aff7 | 1401 | for (data = NELEM(tcp_msstab) - 1; data > 0; data--) |
984263bc MD |
1402 | if (tcp_msstab[data] <= sc->sc_peer_mss) |
1403 | break; | |
1404 | data = (data << SYNCOOKIE_WNDBITS) | idx; | |
1405 | data ^= sc->sc_irs; /* peer's iss */ | |
1406 | MD5Init(&syn_ctx); | |
61896e3c | 1407 | if (isipv6) { |
984263bc MD |
1408 | MD5Add(sc->sc_inc.inc6_laddr); |
1409 | MD5Add(sc->sc_inc.inc6_faddr); | |
1410 | add.laddr = 0; | |
1411 | add.faddr = 0; | |
61896e3c | 1412 | } else { |
984263bc MD |
1413 | add.laddr = sc->sc_inc.inc_laddr.s_addr; |
1414 | add.faddr = sc->sc_inc.inc_faddr.s_addr; | |
1415 | } | |
1416 | add.lport = sc->sc_inc.inc_lport; | |
1417 | add.fport = sc->sc_inc.inc_fport; | |
1418 | add.secbits[0] = tcp_secret[idx].ts_secbits[0]; | |
1419 | add.secbits[1] = tcp_secret[idx].ts_secbits[1]; | |
1420 | add.secbits[2] = tcp_secret[idx].ts_secbits[2]; | |
1421 | add.secbits[3] = tcp_secret[idx].ts_secbits[3]; | |
1422 | MD5Add(add); | |
1423 | MD5Final((u_char *)&md5_buffer, &syn_ctx); | |
1424 | data ^= (md5_buffer[0] & ~SYNCOOKIE_WNDMASK); | |
1425 | return (data); | |
1426 | } | |
1427 | ||
1428 | static struct syncache * | |
f3f70f0d | 1429 | syncookie_lookup(struct in_conninfo *inc, struct tcphdr *th, struct socket *so) |
984263bc MD |
1430 | { |
1431 | u_int32_t md5_buffer[4]; | |
1432 | struct syncache *sc; | |
1433 | u_int32_t data; | |
1434 | int wnd, idx; | |
1435 | struct md5_add add; | |
1436 | ||
1437 | data = (th->th_ack - 1) ^ (th->th_seq - 1); /* remove ISS */ | |
1438 | idx = data & SYNCOOKIE_WNDMASK; | |
1439 | if (tcp_secret[idx].ts_expire < ticks || | |
1440 | sototcpcb(so)->ts_recent + SYNCOOKIE_TIMEOUT < ticks) | |
1441 | return (NULL); | |
1442 | MD5Init(&syn_ctx); | |
1443 | #ifdef INET6 | |
1444 | if (inc->inc_isipv6) { | |
1445 | MD5Add(inc->inc6_laddr); | |
1446 | MD5Add(inc->inc6_faddr); | |
1447 | add.laddr = 0; | |
1448 | add.faddr = 0; | |
1449 | } else | |
1450 | #endif | |
1451 | { | |
1452 | add.laddr = inc->inc_laddr.s_addr; | |
1453 | add.faddr = inc->inc_faddr.s_addr; | |
1454 | } | |
1455 | add.lport = inc->inc_lport; | |
1456 | add.fport = inc->inc_fport; | |
1457 | add.secbits[0] = tcp_secret[idx].ts_secbits[0]; | |
1458 | add.secbits[1] = tcp_secret[idx].ts_secbits[1]; | |
1459 | add.secbits[2] = tcp_secret[idx].ts_secbits[2]; | |
1460 | add.secbits[3] = tcp_secret[idx].ts_secbits[3]; | |
1461 | MD5Add(add); | |
1462 | MD5Final((u_char *)&md5_buffer, &syn_ctx); | |
1463 | data ^= md5_buffer[0]; | |
f23061d4 | 1464 | if (data & ~SYNCOOKIE_DATAMASK) |
984263bc MD |
1465 | return (NULL); |
1466 | data = data >> SYNCOOKIE_WNDBITS; | |
1467 | ||
984263bc MD |
1468 | /* |
1469 | * Fill in the syncache values. | |
1470 | * XXX duplicate code from syncache_add | |
1471 | */ | |
9f42c129 | 1472 | sc = kmalloc(sizeof(struct syncache), M_SYNCACHE, M_WAITOK|M_ZERO); |
984263bc MD |
1473 | sc->sc_ipopts = NULL; |
1474 | sc->sc_inc.inc_fport = inc->inc_fport; | |
1475 | sc->sc_inc.inc_lport = inc->inc_lport; | |
1476 | #ifdef INET6 | |
1477 | sc->sc_inc.inc_isipv6 = inc->inc_isipv6; | |
1478 | if (inc->inc_isipv6) { | |
1479 | sc->sc_inc.inc6_faddr = inc->inc6_faddr; | |
1480 | sc->sc_inc.inc6_laddr = inc->inc6_laddr; | |
1481 | sc->sc_route6.ro_rt = NULL; | |
1482 | } else | |
1483 | #endif | |
1484 | { | |
1485 | sc->sc_inc.inc_faddr = inc->inc_faddr; | |
1486 | sc->sc_inc.inc_laddr = inc->inc_laddr; | |
1487 | sc->sc_route.ro_rt = NULL; | |
1488 | } | |
1489 | sc->sc_irs = th->th_seq - 1; | |
1490 | sc->sc_iss = th->th_ack - 1; | |
6d49aa6f | 1491 | wnd = ssb_space(&so->so_rcv); |
984263bc MD |
1492 | wnd = imax(wnd, 0); |
1493 | wnd = imin(wnd, TCP_MAXWIN); | |
1494 | sc->sc_wnd = wnd; | |
1495 | sc->sc_flags = 0; | |
1496 | sc->sc_rxtslot = 0; | |
1497 | sc->sc_peer_mss = tcp_msstab[data]; | |
1498 | return (sc); | |
1499 | } | |
950b8840 SZ |
1500 | |
1501 | static int | |
1502 | syncache_sysctl_count(SYSCTL_HANDLER_ARGS) | |
1503 | { | |
1504 | u_int count = 0; | |
1505 | int cpu; | |
1506 | ||
1507 | for (cpu = 0; cpu < netisr_ncpus; ++cpu) | |
1508 | count += tcp_syncache_percpu[cpu]->cache_count; | |
1509 | return sysctl_handle_int(oidp, &count, 0, req); | |
1510 | } |