| 1 | /* |
| 2 | * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved. |
| 3 | * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved. |
| 4 | * |
| 5 | * This code is derived from software contributed to The DragonFly Project |
| 6 | * by Jeffrey M. Hsu. |
| 7 | * |
| 8 | * Redistribution and use in source and binary forms, with or without |
| 9 | * modification, are permitted provided that the following conditions |
| 10 | * are met: |
| 11 | * 1. Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer in the |
| 15 | * documentation and/or other materials provided with the distribution. |
| 16 | * 3. Neither the name of The DragonFly Project nor the names of its |
| 17 | * contributors may be used to endorse or promote products derived |
| 18 | * from this software without specific, prior written permission. |
| 19 | * |
| 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 23 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 24 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 25 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 26 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 27 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| 28 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| 30 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 31 | * SUCH DAMAGE. |
| 32 | */ |
| 33 | |
| 34 | /* |
| 35 | * All advertising materials mentioning features or use of this software |
| 36 | * must display the following acknowledgement: |
| 37 | * This product includes software developed by Jeffrey M. Hsu. |
| 38 | * |
| 39 | * Copyright (c) 2001 Networks Associates Technologies, Inc. |
| 40 | * All rights reserved. |
| 41 | * |
| 42 | * This software was developed for the FreeBSD Project by Jonathan Lemon |
| 43 | * and NAI Labs, the Security Research Division of Network Associates, Inc. |
| 44 | * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the |
| 45 | * DARPA CHATS research program. |
| 46 | * |
| 47 | * Redistribution and use in source and binary forms, with or without |
| 48 | * modification, are permitted provided that the following conditions |
| 49 | * are met: |
| 50 | * 1. Redistributions of source code must retain the above copyright |
| 51 | * notice, this list of conditions and the following disclaimer. |
| 52 | * 2. Redistributions in binary form must reproduce the above copyright |
| 53 | * notice, this list of conditions and the following disclaimer in the |
| 54 | * documentation and/or other materials provided with the distribution. |
| 55 | * 3. The name of the author may not be used to endorse or promote |
| 56 | * products derived from this software without specific prior written |
| 57 | * permission. |
| 58 | * |
| 59 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
| 60 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 61 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 62 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
| 63 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 64 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 65 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 66 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 67 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 68 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 69 | * SUCH DAMAGE. |
| 70 | * |
| 71 | * $FreeBSD: src/sys/netinet/tcp_syncache.c,v 1.5.2.14 2003/02/24 04:02:27 silby Exp $ |
| 72 | * $DragonFly: src/sys/netinet/tcp_syncache.c,v 1.35 2008/11/22 11:03:35 sephe Exp $ |
| 73 | */ |
| 74 | |
| 75 | #include "opt_inet6.h" |
| 76 | #include "opt_ipsec.h" |
| 77 | |
| 78 | #include <sys/param.h> |
| 79 | #include <sys/systm.h> |
| 80 | #include <sys/kernel.h> |
| 81 | #include <sys/sysctl.h> |
| 82 | #include <sys/malloc.h> |
| 83 | #include <sys/mbuf.h> |
| 84 | #include <sys/md5.h> |
| 85 | #include <sys/proc.h> /* for proc0 declaration */ |
| 86 | #include <sys/random.h> |
| 87 | #include <sys/socket.h> |
| 88 | #include <sys/socketvar.h> |
| 89 | #include <sys/in_cksum.h> |
| 90 | |
| 91 | #include <sys/msgport2.h> |
| 92 | #include <net/netmsg2.h> |
| 93 | |
| 94 | #include <net/if.h> |
| 95 | #include <net/route.h> |
| 96 | |
| 97 | #include <netinet/in.h> |
| 98 | #include <netinet/in_systm.h> |
| 99 | #include <netinet/ip.h> |
| 100 | #include <netinet/in_var.h> |
| 101 | #include <netinet/in_pcb.h> |
| 102 | #include <netinet/ip_var.h> |
| 103 | #include <netinet/ip6.h> |
| 104 | #ifdef INET6 |
| 105 | #include <netinet/icmp6.h> |
| 106 | #include <netinet6/nd6.h> |
| 107 | #endif |
| 108 | #include <netinet6/ip6_var.h> |
| 109 | #include <netinet6/in6_pcb.h> |
| 110 | #include <netinet/tcp.h> |
| 111 | #include <netinet/tcp_fsm.h> |
| 112 | #include <netinet/tcp_seq.h> |
| 113 | #include <netinet/tcp_timer.h> |
| 114 | #include <netinet/tcp_timer2.h> |
| 115 | #include <netinet/tcp_var.h> |
| 116 | #include <netinet6/tcp6_var.h> |
| 117 | |
| 118 | #ifdef IPSEC |
| 119 | #include <netinet6/ipsec.h> |
| 120 | #ifdef INET6 |
| 121 | #include <netinet6/ipsec6.h> |
| 122 | #endif |
| 123 | #include <netproto/key/key.h> |
| 124 | #endif /*IPSEC*/ |
| 125 | |
| 126 | #ifdef FAST_IPSEC |
| 127 | #include <netproto/ipsec/ipsec.h> |
| 128 | #ifdef INET6 |
| 129 | #include <netproto/ipsec/ipsec6.h> |
| 130 | #endif |
| 131 | #include <netproto/ipsec/key.h> |
| 132 | #define IPSEC |
| 133 | #endif /*FAST_IPSEC*/ |
| 134 | |
| 135 | #include <vm/vm_zone.h> |
| 136 | |
| 137 | static int tcp_syncookies = 1; |
| 138 | SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_RW, |
| 139 | &tcp_syncookies, 0, |
| 140 | "Use TCP SYN cookies if the syncache overflows"); |
| 141 | |
| 142 | static void syncache_drop(struct syncache *, struct syncache_head *); |
| 143 | static void syncache_free(struct syncache *); |
| 144 | static void syncache_insert(struct syncache *, struct syncache_head *); |
| 145 | struct syncache *syncache_lookup(struct in_conninfo *, struct syncache_head **); |
| 146 | static int syncache_respond(struct syncache *, struct mbuf *); |
| 147 | static struct socket *syncache_socket(struct syncache *, struct socket *, |
| 148 | struct mbuf *); |
| 149 | static void syncache_timer(void *); |
| 150 | static u_int32_t syncookie_generate(struct syncache *); |
| 151 | static struct syncache *syncookie_lookup(struct in_conninfo *, |
| 152 | struct tcphdr *, struct socket *); |
| 153 | |
| 154 | /* |
| 155 | * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies. |
| 156 | * 3 retransmits corresponds to a timeout of (1 + 2 + 4 + 8 == 15) seconds, |
| 157 | * the odds are that the user has given up attempting to connect by then. |
| 158 | */ |
| 159 | #define SYNCACHE_MAXREXMTS 3 |
| 160 | |
| 161 | /* Arbitrary values */ |
| 162 | #define TCP_SYNCACHE_HASHSIZE 512 |
| 163 | #define TCP_SYNCACHE_BUCKETLIMIT 30 |
| 164 | |
| 165 | struct netmsg_sc_timer { |
| 166 | struct netmsg nm_netmsg; |
| 167 | struct msgrec *nm_mrec; /* back pointer to containing msgrec */ |
| 168 | }; |
| 169 | |
| 170 | struct msgrec { |
| 171 | struct netmsg_sc_timer msg; |
| 172 | lwkt_port_t port; /* constant after init */ |
| 173 | int slot; /* constant after init */ |
| 174 | }; |
| 175 | |
| 176 | static void syncache_timer_handler(netmsg_t); |
| 177 | |
| 178 | struct tcp_syncache { |
| 179 | struct vm_zone *zone; |
| 180 | u_int hashsize; |
| 181 | u_int hashmask; |
| 182 | u_int bucket_limit; |
| 183 | u_int cache_limit; |
| 184 | u_int rexmt_limit; |
| 185 | u_int hash_secret; |
| 186 | }; |
| 187 | static struct tcp_syncache tcp_syncache; |
| 188 | |
| 189 | struct tcp_syncache_percpu { |
| 190 | struct syncache_head *hashbase; |
| 191 | u_int cache_count; |
| 192 | TAILQ_HEAD(, syncache) timerq[SYNCACHE_MAXREXMTS + 1]; |
| 193 | struct callout tt_timerq[SYNCACHE_MAXREXMTS + 1]; |
| 194 | struct msgrec mrec[SYNCACHE_MAXREXMTS + 1]; |
| 195 | }; |
| 196 | static struct tcp_syncache_percpu tcp_syncache_percpu[MAXCPU]; |
| 197 | |
| 198 | static struct lwkt_port syncache_null_rport; |
| 199 | |
| 200 | SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache"); |
| 201 | |
| 202 | SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RD, |
| 203 | &tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache"); |
| 204 | |
| 205 | SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RD, |
| 206 | &tcp_syncache.cache_limit, 0, "Overall entry limit for syncache"); |
| 207 | |
| 208 | /* XXX JH */ |
| 209 | #if 0 |
| 210 | SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD, |
| 211 | &tcp_syncache.cache_count, 0, "Current number of entries in syncache"); |
| 212 | #endif |
| 213 | |
| 214 | SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RD, |
| 215 | &tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable"); |
| 216 | |
| 217 | SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW, |
| 218 | &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions"); |
| 219 | |
| 220 | static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache"); |
| 221 | |
| 222 | #define SYNCACHE_HASH(inc, mask) \ |
| 223 | ((tcp_syncache.hash_secret ^ \ |
| 224 | (inc)->inc_faddr.s_addr ^ \ |
| 225 | ((inc)->inc_faddr.s_addr >> 16) ^ \ |
| 226 | (inc)->inc_fport ^ (inc)->inc_lport) & mask) |
| 227 | |
| 228 | #define SYNCACHE_HASH6(inc, mask) \ |
| 229 | ((tcp_syncache.hash_secret ^ \ |
| 230 | (inc)->inc6_faddr.s6_addr32[0] ^ \ |
| 231 | (inc)->inc6_faddr.s6_addr32[3] ^ \ |
| 232 | (inc)->inc_fport ^ (inc)->inc_lport) & mask) |
| 233 | |
| 234 | #define ENDPTS_EQ(a, b) ( \ |
| 235 | (a)->ie_fport == (b)->ie_fport && \ |
| 236 | (a)->ie_lport == (b)->ie_lport && \ |
| 237 | (a)->ie_faddr.s_addr == (b)->ie_faddr.s_addr && \ |
| 238 | (a)->ie_laddr.s_addr == (b)->ie_laddr.s_addr \ |
| 239 | ) |
| 240 | |
| 241 | #define ENDPTS6_EQ(a, b) (memcmp(a, b, sizeof(*a)) == 0) |
| 242 | |
| 243 | static __inline void |
| 244 | syncache_timeout(struct tcp_syncache_percpu *syncache_percpu, |
| 245 | struct syncache *sc, int slot) |
| 246 | { |
| 247 | sc->sc_rxtslot = slot; |
| 248 | sc->sc_rxttime = ticks + TCPTV_RTOBASE * tcp_backoff[slot]; |
| 249 | TAILQ_INSERT_TAIL(&syncache_percpu->timerq[slot], sc, sc_timerq); |
| 250 | if (!callout_active(&syncache_percpu->tt_timerq[slot])) { |
| 251 | callout_reset(&syncache_percpu->tt_timerq[slot], |
| 252 | TCPTV_RTOBASE * tcp_backoff[slot], |
| 253 | syncache_timer, |
| 254 | &syncache_percpu->mrec[slot]); |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | static void |
| 259 | syncache_free(struct syncache *sc) |
| 260 | { |
| 261 | struct rtentry *rt; |
| 262 | #ifdef INET6 |
| 263 | const boolean_t isipv6 = sc->sc_inc.inc_isipv6; |
| 264 | #else |
| 265 | const boolean_t isipv6 = FALSE; |
| 266 | #endif |
| 267 | |
| 268 | if (sc->sc_ipopts) |
| 269 | m_free(sc->sc_ipopts); |
| 270 | |
| 271 | rt = isipv6 ? sc->sc_route6.ro_rt : sc->sc_route.ro_rt; |
| 272 | if (rt != NULL) { |
| 273 | /* |
| 274 | * If this is the only reference to a protocol-cloned |
| 275 | * route, remove it immediately. |
| 276 | */ |
| 277 | if ((rt->rt_flags & RTF_WASCLONED) && rt->rt_refcnt == 1) |
| 278 | rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway, |
| 279 | rt_mask(rt), rt->rt_flags, NULL); |
| 280 | RTFREE(rt); |
| 281 | } |
| 282 | |
| 283 | zfree(tcp_syncache.zone, sc); |
| 284 | } |
| 285 | |
| 286 | void |
| 287 | syncache_init(void) |
| 288 | { |
| 289 | int i, cpu; |
| 290 | |
| 291 | tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; |
| 292 | tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT; |
| 293 | tcp_syncache.cache_limit = |
| 294 | tcp_syncache.hashsize * tcp_syncache.bucket_limit; |
| 295 | tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS; |
| 296 | tcp_syncache.hash_secret = karc4random(); |
| 297 | |
| 298 | TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize", |
| 299 | &tcp_syncache.hashsize); |
| 300 | TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit", |
| 301 | &tcp_syncache.cache_limit); |
| 302 | TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit", |
| 303 | &tcp_syncache.bucket_limit); |
| 304 | if (!powerof2(tcp_syncache.hashsize)) { |
| 305 | kprintf("WARNING: syncache hash size is not a power of 2.\n"); |
| 306 | tcp_syncache.hashsize = 512; /* safe default */ |
| 307 | } |
| 308 | tcp_syncache.hashmask = tcp_syncache.hashsize - 1; |
| 309 | |
| 310 | lwkt_initport_replyonly_null(&syncache_null_rport); |
| 311 | |
| 312 | for (cpu = 0; cpu < ncpus2; cpu++) { |
| 313 | struct tcp_syncache_percpu *syncache_percpu; |
| 314 | |
| 315 | syncache_percpu = &tcp_syncache_percpu[cpu]; |
| 316 | /* Allocate the hash table. */ |
| 317 | MALLOC(syncache_percpu->hashbase, struct syncache_head *, |
| 318 | tcp_syncache.hashsize * sizeof(struct syncache_head), |
| 319 | M_SYNCACHE, M_WAITOK); |
| 320 | |
| 321 | /* Initialize the hash buckets. */ |
| 322 | for (i = 0; i < tcp_syncache.hashsize; i++) { |
| 323 | struct syncache_head *bucket; |
| 324 | |
| 325 | bucket = &syncache_percpu->hashbase[i]; |
| 326 | TAILQ_INIT(&bucket->sch_bucket); |
| 327 | bucket->sch_length = 0; |
| 328 | } |
| 329 | |
| 330 | for (i = 0; i <= SYNCACHE_MAXREXMTS; i++) { |
| 331 | /* Initialize the timer queues. */ |
| 332 | TAILQ_INIT(&syncache_percpu->timerq[i]); |
| 333 | callout_init(&syncache_percpu->tt_timerq[i]); |
| 334 | |
| 335 | syncache_percpu->mrec[i].slot = i; |
| 336 | syncache_percpu->mrec[i].port = tcp_cport(cpu); |
| 337 | syncache_percpu->mrec[i].msg.nm_mrec = |
| 338 | &syncache_percpu->mrec[i]; |
| 339 | netmsg_init(&syncache_percpu->mrec[i].msg.nm_netmsg, |
| 340 | &syncache_null_rport, 0, |
| 341 | syncache_timer_handler); |
| 342 | } |
| 343 | } |
| 344 | |
| 345 | /* |
| 346 | * Allocate the syncache entries. Allow the zone to allocate one |
| 347 | * more entry than cache limit, so a new entry can bump out an |
| 348 | * older one. |
| 349 | */ |
| 350 | tcp_syncache.zone = zinit("syncache", sizeof(struct syncache), |
| 351 | tcp_syncache.cache_limit * ncpus2, ZONE_INTERRUPT, 0); |
| 352 | tcp_syncache.cache_limit -= 1; |
| 353 | } |
| 354 | |
| 355 | static void |
| 356 | syncache_insert(struct syncache *sc, struct syncache_head *sch) |
| 357 | { |
| 358 | struct tcp_syncache_percpu *syncache_percpu; |
| 359 | struct syncache *sc2; |
| 360 | int i; |
| 361 | |
| 362 | syncache_percpu = &tcp_syncache_percpu[mycpu->gd_cpuid]; |
| 363 | |
| 364 | /* |
| 365 | * Make sure that we don't overflow the per-bucket |
| 366 | * limit or the total cache size limit. |
| 367 | */ |
| 368 | if (sch->sch_length >= tcp_syncache.bucket_limit) { |
| 369 | /* |
| 370 | * The bucket is full, toss the oldest element. |
| 371 | */ |
| 372 | sc2 = TAILQ_FIRST(&sch->sch_bucket); |
| 373 | sc2->sc_tp->ts_recent = ticks; |
| 374 | syncache_drop(sc2, sch); |
| 375 | tcpstat.tcps_sc_bucketoverflow++; |
| 376 | } else if (syncache_percpu->cache_count >= tcp_syncache.cache_limit) { |
| 377 | /* |
| 378 | * The cache is full. Toss the oldest entry in the |
| 379 | * entire cache. This is the front entry in the |
| 380 | * first non-empty timer queue with the largest |
| 381 | * timeout value. |
| 382 | */ |
| 383 | for (i = SYNCACHE_MAXREXMTS; i >= 0; i--) { |
| 384 | sc2 = TAILQ_FIRST(&syncache_percpu->timerq[i]); |
| 385 | if (sc2 != NULL) |
| 386 | break; |
| 387 | } |
| 388 | sc2->sc_tp->ts_recent = ticks; |
| 389 | syncache_drop(sc2, NULL); |
| 390 | tcpstat.tcps_sc_cacheoverflow++; |
| 391 | } |
| 392 | |
| 393 | /* Initialize the entry's timer. */ |
| 394 | syncache_timeout(syncache_percpu, sc, 0); |
| 395 | |
| 396 | /* Put it into the bucket. */ |
| 397 | TAILQ_INSERT_TAIL(&sch->sch_bucket, sc, sc_hash); |
| 398 | sch->sch_length++; |
| 399 | syncache_percpu->cache_count++; |
| 400 | tcpstat.tcps_sc_added++; |
| 401 | } |
| 402 | |
| 403 | static void |
| 404 | syncache_drop(struct syncache *sc, struct syncache_head *sch) |
| 405 | { |
| 406 | struct tcp_syncache_percpu *syncache_percpu; |
| 407 | #ifdef INET6 |
| 408 | const boolean_t isipv6 = sc->sc_inc.inc_isipv6; |
| 409 | #else |
| 410 | const boolean_t isipv6 = FALSE; |
| 411 | #endif |
| 412 | |
| 413 | syncache_percpu = &tcp_syncache_percpu[mycpu->gd_cpuid]; |
| 414 | |
| 415 | if (sch == NULL) { |
| 416 | if (isipv6) { |
| 417 | sch = &syncache_percpu->hashbase[ |
| 418 | SYNCACHE_HASH6(&sc->sc_inc, tcp_syncache.hashmask)]; |
| 419 | } else { |
| 420 | sch = &syncache_percpu->hashbase[ |
| 421 | SYNCACHE_HASH(&sc->sc_inc, tcp_syncache.hashmask)]; |
| 422 | } |
| 423 | } |
| 424 | |
| 425 | TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); |
| 426 | sch->sch_length--; |
| 427 | syncache_percpu->cache_count--; |
| 428 | |
| 429 | /* |
| 430 | * Remove the entry from the syncache timer/timeout queue. Note |
| 431 | * that we do not try to stop any running timer since we do not know |
| 432 | * whether the timer's message is in-transit or not. Since timeouts |
| 433 | * are fairly long, taking an unneeded callout does not detrimentally |
| 434 | * effect performance. |
| 435 | */ |
| 436 | TAILQ_REMOVE(&syncache_percpu->timerq[sc->sc_rxtslot], sc, sc_timerq); |
| 437 | |
| 438 | syncache_free(sc); |
| 439 | } |
| 440 | |
| 441 | /* |
| 442 | * Place a timeout message on the TCP thread's message queue. |
| 443 | * This routine runs in soft interrupt context. |
| 444 | * |
| 445 | * An invariant is for this routine to be called, the callout must |
| 446 | * have been active. Note that the callout is not deactivated until |
| 447 | * after the message has been processed in syncache_timer_handler() below. |
| 448 | */ |
| 449 | static void |
| 450 | syncache_timer(void *p) |
| 451 | { |
| 452 | struct netmsg_sc_timer *msg = p; |
| 453 | |
| 454 | lwkt_sendmsg(msg->nm_mrec->port, &msg->nm_netmsg.nm_lmsg); |
| 455 | } |
| 456 | |
| 457 | /* |
| 458 | * Service a timer message queued by timer expiration. |
| 459 | * This routine runs in the TCP protocol thread. |
| 460 | * |
| 461 | * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted. |
| 462 | * If we have retransmitted an entry the maximum number of times, expire it. |
| 463 | * |
| 464 | * When we finish processing timed-out entries, we restart the timer if there |
| 465 | * are any entries still on the queue and deactivate it otherwise. Only after |
| 466 | * a timer has been deactivated here can it be restarted by syncache_timeout(). |
| 467 | */ |
| 468 | static void |
| 469 | syncache_timer_handler(netmsg_t netmsg) |
| 470 | { |
| 471 | struct tcp_syncache_percpu *syncache_percpu; |
| 472 | struct syncache *sc, *nsc; |
| 473 | struct inpcb *inp; |
| 474 | int slot; |
| 475 | |
| 476 | slot = ((struct netmsg_sc_timer *)netmsg)->nm_mrec->slot; |
| 477 | syncache_percpu = &tcp_syncache_percpu[mycpu->gd_cpuid]; |
| 478 | |
| 479 | nsc = TAILQ_FIRST(&syncache_percpu->timerq[slot]); |
| 480 | while (nsc != NULL) { |
| 481 | if (ticks < nsc->sc_rxttime) |
| 482 | break; /* finished because timerq sorted by time */ |
| 483 | sc = nsc; |
| 484 | inp = sc->sc_tp->t_inpcb; |
| 485 | if (slot == SYNCACHE_MAXREXMTS || |
| 486 | slot >= tcp_syncache.rexmt_limit || |
| 487 | inp->inp_gencnt != sc->sc_inp_gencnt) { |
| 488 | nsc = TAILQ_NEXT(sc, sc_timerq); |
| 489 | syncache_drop(sc, NULL); |
| 490 | tcpstat.tcps_sc_stale++; |
| 491 | continue; |
| 492 | } |
| 493 | /* |
| 494 | * syncache_respond() may call back into the syncache to |
| 495 | * to modify another entry, so do not obtain the next |
| 496 | * entry on the timer chain until it has completed. |
| 497 | */ |
| 498 | syncache_respond(sc, NULL); |
| 499 | nsc = TAILQ_NEXT(sc, sc_timerq); |
| 500 | tcpstat.tcps_sc_retransmitted++; |
| 501 | TAILQ_REMOVE(&syncache_percpu->timerq[slot], sc, sc_timerq); |
| 502 | syncache_timeout(syncache_percpu, sc, slot + 1); |
| 503 | } |
| 504 | if (nsc != NULL) |
| 505 | callout_reset(&syncache_percpu->tt_timerq[slot], |
| 506 | nsc->sc_rxttime - ticks, syncache_timer, |
| 507 | &syncache_percpu->mrec[slot]); |
| 508 | else |
| 509 | callout_deactivate(&syncache_percpu->tt_timerq[slot]); |
| 510 | |
| 511 | lwkt_replymsg(&netmsg->nm_lmsg, 0); |
| 512 | } |
| 513 | |
| 514 | /* |
| 515 | * Find an entry in the syncache. |
| 516 | */ |
| 517 | struct syncache * |
| 518 | syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp) |
| 519 | { |
| 520 | struct tcp_syncache_percpu *syncache_percpu; |
| 521 | struct syncache *sc; |
| 522 | struct syncache_head *sch; |
| 523 | |
| 524 | syncache_percpu = &tcp_syncache_percpu[mycpu->gd_cpuid]; |
| 525 | #ifdef INET6 |
| 526 | if (inc->inc_isipv6) { |
| 527 | sch = &syncache_percpu->hashbase[ |
| 528 | SYNCACHE_HASH6(inc, tcp_syncache.hashmask)]; |
| 529 | *schp = sch; |
| 530 | TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) |
| 531 | if (ENDPTS6_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) |
| 532 | return (sc); |
| 533 | } else |
| 534 | #endif |
| 535 | { |
| 536 | sch = &syncache_percpu->hashbase[ |
| 537 | SYNCACHE_HASH(inc, tcp_syncache.hashmask)]; |
| 538 | *schp = sch; |
| 539 | TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) { |
| 540 | #ifdef INET6 |
| 541 | if (sc->sc_inc.inc_isipv6) |
| 542 | continue; |
| 543 | #endif |
| 544 | if (ENDPTS_EQ(&inc->inc_ie, &sc->sc_inc.inc_ie)) |
| 545 | return (sc); |
| 546 | } |
| 547 | } |
| 548 | return (NULL); |
| 549 | } |
| 550 | |
| 551 | /* |
| 552 | * This function is called when we get a RST for a |
| 553 | * non-existent connection, so that we can see if the |
| 554 | * connection is in the syn cache. If it is, zap it. |
| 555 | */ |
| 556 | void |
| 557 | syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th) |
| 558 | { |
| 559 | struct syncache *sc; |
| 560 | struct syncache_head *sch; |
| 561 | |
| 562 | sc = syncache_lookup(inc, &sch); |
| 563 | if (sc == NULL) |
| 564 | return; |
| 565 | /* |
| 566 | * If the RST bit is set, check the sequence number to see |
| 567 | * if this is a valid reset segment. |
| 568 | * RFC 793 page 37: |
| 569 | * In all states except SYN-SENT, all reset (RST) segments |
| 570 | * are validated by checking their SEQ-fields. A reset is |
| 571 | * valid if its sequence number is in the window. |
| 572 | * |
| 573 | * The sequence number in the reset segment is normally an |
| 574 | * echo of our outgoing acknowlegement numbers, but some hosts |
| 575 | * send a reset with the sequence number at the rightmost edge |
| 576 | * of our receive window, and we have to handle this case. |
| 577 | */ |
| 578 | if (SEQ_GEQ(th->th_seq, sc->sc_irs) && |
| 579 | SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) { |
| 580 | syncache_drop(sc, sch); |
| 581 | tcpstat.tcps_sc_reset++; |
| 582 | } |
| 583 | } |
| 584 | |
| 585 | void |
| 586 | syncache_badack(struct in_conninfo *inc) |
| 587 | { |
| 588 | struct syncache *sc; |
| 589 | struct syncache_head *sch; |
| 590 | |
| 591 | sc = syncache_lookup(inc, &sch); |
| 592 | if (sc != NULL) { |
| 593 | syncache_drop(sc, sch); |
| 594 | tcpstat.tcps_sc_badack++; |
| 595 | } |
| 596 | } |
| 597 | |
| 598 | void |
| 599 | syncache_unreach(struct in_conninfo *inc, struct tcphdr *th) |
| 600 | { |
| 601 | struct syncache *sc; |
| 602 | struct syncache_head *sch; |
| 603 | |
| 604 | /* we are called at splnet() here */ |
| 605 | sc = syncache_lookup(inc, &sch); |
| 606 | if (sc == NULL) |
| 607 | return; |
| 608 | |
| 609 | /* If the sequence number != sc_iss, then it's a bogus ICMP msg */ |
| 610 | if (ntohl(th->th_seq) != sc->sc_iss) |
| 611 | return; |
| 612 | |
| 613 | /* |
| 614 | * If we've rertransmitted 3 times and this is our second error, |
| 615 | * we remove the entry. Otherwise, we allow it to continue on. |
| 616 | * This prevents us from incorrectly nuking an entry during a |
| 617 | * spurious network outage. |
| 618 | * |
| 619 | * See tcp_notify(). |
| 620 | */ |
| 621 | if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxtslot < 3) { |
| 622 | sc->sc_flags |= SCF_UNREACH; |
| 623 | return; |
| 624 | } |
| 625 | syncache_drop(sc, sch); |
| 626 | tcpstat.tcps_sc_unreach++; |
| 627 | } |
| 628 | |
| 629 | /* |
| 630 | * Build a new TCP socket structure from a syncache entry. |
| 631 | */ |
| 632 | static struct socket * |
| 633 | syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) |
| 634 | { |
| 635 | struct inpcb *inp = NULL, *linp; |
| 636 | struct socket *so; |
| 637 | struct tcpcb *tp; |
| 638 | #ifdef INET6 |
| 639 | const boolean_t isipv6 = sc->sc_inc.inc_isipv6; |
| 640 | #else |
| 641 | const boolean_t isipv6 = FALSE; |
| 642 | #endif |
| 643 | |
| 644 | /* |
| 645 | * Ok, create the full blown connection, and set things up |
| 646 | * as they would have been set up if we had created the |
| 647 | * connection when the SYN arrived. If we can't create |
| 648 | * the connection, abort it. |
| 649 | */ |
| 650 | so = sonewconn(lso, SS_ISCONNECTED); |
| 651 | if (so == NULL) { |
| 652 | /* |
| 653 | * Drop the connection; we will send a RST if the peer |
| 654 | * retransmits the ACK, |
| 655 | */ |
| 656 | tcpstat.tcps_listendrop++; |
| 657 | goto abort; |
| 658 | } |
| 659 | |
| 660 | inp = so->so_pcb; |
| 661 | |
| 662 | /* |
| 663 | * Insert new socket into hash list. |
| 664 | */ |
| 665 | inp->inp_inc.inc_isipv6 = sc->sc_inc.inc_isipv6; |
| 666 | if (isipv6) { |
| 667 | inp->in6p_laddr = sc->sc_inc.inc6_laddr; |
| 668 | } else { |
| 669 | #ifdef INET6 |
| 670 | inp->inp_vflag &= ~INP_IPV6; |
| 671 | inp->inp_vflag |= INP_IPV4; |
| 672 | #endif |
| 673 | inp->inp_laddr = sc->sc_inc.inc_laddr; |
| 674 | } |
| 675 | inp->inp_lport = sc->sc_inc.inc_lport; |
| 676 | if (in_pcbinsporthash(inp) != 0) { |
| 677 | /* |
| 678 | * Undo the assignments above if we failed to |
| 679 | * put the PCB on the hash lists. |
| 680 | */ |
| 681 | if (isipv6) |
| 682 | inp->in6p_laddr = kin6addr_any; |
| 683 | else |
| 684 | inp->inp_laddr.s_addr = INADDR_ANY; |
| 685 | inp->inp_lport = 0; |
| 686 | goto abort; |
| 687 | } |
| 688 | linp = so->so_pcb; |
| 689 | #ifdef IPSEC |
| 690 | /* copy old policy into new socket's */ |
| 691 | if (ipsec_copy_policy(linp->inp_sp, inp->inp_sp)) |
| 692 | kprintf("syncache_expand: could not copy policy\n"); |
| 693 | #endif |
| 694 | if (isipv6) { |
| 695 | struct in6_addr laddr6; |
| 696 | struct sockaddr_in6 sin6; |
| 697 | /* |
| 698 | * Inherit socket options from the listening socket. |
| 699 | * Note that in6p_inputopts are not (and should not be) |
| 700 | * copied, since it stores previously received options and is |
| 701 | * used to detect if each new option is different than the |
| 702 | * previous one and hence should be passed to a user. |
| 703 | * If we copied in6p_inputopts, a user would not be able to |
| 704 | * receive options just after calling the accept system call. |
| 705 | */ |
| 706 | inp->inp_flags |= linp->inp_flags & INP_CONTROLOPTS; |
| 707 | if (linp->in6p_outputopts) |
| 708 | inp->in6p_outputopts = |
| 709 | ip6_copypktopts(linp->in6p_outputopts, M_INTWAIT); |
| 710 | inp->in6p_route = sc->sc_route6; |
| 711 | sc->sc_route6.ro_rt = NULL; |
| 712 | |
| 713 | sin6.sin6_family = AF_INET6; |
| 714 | sin6.sin6_len = sizeof sin6; |
| 715 | sin6.sin6_addr = sc->sc_inc.inc6_faddr; |
| 716 | sin6.sin6_port = sc->sc_inc.inc_fport; |
| 717 | sin6.sin6_flowinfo = sin6.sin6_scope_id = 0; |
| 718 | laddr6 = inp->in6p_laddr; |
| 719 | if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) |
| 720 | inp->in6p_laddr = sc->sc_inc.inc6_laddr; |
| 721 | if (in6_pcbconnect(inp, (struct sockaddr *)&sin6, &thread0)) { |
| 722 | inp->in6p_laddr = laddr6; |
| 723 | goto abort; |
| 724 | } |
| 725 | } else { |
| 726 | struct in_addr laddr; |
| 727 | struct sockaddr_in sin; |
| 728 | |
| 729 | inp->inp_options = ip_srcroute(m); |
| 730 | if (inp->inp_options == NULL) { |
| 731 | inp->inp_options = sc->sc_ipopts; |
| 732 | sc->sc_ipopts = NULL; |
| 733 | } |
| 734 | inp->inp_route = sc->sc_route; |
| 735 | sc->sc_route.ro_rt = NULL; |
| 736 | |
| 737 | sin.sin_family = AF_INET; |
| 738 | sin.sin_len = sizeof sin; |
| 739 | sin.sin_addr = sc->sc_inc.inc_faddr; |
| 740 | sin.sin_port = sc->sc_inc.inc_fport; |
| 741 | bzero(sin.sin_zero, sizeof sin.sin_zero); |
| 742 | laddr = inp->inp_laddr; |
| 743 | if (inp->inp_laddr.s_addr == INADDR_ANY) |
| 744 | inp->inp_laddr = sc->sc_inc.inc_laddr; |
| 745 | if (in_pcbconnect(inp, (struct sockaddr *)&sin, &thread0)) { |
| 746 | inp->inp_laddr = laddr; |
| 747 | goto abort; |
| 748 | } |
| 749 | } |
| 750 | |
| 751 | tp = intotcpcb(inp); |
| 752 | tp->t_state = TCPS_SYN_RECEIVED; |
| 753 | tp->iss = sc->sc_iss; |
| 754 | tp->irs = sc->sc_irs; |
| 755 | tcp_rcvseqinit(tp); |
| 756 | tcp_sendseqinit(tp); |
| 757 | tp->snd_wl1 = sc->sc_irs; |
| 758 | tp->rcv_up = sc->sc_irs + 1; |
| 759 | tp->rcv_wnd = sc->sc_wnd; |
| 760 | tp->rcv_adv += tp->rcv_wnd; |
| 761 | |
| 762 | tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH | TF_NODELAY); |
| 763 | if (sc->sc_flags & SCF_NOOPT) |
| 764 | tp->t_flags |= TF_NOOPT; |
| 765 | if (sc->sc_flags & SCF_WINSCALE) { |
| 766 | tp->t_flags |= TF_REQ_SCALE | TF_RCVD_SCALE; |
| 767 | tp->requested_s_scale = sc->sc_requested_s_scale; |
| 768 | tp->request_r_scale = sc->sc_request_r_scale; |
| 769 | } |
| 770 | if (sc->sc_flags & SCF_TIMESTAMP) { |
| 771 | tp->t_flags |= TF_REQ_TSTMP | TF_RCVD_TSTMP; |
| 772 | tp->ts_recent = sc->sc_tsrecent; |
| 773 | tp->ts_recent_age = ticks; |
| 774 | } |
| 775 | if (sc->sc_flags & SCF_CC) { |
| 776 | /* |
| 777 | * Initialization of the tcpcb for transaction; |
| 778 | * set SND.WND = SEG.WND, |
| 779 | * initialize CCsend and CCrecv. |
| 780 | */ |
| 781 | tp->t_flags |= TF_REQ_CC | TF_RCVD_CC; |
| 782 | tp->cc_send = sc->sc_cc_send; |
| 783 | tp->cc_recv = sc->sc_cc_recv; |
| 784 | } |
| 785 | if (sc->sc_flags & SCF_SACK_PERMITTED) |
| 786 | tp->t_flags |= TF_SACK_PERMITTED; |
| 787 | |
| 788 | tcp_mss(tp, sc->sc_peer_mss); |
| 789 | |
| 790 | /* |
| 791 | * If the SYN,ACK was retransmitted, reset cwnd to 1 segment. |
| 792 | */ |
| 793 | if (sc->sc_rxtslot != 0) |
| 794 | tp->snd_cwnd = tp->t_maxseg; |
| 795 | tcp_create_timermsg(tp); |
| 796 | tcp_callout_reset(tp, tp->tt_keep, tcp_keepinit, tcp_timer_keep); |
| 797 | |
| 798 | tcpstat.tcps_accepts++; |
| 799 | return (so); |
| 800 | |
| 801 | abort: |
| 802 | if (so != NULL) |
| 803 | soaborta(so); |
| 804 | return (NULL); |
| 805 | } |
| 806 | |
| 807 | /* |
| 808 | * This function gets called when we receive an ACK for a |
| 809 | * socket in the LISTEN state. We look up the connection |
| 810 | * in the syncache, and if its there, we pull it out of |
| 811 | * the cache and turn it into a full-blown connection in |
| 812 | * the SYN-RECEIVED state. |
| 813 | */ |
| 814 | int |
| 815 | syncache_expand(struct in_conninfo *inc, struct tcphdr *th, struct socket **sop, |
| 816 | struct mbuf *m) |
| 817 | { |
| 818 | struct syncache *sc; |
| 819 | struct syncache_head *sch; |
| 820 | struct socket *so; |
| 821 | |
| 822 | sc = syncache_lookup(inc, &sch); |
| 823 | if (sc == NULL) { |
| 824 | /* |
| 825 | * There is no syncache entry, so see if this ACK is |
| 826 | * a returning syncookie. To do this, first: |
| 827 | * A. See if this socket has had a syncache entry dropped in |
| 828 | * the past. We don't want to accept a bogus syncookie |
| 829 | * if we've never received a SYN. |
| 830 | * B. check that the syncookie is valid. If it is, then |
| 831 | * cobble up a fake syncache entry, and return. |
| 832 | */ |
| 833 | if (!tcp_syncookies) |
| 834 | return (0); |
| 835 | sc = syncookie_lookup(inc, th, *sop); |
| 836 | if (sc == NULL) |
| 837 | return (0); |
| 838 | sch = NULL; |
| 839 | tcpstat.tcps_sc_recvcookie++; |
| 840 | } |
| 841 | |
| 842 | /* |
| 843 | * If seg contains an ACK, but not for our SYN/ACK, send a RST. |
| 844 | */ |
| 845 | if (th->th_ack != sc->sc_iss + 1) |
| 846 | return (0); |
| 847 | |
| 848 | so = syncache_socket(sc, *sop, m); |
| 849 | if (so == NULL) { |
| 850 | #if 0 |
| 851 | resetandabort: |
| 852 | /* XXXjlemon check this - is this correct? */ |
| 853 | tcp_respond(NULL, m, m, th, |
| 854 | th->th_seq + tlen, (tcp_seq)0, TH_RST | TH_ACK); |
| 855 | #endif |
| 856 | m_freem(m); /* XXX only needed for above */ |
| 857 | tcpstat.tcps_sc_aborted++; |
| 858 | } else { |
| 859 | tcpstat.tcps_sc_completed++; |
| 860 | } |
| 861 | if (sch == NULL) |
| 862 | syncache_free(sc); |
| 863 | else |
| 864 | syncache_drop(sc, sch); |
| 865 | *sop = so; |
| 866 | return (1); |
| 867 | } |
| 868 | |
| 869 | /* |
| 870 | * Given a LISTEN socket and an inbound SYN request, add |
| 871 | * this to the syn cache, and send back a segment: |
| 872 | * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> |
| 873 | * to the source. |
| 874 | * |
| 875 | * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN. |
| 876 | * Doing so would require that we hold onto the data and deliver it |
| 877 | * to the application. However, if we are the target of a SYN-flood |
| 878 | * DoS attack, an attacker could send data which would eventually |
| 879 | * consume all available buffer space if it were ACKed. By not ACKing |
| 880 | * the data, we avoid this DoS scenario. |
| 881 | */ |
| 882 | int |
| 883 | syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, |
| 884 | struct socket **sop, struct mbuf *m) |
| 885 | { |
| 886 | struct tcp_syncache_percpu *syncache_percpu; |
| 887 | struct tcpcb *tp; |
| 888 | struct socket *so; |
| 889 | struct syncache *sc = NULL; |
| 890 | struct syncache_head *sch; |
| 891 | struct mbuf *ipopts = NULL; |
| 892 | struct rmxp_tao *taop; |
| 893 | int win; |
| 894 | |
| 895 | syncache_percpu = &tcp_syncache_percpu[mycpu->gd_cpuid]; |
| 896 | so = *sop; |
| 897 | tp = sototcpcb(so); |
| 898 | |
| 899 | /* |
| 900 | * Remember the IP options, if any. |
| 901 | */ |
| 902 | #ifdef INET6 |
| 903 | if (!inc->inc_isipv6) |
| 904 | #endif |
| 905 | ipopts = ip_srcroute(m); |
| 906 | |
| 907 | /* |
| 908 | * See if we already have an entry for this connection. |
| 909 | * If we do, resend the SYN,ACK, and reset the retransmit timer. |
| 910 | * |
| 911 | * XXX |
| 912 | * The syncache should be re-initialized with the contents |
| 913 | * of the new SYN which may have different options. |
| 914 | */ |
| 915 | sc = syncache_lookup(inc, &sch); |
| 916 | if (sc != NULL) { |
| 917 | tcpstat.tcps_sc_dupsyn++; |
| 918 | if (ipopts) { |
| 919 | /* |
| 920 | * If we were remembering a previous source route, |
| 921 | * forget it and use the new one we've been given. |
| 922 | */ |
| 923 | if (sc->sc_ipopts) |
| 924 | m_free(sc->sc_ipopts); |
| 925 | sc->sc_ipopts = ipopts; |
| 926 | } |
| 927 | /* |
| 928 | * Update timestamp if present. |
| 929 | */ |
| 930 | if (sc->sc_flags & SCF_TIMESTAMP) |
| 931 | sc->sc_tsrecent = to->to_tsval; |
| 932 | |
| 933 | /* Just update the TOF_SACK_PERMITTED for now. */ |
| 934 | if (tcp_do_sack && (to->to_flags & TOF_SACK_PERMITTED)) |
| 935 | sc->sc_flags |= SCF_SACK_PERMITTED; |
| 936 | else |
| 937 | sc->sc_flags &= ~SCF_SACK_PERMITTED; |
| 938 | |
| 939 | /* |
| 940 | * PCB may have changed, pick up new values. |
| 941 | */ |
| 942 | sc->sc_tp = tp; |
| 943 | sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt; |
| 944 | if (syncache_respond(sc, m) == 0) { |
| 945 | TAILQ_REMOVE(&syncache_percpu->timerq[sc->sc_rxtslot], |
| 946 | sc, sc_timerq); |
| 947 | syncache_timeout(syncache_percpu, sc, sc->sc_rxtslot); |
| 948 | tcpstat.tcps_sndacks++; |
| 949 | tcpstat.tcps_sndtotal++; |
| 950 | } |
| 951 | *sop = NULL; |
| 952 | return (1); |
| 953 | } |
| 954 | |
| 955 | /* |
| 956 | * This allocation is guaranteed to succeed because we |
| 957 | * preallocate one more syncache entry than cache_limit. |
| 958 | */ |
| 959 | sc = zalloc(tcp_syncache.zone); |
| 960 | |
| 961 | /* |
| 962 | * Fill in the syncache values. |
| 963 | */ |
| 964 | sc->sc_tp = tp; |
| 965 | sc->sc_inp_gencnt = tp->t_inpcb->inp_gencnt; |
| 966 | sc->sc_ipopts = ipopts; |
| 967 | sc->sc_inc.inc_fport = inc->inc_fport; |
| 968 | sc->sc_inc.inc_lport = inc->inc_lport; |
| 969 | #ifdef INET6 |
| 970 | sc->sc_inc.inc_isipv6 = inc->inc_isipv6; |
| 971 | if (inc->inc_isipv6) { |
| 972 | sc->sc_inc.inc6_faddr = inc->inc6_faddr; |
| 973 | sc->sc_inc.inc6_laddr = inc->inc6_laddr; |
| 974 | sc->sc_route6.ro_rt = NULL; |
| 975 | } else |
| 976 | #endif |
| 977 | { |
| 978 | sc->sc_inc.inc_faddr = inc->inc_faddr; |
| 979 | sc->sc_inc.inc_laddr = inc->inc_laddr; |
| 980 | sc->sc_route.ro_rt = NULL; |
| 981 | } |
| 982 | sc->sc_irs = th->th_seq; |
| 983 | sc->sc_flags = 0; |
| 984 | sc->sc_peer_mss = to->to_flags & TOF_MSS ? to->to_mss : 0; |
| 985 | if (tcp_syncookies) |
| 986 | sc->sc_iss = syncookie_generate(sc); |
| 987 | else |
| 988 | sc->sc_iss = karc4random(); |
| 989 | |
| 990 | /* Initial receive window: clip ssb_space to [0 .. TCP_MAXWIN] */ |
| 991 | win = ssb_space(&so->so_rcv); |
| 992 | win = imax(win, 0); |
| 993 | win = imin(win, TCP_MAXWIN); |
| 994 | sc->sc_wnd = win; |
| 995 | |
| 996 | if (tcp_do_rfc1323) { |
| 997 | /* |
| 998 | * A timestamp received in a SYN makes |
| 999 | * it ok to send timestamp requests and replies. |
| 1000 | */ |
| 1001 | if (to->to_flags & TOF_TS) { |
| 1002 | sc->sc_tsrecent = to->to_tsval; |
| 1003 | sc->sc_flags |= SCF_TIMESTAMP; |
| 1004 | } |
| 1005 | if (to->to_flags & TOF_SCALE) { |
| 1006 | int wscale = 0; |
| 1007 | |
| 1008 | /* Compute proper scaling value from buffer space */ |
| 1009 | while (wscale < TCP_MAX_WINSHIFT && |
| 1010 | (TCP_MAXWIN << wscale) < so->so_rcv.ssb_hiwat) |
| 1011 | wscale++; |
| 1012 | sc->sc_request_r_scale = wscale; |
| 1013 | sc->sc_requested_s_scale = to->to_requested_s_scale; |
| 1014 | sc->sc_flags |= SCF_WINSCALE; |
| 1015 | } |
| 1016 | } |
| 1017 | if (tcp_do_rfc1644) { |
| 1018 | /* |
| 1019 | * A CC or CC.new option received in a SYN makes |
| 1020 | * it ok to send CC in subsequent segments. |
| 1021 | */ |
| 1022 | if (to->to_flags & (TOF_CC | TOF_CCNEW)) { |
| 1023 | sc->sc_cc_recv = to->to_cc; |
| 1024 | sc->sc_cc_send = CC_INC(tcp_ccgen); |
| 1025 | sc->sc_flags |= SCF_CC; |
| 1026 | } |
| 1027 | } |
| 1028 | if (tcp_do_sack && (to->to_flags & TOF_SACK_PERMITTED)) |
| 1029 | sc->sc_flags |= SCF_SACK_PERMITTED; |
| 1030 | if (tp->t_flags & TF_NOOPT) |
| 1031 | sc->sc_flags = SCF_NOOPT; |
| 1032 | |
| 1033 | /* |
| 1034 | * XXX |
| 1035 | * We have the option here of not doing TAO (even if the segment |
| 1036 | * qualifies) and instead fall back to a normal 3WHS via the syncache. |
| 1037 | * This allows us to apply synflood protection to TAO-qualifying SYNs |
| 1038 | * also. However, there should be a hueristic to determine when to |
| 1039 | * do this, and is not present at the moment. |
| 1040 | */ |
| 1041 | |
| 1042 | /* |
| 1043 | * Perform TAO test on incoming CC (SEG.CC) option, if any. |
| 1044 | * - compare SEG.CC against cached CC from the same host, if any. |
| 1045 | * - if SEG.CC > chached value, SYN must be new and is accepted |
| 1046 | * immediately: save new CC in the cache, mark the socket |
| 1047 | * connected, enter ESTABLISHED state, turn on flag to |
| 1048 | * send a SYN in the next segment. |
| 1049 | * A virtual advertised window is set in rcv_adv to |
| 1050 | * initialize SWS prevention. Then enter normal segment |
| 1051 | * processing: drop SYN, process data and FIN. |
| 1052 | * - otherwise do a normal 3-way handshake. |
| 1053 | */ |
| 1054 | taop = tcp_gettaocache(&sc->sc_inc); |
| 1055 | if (to->to_flags & TOF_CC) { |
| 1056 | if ((tp->t_flags & TF_NOPUSH) && |
| 1057 | sc->sc_flags & SCF_CC && |
| 1058 | taop != NULL && taop->tao_cc != 0 && |
| 1059 | CC_GT(to->to_cc, taop->tao_cc)) { |
| 1060 | sc->sc_rxtslot = 0; |
| 1061 | so = syncache_socket(sc, *sop, m); |
| 1062 | if (so != NULL) { |
| 1063 | taop->tao_cc = to->to_cc; |
| 1064 | *sop = so; |
| 1065 | } |
| 1066 | syncache_free(sc); |
| 1067 | return (so != NULL); |
| 1068 | } |
| 1069 | } else { |
| 1070 | /* |
| 1071 | * No CC option, but maybe CC.NEW: invalidate cached value. |
| 1072 | */ |
| 1073 | if (taop != NULL) |
| 1074 | taop->tao_cc = 0; |
| 1075 | } |
| 1076 | /* |
| 1077 | * TAO test failed or there was no CC option, |
| 1078 | * do a standard 3-way handshake. |
| 1079 | */ |
| 1080 | if (syncache_respond(sc, m) == 0) { |
| 1081 | syncache_insert(sc, sch); |
| 1082 | tcpstat.tcps_sndacks++; |
| 1083 | tcpstat.tcps_sndtotal++; |
| 1084 | } else { |
| 1085 | syncache_free(sc); |
| 1086 | tcpstat.tcps_sc_dropped++; |
| 1087 | } |
| 1088 | *sop = NULL; |
| 1089 | return (1); |
| 1090 | } |
| 1091 | |
| 1092 | static int |
| 1093 | syncache_respond(struct syncache *sc, struct mbuf *m) |
| 1094 | { |
| 1095 | u_int8_t *optp; |
| 1096 | int optlen, error; |
| 1097 | u_int16_t tlen, hlen, mssopt; |
| 1098 | struct ip *ip = NULL; |
| 1099 | struct rtentry *rt; |
| 1100 | struct tcphdr *th; |
| 1101 | struct ip6_hdr *ip6 = NULL; |
| 1102 | #ifdef INET6 |
| 1103 | const boolean_t isipv6 = sc->sc_inc.inc_isipv6; |
| 1104 | #else |
| 1105 | const boolean_t isipv6 = FALSE; |
| 1106 | #endif |
| 1107 | |
| 1108 | if (isipv6) { |
| 1109 | rt = tcp_rtlookup6(&sc->sc_inc); |
| 1110 | if (rt != NULL) |
| 1111 | mssopt = rt->rt_ifp->if_mtu - |
| 1112 | (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)); |
| 1113 | else |
| 1114 | mssopt = tcp_v6mssdflt; |
| 1115 | hlen = sizeof(struct ip6_hdr); |
| 1116 | } else { |
| 1117 | rt = tcp_rtlookup(&sc->sc_inc); |
| 1118 | if (rt != NULL) |
| 1119 | mssopt = rt->rt_ifp->if_mtu - |
| 1120 | (sizeof(struct ip) + sizeof(struct tcphdr)); |
| 1121 | else |
| 1122 | mssopt = tcp_mssdflt; |
| 1123 | hlen = sizeof(struct ip); |
| 1124 | } |
| 1125 | |
| 1126 | /* Compute the size of the TCP options. */ |
| 1127 | if (sc->sc_flags & SCF_NOOPT) { |
| 1128 | optlen = 0; |
| 1129 | } else { |
| 1130 | optlen = TCPOLEN_MAXSEG + |
| 1131 | ((sc->sc_flags & SCF_WINSCALE) ? 4 : 0) + |
| 1132 | ((sc->sc_flags & SCF_TIMESTAMP) ? TCPOLEN_TSTAMP_APPA : 0) + |
| 1133 | ((sc->sc_flags & SCF_CC) ? TCPOLEN_CC_APPA * 2 : 0) + |
| 1134 | ((sc->sc_flags & SCF_SACK_PERMITTED) ? |
| 1135 | TCPOLEN_SACK_PERMITTED_ALIGNED : 0); |
| 1136 | } |
| 1137 | tlen = hlen + sizeof(struct tcphdr) + optlen; |
| 1138 | |
| 1139 | /* |
| 1140 | * XXX |
| 1141 | * assume that the entire packet will fit in a header mbuf |
| 1142 | */ |
| 1143 | KASSERT(max_linkhdr + tlen <= MHLEN, ("syncache: mbuf too small")); |
| 1144 | |
| 1145 | /* |
| 1146 | * XXX shouldn't this reuse the mbuf if possible ? |
| 1147 | * Create the IP+TCP header from scratch. |
| 1148 | */ |
| 1149 | if (m) |
| 1150 | m_freem(m); |
| 1151 | |
| 1152 | m = m_gethdr(MB_DONTWAIT, MT_HEADER); |
| 1153 | if (m == NULL) |
| 1154 | return (ENOBUFS); |
| 1155 | m->m_data += max_linkhdr; |
| 1156 | m->m_len = tlen; |
| 1157 | m->m_pkthdr.len = tlen; |
| 1158 | m->m_pkthdr.rcvif = NULL; |
| 1159 | |
| 1160 | if (isipv6) { |
| 1161 | ip6 = mtod(m, struct ip6_hdr *); |
| 1162 | ip6->ip6_vfc = IPV6_VERSION; |
| 1163 | ip6->ip6_nxt = IPPROTO_TCP; |
| 1164 | ip6->ip6_src = sc->sc_inc.inc6_laddr; |
| 1165 | ip6->ip6_dst = sc->sc_inc.inc6_faddr; |
| 1166 | ip6->ip6_plen = htons(tlen - hlen); |
| 1167 | /* ip6_hlim is set after checksum */ |
| 1168 | /* ip6_flow = ??? */ |
| 1169 | |
| 1170 | th = (struct tcphdr *)(ip6 + 1); |
| 1171 | } else { |
| 1172 | ip = mtod(m, struct ip *); |
| 1173 | ip->ip_v = IPVERSION; |
| 1174 | ip->ip_hl = sizeof(struct ip) >> 2; |
| 1175 | ip->ip_len = tlen; |
| 1176 | ip->ip_id = 0; |
| 1177 | ip->ip_off = 0; |
| 1178 | ip->ip_sum = 0; |
| 1179 | ip->ip_p = IPPROTO_TCP; |
| 1180 | ip->ip_src = sc->sc_inc.inc_laddr; |
| 1181 | ip->ip_dst = sc->sc_inc.inc_faddr; |
| 1182 | ip->ip_ttl = sc->sc_tp->t_inpcb->inp_ip_ttl; /* XXX */ |
| 1183 | ip->ip_tos = sc->sc_tp->t_inpcb->inp_ip_tos; /* XXX */ |
| 1184 | |
| 1185 | /* |
| 1186 | * See if we should do MTU discovery. Route lookups are |
| 1187 | * expensive, so we will only unset the DF bit if: |
| 1188 | * |
| 1189 | * 1) path_mtu_discovery is disabled |
| 1190 | * 2) the SCF_UNREACH flag has been set |
| 1191 | */ |
| 1192 | if (path_mtu_discovery |
| 1193 | && ((sc->sc_flags & SCF_UNREACH) == 0)) { |
| 1194 | ip->ip_off |= IP_DF; |
| 1195 | } |
| 1196 | |
| 1197 | th = (struct tcphdr *)(ip + 1); |
| 1198 | } |
| 1199 | th->th_sport = sc->sc_inc.inc_lport; |
| 1200 | th->th_dport = sc->sc_inc.inc_fport; |
| 1201 | |
| 1202 | th->th_seq = htonl(sc->sc_iss); |
| 1203 | th->th_ack = htonl(sc->sc_irs + 1); |
| 1204 | th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; |
| 1205 | th->th_x2 = 0; |
| 1206 | th->th_flags = TH_SYN | TH_ACK; |
| 1207 | th->th_win = htons(sc->sc_wnd); |
| 1208 | th->th_urp = 0; |
| 1209 | |
| 1210 | /* Tack on the TCP options. */ |
| 1211 | if (optlen == 0) |
| 1212 | goto no_options; |
| 1213 | optp = (u_int8_t *)(th + 1); |
| 1214 | *optp++ = TCPOPT_MAXSEG; |
| 1215 | *optp++ = TCPOLEN_MAXSEG; |
| 1216 | *optp++ = (mssopt >> 8) & 0xff; |
| 1217 | *optp++ = mssopt & 0xff; |
| 1218 | |
| 1219 | if (sc->sc_flags & SCF_WINSCALE) { |
| 1220 | *((u_int32_t *)optp) = htonl(TCPOPT_NOP << 24 | |
| 1221 | TCPOPT_WINDOW << 16 | TCPOLEN_WINDOW << 8 | |
| 1222 | sc->sc_request_r_scale); |
| 1223 | optp += 4; |
| 1224 | } |
| 1225 | |
| 1226 | if (sc->sc_flags & SCF_TIMESTAMP) { |
| 1227 | u_int32_t *lp = (u_int32_t *)(optp); |
| 1228 | |
| 1229 | /* Form timestamp option as shown in appendix A of RFC 1323. */ |
| 1230 | *lp++ = htonl(TCPOPT_TSTAMP_HDR); |
| 1231 | *lp++ = htonl(ticks); |
| 1232 | *lp = htonl(sc->sc_tsrecent); |
| 1233 | optp += TCPOLEN_TSTAMP_APPA; |
| 1234 | } |
| 1235 | |
| 1236 | /* |
| 1237 | * Send CC and CC.echo if we received CC from our peer. |
| 1238 | */ |
| 1239 | if (sc->sc_flags & SCF_CC) { |
| 1240 | u_int32_t *lp = (u_int32_t *)(optp); |
| 1241 | |
| 1242 | *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CC)); |
| 1243 | *lp++ = htonl(sc->sc_cc_send); |
| 1244 | *lp++ = htonl(TCPOPT_CC_HDR(TCPOPT_CCECHO)); |
| 1245 | *lp = htonl(sc->sc_cc_recv); |
| 1246 | optp += TCPOLEN_CC_APPA * 2; |
| 1247 | } |
| 1248 | |
| 1249 | if (sc->sc_flags & SCF_SACK_PERMITTED) { |
| 1250 | *((u_int32_t *)optp) = htonl(TCPOPT_SACK_PERMITTED_ALIGNED); |
| 1251 | optp += TCPOLEN_SACK_PERMITTED_ALIGNED; |
| 1252 | } |
| 1253 | |
| 1254 | no_options: |
| 1255 | if (isipv6) { |
| 1256 | struct route_in6 *ro6 = &sc->sc_route6; |
| 1257 | |
| 1258 | th->th_sum = 0; |
| 1259 | th->th_sum = in6_cksum(m, IPPROTO_TCP, hlen, tlen - hlen); |
| 1260 | ip6->ip6_hlim = in6_selecthlim(NULL, |
| 1261 | ro6->ro_rt ? ro6->ro_rt->rt_ifp : NULL); |
| 1262 | error = ip6_output(m, NULL, ro6, 0, NULL, NULL, |
| 1263 | sc->sc_tp->t_inpcb); |
| 1264 | } else { |
| 1265 | th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, |
| 1266 | htons(tlen - hlen + IPPROTO_TCP)); |
| 1267 | m->m_pkthdr.csum_flags = CSUM_TCP; |
| 1268 | m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); |
| 1269 | error = ip_output(m, sc->sc_ipopts, &sc->sc_route, |
| 1270 | IP_DEBUGROUTE, NULL, sc->sc_tp->t_inpcb); |
| 1271 | } |
| 1272 | return (error); |
| 1273 | } |
| 1274 | |
| 1275 | /* |
| 1276 | * cookie layers: |
| 1277 | * |
| 1278 | * |. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .| |
| 1279 | * | peer iss | |
| 1280 | * | MD5(laddr,faddr,secret,lport,fport) |. . . . . . .| |
| 1281 | * | 0 |(A)| | |
| 1282 | * (A): peer mss index |
| 1283 | */ |
| 1284 | |
| 1285 | /* |
| 1286 | * The values below are chosen to minimize the size of the tcp_secret |
| 1287 | * table, as well as providing roughly a 16 second lifetime for the cookie. |
| 1288 | */ |
| 1289 | |
| 1290 | #define SYNCOOKIE_WNDBITS 5 /* exposed bits for window indexing */ |
| 1291 | #define SYNCOOKIE_TIMESHIFT 1 /* scale ticks to window time units */ |
| 1292 | |
| 1293 | #define SYNCOOKIE_WNDMASK ((1 << SYNCOOKIE_WNDBITS) - 1) |
| 1294 | #define SYNCOOKIE_NSECRETS (1 << SYNCOOKIE_WNDBITS) |
| 1295 | #define SYNCOOKIE_TIMEOUT \ |
| 1296 | (hz * (1 << SYNCOOKIE_WNDBITS) / (1 << SYNCOOKIE_TIMESHIFT)) |
| 1297 | #define SYNCOOKIE_DATAMASK ((3 << SYNCOOKIE_WNDBITS) | SYNCOOKIE_WNDMASK) |
| 1298 | |
| 1299 | static struct { |
| 1300 | u_int32_t ts_secbits[4]; |
| 1301 | u_int ts_expire; |
| 1302 | } tcp_secret[SYNCOOKIE_NSECRETS]; |
| 1303 | |
| 1304 | static int tcp_msstab[] = { 0, 536, 1460, 8960 }; |
| 1305 | |
| 1306 | static MD5_CTX syn_ctx; |
| 1307 | |
| 1308 | #define MD5Add(v) MD5Update(&syn_ctx, (u_char *)&v, sizeof(v)) |
| 1309 | |
| 1310 | struct md5_add { |
| 1311 | u_int32_t laddr, faddr; |
| 1312 | u_int32_t secbits[4]; |
| 1313 | u_int16_t lport, fport; |
| 1314 | }; |
| 1315 | |
| 1316 | #ifdef CTASSERT |
| 1317 | CTASSERT(sizeof(struct md5_add) == 28); |
| 1318 | #endif |
| 1319 | |
| 1320 | /* |
| 1321 | * Consider the problem of a recreated (and retransmitted) cookie. If the |
| 1322 | * original SYN was accepted, the connection is established. The second |
| 1323 | * SYN is inflight, and if it arrives with an ISN that falls within the |
| 1324 | * receive window, the connection is killed. |
| 1325 | * |
| 1326 | * However, since cookies have other problems, this may not be worth |
| 1327 | * worrying about. |
| 1328 | */ |
| 1329 | |
| 1330 | static u_int32_t |
| 1331 | syncookie_generate(struct syncache *sc) |
| 1332 | { |
| 1333 | u_int32_t md5_buffer[4]; |
| 1334 | u_int32_t data; |
| 1335 | int idx, i; |
| 1336 | struct md5_add add; |
| 1337 | #ifdef INET6 |
| 1338 | const boolean_t isipv6 = sc->sc_inc.inc_isipv6; |
| 1339 | #else |
| 1340 | const boolean_t isipv6 = FALSE; |
| 1341 | #endif |
| 1342 | |
| 1343 | idx = ((ticks << SYNCOOKIE_TIMESHIFT) / hz) & SYNCOOKIE_WNDMASK; |
| 1344 | if (tcp_secret[idx].ts_expire < ticks) { |
| 1345 | for (i = 0; i < 4; i++) |
| 1346 | tcp_secret[idx].ts_secbits[i] = karc4random(); |
| 1347 | tcp_secret[idx].ts_expire = ticks + SYNCOOKIE_TIMEOUT; |
| 1348 | } |
| 1349 | for (data = sizeof(tcp_msstab) / sizeof(int) - 1; data > 0; data--) |
| 1350 | if (tcp_msstab[data] <= sc->sc_peer_mss) |
| 1351 | break; |
| 1352 | data = (data << SYNCOOKIE_WNDBITS) | idx; |
| 1353 | data ^= sc->sc_irs; /* peer's iss */ |
| 1354 | MD5Init(&syn_ctx); |
| 1355 | if (isipv6) { |
| 1356 | MD5Add(sc->sc_inc.inc6_laddr); |
| 1357 | MD5Add(sc->sc_inc.inc6_faddr); |
| 1358 | add.laddr = 0; |
| 1359 | add.faddr = 0; |
| 1360 | } else { |
| 1361 | add.laddr = sc->sc_inc.inc_laddr.s_addr; |
| 1362 | add.faddr = sc->sc_inc.inc_faddr.s_addr; |
| 1363 | } |
| 1364 | add.lport = sc->sc_inc.inc_lport; |
| 1365 | add.fport = sc->sc_inc.inc_fport; |
| 1366 | add.secbits[0] = tcp_secret[idx].ts_secbits[0]; |
| 1367 | add.secbits[1] = tcp_secret[idx].ts_secbits[1]; |
| 1368 | add.secbits[2] = tcp_secret[idx].ts_secbits[2]; |
| 1369 | add.secbits[3] = tcp_secret[idx].ts_secbits[3]; |
| 1370 | MD5Add(add); |
| 1371 | MD5Final((u_char *)&md5_buffer, &syn_ctx); |
| 1372 | data ^= (md5_buffer[0] & ~SYNCOOKIE_WNDMASK); |
| 1373 | return (data); |
| 1374 | } |
| 1375 | |
| 1376 | static struct syncache * |
| 1377 | syncookie_lookup(struct in_conninfo *inc, struct tcphdr *th, struct socket *so) |
| 1378 | { |
| 1379 | u_int32_t md5_buffer[4]; |
| 1380 | struct syncache *sc; |
| 1381 | u_int32_t data; |
| 1382 | int wnd, idx; |
| 1383 | struct md5_add add; |
| 1384 | |
| 1385 | data = (th->th_ack - 1) ^ (th->th_seq - 1); /* remove ISS */ |
| 1386 | idx = data & SYNCOOKIE_WNDMASK; |
| 1387 | if (tcp_secret[idx].ts_expire < ticks || |
| 1388 | sototcpcb(so)->ts_recent + SYNCOOKIE_TIMEOUT < ticks) |
| 1389 | return (NULL); |
| 1390 | MD5Init(&syn_ctx); |
| 1391 | #ifdef INET6 |
| 1392 | if (inc->inc_isipv6) { |
| 1393 | MD5Add(inc->inc6_laddr); |
| 1394 | MD5Add(inc->inc6_faddr); |
| 1395 | add.laddr = 0; |
| 1396 | add.faddr = 0; |
| 1397 | } else |
| 1398 | #endif |
| 1399 | { |
| 1400 | add.laddr = inc->inc_laddr.s_addr; |
| 1401 | add.faddr = inc->inc_faddr.s_addr; |
| 1402 | } |
| 1403 | add.lport = inc->inc_lport; |
| 1404 | add.fport = inc->inc_fport; |
| 1405 | add.secbits[0] = tcp_secret[idx].ts_secbits[0]; |
| 1406 | add.secbits[1] = tcp_secret[idx].ts_secbits[1]; |
| 1407 | add.secbits[2] = tcp_secret[idx].ts_secbits[2]; |
| 1408 | add.secbits[3] = tcp_secret[idx].ts_secbits[3]; |
| 1409 | MD5Add(add); |
| 1410 | MD5Final((u_char *)&md5_buffer, &syn_ctx); |
| 1411 | data ^= md5_buffer[0]; |
| 1412 | if (data & ~SYNCOOKIE_DATAMASK) |
| 1413 | return (NULL); |
| 1414 | data = data >> SYNCOOKIE_WNDBITS; |
| 1415 | |
| 1416 | /* |
| 1417 | * This allocation is guaranteed to succeed because we |
| 1418 | * preallocate one more syncache entry than cache_limit. |
| 1419 | */ |
| 1420 | sc = zalloc(tcp_syncache.zone); |
| 1421 | |
| 1422 | /* |
| 1423 | * Fill in the syncache values. |
| 1424 | * XXX duplicate code from syncache_add |
| 1425 | */ |
| 1426 | sc->sc_ipopts = NULL; |
| 1427 | sc->sc_inc.inc_fport = inc->inc_fport; |
| 1428 | sc->sc_inc.inc_lport = inc->inc_lport; |
| 1429 | #ifdef INET6 |
| 1430 | sc->sc_inc.inc_isipv6 = inc->inc_isipv6; |
| 1431 | if (inc->inc_isipv6) { |
| 1432 | sc->sc_inc.inc6_faddr = inc->inc6_faddr; |
| 1433 | sc->sc_inc.inc6_laddr = inc->inc6_laddr; |
| 1434 | sc->sc_route6.ro_rt = NULL; |
| 1435 | } else |
| 1436 | #endif |
| 1437 | { |
| 1438 | sc->sc_inc.inc_faddr = inc->inc_faddr; |
| 1439 | sc->sc_inc.inc_laddr = inc->inc_laddr; |
| 1440 | sc->sc_route.ro_rt = NULL; |
| 1441 | } |
| 1442 | sc->sc_irs = th->th_seq - 1; |
| 1443 | sc->sc_iss = th->th_ack - 1; |
| 1444 | wnd = ssb_space(&so->so_rcv); |
| 1445 | wnd = imax(wnd, 0); |
| 1446 | wnd = imin(wnd, TCP_MAXWIN); |
| 1447 | sc->sc_wnd = wnd; |
| 1448 | sc->sc_flags = 0; |
| 1449 | sc->sc_rxtslot = 0; |
| 1450 | sc->sc_peer_mss = tcp_msstab[data]; |
| 1451 | return (sc); |
| 1452 | } |