bge: Avoid accessing invalid internal memory region on BCM5906
[dragonfly.git] / sys / netinet6 / ipsec.c
1 /*      $FreeBSD: src/sys/netinet6/ipsec.c,v 1.3.2.12 2003/05/06 06:46:58 suz Exp $     */
2 /*      $KAME: ipsec.c,v 1.103 2001/05/24 07:14:18 sakane Exp $ */
3
4 /*
5  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the project nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32
33 /*
34  * IPsec controller part.
35  */
36
37 #include "opt_inet.h"
38 #include "opt_inet6.h"
39 #include "opt_ipsec.h"
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/domain.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/errno.h>
50 #include <sys/time.h>
51 #include <sys/kernel.h>
52 #include <sys/syslog.h>
53 #include <sys/sysctl.h>
54 #include <sys/proc.h>
55 #include <sys/in_cksum.h>
56 #include <sys/thread2.h>
57
58 #include <net/if.h>
59 #include <net/route.h>
60
61 #include <netinet/in.h>
62 #include <netinet/in_systm.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/in_var.h>
66 #include <netinet/udp.h>
67 #include <netinet/udp_var.h>
68 #include <netinet/ip_ecn.h>
69 #ifdef INET6
70 #include <netinet6/ip6_ecn.h>
71 #endif
72 #include <netinet/tcp.h>
73 #include <netinet/udp.h>
74
75 #include <netinet/ip6.h>
76 #ifdef INET6
77 #include <netinet6/ip6_var.h>
78 #endif
79 #include <netinet/in_pcb.h>
80 #ifdef INET6
81 #include <netinet/icmp6.h>
82 #endif
83
84 #include <netinet6/ipsec.h>
85 #ifdef INET6
86 #include <netinet6/ipsec6.h>
87 #endif
88 #include <netinet6/ah.h>
89 #ifdef INET6
90 #include <netinet6/ah6.h>
91 #endif
92 #ifdef IPSEC_ESP
93 #include <netinet6/esp.h>
94 #ifdef INET6
95 #include <netinet6/esp6.h>
96 #endif
97 #endif
98 #include <netinet6/ipcomp.h>
99 #ifdef INET6
100 #include <netinet6/ipcomp6.h>
101 #endif
102 #include <netproto/key/key.h>
103 #include <netproto/key/keydb.h>
104 #include <netproto/key/key_debug.h>
105
106 #include <net/net_osdep.h>
107
108 #ifdef IPSEC_DEBUG
109 int ipsec_debug = 1;
110 #else
111 int ipsec_debug = 0;
112 #endif
113
114 struct ipsecstat ipsecstat;
115 int ip4_ah_cleartos = 1;
116 int ip4_ah_offsetmask = 0;      /* maybe IP_DF? */
117 int ip4_ipsec_dfbit = 0;        /* DF bit on encap. 0: clear 1: set 2: copy */
118 int ip4_esp_trans_deflev = IPSEC_LEVEL_USE;
119 int ip4_esp_net_deflev = IPSEC_LEVEL_USE;
120 int ip4_ah_trans_deflev = IPSEC_LEVEL_USE;
121 int ip4_ah_net_deflev = IPSEC_LEVEL_USE;
122 struct secpolicy ip4_def_policy;
123 int ip4_ipsec_ecn = 0;          /* ECN ignore(-1)/forbidden(0)/allowed(1) */
124 int ip4_esp_randpad = -1;
125
126 #ifdef SYSCTL_DECL
127 SYSCTL_DECL(_net_inet_ipsec);
128 #ifdef INET6
129 SYSCTL_DECL(_net_inet6_ipsec6);
130 #endif
131 #endif
132
133 /* net.inet.ipsec */
134 SYSCTL_STRUCT(_net_inet_ipsec, IPSECCTL_STATS,
135         stats, CTLFLAG_RD,      &ipsecstat,     ipsecstat, "");
136 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_POLICY,
137         def_policy, CTLFLAG_RW, &ip4_def_policy.policy, 0, "");
138 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev,
139         CTLFLAG_RW, &ip4_esp_trans_deflev,      0, "");
140 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev,
141         CTLFLAG_RW, &ip4_esp_net_deflev,        0, "");
142 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev,
143         CTLFLAG_RW, &ip4_ah_trans_deflev,       0, "");
144 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev,
145         CTLFLAG_RW, &ip4_ah_net_deflev, 0, "");
146 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS,
147         ah_cleartos, CTLFLAG_RW,        &ip4_ah_cleartos,       0, "");
148 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_OFFSETMASK,
149         ah_offsetmask, CTLFLAG_RW,      &ip4_ah_offsetmask,     0, "");
150 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DFBIT,
151         dfbit, CTLFLAG_RW,      &ip4_ipsec_dfbit,       0, "");
152 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ECN,
153         ecn, CTLFLAG_RW,        &ip4_ipsec_ecn, 0, "");
154 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEBUG,
155         debug, CTLFLAG_RW,      &ipsec_debug,   0, "");
156 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ESP_RANDPAD,
157         esp_randpad, CTLFLAG_RW,        &ip4_esp_randpad,       0, "");
158
159 #ifdef INET6
160 struct ipsecstat ipsec6stat;
161 int ip6_esp_trans_deflev = IPSEC_LEVEL_USE;
162 int ip6_esp_net_deflev = IPSEC_LEVEL_USE;
163 int ip6_ah_trans_deflev = IPSEC_LEVEL_USE;
164 int ip6_ah_net_deflev = IPSEC_LEVEL_USE;
165 struct secpolicy ip6_def_policy;
166 int ip6_ipsec_ecn = 0;          /* ECN ignore(-1)/forbidden(0)/allowed(1) */
167 int ip6_esp_randpad = -1;
168
169 /* net.inet6.ipsec6 */
170 SYSCTL_STRUCT(_net_inet6_ipsec6, IPSECCTL_STATS,
171         stats, CTLFLAG_RD, &ipsec6stat, ipsecstat, "");
172 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY,
173         def_policy, CTLFLAG_RW, &ip6_def_policy.policy, 0, "");
174 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev,
175         CTLFLAG_RW, &ip6_esp_trans_deflev,      0, "");
176 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev,
177         CTLFLAG_RW, &ip6_esp_net_deflev,        0, "");
178 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev,
179         CTLFLAG_RW, &ip6_ah_trans_deflev,       0, "");
180 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev,
181         CTLFLAG_RW, &ip6_ah_net_deflev, 0, "");
182 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ECN,
183         ecn, CTLFLAG_RW,        &ip6_ipsec_ecn, 0, "");
184 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG,
185         debug, CTLFLAG_RW,      &ipsec_debug,   0, "");
186 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ESP_RANDPAD,
187         esp_randpad, CTLFLAG_RW,        &ip6_esp_randpad,       0, "");
188 #endif /* INET6 */
189
190 static int ipsec_setspidx_mbuf
191         (struct secpolicyindex *, u_int, u_int, struct mbuf *, int);
192 static int ipsec4_setspidx_inpcb (struct mbuf *, struct inpcb *pcb);
193 #ifdef INET6
194 static int ipsec6_setspidx_in6pcb (struct mbuf *, struct in6pcb *pcb);
195 #endif
196 static int ipsec_setspidx (struct mbuf *, struct secpolicyindex *, int);
197 static void ipsec4_get_ulp (struct mbuf *m, struct secpolicyindex *, int);
198 static int ipsec4_setspidx_ipaddr (struct mbuf *, struct secpolicyindex *);
199 #ifdef INET6
200 static void ipsec6_get_ulp (struct mbuf *m, struct secpolicyindex *, int);
201 static int ipsec6_setspidx_ipaddr (struct mbuf *, struct secpolicyindex *);
202 #endif
203 static struct inpcbpolicy *ipsec_newpcbpolicy (void);
204 static void ipsec_delpcbpolicy (struct inpcbpolicy *);
205 static struct secpolicy *ipsec_deepcopy_policy (struct secpolicy *src);
206 static int ipsec_set_policy (struct secpolicy **pcb_sp,
207         int optname, caddr_t request, size_t len, int priv);
208 static int ipsec_get_policy (struct secpolicy *pcb_sp, struct mbuf **mp);
209 static void vshiftl (unsigned char *, int, int);
210 static int ipsec_in_reject (struct secpolicy *, struct mbuf *);
211 static size_t ipsec_hdrsiz (struct secpolicy *);
212 #ifdef INET
213 static struct mbuf *ipsec4_splithdr (struct mbuf *);
214 #endif
215 #ifdef INET6
216 static struct mbuf *ipsec6_splithdr (struct mbuf *);
217 #endif
218 #ifdef INET
219 static int ipsec4_encapsulate (struct mbuf *, struct secasvar *);
220 #endif
221 #ifdef INET6
222 static int ipsec6_encapsulate (struct mbuf *, struct secasvar *);
223 #endif
224
225 /*
226  * For OUTBOUND packet having a socket. Searching SPD for packet,
227  * and return a pointer to SP.
228  * OUT: NULL:   no apropreate SP found, the following value is set to error.
229  *              0       : bypass
230  *              EACCES  : discard packet.
231  *              ENOENT  : ipsec_acquire() in progress, maybe.
232  *              others  : error occured.
233  *      others: a pointer to SP
234  *
235  * NOTE: IPv6 mapped adddress concern is implemented here.
236  */
237 struct secpolicy *
238 ipsec4_getpolicybysock(struct mbuf *m, u_int dir, struct socket *so, int *error)
239 {
240         struct inpcbpolicy *pcbsp = NULL;
241         struct secpolicy *currsp = NULL;        /* policy on socket */
242         struct secpolicy *kernsp = NULL;        /* policy on kernel */
243
244         /* sanity check */
245         if (m == NULL || so == NULL || error == NULL)
246                 panic("ipsec4_getpolicybysock: NULL pointer was passed.");
247
248         switch (so->so_proto->pr_domain->dom_family) {
249         case AF_INET:
250                 /* set spidx in pcb */
251                 *error = ipsec4_setspidx_inpcb(m, so->so_pcb);
252                 break;
253 #ifdef INET6
254         case AF_INET6:
255                 /* set spidx in pcb */
256                 *error = ipsec6_setspidx_in6pcb(m, so->so_pcb);
257                 break;
258 #endif
259         default:
260                 panic("ipsec4_getpolicybysock: unsupported address family");
261         }
262         if (*error)
263                 return NULL;
264         switch (so->so_proto->pr_domain->dom_family) {
265         case AF_INET:
266                 pcbsp = sotoinpcb(so)->inp_sp;
267                 break;
268 #ifdef INET6
269         case AF_INET6:
270                 pcbsp = sotoin6pcb(so)->in6p_sp;
271                 break;
272 #endif
273         }
274
275         /* sanity check */
276         if (pcbsp == NULL)
277                 panic("ipsec4_getpolicybysock: pcbsp is NULL.");
278
279         switch (dir) {
280         case IPSEC_DIR_INBOUND:
281                 currsp = pcbsp->sp_in;
282                 break;
283         case IPSEC_DIR_OUTBOUND:
284                 currsp = pcbsp->sp_out;
285                 break;
286         default:
287                 panic("ipsec4_getpolicybysock: illegal direction.");
288         }
289
290         /* sanity check */
291         if (currsp == NULL)
292                 panic("ipsec4_getpolicybysock: currsp is NULL.");
293
294         lwkt_gettoken(&key_token);
295
296         /* when privilieged socket */
297         if (pcbsp->priv) {
298                 switch (currsp->policy) {
299                 case IPSEC_POLICY_BYPASS:
300                         currsp->refcnt++;
301                         *error = 0;
302                         lwkt_reltoken(&key_token);
303                         return currsp;
304
305                 case IPSEC_POLICY_ENTRUST:
306                         /* look for a policy in SPD */
307                         kernsp = key_allocsp(&currsp->spidx, dir);
308
309                         /* SP found */
310                         if (kernsp != NULL) {
311                                 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
312                                         kprintf("DP ipsec4_getpolicybysock called "
313                                                "to allocate SP:%p\n", kernsp));
314                                 *error = 0;
315                                 lwkt_reltoken(&key_token);
316                                 return kernsp;
317                         }
318
319                         /* no SP found */
320                         if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD
321                          && ip4_def_policy.policy != IPSEC_POLICY_NONE) {
322                                 ipseclog((LOG_INFO,
323                                     "fixed system default policy: %d->%d\n",
324                                     ip4_def_policy.policy, IPSEC_POLICY_NONE));
325                                 ip4_def_policy.policy = IPSEC_POLICY_NONE;
326                         }
327                         ip4_def_policy.refcnt++;
328                         *error = 0;
329                         lwkt_reltoken(&key_token);
330                         return &ip4_def_policy;
331                         
332                 case IPSEC_POLICY_IPSEC:
333                         currsp->refcnt++;
334                         *error = 0;
335                         lwkt_reltoken(&key_token);
336                         return currsp;
337
338                 default:
339                         ipseclog((LOG_ERR, "ipsec4_getpolicybysock: "
340                               "Invalid policy for PCB %d\n", currsp->policy));
341                         *error = EINVAL;
342                         lwkt_reltoken(&key_token);
343                         return NULL;
344                 }
345                 /* NOTREACHED */
346         }
347
348         /* when non-privilieged socket */
349         /* look for a policy in SPD */
350         kernsp = key_allocsp(&currsp->spidx, dir);
351
352         /* SP found */
353         if (kernsp != NULL) {
354                 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
355                         kprintf("DP ipsec4_getpolicybysock called "
356                                "to allocate SP:%p\n", kernsp));
357                 *error = 0;
358                 lwkt_reltoken(&key_token);
359                 return kernsp;
360         }
361
362         /* no SP found */
363         switch (currsp->policy) {
364         case IPSEC_POLICY_BYPASS:
365                 ipseclog((LOG_ERR, "ipsec4_getpolicybysock: "
366                        "Illegal policy for non-priviliged defined %d\n",
367                         currsp->policy));
368                 *error = EINVAL;
369                 lwkt_reltoken(&key_token);
370                 return NULL;
371
372         case IPSEC_POLICY_ENTRUST:
373                 if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD
374                  && ip4_def_policy.policy != IPSEC_POLICY_NONE) {
375                         ipseclog((LOG_INFO,
376                             "fixed system default policy: %d->%d\n",
377                             ip4_def_policy.policy, IPSEC_POLICY_NONE));
378                         ip4_def_policy.policy = IPSEC_POLICY_NONE;
379                 }
380                 ip4_def_policy.refcnt++;
381                 *error = 0;
382                 lwkt_reltoken(&key_token);
383                 return &ip4_def_policy;
384
385         case IPSEC_POLICY_IPSEC:
386                 currsp->refcnt++;
387                 *error = 0;
388                 lwkt_reltoken(&key_token);
389                 return currsp;
390
391         default:
392                 ipseclog((LOG_ERR, "ipsec4_getpolicybysock: "
393                    "Invalid policy for PCB %d\n", currsp->policy));
394                 *error = EINVAL;
395                 lwkt_reltoken(&key_token);
396                 return NULL;
397         }
398         /* NOTREACHED */
399 }
400
401 /*
402  * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet,
403  * and return a pointer to SP.
404  * OUT: positive: a pointer to the entry for security policy leaf matched.
405  *      NULL:   no apropreate SP found, the following value is set to error.
406  *              0       : bypass
407  *              EACCES  : discard packet.
408  *              ENOENT  : ipsec_acquire() in progress, maybe.
409  *              others  : error occured.
410  */
411 struct secpolicy *
412 ipsec4_getpolicybyaddr(struct mbuf *m, u_int dir, int flag, int *error)
413 {
414         struct secpolicy *sp = NULL;
415         struct secpolicyindex spidx;
416
417         /* sanity check */
418         if (m == NULL || error == NULL)
419                 panic("ipsec4_getpolicybyaddr: NULL pointer was passed.");
420
421         bzero(&spidx, sizeof(spidx));
422
423         /* make a index to look for a policy */
424         *error = ipsec_setspidx_mbuf(&spidx, dir, AF_INET, m,
425                                      (flag & IP_FORWARDING) ? 0 : 1);
426
427         if (*error != 0)
428                 return NULL;
429
430         lwkt_gettoken(&key_token);
431         sp = key_allocsp(&spidx, dir);
432
433         /* SP found */
434         if (sp != NULL) {
435                 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
436                         kprintf("DP ipsec4_getpolicybyaddr called "
437                                "to allocate SP:%p\n", sp));
438                 *error = 0;
439                 lwkt_reltoken(&key_token);
440                 return sp;
441         }
442
443         /* no SP found */
444         if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD
445          && ip4_def_policy.policy != IPSEC_POLICY_NONE) {
446                 ipseclog((LOG_INFO, "fixed system default policy:%d->%d\n",
447                         ip4_def_policy.policy,
448                         IPSEC_POLICY_NONE));
449                 ip4_def_policy.policy = IPSEC_POLICY_NONE;
450         }
451         ip4_def_policy.refcnt++;
452         *error = 0;
453         lwkt_reltoken(&key_token);
454         return &ip4_def_policy;
455 }
456
457 #ifdef INET6
458 /*
459  * For OUTBOUND packet having a socket. Searching SPD for packet,
460  * and return a pointer to SP.
461  * OUT: NULL:   no apropreate SP found, the following value is set to error.
462  *              0       : bypass
463  *              EACCES  : discard packet.
464  *              ENOENT  : ipsec_acquire() in progress, maybe.
465  *              others  : error occured.
466  *      others: a pointer to SP
467  */
468 struct secpolicy *
469 ipsec6_getpolicybysock(struct mbuf *m, u_int dir, struct socket *so, int *error)
470 {
471         struct inpcbpolicy *pcbsp = NULL;
472         struct secpolicy *currsp = NULL;        /* policy on socket */
473         struct secpolicy *kernsp = NULL;        /* policy on kernel */
474
475         /* sanity check */
476         if (m == NULL || so == NULL || error == NULL)
477                 panic("ipsec6_getpolicybysock: NULL pointer was passed.");
478
479 #ifdef DIAGNOSTIC
480         if (so->so_proto->pr_domain->dom_family != AF_INET6)
481                 panic("ipsec6_getpolicybysock: socket domain != inet6");
482 #endif
483
484         lwkt_gettoken(&key_token);
485
486         /* set spidx in pcb */
487         ipsec6_setspidx_in6pcb(m, so->so_pcb);
488         pcbsp = sotoin6pcb(so)->in6p_sp;
489
490         /* sanity check */
491         if (pcbsp == NULL)
492                 panic("ipsec6_getpolicybysock: pcbsp is NULL.");
493
494         switch (dir) {
495         case IPSEC_DIR_INBOUND:
496                 currsp = pcbsp->sp_in;
497                 break;
498         case IPSEC_DIR_OUTBOUND:
499                 currsp = pcbsp->sp_out;
500                 break;
501         default:
502                 panic("ipsec6_getpolicybysock: illegal direction.");
503         }
504
505         /* sanity check */
506         if (currsp == NULL)
507                 panic("ipsec6_getpolicybysock: currsp is NULL.");
508
509         /* when privilieged socket */
510         if (pcbsp->priv) {
511                 switch (currsp->policy) {
512                 case IPSEC_POLICY_BYPASS:
513                         currsp->refcnt++;
514                         *error = 0;
515                         break;
516                 case IPSEC_POLICY_ENTRUST:
517                         /* look for a policy in SPD */
518                         kernsp = key_allocsp(&currsp->spidx, dir);
519
520                         /* SP found */
521                         if (kernsp != NULL) {
522                                 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
523                                         kprintf("DP ipsec6_getpolicybysock called "
524                                                "to allocate SP:%p\n", kernsp));
525                                 *error = 0;
526                                 currsp = kernsp;
527                                 break;
528                         }
529
530                         /* no SP found */
531                         if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD
532                          && ip6_def_policy.policy != IPSEC_POLICY_NONE) {
533                                 ipseclog((LOG_INFO,
534                                     "fixed system default policy: %d->%d\n",
535                                     ip6_def_policy.policy, IPSEC_POLICY_NONE));
536                                 ip6_def_policy.policy = IPSEC_POLICY_NONE;
537                         }
538                         currsp = &ip6_def_policy;
539                         currsp->refcnt++;
540                         *error = 0;
541                         break;
542                 case IPSEC_POLICY_IPSEC:
543                         currsp->refcnt++;
544                         *error = 0;
545                         break;
546                 default:
547                         ipseclog((LOG_ERR, "ipsec6_getpolicybysock: "
548                             "Invalid policy for PCB %d\n", currsp->policy));
549                         *error = EINVAL;
550                         lwkt_reltoken(&key_token);
551                         currsp = NULL;
552                         break;
553                 }
554                 lwkt_reltoken(&key_token);
555                 return currsp;
556                 /* NOTREACHED */
557         }
558
559         /* when non-privilieged socket */
560         /* look for a policy in SPD */
561         kernsp = key_allocsp(&currsp->spidx, dir);
562
563         /* SP found */
564         if (kernsp != NULL) {
565                 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
566                         kprintf("DP ipsec6_getpolicybysock called "
567                                "to allocate SP:%p\n", kernsp));
568                 *error = 0;
569                 lwkt_reltoken(&key_token);
570                 return kernsp;
571         }
572
573         /* no SP found */
574         switch (currsp->policy) {
575         case IPSEC_POLICY_BYPASS:
576                 ipseclog((LOG_ERR, "ipsec6_getpolicybysock: "
577                     "Illegal policy for non-priviliged defined %d\n",
578                     currsp->policy));
579                 *error = EINVAL;
580                 currsp = NULL;
581                 break;
582         case IPSEC_POLICY_ENTRUST:
583                 if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD
584                  && ip6_def_policy.policy != IPSEC_POLICY_NONE) {
585                         ipseclog((LOG_INFO,
586                             "fixed system default policy: %d->%d\n",
587                             ip6_def_policy.policy, IPSEC_POLICY_NONE));
588                         ip6_def_policy.policy = IPSEC_POLICY_NONE;
589                 }
590                 currsp = &ip6_def_policy;
591                 currsp->refcnt++;
592                 *error = 0;
593                 break;
594         case IPSEC_POLICY_IPSEC:
595                 currsp->refcnt++;
596                 *error = 0;
597                 break;
598         default:
599                 ipseclog((LOG_ERR,
600                     "ipsec6_policybysock: Invalid policy for PCB %d\n",
601                     currsp->policy));
602                 *error = EINVAL;
603                 currsp = NULL;
604                 break;
605         }
606         lwkt_reltoken(&key_token);
607         return currsp;
608 }
609
610 /*
611  * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet,
612  * and return a pointer to SP.
613  * `flag' means that packet is to be forwarded whether or not.
614  *      flag = 1: forwad
615  * OUT: positive: a pointer to the entry for security policy leaf matched.
616  *      NULL:   no apropreate SP found, the following value is set to error.
617  *              0       : bypass
618  *              EACCES  : discard packet.
619  *              ENOENT  : ipsec_acquire() in progress, maybe.
620  *              others  : error occured.
621  */
622 #ifndef IP_FORWARDING
623 #define IP_FORWARDING 1
624 #endif
625
626 struct secpolicy *
627 ipsec6_getpolicybyaddr(struct mbuf *m, u_int dir, int flag, int *error)
628 {
629         struct secpolicy *sp = NULL;
630         struct secpolicyindex spidx;
631
632         /* sanity check */
633         if (m == NULL || error == NULL)
634                 panic("ipsec6_getpolicybyaddr: NULL pointer was passed.");
635         bzero(&spidx, sizeof(spidx));
636
637         lwkt_gettoken(&key_token);
638
639         /* make a index to look for a policy */
640         *error = ipsec_setspidx_mbuf(&spidx, dir, AF_INET6, m,
641                                      (flag & IP_FORWARDING) ? 0 : 1);
642         if (*error != 0) {
643                 lwkt_reltoken(&key_token);
644                 return NULL;
645         }
646
647         sp = key_allocsp(&spidx, dir);
648
649         /* SP found */
650         if (sp != NULL) {
651                 KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
652                         kprintf("DP ipsec6_getpolicybyaddr called "
653                                "to allocate SP:%p\n", sp));
654                 *error = 0;
655                 lwkt_reltoken(&key_token);
656                 return sp;
657         }
658
659         /* no SP found */
660         if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD
661          && ip6_def_policy.policy != IPSEC_POLICY_NONE) {
662                 ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n",
663                     ip6_def_policy.policy, IPSEC_POLICY_NONE));
664                 ip6_def_policy.policy = IPSEC_POLICY_NONE;
665         }
666         sp = &ip6_def_policy;
667         sp->refcnt++;
668         *error = 0;
669         lwkt_reltoken(&key_token);
670
671         return sp;
672 }
673 #endif /* INET6 */
674
675 /*
676  * set IP address into spidx from mbuf.
677  * When Forwarding packet and ICMP echo reply, this function is used.
678  *
679  * IN:  get the followings from mbuf.
680  *      protocol family, src, dst, next protocol
681  * OUT:
682  *      0:      success.
683  *      other:  failure, and set errno.
684  */
685 int
686 ipsec_setspidx_mbuf(struct secpolicyindex *spidx, u_int dir, u_int family,
687                     struct mbuf *m, int needport)
688 {
689         int error;
690
691         /* sanity check */
692         if (spidx == NULL || m == NULL)
693                 panic("ipsec_setspidx_mbuf: NULL pointer was passed.");
694
695         bzero(spidx, sizeof(*spidx));
696
697         error = ipsec_setspidx(m, spidx, needport);
698         if (error)
699                 goto bad;
700         spidx->dir = dir;
701
702         return 0;
703
704 bad:
705         /* XXX initialize */
706         bzero(spidx, sizeof(*spidx));
707         return EINVAL;
708 }
709
710 static int
711 ipsec4_setspidx_inpcb(struct mbuf *m, struct inpcb *pcb)
712 {
713         struct secpolicyindex *spidx;
714         int error;
715
716         /* sanity check */
717         if (pcb == NULL)
718                 panic("ipsec4_setspidx_inpcb: no PCB found.");
719         if (pcb->inp_sp == NULL)
720                 panic("ipsec4_setspidx_inpcb: no inp_sp found.");
721         if (pcb->inp_sp->sp_out == NULL || pcb->inp_sp->sp_in == NULL)
722                 panic("ipsec4_setspidx_inpcb: no sp_in/out found.");
723
724         bzero(&pcb->inp_sp->sp_in->spidx, sizeof(*spidx));
725         bzero(&pcb->inp_sp->sp_out->spidx, sizeof(*spidx));
726
727         spidx = &pcb->inp_sp->sp_in->spidx;
728         error = ipsec_setspidx(m, spidx, 1);
729         if (error)
730                 goto bad;
731         spidx->dir = IPSEC_DIR_INBOUND;
732
733         spidx = &pcb->inp_sp->sp_out->spidx;
734         error = ipsec_setspidx(m, spidx, 1);
735         if (error)
736                 goto bad;
737         spidx->dir = IPSEC_DIR_OUTBOUND;
738
739         return 0;
740
741 bad:
742         bzero(&pcb->inp_sp->sp_in->spidx, sizeof(*spidx));
743         bzero(&pcb->inp_sp->sp_out->spidx, sizeof(*spidx));
744         return error;
745 }
746
747 #ifdef INET6
748 static int
749 ipsec6_setspidx_in6pcb(struct mbuf *m, struct in6pcb *pcb)
750 {
751         struct secpolicyindex *spidx;
752         int error;
753
754         /* sanity check */
755         if (pcb == NULL)
756                 panic("ipsec6_setspidx_in6pcb: no PCB found.");
757         if (pcb->in6p_sp == NULL)
758                 panic("ipsec6_setspidx_in6pcb: no in6p_sp found.");
759         if (pcb->in6p_sp->sp_out == NULL || pcb->in6p_sp->sp_in == NULL)
760                 panic("ipsec6_setspidx_in6pcb: no sp_in/out found.");
761
762         bzero(&pcb->in6p_sp->sp_in->spidx, sizeof(*spidx));
763         bzero(&pcb->in6p_sp->sp_out->spidx, sizeof(*spidx));
764
765         spidx = &pcb->in6p_sp->sp_in->spidx;
766         error = ipsec_setspidx(m, spidx, 1);
767         if (error)
768                 goto bad;
769         spidx->dir = IPSEC_DIR_INBOUND;
770
771         spidx = &pcb->in6p_sp->sp_out->spidx;
772         error = ipsec_setspidx(m, spidx, 1);
773         if (error)
774                 goto bad;
775         spidx->dir = IPSEC_DIR_OUTBOUND;
776
777         return 0;
778
779 bad:
780         bzero(&pcb->in6p_sp->sp_in->spidx, sizeof(*spidx));
781         bzero(&pcb->in6p_sp->sp_out->spidx, sizeof(*spidx));
782         return error;
783 }
784 #endif
785
786 /*
787  * configure security policy index (src/dst/proto/sport/dport)
788  * by looking at the content of mbuf.
789  * the caller is responsible for error recovery (like clearing up spidx).
790  */
791 static int
792 ipsec_setspidx(struct mbuf *m, struct secpolicyindex *spidx, int needport)
793 {
794         struct ip *ip = NULL;
795         struct ip ipbuf;
796         u_int v;
797         struct mbuf *n;
798         int len;
799         int error;
800
801         if (m == NULL)
802                 panic("ipsec_setspidx: m == 0 passed.");
803
804         /*
805          * validate m->m_pkthdr.len.  we see incorrect length if we
806          * mistakenly call this function with inconsistent mbuf chain
807          * (like 4.4BSD tcp/udp processing).  XXX should we panic here?
808          */
809         len = 0;
810         for (n = m; n; n = n->m_next)
811                 len += n->m_len;
812         if (m->m_pkthdr.len != len) {
813                 KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
814                         kprintf("ipsec_setspidx: "
815                                "total of m_len(%d) != pkthdr.len(%d), "
816                                "ignored.\n",
817                                 len, m->m_pkthdr.len));
818                 return EINVAL;
819         }
820
821         if (m->m_pkthdr.len < sizeof(struct ip)) {
822                 KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
823                         kprintf("ipsec_setspidx: "
824                             "pkthdr.len(%d) < sizeof(struct ip), ignored.\n",
825                             m->m_pkthdr.len));
826                 return EINVAL;
827         }
828
829         if (m->m_len >= sizeof(*ip))
830                 ip = mtod(m, struct ip *);
831         else {
832                 m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf);
833                 ip = &ipbuf;
834         }
835 #ifdef _IP_VHL
836         v = _IP_VHL_V(ip->ip_vhl);
837 #else
838         v = ip->ip_v;
839 #endif
840         switch (v) {
841         case 4:
842                 error = ipsec4_setspidx_ipaddr(m, spidx);
843                 if (error)
844                         return error;
845                 ipsec4_get_ulp(m, spidx, needport);
846                 return 0;
847 #ifdef INET6
848         case 6:
849                 if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) {
850                         KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
851                                 kprintf("ipsec_setspidx: "
852                                     "pkthdr.len(%d) < sizeof(struct ip6_hdr), "
853                                     "ignored.\n", m->m_pkthdr.len));
854                         return EINVAL;
855                 }
856                 error = ipsec6_setspidx_ipaddr(m, spidx);
857                 if (error)
858                         return error;
859                 ipsec6_get_ulp(m, spidx, needport);
860                 return 0;
861 #endif
862         default:
863                 KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
864                         kprintf("ipsec_setspidx: "
865                             "unknown IP version %u, ignored.\n", v));
866                 return EINVAL;
867         }
868 }
869
870 static void
871 ipsec4_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport)
872 {
873         struct ip ip;
874         struct ip6_ext ip6e;
875         u_int8_t nxt;
876         int off;
877         struct tcphdr th;
878         struct udphdr uh;
879
880         /* sanity check */
881         if (m == NULL)
882                 panic("ipsec4_get_ulp: NULL pointer was passed.");
883         if (m->m_pkthdr.len < sizeof(ip))
884                 panic("ipsec4_get_ulp: too short");
885
886         /* set default */
887         spidx->ul_proto = IPSEC_ULPROTO_ANY;
888         ((struct sockaddr_in *)&spidx->src)->sin_port = IPSEC_PORT_ANY;
889         ((struct sockaddr_in *)&spidx->dst)->sin_port = IPSEC_PORT_ANY;
890
891         m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
892         /* ip_input() flips it into host endian XXX need more checking */
893         if (ip.ip_off & (IP_MF | IP_OFFMASK))
894                 return;
895
896         nxt = ip.ip_p;
897 #ifdef _IP_VHL
898         off = _IP_VHL_HL(ip->ip_vhl) << 2;
899 #else
900         off = ip.ip_hl << 2;
901 #endif
902         while (off < m->m_pkthdr.len) {
903                 switch (nxt) {
904                 case IPPROTO_TCP:
905                         spidx->ul_proto = nxt;
906                         if (!needport)
907                                 return;
908                         if (off + sizeof(struct tcphdr) > m->m_pkthdr.len)
909                                 return;
910                         m_copydata(m, off, sizeof(th), (caddr_t)&th);
911                         ((struct sockaddr_in *)&spidx->src)->sin_port =
912                             th.th_sport;
913                         ((struct sockaddr_in *)&spidx->dst)->sin_port =
914                             th.th_dport;
915                         return;
916                 case IPPROTO_UDP:
917                         spidx->ul_proto = nxt;
918                         if (!needport)
919                                 return;
920                         if (off + sizeof(struct udphdr) > m->m_pkthdr.len)
921                                 return;
922                         m_copydata(m, off, sizeof(uh), (caddr_t)&uh);
923                         ((struct sockaddr_in *)&spidx->src)->sin_port =
924                             uh.uh_sport;
925                         ((struct sockaddr_in *)&spidx->dst)->sin_port =
926                             uh.uh_dport;
927                         return;
928                 case IPPROTO_AH:
929                         if (off + sizeof(ip6e) > m->m_pkthdr.len)
930                                 return;
931                         m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
932                         off += (ip6e.ip6e_len + 2) << 2;
933                         nxt = ip6e.ip6e_nxt;
934                         break;
935                 case IPPROTO_ICMP:
936                 default:
937                         /* XXX intermediate headers??? */
938                         spidx->ul_proto = nxt;
939                         return;
940                 }
941         }
942 }
943
944 /* assumes that m is sane */
945 static int
946 ipsec4_setspidx_ipaddr(struct mbuf *m, struct secpolicyindex *spidx)
947 {
948         struct ip *ip = NULL;
949         struct ip ipbuf;
950         struct sockaddr_in *sin;
951
952         if (m->m_len >= sizeof(*ip))
953                 ip = mtod(m, struct ip *);
954         else {
955                 m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf);
956                 ip = &ipbuf;
957         }
958
959         sin = (struct sockaddr_in *)&spidx->src;
960         bzero(sin, sizeof(*sin));
961         sin->sin_family = AF_INET;
962         sin->sin_len = sizeof(struct sockaddr_in);
963         bcopy(&ip->ip_src, &sin->sin_addr, sizeof(ip->ip_src));
964         spidx->prefs = sizeof(struct in_addr) << 3;
965
966         sin = (struct sockaddr_in *)&spidx->dst;
967         bzero(sin, sizeof(*sin));
968         sin->sin_family = AF_INET;
969         sin->sin_len = sizeof(struct sockaddr_in);
970         bcopy(&ip->ip_dst, &sin->sin_addr, sizeof(ip->ip_dst));
971         spidx->prefd = sizeof(struct in_addr) << 3;
972         return 0;
973 }
974
975 #ifdef INET6
976 static void
977 ipsec6_get_ulp(struct mbuf *m, struct secpolicyindex *spidx, int needport)
978 {
979         int off, nxt;
980         struct tcphdr th;
981         struct udphdr uh;
982
983         /* sanity check */
984         if (m == NULL)
985                 panic("ipsec6_get_ulp: NULL pointer was passed.");
986
987         KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
988                 kprintf("ipsec6_get_ulp:\n"); kdebug_mbuf(m));
989
990         /* set default */
991         spidx->ul_proto = IPSEC_ULPROTO_ANY;
992         ((struct sockaddr_in6 *)&spidx->src)->sin6_port = IPSEC_PORT_ANY;
993         ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = IPSEC_PORT_ANY;
994
995         nxt = -1;
996         off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
997         if (off < 0 || m->m_pkthdr.len < off)
998                 return;
999
1000         switch (nxt) {
1001         case IPPROTO_TCP:
1002                 spidx->ul_proto = nxt;
1003                 if (!needport)
1004                         break;
1005                 if (off + sizeof(struct tcphdr) > m->m_pkthdr.len)
1006                         break;
1007                 m_copydata(m, off, sizeof(th), (caddr_t)&th);
1008                 ((struct sockaddr_in6 *)&spidx->src)->sin6_port = th.th_sport;
1009                 ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = th.th_dport;
1010                 break;
1011         case IPPROTO_UDP:
1012                 spidx->ul_proto = nxt;
1013                 if (!needport)
1014                         break;
1015                 if (off + sizeof(struct udphdr) > m->m_pkthdr.len)
1016                         break;
1017                 m_copydata(m, off, sizeof(uh), (caddr_t)&uh);
1018                 ((struct sockaddr_in6 *)&spidx->src)->sin6_port = uh.uh_sport;
1019                 ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = uh.uh_dport;
1020                 break;
1021         case IPPROTO_ICMPV6:
1022         default:
1023                 /* XXX intermediate headers??? */
1024                 spidx->ul_proto = nxt;
1025                 break;
1026         }
1027 }
1028
1029 /* assumes that m is sane */
1030 static int
1031 ipsec6_setspidx_ipaddr(struct mbuf *m, struct secpolicyindex *spidx)
1032 {
1033         struct ip6_hdr *ip6 = NULL;
1034         struct ip6_hdr ip6buf;
1035         struct sockaddr_in6 *sin6;
1036
1037         if (m->m_len >= sizeof(*ip6))
1038                 ip6 = mtod(m, struct ip6_hdr *);
1039         else {
1040                 m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf);
1041                 ip6 = &ip6buf;
1042         }
1043
1044         sin6 = (struct sockaddr_in6 *)&spidx->src;
1045         bzero(sin6, sizeof(*sin6));
1046         sin6->sin6_family = AF_INET6;
1047         sin6->sin6_len = sizeof(struct sockaddr_in6);
1048         bcopy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(ip6->ip6_src));
1049         if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
1050                 sin6->sin6_addr.s6_addr16[1] = 0;
1051                 sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]);
1052         }
1053         spidx->prefs = sizeof(struct in6_addr) << 3;
1054
1055         sin6 = (struct sockaddr_in6 *)&spidx->dst;
1056         bzero(sin6, sizeof(*sin6));
1057         sin6->sin6_family = AF_INET6;
1058         sin6->sin6_len = sizeof(struct sockaddr_in6);
1059         bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(ip6->ip6_dst));
1060         if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
1061                 sin6->sin6_addr.s6_addr16[1] = 0;
1062                 sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]);
1063         }
1064         spidx->prefd = sizeof(struct in6_addr) << 3;
1065
1066         return 0;
1067 }
1068 #endif
1069
1070 static struct inpcbpolicy *
1071 ipsec_newpcbpolicy(void)
1072 {
1073         struct inpcbpolicy *p;
1074
1075         p = (struct inpcbpolicy *)kmalloc(sizeof(*p), M_SECA, M_NOWAIT);
1076         return p;
1077 }
1078
1079 static void
1080 ipsec_delpcbpolicy(struct inpcbpolicy *p)
1081 {
1082         kfree(p, M_SECA);
1083 }
1084
1085 /* initialize policy in PCB */
1086 int
1087 ipsec_init_policy(struct socket *so, struct inpcbpolicy **pcb_sp)
1088 {
1089         struct inpcbpolicy *new;
1090
1091         /* sanity check. */
1092         if (so == NULL || pcb_sp == NULL)
1093                 panic("ipsec_init_policy: NULL pointer was passed.");
1094
1095         lwkt_gettoken(&key_token);
1096
1097         new = ipsec_newpcbpolicy();
1098         if (new == NULL) {
1099                 ipseclog((LOG_DEBUG, "ipsec_init_policy: No more memory.\n"));
1100                 lwkt_reltoken(&key_token);
1101                 return ENOBUFS;
1102         }
1103         bzero(new, sizeof(*new));
1104
1105         if (so->so_cred != 0 && so->so_cred->cr_uid == 0)
1106                 new->priv = 1;
1107         else
1108                 new->priv = 0;
1109
1110         if ((new->sp_in = key_newsp()) == NULL) {
1111                 ipsec_delpcbpolicy(new);
1112                 lwkt_reltoken(&key_token);
1113                 return ENOBUFS;
1114         }
1115         new->sp_in->state = IPSEC_SPSTATE_ALIVE;
1116         new->sp_in->policy = IPSEC_POLICY_ENTRUST;
1117
1118         if ((new->sp_out = key_newsp()) == NULL) {
1119                 key_freesp(new->sp_in);
1120                 ipsec_delpcbpolicy(new);
1121                 lwkt_reltoken(&key_token);
1122                 return ENOBUFS;
1123         }
1124         new->sp_out->state = IPSEC_SPSTATE_ALIVE;
1125         new->sp_out->policy = IPSEC_POLICY_ENTRUST;
1126
1127         *pcb_sp = new;
1128         lwkt_reltoken(&key_token);
1129
1130         return 0;
1131 }
1132
1133 /* copy old ipsec policy into new */
1134 int
1135 ipsec_copy_policy(struct inpcbpolicy *old, struct inpcbpolicy *new)
1136 {
1137         struct secpolicy *sp;
1138
1139         lwkt_gettoken(&key_token);
1140         sp = ipsec_deepcopy_policy(old->sp_in);
1141         if (sp) {
1142                 key_freesp(new->sp_in);
1143                 new->sp_in = sp;
1144         } else {
1145                 lwkt_reltoken(&key_token);
1146                 return ENOBUFS;
1147         }
1148
1149         sp = ipsec_deepcopy_policy(old->sp_out);
1150         if (sp) {
1151                 key_freesp(new->sp_out);
1152                 new->sp_out = sp;
1153         } else {
1154                 lwkt_reltoken(&key_token);
1155                 return ENOBUFS;
1156         }
1157
1158         new->priv = old->priv;
1159         lwkt_reltoken(&key_token);
1160
1161         return 0;
1162 }
1163
1164 /* deep-copy a policy in PCB */
1165 static struct secpolicy *
1166 ipsec_deepcopy_policy(struct secpolicy *src)
1167 {
1168         struct ipsecrequest *newchain = NULL;
1169         struct ipsecrequest *p;
1170         struct ipsecrequest **q;
1171         struct ipsecrequest *r;
1172         struct secpolicy *dst;
1173
1174         lwkt_gettoken(&key_token);
1175         dst = key_newsp();
1176         if (src == NULL || dst == NULL) {
1177                 lwkt_reltoken(&key_token);
1178                 return NULL;
1179         }
1180
1181         /*
1182          * deep-copy IPsec request chain.  This is required since struct
1183          * ipsecrequest is not reference counted.
1184          */
1185         q = &newchain;
1186         for (p = src->req; p; p = p->next) {
1187                 *q = (struct ipsecrequest *)kmalloc(sizeof(struct ipsecrequest),
1188                         M_SECA, M_NOWAIT | M_ZERO);
1189                 if (*q == NULL)
1190                         goto fail;
1191                 (*q)->next = NULL;
1192
1193                 (*q)->saidx.proto = p->saidx.proto;
1194                 (*q)->saidx.mode = p->saidx.mode;
1195                 (*q)->level = p->level;
1196                 (*q)->saidx.reqid = p->saidx.reqid;
1197
1198                 bcopy(&p->saidx.src, &(*q)->saidx.src, sizeof((*q)->saidx.src));
1199                 bcopy(&p->saidx.dst, &(*q)->saidx.dst, sizeof((*q)->saidx.dst));
1200
1201                 (*q)->sav = NULL;
1202                 (*q)->sp = dst;
1203
1204                 q = &((*q)->next);
1205         }
1206
1207         dst->req = newchain;
1208         dst->state = src->state;
1209         dst->policy = src->policy;
1210         /* do not touch the refcnt fields */
1211         lwkt_reltoken(&key_token);
1212
1213         return dst;
1214
1215 fail:
1216         lwkt_reltoken(&key_token);
1217         for (p = newchain; p; p = r) {
1218                 r = p->next;
1219                 kfree(p, M_SECA);
1220                 p = NULL;
1221         }
1222         return NULL;
1223 }
1224
1225 /* set policy and ipsec request if present. */
1226 static int
1227 ipsec_set_policy(struct secpolicy **pcb_sp, int optname, caddr_t request,
1228                  size_t len, int priv)
1229 {
1230         struct sadb_x_policy *xpl;
1231         struct secpolicy *newsp = NULL;
1232         int error;
1233
1234         /* sanity check. */
1235         if (pcb_sp == NULL || *pcb_sp == NULL || request == NULL)
1236                 return EINVAL;
1237         if (len < sizeof(*xpl))
1238                 return EINVAL;
1239         xpl = (struct sadb_x_policy *)request;
1240
1241         KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
1242                 kprintf("ipsec_set_policy: passed policy\n");
1243                 kdebug_sadb_x_policy((struct sadb_ext *)xpl));
1244
1245         /* check policy type */
1246         /* ipsec_set_policy() accepts IPSEC, ENTRUST and BYPASS. */
1247         if (xpl->sadb_x_policy_type == IPSEC_POLICY_DISCARD
1248          || xpl->sadb_x_policy_type == IPSEC_POLICY_NONE)
1249                 return EINVAL;
1250
1251         /* check privileged socket */
1252         if (priv == 0 && xpl->sadb_x_policy_type == IPSEC_POLICY_BYPASS)
1253                 return EACCES;
1254
1255         /* allocation new SP entry */
1256         if ((newsp = key_msg2sp(xpl, len, &error)) == NULL)
1257                 return error;
1258
1259         newsp->state = IPSEC_SPSTATE_ALIVE;
1260
1261         /* clear old SP and set new SP */
1262         key_freesp(*pcb_sp);
1263         *pcb_sp = newsp;
1264         KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
1265                 kprintf("ipsec_set_policy: new policy\n");
1266                 kdebug_secpolicy(newsp));
1267
1268         return 0;
1269 }
1270
1271 static int
1272 ipsec_get_policy(struct secpolicy *pcb_sp, struct mbuf **mp)
1273 {
1274
1275         /* sanity check. */
1276         if (pcb_sp == NULL || mp == NULL)
1277                 return EINVAL;
1278
1279         *mp = key_sp2msg(pcb_sp);
1280         if (!*mp) {
1281                 ipseclog((LOG_DEBUG, "ipsec_get_policy: No more memory.\n"));
1282                 return ENOBUFS;
1283         }
1284
1285         KKASSERT((*mp)->m_type == MT_DATA);
1286         KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
1287                 kprintf("ipsec_get_policy:\n");
1288                 kdebug_mbuf(*mp));
1289
1290         return 0;
1291 }
1292
1293 int
1294 ipsec4_set_policy(struct inpcb *inp, int optname, caddr_t request, size_t len,
1295                   int priv)
1296 {
1297         struct sadb_x_policy *xpl;
1298         struct secpolicy **pcb_sp;
1299         int error;
1300
1301         /* sanity check. */
1302         if (inp == NULL || request == NULL)
1303                 return EINVAL;
1304         if (len < sizeof(*xpl))
1305                 return EINVAL;
1306         xpl = (struct sadb_x_policy *)request;
1307
1308         lwkt_gettoken(&key_token);
1309         /* select direction */
1310         switch (xpl->sadb_x_policy_dir) {
1311         case IPSEC_DIR_INBOUND:
1312                 pcb_sp = &inp->inp_sp->sp_in;
1313                 break;
1314         case IPSEC_DIR_OUTBOUND:
1315                 pcb_sp = &inp->inp_sp->sp_out;
1316                 break;
1317         default:
1318                 ipseclog((LOG_ERR, "ipsec4_set_policy: invalid direction=%u\n",
1319                         xpl->sadb_x_policy_dir));
1320                 lwkt_reltoken(&key_token);
1321                 return EINVAL;
1322         }
1323         error = ipsec_set_policy(pcb_sp, optname, request, len, priv);
1324         lwkt_reltoken(&key_token);
1325         return error;
1326 }
1327
1328 int
1329 ipsec4_get_policy(struct inpcb *inp, caddr_t request, size_t len,
1330                   struct mbuf **mp)
1331 {
1332         struct sadb_x_policy *xpl;
1333         struct secpolicy *pcb_sp;
1334         int error;
1335
1336         /* sanity check. */
1337         if (inp == NULL || request == NULL || mp == NULL)
1338                 return EINVAL;
1339         if (inp->inp_sp == NULL)
1340                 panic("policy in PCB is NULL");
1341         if (len < sizeof(*xpl))
1342                 return EINVAL;
1343         xpl = (struct sadb_x_policy *)request;
1344
1345         lwkt_gettoken(&key_token);
1346
1347         /* select direction */
1348         switch (xpl->sadb_x_policy_dir) {
1349         case IPSEC_DIR_INBOUND:
1350                 pcb_sp = inp->inp_sp->sp_in;
1351                 break;
1352         case IPSEC_DIR_OUTBOUND:
1353                 pcb_sp = inp->inp_sp->sp_out;
1354                 break;
1355         default:
1356                 ipseclog((LOG_ERR, "ipsec4_set_policy: invalid direction=%u\n",
1357                         xpl->sadb_x_policy_dir));
1358                 lwkt_reltoken(&key_token);
1359                 return EINVAL;
1360         }
1361         error = ipsec_get_policy(pcb_sp, mp);
1362         lwkt_reltoken(&key_token);
1363         return error;
1364 }
1365
1366 /* delete policy in PCB */
1367 int
1368 ipsec4_delete_pcbpolicy(struct inpcb *inp)
1369 {
1370         struct inpcbpolicy *isp;
1371
1372         /* sanity check. */
1373         if (inp == NULL)
1374                 panic("ipsec4_delete_pcbpolicy: NULL pointer was passed.");
1375
1376         lwkt_gettoken(&key_token);
1377
1378         if ((isp = inp->inp_sp) == NULL) {
1379                 lwkt_reltoken(&key_token);
1380                 return 0;
1381         }
1382
1383         if (isp->sp_in != NULL) {
1384                 key_freesp(isp->sp_in);
1385                 isp->sp_in = NULL;
1386         }
1387
1388         if (isp->sp_out != NULL) {
1389                 key_freesp(isp->sp_out);
1390                 isp->sp_out = NULL;
1391         }
1392         KKASSERT(inp->inp_sp == isp);
1393         inp->inp_sp = NULL;
1394         ipsec_delpcbpolicy(isp);
1395         lwkt_reltoken(&key_token);
1396
1397         return 0;
1398 }
1399
1400 #ifdef INET6
1401 int
1402 ipsec6_set_policy(struct in6pcb *in6p, int optname, caddr_t request, size_t len,
1403                   int priv)
1404 {
1405         struct sadb_x_policy *xpl;
1406         struct secpolicy **pcb_sp;
1407         int error;
1408
1409         /* sanity check. */
1410         if (in6p == NULL || request == NULL)
1411                 return EINVAL;
1412         if (len < sizeof(*xpl))
1413                 return EINVAL;
1414         xpl = (struct sadb_x_policy *)request;
1415
1416         lwkt_gettoken(&key_token);
1417
1418         /* select direction */
1419         switch (xpl->sadb_x_policy_dir) {
1420         case IPSEC_DIR_INBOUND:
1421                 pcb_sp = &in6p->in6p_sp->sp_in;
1422                 break;
1423         case IPSEC_DIR_OUTBOUND:
1424                 pcb_sp = &in6p->in6p_sp->sp_out;
1425                 break;
1426         default:
1427                 ipseclog((LOG_ERR, "ipsec6_set_policy: invalid direction=%u\n",
1428                         xpl->sadb_x_policy_dir));
1429                 lwkt_reltoken(&key_token);
1430                 return EINVAL;
1431         }
1432
1433         error = ipsec_set_policy(pcb_sp, optname, request, len, priv);
1434         lwkt_reltoken(&key_token);
1435         return error;
1436 }
1437
1438 int
1439 ipsec6_get_policy(struct in6pcb *in6p, caddr_t request, size_t len,
1440                   struct mbuf **mp)
1441 {
1442         struct sadb_x_policy *xpl;
1443         struct secpolicy *pcb_sp;
1444         int error;
1445
1446         /* sanity check. */
1447         if (in6p == NULL || request == NULL || mp == NULL)
1448                 return EINVAL;
1449         if (in6p->in6p_sp == NULL)
1450                 panic("policy in PCB is NULL");
1451         if (len < sizeof(*xpl))
1452                 return EINVAL;
1453         xpl = (struct sadb_x_policy *)request;
1454
1455         lwkt_gettoken(&key_token);
1456
1457         /* select direction */
1458         switch (xpl->sadb_x_policy_dir) {
1459         case IPSEC_DIR_INBOUND:
1460                 pcb_sp = in6p->in6p_sp->sp_in;
1461                 break;
1462         case IPSEC_DIR_OUTBOUND:
1463                 pcb_sp = in6p->in6p_sp->sp_out;
1464                 break;
1465         default:
1466                 ipseclog((LOG_ERR, "ipsec6_set_policy: invalid direction=%u\n",
1467                         xpl->sadb_x_policy_dir));
1468                 lwkt_reltoken(&key_token);
1469                 return EINVAL;
1470         }
1471
1472         error = ipsec_get_policy(pcb_sp, mp);
1473         lwkt_reltoken(&key_token);
1474         return error;
1475 }
1476
1477 int
1478 ipsec6_delete_pcbpolicy(struct in6pcb *in6p)
1479 {
1480         struct inpcbpolicy *isp;
1481
1482         /* sanity check. */
1483         if (in6p == NULL)
1484                 panic("ipsec6_delete_pcbpolicy: NULL pointer was passed.");
1485
1486         lwkt_gettoken(&key_token);
1487
1488         if ((isp = in6p->in6p_sp) == NULL) {
1489                 lwkt_reltoken(&key_token);
1490                 return 0;
1491         }
1492
1493         if (isp->sp_in != NULL) {
1494                 key_freesp(isp->sp_in);
1495                 isp->sp_in = NULL;
1496         }
1497
1498         if (isp->sp_out != NULL) {
1499                 key_freesp(isp->sp_out);
1500                 isp->sp_out = NULL;
1501         }
1502         KKASSERT(in6p->in6p_sp == isp);
1503         in6p->in6p_sp = NULL;
1504         ipsec_delpcbpolicy(isp);
1505         lwkt_reltoken(&key_token);
1506
1507         return 0;
1508 }
1509 #endif
1510
1511 /*
1512  * return current level.
1513  * Either IPSEC_LEVEL_USE or IPSEC_LEVEL_REQUIRE are always returned.
1514  */
1515 u_int
1516 ipsec_get_reqlevel(struct ipsecrequest *isr)
1517 {
1518         u_int level = 0;
1519         u_int esp_trans_deflev, esp_net_deflev, ah_trans_deflev, ah_net_deflev;
1520
1521         /* sanity check */
1522         if (isr == NULL || isr->sp == NULL)
1523                 panic("ipsec_get_reqlevel: NULL pointer is passed.");
1524         if (((struct sockaddr *)&isr->sp->spidx.src)->sa_family
1525                         != ((struct sockaddr *)&isr->sp->spidx.dst)->sa_family)
1526                 panic("ipsec_get_reqlevel: family mismatched.");
1527
1528 /* XXX note that we have ipseclog() expanded here - code sync issue */
1529 #define IPSEC_CHECK_DEFAULT(lev) \
1530         (((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE            \
1531                         && (lev) != IPSEC_LEVEL_UNIQUE)                       \
1532                 ? (ipsec_debug                                                \
1533                         ? log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\
1534                                 (lev), IPSEC_LEVEL_REQUIRE)                   \
1535                         : 0),                                                 \
1536                         (lev) = IPSEC_LEVEL_REQUIRE,                          \
1537                         (lev)                                                 \
1538                 : (lev))
1539
1540         /* set default level */
1541         switch (((struct sockaddr *)&isr->sp->spidx.src)->sa_family) {
1542 #ifdef INET
1543         case AF_INET:
1544                 esp_trans_deflev = IPSEC_CHECK_DEFAULT(ip4_esp_trans_deflev);
1545                 esp_net_deflev = IPSEC_CHECK_DEFAULT(ip4_esp_net_deflev);
1546                 ah_trans_deflev = IPSEC_CHECK_DEFAULT(ip4_ah_trans_deflev);
1547                 ah_net_deflev = IPSEC_CHECK_DEFAULT(ip4_ah_net_deflev);
1548                 break;
1549 #endif
1550 #ifdef INET6
1551         case AF_INET6:
1552                 esp_trans_deflev = IPSEC_CHECK_DEFAULT(ip6_esp_trans_deflev);
1553                 esp_net_deflev = IPSEC_CHECK_DEFAULT(ip6_esp_net_deflev);
1554                 ah_trans_deflev = IPSEC_CHECK_DEFAULT(ip6_ah_trans_deflev);
1555                 ah_net_deflev = IPSEC_CHECK_DEFAULT(ip6_ah_net_deflev);
1556                 break;
1557 #endif /* INET6 */
1558         default:
1559                 panic("key_get_reqlevel: Unknown family. %d",
1560                         ((struct sockaddr *)&isr->sp->spidx.src)->sa_family);
1561         }
1562
1563 #undef IPSEC_CHECK_DEFAULT
1564
1565         /* set level */
1566         switch (isr->level) {
1567         case IPSEC_LEVEL_DEFAULT:
1568                 switch (isr->saidx.proto) {
1569                 case IPPROTO_ESP:
1570                         if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
1571                                 level = esp_net_deflev;
1572                         else
1573                                 level = esp_trans_deflev;
1574                         break;
1575                 case IPPROTO_AH:
1576                         if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
1577                                 level = ah_net_deflev;
1578                         else
1579                                 level = ah_trans_deflev;
1580                 case IPPROTO_IPCOMP:
1581                         /*
1582                          * we don't really care, as IPcomp document says that
1583                          * we shouldn't compress small packets
1584                          */
1585                         level = IPSEC_LEVEL_USE;
1586                         break;
1587                 default:
1588                         panic("ipsec_get_reqlevel: "
1589                                 "Illegal protocol defined %u",
1590                                 isr->saidx.proto);
1591                 }
1592                 break;
1593
1594         case IPSEC_LEVEL_USE:
1595         case IPSEC_LEVEL_REQUIRE:
1596                 level = isr->level;
1597                 break;
1598         case IPSEC_LEVEL_UNIQUE:
1599                 level = IPSEC_LEVEL_REQUIRE;
1600                 break;
1601
1602         default:
1603                 panic("ipsec_get_reqlevel: Illegal IPsec level %u",
1604                         isr->level);
1605         }
1606
1607         return level;
1608 }
1609
1610 /*
1611  * Check AH/ESP integrity.
1612  * OUT:
1613  *      0: valid
1614  *      1: invalid
1615  */
1616 static int
1617 ipsec_in_reject(struct secpolicy *sp, struct mbuf *m)
1618 {
1619         struct ipsecrequest *isr;
1620         u_int level;
1621         int need_auth, need_conf, need_icv;
1622
1623         KEYDEBUG(KEYDEBUG_IPSEC_DATA,
1624                 kprintf("ipsec_in_reject: using SP\n");
1625                 kdebug_secpolicy(sp));
1626
1627         /* check policy */
1628         switch (sp->policy) {
1629         case IPSEC_POLICY_DISCARD:
1630                 return 1;
1631         case IPSEC_POLICY_BYPASS:
1632         case IPSEC_POLICY_NONE:
1633                 return 0;
1634         
1635         case IPSEC_POLICY_IPSEC:
1636                 break;
1637
1638         case IPSEC_POLICY_ENTRUST:
1639         default:
1640                 panic("ipsec_hdrsiz: Invalid policy found. %d", sp->policy);
1641         }
1642
1643         need_auth = 0;
1644         need_conf = 0;
1645         need_icv = 0;
1646
1647         /* XXX should compare policy against ipsec header history */
1648
1649         for (isr = sp->req; isr != NULL; isr = isr->next) {
1650
1651                 /* get current level */
1652                 level = ipsec_get_reqlevel(isr);
1653
1654                 switch (isr->saidx.proto) {
1655                 case IPPROTO_ESP:
1656                         if (level == IPSEC_LEVEL_REQUIRE) {
1657                                 need_conf++;
1658
1659                                 if (isr->sav != NULL
1660                                  && isr->sav->flags == SADB_X_EXT_NONE
1661                                  && isr->sav->alg_auth != SADB_AALG_NONE)
1662                                         need_icv++;
1663                         }
1664                         break;
1665                 case IPPROTO_AH:
1666                         if (level == IPSEC_LEVEL_REQUIRE) {
1667                                 need_auth++;
1668                                 need_icv++;
1669                         }
1670                         break;
1671                 case IPPROTO_IPCOMP:
1672                         /*
1673                          * we don't really care, as IPcomp document says that
1674                          * we shouldn't compress small packets, IPComp policy
1675                          * should always be treated as being in "use" level.
1676                          */
1677                         break;
1678                 }
1679         }
1680
1681         KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
1682                 kprintf("ipsec_in_reject: auth:%d conf:%d icv:%d m_flags:%x\n",
1683                         need_auth, need_conf, need_icv, m->m_flags));
1684
1685         if ((need_conf && !(m->m_flags & M_DECRYPTED))
1686          || (!need_auth && need_icv && !(m->m_flags & M_AUTHIPDGM))
1687          || (need_auth && !(m->m_flags & M_AUTHIPHDR)))
1688                 return 1;
1689
1690         return 0;
1691 }
1692
1693 /*
1694  * Check AH/ESP integrity.
1695  * This function is called from tcp_input(), udp_input(),
1696  * and {ah,esp}4_input for tunnel mode
1697  */
1698 int
1699 ipsec4_in_reject_so(struct mbuf *m, struct socket *so)
1700 {
1701         struct secpolicy *sp = NULL;
1702         int error;
1703         int result;
1704
1705         /* sanity check */
1706         if (m == NULL)
1707                 return 0;       /* XXX should be panic ? */
1708
1709         /* get SP for this packet.
1710          * When we are called from ip_forward(), we call
1711          * ipsec4_getpolicybyaddr() with IP_FORWARDING flag.
1712          */
1713         lwkt_gettoken(&key_token);
1714         if (so == NULL)
1715                 sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error);
1716         else
1717                 sp = ipsec4_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error);
1718
1719         if (sp == NULL) {
1720                 lwkt_reltoken(&key_token);
1721                 return 0;
1722         }
1723
1724         result = ipsec_in_reject(sp, m);
1725         KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1726                 kprintf("DP ipsec4_in_reject_so call free SP:%p\n", sp));
1727         key_freesp(sp);
1728         lwkt_reltoken(&key_token);
1729
1730         return result;
1731 }
1732
1733 int
1734 ipsec4_in_reject(struct mbuf *m, struct inpcb *inp)
1735 {
1736         if (inp == NULL)
1737                 return ipsec4_in_reject_so(m, NULL);
1738         if (inp->inp_socket)
1739                 return ipsec4_in_reject_so(m, inp->inp_socket);
1740         else
1741                 panic("ipsec4_in_reject: invalid inpcb/socket");
1742 }
1743
1744 #ifdef INET6
1745 /*
1746  * Check AH/ESP integrity.
1747  * This function is called from tcp6_input(), udp6_input(),
1748  * and {ah,esp}6_input for tunnel mode
1749  */
1750 int
1751 ipsec6_in_reject_so(struct mbuf *m, struct socket *so)
1752 {
1753         struct secpolicy *sp = NULL;
1754         int error;
1755         int result;
1756
1757         /* sanity check */
1758         if (m == NULL)
1759                 return 0;       /* XXX should be panic ? */
1760
1761         /* get SP for this packet.
1762          * When we are called from ip_forward(), we call
1763          * ipsec6_getpolicybyaddr() with IP_FORWARDING flag.
1764          */
1765         lwkt_gettoken(&key_token);
1766         if (so == NULL)
1767                 sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error);
1768         else
1769                 sp = ipsec6_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error);
1770
1771         if (sp == NULL) {
1772                 lwkt_reltoken(&key_token);
1773                 return 0;       /* XXX should be panic ? */
1774         }
1775
1776         result = ipsec_in_reject(sp, m);
1777         KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1778                 kprintf("DP ipsec6_in_reject_so call free SP:%p\n", sp));
1779         key_freesp(sp);
1780         lwkt_reltoken(&key_token);
1781
1782         return result;
1783 }
1784
1785 int
1786 ipsec6_in_reject(struct mbuf *m, struct in6pcb *in6p)
1787 {
1788         if (in6p == NULL)
1789                 return ipsec6_in_reject_so(m, NULL);
1790         if (in6p->in6p_socket)
1791                 return ipsec6_in_reject_so(m, in6p->in6p_socket);
1792         else
1793                 panic("ipsec6_in_reject: invalid in6p/socket");
1794 }
1795 #endif
1796
1797 /*
1798  * compute the byte size to be occupied by IPsec header.
1799  * in case it is tunneled, it includes the size of outer IP header.
1800  * NOTE: SP passed is free in this function.
1801  */
1802 static size_t
1803 ipsec_hdrsiz(struct secpolicy *sp)
1804 {
1805         struct ipsecrequest *isr;
1806         size_t siz, clen;
1807
1808         KEYDEBUG(KEYDEBUG_IPSEC_DATA,
1809                 kprintf("ipsec_hdrsiz: using SP\n");
1810                 kdebug_secpolicy(sp));
1811
1812         /* check policy */
1813         switch (sp->policy) {
1814         case IPSEC_POLICY_DISCARD:
1815         case IPSEC_POLICY_BYPASS:
1816         case IPSEC_POLICY_NONE:
1817                 return 0;
1818         
1819         case IPSEC_POLICY_IPSEC:
1820                 break;
1821
1822         case IPSEC_POLICY_ENTRUST:
1823         default:
1824                 panic("ipsec_hdrsiz: Invalid policy found. %d", sp->policy);
1825         }
1826
1827         siz = 0;
1828
1829         for (isr = sp->req; isr != NULL; isr = isr->next) {
1830
1831                 clen = 0;
1832
1833                 switch (isr->saidx.proto) {
1834                 case IPPROTO_ESP:
1835 #ifdef IPSEC_ESP
1836                         clen = esp_hdrsiz(isr);
1837 #else
1838                         clen = 0;       /* XXX */
1839 #endif
1840                         break;
1841                 case IPPROTO_AH:
1842                         clen = ah_hdrsiz(isr);
1843                         break;
1844                 case IPPROTO_IPCOMP:
1845                         clen = sizeof(struct ipcomp);
1846                         break;
1847                 }
1848
1849                 if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
1850                         switch (((struct sockaddr *)&isr->saidx.dst)->sa_family) {
1851                         case AF_INET:
1852                                 clen += sizeof(struct ip);
1853                                 break;
1854 #ifdef INET6
1855                         case AF_INET6:
1856                                 clen += sizeof(struct ip6_hdr);
1857                                 break;
1858 #endif
1859                         default:
1860                                 ipseclog((LOG_ERR, "ipsec_hdrsiz: "
1861                                     "unknown AF %d in IPsec tunnel SA\n",
1862                                     ((struct sockaddr *)&isr->saidx.dst)->sa_family));
1863                                 break;
1864                         }
1865                 }
1866                 siz += clen;
1867         }
1868
1869         return siz;
1870 }
1871
1872 /* This function is called from ip_forward() and ipsec4_hdrsize_tcp(). */
1873 size_t
1874 ipsec4_hdrsiz(struct mbuf *m, u_int dir, struct inpcb *inp)
1875 {
1876         struct secpolicy *sp = NULL;
1877         int error;
1878         size_t size;
1879
1880         /* sanity check */
1881         if (m == NULL)
1882                 return 0;       /* XXX should be panic ? */
1883         if (inp != NULL && inp->inp_socket == NULL)
1884                 panic("ipsec4_hdrsize: why is socket NULL but there is PCB.");
1885
1886         /* get SP for this packet.
1887          * When we are called from ip_forward(), we call
1888          * ipsec4_getpolicybyaddr() with IP_FORWARDING flag.
1889          */
1890         lwkt_gettoken(&key_token);
1891         if (inp == NULL)
1892                 sp = ipsec4_getpolicybyaddr(m, dir, IP_FORWARDING, &error);
1893         else
1894                 sp = ipsec4_getpolicybysock(m, dir, inp->inp_socket, &error);
1895
1896         if (sp == NULL) {
1897                 lwkt_reltoken(&key_token);
1898                 return 0;       /* XXX should be panic ? */
1899         }
1900
1901         size = ipsec_hdrsiz(sp);
1902         KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1903                 kprintf("DP ipsec4_hdrsiz call free SP:%p\n", sp));
1904         KEYDEBUG(KEYDEBUG_IPSEC_DATA,
1905                 kprintf("ipsec4_hdrsiz: size:%lu.\n", (unsigned long)size));
1906         key_freesp(sp);
1907         lwkt_reltoken(&key_token);
1908
1909         return size;
1910 }
1911
1912 #ifdef INET6
1913 /* This function is called from ipsec6_hdrsize_tcp(),
1914  * and maybe from ip6_forward.()
1915  */
1916 size_t
1917 ipsec6_hdrsiz(struct mbuf *m, u_int dir, struct in6pcb *in6p)
1918 {
1919         struct secpolicy *sp = NULL;
1920         int error;
1921         size_t size;
1922
1923         /* sanity check */
1924         if (m == NULL)
1925                 return 0;       /* XXX shoud be panic ? */
1926         if (in6p != NULL && in6p->in6p_socket == NULL)
1927                 panic("ipsec6_hdrsize: why is socket NULL but there is PCB.");
1928
1929         /* get SP for this packet */
1930         /* XXX Is it right to call with IP_FORWARDING. */
1931         lwkt_gettoken(&key_token);
1932         if (in6p == NULL)
1933                 sp = ipsec6_getpolicybyaddr(m, dir, IP_FORWARDING, &error);
1934         else
1935                 sp = ipsec6_getpolicybysock(m, dir, in6p->in6p_socket, &error);
1936
1937         if (sp == NULL) {
1938                 lwkt_reltoken(&key_token);
1939                 return 0;
1940         }
1941         size = ipsec_hdrsiz(sp);
1942         KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1943                 kprintf("DP ipsec6_hdrsiz call free SP:%p\n", sp));
1944         KEYDEBUG(KEYDEBUG_IPSEC_DATA,
1945                 kprintf("ipsec6_hdrsiz: size:%lu.\n", (unsigned long)size));
1946         key_freesp(sp);
1947         lwkt_reltoken(&key_token);
1948
1949         return size;
1950 }
1951 #endif /* INET6 */
1952
1953 #ifdef INET
1954 /*
1955  * encapsulate for ipsec tunnel.
1956  * ip->ip_src must be fixed later on.
1957  */
1958 static int
1959 ipsec4_encapsulate(struct mbuf *m, struct secasvar *sav)
1960 {
1961         struct ip *oip;
1962         struct ip *ip;
1963         size_t hlen;
1964         size_t plen;
1965
1966         /* can't tunnel between different AFs */
1967         if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family
1968                 != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family
1969          || ((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET) {
1970                 m_freem(m);
1971                 return EINVAL;
1972         }
1973 #if 0
1974         /* XXX if the dst is myself, perform nothing. */
1975         if (key_ismyaddr((struct sockaddr *)&sav->sah->saidx.dst)) {
1976                 m_freem(m);
1977                 return EINVAL;
1978         }
1979 #endif
1980
1981         if (m->m_len < sizeof(*ip))
1982                 panic("ipsec4_encapsulate: assumption failed (first mbuf length)");
1983
1984         ip = mtod(m, struct ip *);
1985 #ifdef _IP_VHL
1986         hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
1987 #else
1988         hlen = ip->ip_hl << 2;
1989 #endif
1990
1991         if (m->m_len != hlen)
1992                 panic("ipsec4_encapsulate: assumption failed (first mbuf length)");
1993
1994         /* generate header checksum */
1995         ip->ip_sum = 0;
1996 #ifdef _IP_VHL
1997         if (ip->ip_vhl == IP_VHL_BORING)
1998                 ip->ip_sum = in_cksum_hdr(ip);
1999         else
2000                 ip->ip_sum = in_cksum(m, hlen);
2001 #else
2002         ip->ip_sum = in_cksum(m, hlen);
2003 #endif
2004
2005         plen = m->m_pkthdr.len;
2006
2007         /*
2008          * grow the mbuf to accomodate the new IPv4 header.
2009          * NOTE: IPv4 options will never be copied.
2010          */
2011         if (M_LEADINGSPACE(m->m_next) < hlen) {
2012                 struct mbuf *n;
2013                 MGET(n, MB_DONTWAIT, MT_DATA);
2014                 if (!n) {
2015                         m_freem(m);
2016                         return ENOBUFS;
2017                 }
2018                 n->m_len = hlen;
2019                 n->m_next = m->m_next;
2020                 m->m_next = n;
2021                 m->m_pkthdr.len += hlen;
2022                 oip = mtod(n, struct ip *);
2023         } else {
2024                 m->m_next->m_len += hlen;
2025                 m->m_next->m_data -= hlen;
2026                 m->m_pkthdr.len += hlen;
2027                 oip = mtod(m->m_next, struct ip *);
2028         }
2029         ip = mtod(m, struct ip *);
2030         ovbcopy((caddr_t)ip, (caddr_t)oip, hlen);
2031         m->m_len = sizeof(struct ip);
2032         m->m_pkthdr.len -= (hlen - sizeof(struct ip));
2033
2034         /* construct new IPv4 header. see RFC 2401 5.1.2.1 */
2035         /* ECN consideration. */
2036         ip_ecn_ingress(ip4_ipsec_ecn, &ip->ip_tos, &oip->ip_tos);
2037 #ifdef _IP_VHL
2038         ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2);
2039 #else
2040         ip->ip_hl = sizeof(struct ip) >> 2;
2041 #endif
2042         ip->ip_off &= htons(~IP_OFFMASK);
2043         ip->ip_off &= htons(~IP_MF);
2044         switch (ip4_ipsec_dfbit) {
2045         case 0: /* clear DF bit */
2046                 ip->ip_off &= htons(~IP_DF);
2047                 break;
2048         case 1: /* set DF bit */
2049                 ip->ip_off |= htons(IP_DF);
2050                 break;
2051         default:        /* copy DF bit */
2052                 break;
2053         }
2054         ip->ip_p = IPPROTO_IPIP;
2055         if (plen + sizeof(struct ip) < IP_MAXPACKET)
2056                 ip->ip_len = htons(plen + sizeof(struct ip));
2057         else {
2058                 ipseclog((LOG_ERR, "IPv4 ipsec: size exceeds limit: "
2059                         "leave ip_len as is (invalid packet)\n"));
2060         }
2061 #ifdef RANDOM_IP_ID
2062         ip->ip_id = ip_randomid();
2063 #else
2064         ip->ip_id = htons(ip_id++);
2065 #endif
2066         bcopy(&((struct sockaddr_in *)&sav->sah->saidx.src)->sin_addr,
2067                 &ip->ip_src, sizeof(ip->ip_src));
2068         bcopy(&((struct sockaddr_in *)&sav->sah->saidx.dst)->sin_addr,
2069                 &ip->ip_dst, sizeof(ip->ip_dst));
2070         ip->ip_ttl = IPDEFTTL;
2071
2072         /* XXX Should ip_src be updated later ? */
2073
2074         return 0;
2075 }
2076 #endif /* INET */
2077
2078 #ifdef INET6
2079 static int
2080 ipsec6_encapsulate(struct mbuf *m, struct secasvar *sav)
2081 {
2082         struct ip6_hdr *oip6;
2083         struct ip6_hdr *ip6;
2084         size_t plen;
2085
2086         /* can't tunnel between different AFs */
2087         if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family
2088                 != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family
2089          || ((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET6) {
2090                 m_freem(m);
2091                 return EINVAL;
2092         }
2093 #if 0
2094         /* XXX if the dst is myself, perform nothing. */
2095         if (key_ismyaddr((struct sockaddr *)&sav->sah->saidx.dst)) {
2096                 m_freem(m);
2097                 return EINVAL;
2098         }
2099 #endif
2100
2101         plen = m->m_pkthdr.len;
2102
2103         /*
2104          * grow the mbuf to accomodate the new IPv6 header.
2105          */
2106         if (m->m_len != sizeof(struct ip6_hdr))
2107                 panic("ipsec6_encapsulate: assumption failed (first mbuf length)");
2108         if (M_LEADINGSPACE(m->m_next) < sizeof(struct ip6_hdr)) {
2109                 struct mbuf *n;
2110                 MGET(n, MB_DONTWAIT, MT_DATA);
2111                 if (!n) {
2112                         m_freem(m);
2113                         return ENOBUFS;
2114                 }
2115                 n->m_len = sizeof(struct ip6_hdr);
2116                 n->m_next = m->m_next;
2117                 m->m_next = n;
2118                 m->m_pkthdr.len += sizeof(struct ip6_hdr);
2119                 oip6 = mtod(n, struct ip6_hdr *);
2120         } else {
2121                 m->m_next->m_len += sizeof(struct ip6_hdr);
2122                 m->m_next->m_data -= sizeof(struct ip6_hdr);
2123                 m->m_pkthdr.len += sizeof(struct ip6_hdr);
2124                 oip6 = mtod(m->m_next, struct ip6_hdr *);
2125         }
2126         ip6 = mtod(m, struct ip6_hdr *);
2127         ovbcopy((caddr_t)ip6, (caddr_t)oip6, sizeof(struct ip6_hdr));
2128
2129         /* Fake link-local scope-class addresses */
2130         if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src))
2131                 oip6->ip6_src.s6_addr16[1] = 0;
2132         if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst))
2133                 oip6->ip6_dst.s6_addr16[1] = 0;
2134
2135         /* construct new IPv6 header. see RFC 2401 5.1.2.2 */
2136         /* ECN consideration. */
2137         ip6_ecn_ingress(ip6_ipsec_ecn, &ip6->ip6_flow, &oip6->ip6_flow);
2138         if (plen < IPV6_MAXPACKET - sizeof(struct ip6_hdr))
2139                 ip6->ip6_plen = htons(plen);
2140         else {
2141                 /* ip6->ip6_plen will be updated in ip6_output() */
2142         }
2143         ip6->ip6_nxt = IPPROTO_IPV6;
2144         bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.src)->sin6_addr,
2145                 &ip6->ip6_src, sizeof(ip6->ip6_src));
2146         bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.dst)->sin6_addr,
2147                 &ip6->ip6_dst, sizeof(ip6->ip6_dst));
2148         ip6->ip6_hlim = IPV6_DEFHLIM;
2149
2150         /* XXX Should ip6_src be updated later ? */
2151
2152         return 0;
2153 }
2154 #endif /* INET6 */
2155
2156 /*
2157  * Check the variable replay window.
2158  * ipsec_chkreplay() performs replay check before ICV verification.
2159  * ipsec_updatereplay() updates replay bitmap.  This must be called after
2160  * ICV verification (it also performs replay check, which is usually done
2161  * beforehand).
2162  * 0 (zero) is returned if packet disallowed, 1 if packet permitted.
2163  *
2164  * based on RFC 2401.
2165  */
2166 int
2167 ipsec_chkreplay(u_int32_t seq, struct secasvar *sav)
2168 {
2169         const struct secreplay *replay;
2170         u_int32_t diff;
2171         int fr;
2172         u_int32_t wsizeb;       /* constant: bits of window size */
2173         int frlast;             /* constant: last frame */
2174
2175         /* sanity check */
2176         if (sav == NULL)
2177                 panic("ipsec_chkreplay: NULL pointer was passed.");
2178
2179         replay = sav->replay;
2180
2181         if (replay->wsize == 0)
2182                 return 1;       /* no need to check replay. */
2183
2184         /* constant */
2185         frlast = replay->wsize - 1;
2186         wsizeb = replay->wsize << 3;
2187
2188         /* sequence number of 0 is invalid */
2189         if (seq == 0)
2190                 return 0;
2191
2192         /* first time is always okay */
2193         if (replay->count == 0)
2194                 return 1;
2195
2196         if (seq > replay->lastseq) {
2197                 /* larger sequences are okay */
2198                 return 1;
2199         } else {
2200                 /* seq is equal or less than lastseq. */
2201                 diff = replay->lastseq - seq;
2202
2203                 /* over range to check, i.e. too old or wrapped */
2204                 if (diff >= wsizeb)
2205                         return 0;
2206
2207                 fr = frlast - diff / 8;
2208
2209                 /* this packet already seen ? */
2210                 if ((replay->bitmap)[fr] & (1 << (diff % 8)))
2211                         return 0;
2212
2213                 /* out of order but good */
2214                 return 1;
2215         }
2216 }
2217
2218 /*
2219  * check replay counter whether to update or not.
2220  * OUT: 0:      OK
2221  *      1:      NG
2222  */
2223 int
2224 ipsec_updatereplay(u_int32_t seq, struct secasvar *sav)
2225 {
2226         struct secreplay *replay;
2227         u_int32_t diff;
2228         int fr;
2229         u_int32_t wsizeb;       /* constant: bits of window size */
2230         int frlast;             /* constant: last frame */
2231
2232         /* sanity check */
2233         if (sav == NULL)
2234                 panic("ipsec_chkreplay: NULL pointer was passed.");
2235
2236         replay = sav->replay;
2237
2238         if (replay->wsize == 0)
2239                 goto ok;        /* no need to check replay. */
2240
2241         /* constant */
2242         frlast = replay->wsize - 1;
2243         wsizeb = replay->wsize << 3;
2244
2245         /* sequence number of 0 is invalid */
2246         if (seq == 0)
2247                 return 1;
2248
2249         /* first time */
2250         if (replay->count == 0) {
2251                 replay->lastseq = seq;
2252                 bzero(replay->bitmap, replay->wsize);
2253                 (replay->bitmap)[frlast] = 1;
2254                 goto ok;
2255         }
2256
2257         if (seq > replay->lastseq) {
2258                 /* seq is larger than lastseq. */
2259                 diff = seq - replay->lastseq;
2260
2261                 /* new larger sequence number */
2262                 if (diff < wsizeb) {
2263                         /* In window */
2264                         /* set bit for this packet */
2265                         vshiftl(replay->bitmap, diff, replay->wsize);
2266                         (replay->bitmap)[frlast] |= 1;
2267                 } else {
2268                         /* this packet has a "way larger" */
2269                         bzero(replay->bitmap, replay->wsize);
2270                         (replay->bitmap)[frlast] = 1;
2271                 }
2272                 replay->lastseq = seq;
2273
2274                 /* larger is good */
2275         } else {
2276                 /* seq is equal or less than lastseq. */
2277                 diff = replay->lastseq - seq;
2278
2279                 /* over range to check, i.e. too old or wrapped */
2280                 if (diff >= wsizeb)
2281                         return 1;
2282
2283                 fr = frlast - diff / 8;
2284
2285                 /* this packet already seen ? */
2286                 if ((replay->bitmap)[fr] & (1 << (diff % 8)))
2287                         return 1;
2288
2289                 /* mark as seen */
2290                 (replay->bitmap)[fr] |= (1 << (diff % 8));
2291
2292                 /* out of order but good */
2293         }
2294
2295 ok:
2296         if (replay->count == ~0) {
2297
2298                 /* set overflow flag */
2299                 replay->overflow++;
2300
2301                 /* don't increment, no more packets accepted */
2302                 if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0)
2303                         return 1;
2304
2305                 ipseclog((LOG_WARNING, "replay counter made %d cycle. %s\n",
2306                     replay->overflow, ipsec_logsastr(sav)));
2307         }
2308
2309         replay->count++;
2310
2311         return 0;
2312 }
2313
2314 /*
2315  * shift variable length buffer to left.
2316  * IN:  bitmap: pointer to the buffer
2317  *      nbit:   the number of to shift.
2318  *      wsize:  buffer size (bytes).
2319  */
2320 static void
2321 vshiftl(unsigned char *bitmap, int nbit, int wsize)
2322 {
2323         int s, j, i;
2324         unsigned char over;
2325
2326         for (j = 0; j < nbit; j += 8) {
2327                 s = (nbit - j < 8) ? (nbit - j): 8;
2328                 bitmap[0] <<= s;
2329                 for (i = 1; i < wsize; i++) {
2330                         over = (bitmap[i] >> (8 - s));
2331                         bitmap[i] <<= s;
2332                         bitmap[i-1] |= over;
2333                 }
2334         }
2335
2336         return;
2337 }
2338
2339 const char *
2340 ipsec4_logpacketstr(struct ip *ip, u_int32_t spi)
2341 {
2342         static char buf[256];
2343         char *p;
2344         u_int8_t *s, *d;
2345
2346         s = (u_int8_t *)(&ip->ip_src);
2347         d = (u_int8_t *)(&ip->ip_dst);
2348
2349         p = buf;
2350         ksnprintf(buf, sizeof(buf), "packet(SPI=%u ", (u_int32_t)ntohl(spi));
2351         while (p && *p)
2352                 p++;
2353         ksnprintf(p, sizeof(buf) - (p - buf), "src=%u.%u.%u.%u",
2354                 s[0], s[1], s[2], s[3]);
2355         while (p && *p)
2356                 p++;
2357         ksnprintf(p, sizeof(buf) - (p - buf), " dst=%u.%u.%u.%u",
2358                 d[0], d[1], d[2], d[3]);
2359         while (p && *p)
2360                 p++;
2361         ksnprintf(p, sizeof(buf) - (p - buf), ")");
2362
2363         return buf;
2364 }
2365
2366 #ifdef INET6
2367 const char *
2368 ipsec6_logpacketstr(struct ip6_hdr *ip6, u_int32_t spi)
2369 {
2370         static char buf[256];
2371         char *p;
2372
2373         p = buf;
2374         ksnprintf(buf, sizeof(buf), "packet(SPI=%u ", (u_int32_t)ntohl(spi));
2375         while (p && *p)
2376                 p++;
2377         ksnprintf(p, sizeof(buf) - (p - buf), "src=%s",
2378                 ip6_sprintf(&ip6->ip6_src));
2379         while (p && *p)
2380                 p++;
2381         ksnprintf(p, sizeof(buf) - (p - buf), " dst=%s",
2382                 ip6_sprintf(&ip6->ip6_dst));
2383         while (p && *p)
2384                 p++;
2385         ksnprintf(p, sizeof(buf) - (p - buf), ")");
2386
2387         return buf;
2388 }
2389 #endif /* INET6 */
2390
2391 const char *
2392 ipsec_logsastr(struct secasvar *sav)
2393 {
2394         static char buf[256];
2395         char *p;
2396         struct secasindex *saidx = &sav->sah->saidx;
2397
2398         /* validity check */
2399         if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family
2400                         != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family)
2401                 panic("ipsec_logsastr: family mismatched.");
2402
2403         p = buf;
2404         ksnprintf(buf, sizeof(buf), "SA(SPI=%u ", (u_int32_t)ntohl(sav->spi));
2405         while (p && *p)
2406                 p++;
2407         if (((struct sockaddr *)&saidx->src)->sa_family == AF_INET) {
2408                 u_int8_t *s, *d;
2409                 s = (u_int8_t *)&((struct sockaddr_in *)&saidx->src)->sin_addr;
2410                 d = (u_int8_t *)&((struct sockaddr_in *)&saidx->dst)->sin_addr;
2411                 ksnprintf(p, sizeof(buf) - (p - buf),
2412                         "src=%d.%d.%d.%d dst=%d.%d.%d.%d",
2413                         s[0], s[1], s[2], s[3], d[0], d[1], d[2], d[3]);
2414         }
2415 #ifdef INET6
2416         else if (((struct sockaddr *)&saidx->src)->sa_family == AF_INET6) {
2417                 ksnprintf(p, sizeof(buf) - (p - buf),
2418                         "src=%s",
2419                         ip6_sprintf(&((struct sockaddr_in6 *)&saidx->src)->sin6_addr));
2420                 while (p && *p)
2421                         p++;
2422                 ksnprintf(p, sizeof(buf) - (p - buf),
2423                         " dst=%s",
2424                         ip6_sprintf(&((struct sockaddr_in6 *)&saidx->dst)->sin6_addr));
2425         }
2426 #endif
2427         while (p && *p)
2428                 p++;
2429         ksnprintf(p, sizeof(buf) - (p - buf), ")");
2430
2431         return buf;
2432 }
2433
2434 void
2435 ipsec_dumpmbuf(struct mbuf *m)
2436 {
2437         int totlen;
2438         int i;
2439         u_char *p;
2440
2441         totlen = 0;
2442         kprintf("---\n");
2443         while (m) {
2444                 p = mtod(m, u_char *);
2445                 for (i = 0; i < m->m_len; i++) {
2446                         kprintf("%02x ", p[i]);
2447                         totlen++;
2448                         if (totlen % 16 == 0)
2449                                 kprintf("\n");
2450                 }
2451                 m = m->m_next;
2452         }
2453         if (totlen % 16 != 0)
2454                 kprintf("\n");
2455         kprintf("---\n");
2456 }
2457
2458 #ifdef INET
2459 /*
2460  * IPsec output logic for IPv4.
2461  */
2462 int
2463 ipsec4_output(struct ipsec_output_state *state, struct secpolicy *sp, int flags)
2464 {
2465         struct ip *ip = NULL;
2466         struct ipsecrequest *isr = NULL;
2467         struct secasindex saidx;
2468         int error;
2469         struct sockaddr_in *dst4;
2470         struct sockaddr_in *sin;
2471
2472         if (!state)
2473                 panic("state == NULL in ipsec4_output");
2474         if (!state->m)
2475                 panic("state->m == NULL in ipsec4_output");
2476         if (!state->ro)
2477                 panic("state->ro == NULL in ipsec4_output");
2478         if (!state->dst)
2479                 panic("state->dst == NULL in ipsec4_output");
2480
2481         KEYDEBUG(KEYDEBUG_IPSEC_DATA,
2482                 kprintf("ipsec4_output: applyed SP\n");
2483                 kdebug_secpolicy(sp));
2484
2485         for (isr = sp->req; isr != NULL; isr = isr->next) {
2486
2487 #if 0   /* give up to check restriction of transport mode */
2488         /* XXX but should be checked somewhere */
2489                 /*
2490                  * some of the IPsec operation must be performed only in
2491                  * originating case.
2492                  */
2493                 if (isr->saidx.mode == IPSEC_MODE_TRANSPORT
2494                  && (flags & IP_FORWARDING))
2495                         continue;
2496 #endif
2497
2498                 /* make SA index for search proper SA */
2499                 ip = mtod(state->m, struct ip *);
2500                 bcopy(&isr->saidx, &saidx, sizeof(saidx));
2501                 saidx.mode = isr->saidx.mode;
2502                 saidx.reqid = isr->saidx.reqid;
2503                 sin = (struct sockaddr_in *)&saidx.src;
2504                 if (sin->sin_len == 0) {
2505                         sin->sin_len = sizeof(*sin);
2506                         sin->sin_family = AF_INET;
2507                         sin->sin_port = IPSEC_PORT_ANY;
2508                         bcopy(&ip->ip_src, &sin->sin_addr,
2509                             sizeof(sin->sin_addr));
2510                 }
2511                 sin = (struct sockaddr_in *)&saidx.dst;
2512                 if (sin->sin_len == 0) {
2513                         sin->sin_len = sizeof(*sin);
2514                         sin->sin_family = AF_INET;
2515                         sin->sin_port = IPSEC_PORT_ANY;
2516                         bcopy(&ip->ip_dst, &sin->sin_addr,
2517                             sizeof(sin->sin_addr));
2518                 }
2519
2520                 if ((error = key_checkrequest(isr, &saidx)) != 0) {
2521                         /*
2522                          * IPsec processing is required, but no SA found.
2523                          * I assume that key_acquire() had been called
2524                          * to get/establish the SA. Here I discard
2525                          * this packet because it is responsibility for
2526                          * upper layer to retransmit the packet.
2527                          */
2528                         ipsecstat.out_nosa++;
2529                         goto bad;
2530                 }
2531
2532                 /* validity check */
2533                 if (isr->sav == NULL) {
2534                         switch (ipsec_get_reqlevel(isr)) {
2535                         case IPSEC_LEVEL_USE:
2536                                 continue;
2537                         case IPSEC_LEVEL_REQUIRE:
2538                                 /* must be not reached here. */
2539                                 panic("ipsec4_output: no SA found, but required.");
2540                         }
2541                 }
2542
2543                 /*
2544                  * If there is no valid SA, we give up to process any
2545                  * more.  In such a case, the SA's status is changed
2546                  * from DYING to DEAD after allocating.  If a packet
2547                  * send to the receiver by dead SA, the receiver can
2548                  * not decode a packet because SA has been dead.
2549                  */
2550                 if (isr->sav->state != SADB_SASTATE_MATURE
2551                  && isr->sav->state != SADB_SASTATE_DYING) {
2552                         ipsecstat.out_nosa++;
2553                         error = EINVAL;
2554                         goto bad;
2555                 }
2556
2557                 /*
2558                  * There may be the case that SA status will be changed when
2559                  * we are refering to one. So calling crit_enter().
2560                  */
2561                 crit_enter();
2562
2563                 if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
2564                         /*
2565                          * build IPsec tunnel.
2566                          */
2567                         /* XXX should be processed with other familiy */
2568                         if (((struct sockaddr *)&isr->sav->sah->saidx.src)->sa_family != AF_INET) {
2569                                 ipseclog((LOG_ERR, "ipsec4_output: "
2570                                     "family mismatched between inner and outer spi=%u\n",
2571                                     (u_int32_t)ntohl(isr->sav->spi)));
2572                                 crit_exit();
2573                                 error = EAFNOSUPPORT;
2574                                 goto bad;
2575                         }
2576
2577                         state->m = ipsec4_splithdr(state->m);
2578                         if (!state->m) {
2579                                 crit_exit();
2580                                 error = ENOMEM;
2581                                 goto bad;
2582                         }
2583                         error = ipsec4_encapsulate(state->m, isr->sav);
2584                         crit_exit();
2585                         if (error) {
2586                                 state->m = NULL;
2587                                 goto bad;
2588                         }
2589                         ip = mtod(state->m, struct ip *);
2590
2591                         state->ro = &isr->sav->sah->sa_route;
2592                         state->dst = (struct sockaddr *)&state->ro->ro_dst;
2593                         dst4 = (struct sockaddr_in *)state->dst;
2594                         if (state->ro->ro_rt
2595                          && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0
2596                           || dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) {
2597                                 RTFREE(state->ro->ro_rt);
2598                                 state->ro->ro_rt = NULL;
2599                         }
2600                         if (state->ro->ro_rt == 0) {
2601                                 dst4->sin_family = AF_INET;
2602                                 dst4->sin_len = sizeof(*dst4);
2603                                 dst4->sin_addr = ip->ip_dst;
2604                                 rtalloc(state->ro);
2605                         }
2606                         if (state->ro->ro_rt == 0) {
2607                                 ipstat.ips_noroute++;
2608                                 error = EHOSTUNREACH;
2609                                 goto bad;
2610                         }
2611
2612                         /* adjust state->dst if tunnel endpoint is offlink */
2613                         if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) {
2614                                 state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway;
2615                                 dst4 = (struct sockaddr_in *)state->dst;
2616                         }
2617                 } else
2618                         crit_exit();
2619
2620                 state->m = ipsec4_splithdr(state->m);
2621                 if (!state->m) {
2622                         error = ENOMEM;
2623                         goto bad;
2624                 }
2625                 switch (isr->saidx.proto) {
2626                 case IPPROTO_ESP:
2627 #ifdef IPSEC_ESP
2628                         if ((error = esp4_output(state->m, isr)) != 0) {
2629                                 state->m = NULL;
2630                                 goto bad;
2631                         }
2632                         break;
2633 #else
2634                         m_freem(state->m);
2635                         state->m = NULL;
2636                         error = EINVAL;
2637                         goto bad;
2638 #endif
2639                 case IPPROTO_AH:
2640                         if ((error = ah4_output(state->m, isr)) != 0) {
2641                                 state->m = NULL;
2642                                 goto bad;
2643                         }
2644                         break;
2645                 case IPPROTO_IPCOMP:
2646                         if ((error = ipcomp4_output(state->m, isr)) != 0) {
2647                                 state->m = NULL;
2648                                 goto bad;
2649                         }
2650                         break;
2651                 default:
2652                         ipseclog((LOG_ERR,
2653                             "ipsec4_output: unknown ipsec protocol %d\n",
2654                             isr->saidx.proto));
2655                         m_freem(state->m);
2656                         state->m = NULL;
2657                         error = EINVAL;
2658                         goto bad;
2659                 }
2660
2661                 if (state->m == 0) {
2662                         error = ENOMEM;
2663                         goto bad;
2664                 }
2665                 ip = mtod(state->m, struct ip *);
2666         }
2667
2668         return 0;
2669
2670 bad:
2671         m_freem(state->m);
2672         state->m = NULL;
2673         return error;
2674 }
2675 #endif
2676
2677 #ifdef INET6
2678 /*
2679  * IPsec output logic for IPv6, transport mode.
2680  */
2681 int
2682 ipsec6_output_trans(struct ipsec_output_state *state, u_char *nexthdrp,
2683                     struct mbuf *mprev, struct secpolicy *sp, int flags,
2684                     int *tun)
2685 {
2686         struct ip6_hdr *ip6;
2687         struct ipsecrequest *isr = NULL;
2688         struct secasindex saidx;
2689         int error = 0;
2690         int plen;
2691         struct sockaddr_in6 *sin6;
2692
2693         if (!state)
2694                 panic("state == NULL in ipsec6_output_trans");
2695         if (!state->m)
2696                 panic("state->m == NULL in ipsec6_output_trans");
2697         if (!nexthdrp)
2698                 panic("nexthdrp == NULL in ipsec6_output_trans");
2699         if (!mprev)
2700                 panic("mprev == NULL in ipsec6_output_trans");
2701         if (!sp)
2702                 panic("sp == NULL in ipsec6_output_trans");
2703         if (!tun)
2704                 panic("tun == NULL in ipsec6_output_trans");
2705
2706         KEYDEBUG(KEYDEBUG_IPSEC_DATA,
2707                 kprintf("ipsec6_output_trans: applyed SP\n");
2708                 kdebug_secpolicy(sp));
2709
2710         lwkt_gettoken(&key_token);
2711
2712         *tun = 0;
2713         for (isr = sp->req; isr; isr = isr->next) {
2714                 if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
2715                         /* the rest will be handled by ipsec6_output_tunnel() */
2716                         break;
2717                 }
2718
2719                 /* make SA index for search proper SA */
2720                 ip6 = mtod(state->m, struct ip6_hdr *);
2721                 bcopy(&isr->saidx, &saidx, sizeof(saidx));
2722                 saidx.mode = isr->saidx.mode;
2723                 saidx.reqid = isr->saidx.reqid;
2724                 sin6 = (struct sockaddr_in6 *)&saidx.src;
2725                 if (sin6->sin6_len == 0) {
2726                         sin6->sin6_len = sizeof(*sin6);
2727                         sin6->sin6_family = AF_INET6;
2728                         sin6->sin6_port = IPSEC_PORT_ANY;
2729                         bcopy(&ip6->ip6_src, &sin6->sin6_addr,
2730                             sizeof(ip6->ip6_src));
2731                         if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
2732                                 /* fix scope id for comparing SPD */
2733                                 sin6->sin6_addr.s6_addr16[1] = 0;
2734                                 sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]);
2735                         }
2736                 }
2737                 sin6 = (struct sockaddr_in6 *)&saidx.dst;
2738                 if (sin6->sin6_len == 0) {
2739                         sin6->sin6_len = sizeof(*sin6);
2740                         sin6->sin6_family = AF_INET6;
2741                         sin6->sin6_port = IPSEC_PORT_ANY;
2742                         bcopy(&ip6->ip6_dst, &sin6->sin6_addr,
2743                             sizeof(ip6->ip6_dst));
2744                         if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
2745                                 /* fix scope id for comparing SPD */
2746                                 sin6->sin6_addr.s6_addr16[1] = 0;
2747                                 sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]);
2748                         }
2749                 }
2750
2751                 if (key_checkrequest(isr, &saidx) == ENOENT) {
2752                         /*
2753                          * IPsec processing is required, but no SA found.
2754                          * I assume that key_acquire() had been called
2755                          * to get/establish the SA. Here I discard
2756                          * this packet because it is responsibility for
2757                          * upper layer to retransmit the packet.
2758                          */
2759                         ipsec6stat.out_nosa++;
2760                         error = ENOENT;
2761
2762                         /*
2763                          * Notify the fact that the packet is discarded
2764                          * to ourselves. I believe this is better than
2765                          * just silently discarding. (jinmei@kame.net)
2766                          * XXX: should we restrict the error to TCP packets?
2767                          * XXX: should we directly notify sockets via
2768                          *      kpfctlinputs?
2769                          */
2770                         icmp6_error(state->m, ICMP6_DST_UNREACH,
2771                                     ICMP6_DST_UNREACH_ADMIN, 0);
2772                         state->m = NULL; /* icmp6_error freed the mbuf */
2773                         goto bad;
2774                 }
2775
2776                 /* validity check */
2777                 if (isr->sav == NULL) {
2778                         switch (ipsec_get_reqlevel(isr)) {
2779                         case IPSEC_LEVEL_USE:
2780                                 continue;
2781                         case IPSEC_LEVEL_REQUIRE:
2782                                 /* must be not reached here. */
2783                                 panic("ipsec6_output_trans: no SA found, but required.");
2784                         }
2785                 }
2786
2787                 /*
2788                  * If there is no valid SA, we give up to process.
2789                  * see same place at ipsec4_output().
2790                  */
2791                 if (isr->sav->state != SADB_SASTATE_MATURE
2792                  && isr->sav->state != SADB_SASTATE_DYING) {
2793                         ipsec6stat.out_nosa++;
2794                         error = EINVAL;
2795                         goto bad;
2796                 }
2797
2798                 switch (isr->saidx.proto) {
2799                 case IPPROTO_ESP:
2800 #ifdef IPSEC_ESP
2801                         error = esp6_output(state->m, nexthdrp, mprev->m_next, isr);
2802 #else
2803                         m_freem(state->m);
2804                         error = EINVAL;
2805 #endif
2806                         break;
2807                 case IPPROTO_AH:
2808                         error = ah6_output(state->m, nexthdrp, mprev->m_next, isr);
2809                         break;
2810                 case IPPROTO_IPCOMP:
2811                         error = ipcomp6_output(state->m, nexthdrp, mprev->m_next, isr);
2812                         break;
2813                 default:
2814                         ipseclog((LOG_ERR, "ipsec6_output_trans: "
2815                             "unknown ipsec protocol %d\n", isr->saidx.proto));
2816                         m_freem(state->m);
2817                         ipsec6stat.out_inval++;
2818                         error = EINVAL;
2819                         break;
2820                 }
2821                 if (error) {
2822                         state->m = NULL;
2823                         goto bad;
2824                 }
2825                 plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr);
2826                 if (plen > IPV6_MAXPACKET) {
2827                         ipseclog((LOG_ERR, "ipsec6_output_trans: "
2828                             "IPsec with IPv6 jumbogram is not supported\n"));
2829                         ipsec6stat.out_inval++;
2830                         error = EINVAL; /* XXX */
2831                         goto bad;
2832                 }
2833                 ip6 = mtod(state->m, struct ip6_hdr *);
2834                 ip6->ip6_plen = htons(plen);
2835         }
2836
2837         /* if we have more to go, we need a tunnel mode processing */
2838         if (isr != NULL)
2839                 *tun = 1;
2840         lwkt_reltoken(&key_token);
2841         return 0;
2842
2843 bad:
2844         lwkt_reltoken(&key_token);
2845         m_freem(state->m);
2846         state->m = NULL;
2847         return error;
2848 }
2849
2850 /*
2851  * IPsec output logic for IPv6, tunnel mode.
2852  */
2853 int
2854 ipsec6_output_tunnel(struct ipsec_output_state *state, struct secpolicy *sp,
2855                      int flags)
2856 {
2857         struct ip6_hdr *ip6;
2858         struct ipsecrequest *isr = NULL;
2859         struct secasindex saidx;
2860         int error = 0;
2861         int plen;
2862         struct sockaddr_in6* dst6;
2863
2864         if (!state)
2865                 panic("state == NULL in ipsec6_output_tunnel");
2866         if (!state->m)
2867                 panic("state->m == NULL in ipsec6_output_tunnel");
2868         if (!sp)
2869                 panic("sp == NULL in ipsec6_output_tunnel");
2870
2871         KEYDEBUG(KEYDEBUG_IPSEC_DATA,
2872                 kprintf("ipsec6_output_tunnel: applyed SP\n");
2873                 kdebug_secpolicy(sp));
2874
2875         /*
2876          * transport mode ipsec (before the 1st tunnel mode) is already
2877          * processed by ipsec6_output_trans().
2878          */
2879         lwkt_gettoken(&key_token);
2880         for (isr = sp->req; isr; isr = isr->next) {
2881                 if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
2882                         break;
2883         }
2884
2885         for (/* already initialized */; isr; isr = isr->next) {
2886                 if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
2887                         /* When tunnel mode, SA peers must be specified. */
2888                         bcopy(&isr->saidx, &saidx, sizeof(saidx));
2889                 } else {
2890                         /* make SA index to look for a proper SA */
2891                         struct sockaddr_in6 *sin6;
2892
2893                         bzero(&saidx, sizeof(saidx));
2894                         saidx.proto = isr->saidx.proto;
2895                         saidx.mode = isr->saidx.mode;
2896                         saidx.reqid = isr->saidx.reqid;
2897
2898                         ip6 = mtod(state->m, struct ip6_hdr *);
2899                         sin6 = (struct sockaddr_in6 *)&saidx.src;
2900                         if (sin6->sin6_len == 0) {
2901                                 sin6->sin6_len = sizeof(*sin6);
2902                                 sin6->sin6_family = AF_INET6;
2903                                 sin6->sin6_port = IPSEC_PORT_ANY;
2904                                 bcopy(&ip6->ip6_src, &sin6->sin6_addr,
2905                                     sizeof(ip6->ip6_src));
2906                                 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
2907                                         /* fix scope id for comparing SPD */
2908                                         sin6->sin6_addr.s6_addr16[1] = 0;
2909                                         sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]);
2910                                 }
2911                         }
2912                         sin6 = (struct sockaddr_in6 *)&saidx.dst;
2913                         if (sin6->sin6_len == 0) {
2914                                 sin6->sin6_len = sizeof(*sin6);
2915                                 sin6->sin6_family = AF_INET6;
2916                                 sin6->sin6_port = IPSEC_PORT_ANY;
2917                                 bcopy(&ip6->ip6_dst, &sin6->sin6_addr,
2918                                     sizeof(ip6->ip6_dst));
2919                                 if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
2920                                         /* fix scope id for comparing SPD */
2921                                         sin6->sin6_addr.s6_addr16[1] = 0;
2922                                         sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]);
2923                                 }
2924                         }
2925                 }
2926
2927                 if (key_checkrequest(isr, &saidx) == ENOENT) {
2928                         /*
2929                          * IPsec processing is required, but no SA found.
2930                          * I assume that key_acquire() had been called
2931                          * to get/establish the SA. Here I discard
2932                          * this packet because it is responsibility for
2933                          * upper layer to retransmit the packet.
2934                          */
2935                         ipsec6stat.out_nosa++;
2936                         error = ENOENT;
2937                         goto bad;
2938                 }
2939
2940                 /* validity check */
2941                 if (isr->sav == NULL) {
2942                         switch (ipsec_get_reqlevel(isr)) {
2943                         case IPSEC_LEVEL_USE:
2944                                 continue;
2945                         case IPSEC_LEVEL_REQUIRE:
2946                                 /* must be not reached here. */
2947                                 panic("ipsec6_output_tunnel: no SA found, but required.");
2948                         }
2949                 }
2950
2951                 /*
2952                  * If there is no valid SA, we give up to process.
2953                  * see same place at ipsec4_output().
2954                  */
2955                 if (isr->sav->state != SADB_SASTATE_MATURE
2956                  && isr->sav->state != SADB_SASTATE_DYING) {
2957                         ipsec6stat.out_nosa++;
2958                         error = EINVAL;
2959                         goto bad;
2960                 }
2961
2962                 /*
2963                  * There may be the case that SA status will be changed when
2964                  * we are refering to one. So calling crit_enter().
2965                  */
2966                 crit_enter();
2967
2968                 if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
2969                         /*
2970                          * build IPsec tunnel.
2971                          */
2972                         /* XXX should be processed with other familiy */
2973                         if (((struct sockaddr *)&isr->sav->sah->saidx.src)->sa_family != AF_INET6) {
2974                                 ipseclog((LOG_ERR, "ipsec6_output_tunnel: "
2975                                     "family mismatched between inner and outer, spi=%u\n",
2976                                     (u_int32_t)ntohl(isr->sav->spi)));
2977                                 crit_exit();
2978                                 ipsec6stat.out_inval++;
2979                                 error = EAFNOSUPPORT;
2980                                 goto bad;
2981                         }
2982
2983                         state->m = ipsec6_splithdr(state->m);
2984                         if (!state->m) {
2985                                 crit_exit();
2986                                 ipsec6stat.out_nomem++;
2987                                 error = ENOMEM;
2988                                 goto bad;
2989                         }
2990                         error = ipsec6_encapsulate(state->m, isr->sav);
2991                         crit_exit();
2992                         if (error) {
2993                                 state->m = 0;
2994                                 goto bad;
2995                         }
2996                         ip6 = mtod(state->m, struct ip6_hdr *);
2997
2998                         state->ro = &isr->sav->sah->sa_route;
2999                         state->dst = (struct sockaddr *)&state->ro->ro_dst;
3000                         dst6 = (struct sockaddr_in6 *)state->dst;
3001                         if (state->ro->ro_rt
3002                          && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0
3003                           || !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) {
3004                                 RTFREE(state->ro->ro_rt);
3005                                 state->ro->ro_rt = NULL;
3006                         }
3007                         if (state->ro->ro_rt == 0) {
3008                                 bzero(dst6, sizeof(*dst6));
3009                                 dst6->sin6_family = AF_INET6;
3010                                 dst6->sin6_len = sizeof(*dst6);
3011                                 dst6->sin6_addr = ip6->ip6_dst;
3012                                 rtalloc(state->ro);
3013                         }
3014                         if (state->ro->ro_rt == 0) {
3015                                 ip6stat.ip6s_noroute++;
3016                                 ipsec6stat.out_noroute++;
3017                                 error = EHOSTUNREACH;
3018                                 goto bad;
3019                         }
3020
3021                         /* adjust state->dst if tunnel endpoint is offlink */
3022                         if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) {
3023                                 state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway;
3024                                 dst6 = (struct sockaddr_in6 *)state->dst;
3025                         }
3026                 } else
3027                         crit_exit();
3028
3029                 state->m = ipsec6_splithdr(state->m);
3030                 if (!state->m) {
3031                         ipsec6stat.out_nomem++;
3032                         error = ENOMEM;
3033                         goto bad;
3034                 }
3035                 ip6 = mtod(state->m, struct ip6_hdr *);
3036                 switch (isr->saidx.proto) {
3037                 case IPPROTO_ESP:
3038 #ifdef IPSEC_ESP
3039                         error = esp6_output(state->m, &ip6->ip6_nxt, state->m->m_next, isr);
3040 #else
3041                         m_freem(state->m);
3042                         error = EINVAL;
3043 #endif
3044                         break;
3045                 case IPPROTO_AH:
3046                         error = ah6_output(state->m, &ip6->ip6_nxt, state->m->m_next, isr);
3047                         break;
3048                 case IPPROTO_IPCOMP:
3049                         /* XXX code should be here */
3050                         /* FALLTHROUGH */
3051                 default:
3052                         ipseclog((LOG_ERR, "ipsec6_output_tunnel: "
3053                             "unknown ipsec protocol %d\n", isr->saidx.proto));
3054                         m_freem(state->m);
3055                         ipsec6stat.out_inval++;
3056                         error = EINVAL;
3057                         break;
3058                 }
3059                 if (error) {
3060                         state->m = NULL;
3061                         goto bad;
3062                 }
3063                 plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr);
3064                 if (plen > IPV6_MAXPACKET) {
3065                         ipseclog((LOG_ERR, "ipsec6_output_tunnel: "
3066                             "IPsec with IPv6 jumbogram is not supported\n"));
3067                         ipsec6stat.out_inval++;
3068                         error = EINVAL; /* XXX */
3069                         goto bad;
3070                 }
3071                 ip6 = mtod(state->m, struct ip6_hdr *);
3072                 ip6->ip6_plen = htons(plen);
3073         }
3074         lwkt_reltoken(&key_token);
3075
3076         return 0;
3077
3078 bad:
3079         lwkt_reltoken(&key_token);
3080         m_freem(state->m);
3081         state->m = NULL;
3082         return error;
3083 }
3084 #endif /* INET6 */
3085
3086 #ifdef INET
3087 /*
3088  * Chop IP header and option off from the payload.
3089  */
3090 static struct mbuf *
3091 ipsec4_splithdr(struct mbuf *m)
3092 {
3093         struct mbuf *mh;
3094         struct ip *ip;
3095         int hlen;
3096
3097         if (m->m_len < sizeof(struct ip))
3098                 panic("ipsec4_splithdr: first mbuf too short");
3099         ip = mtod(m, struct ip *);
3100 #ifdef _IP_VHL
3101         hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
3102 #else
3103         hlen = ip->ip_hl << 2;
3104 #endif
3105         if (m->m_len > hlen) {
3106                 MGETHDR(mh, MB_DONTWAIT, MT_HEADER);
3107                 if (!mh) {
3108                         m_freem(m);
3109                         return NULL;
3110                 }
3111                 M_MOVE_PKTHDR(mh, m);
3112                 MH_ALIGN(mh, hlen);
3113                 m->m_len -= hlen;
3114                 m->m_data += hlen;
3115                 mh->m_next = m;
3116                 m = mh;
3117                 m->m_len = hlen;
3118                 bcopy((caddr_t)ip, mtod(m, caddr_t), hlen);
3119         } else if (m->m_len < hlen) {
3120                 m = m_pullup(m, hlen);
3121                 if (!m)
3122                         return NULL;
3123         }
3124         return m;
3125 }
3126 #endif
3127
3128 #ifdef INET6
3129 static struct mbuf *
3130 ipsec6_splithdr(struct mbuf *m)
3131 {
3132         struct mbuf *mh;
3133         struct ip6_hdr *ip6;
3134         int hlen;
3135
3136         if (m->m_len < sizeof(struct ip6_hdr))
3137                 panic("ipsec6_splithdr: first mbuf too short");
3138         ip6 = mtod(m, struct ip6_hdr *);
3139         hlen = sizeof(struct ip6_hdr);
3140         if (m->m_len > hlen) {
3141                 MGETHDR(mh, MB_DONTWAIT, MT_HEADER);
3142                 if (!mh) {
3143                         m_freem(m);
3144                         return NULL;
3145                 }
3146                 M_MOVE_PKTHDR(mh, m);
3147                 MH_ALIGN(mh, hlen);
3148                 m->m_len -= hlen;
3149                 m->m_data += hlen;
3150                 mh->m_next = m;
3151                 m = mh;
3152                 m->m_len = hlen;
3153                 bcopy((caddr_t)ip6, mtod(m, caddr_t), hlen);
3154         } else if (m->m_len < hlen) {
3155                 m = m_pullup(m, hlen);
3156                 if (!m)
3157                         return NULL;
3158         }
3159         return m;
3160 }
3161 #endif
3162
3163 /* validate inbound IPsec tunnel packet. */
3164 int
3165 ipsec4_tunnel_validate(struct mbuf *m, /* no pullup permitted, m->m_len >= ip */
3166                        int off, u_int nxt0, struct secasvar *sav)
3167 {
3168         u_int8_t nxt = nxt0 & 0xff;
3169         struct sockaddr_in *sin;
3170         struct sockaddr_in osrc, odst, isrc, idst;
3171         int hlen;
3172         struct secpolicy *sp;
3173         struct ip *oip;
3174
3175 #ifdef DIAGNOSTIC
3176         if (m->m_len < sizeof(struct ip))
3177                 panic("too short mbuf on ipsec4_tunnel_validate");
3178 #endif
3179         if (nxt != IPPROTO_IPV4)
3180                 return 0;
3181         if (m->m_pkthdr.len < off + sizeof(struct ip))
3182                 return 0;
3183         /* do not decapsulate if the SA is for transport mode only */
3184         if (sav->sah->saidx.mode == IPSEC_MODE_TRANSPORT)
3185                 return 0;
3186
3187         oip = mtod(m, struct ip *);
3188 #ifdef _IP_VHL
3189         hlen = _IP_VHL_HL(oip->ip_vhl) << 2;
3190 #else
3191         hlen = oip->ip_hl << 2;
3192 #endif
3193         if (hlen != sizeof(struct ip))
3194                 return 0;
3195
3196         /* AF_INET6 should be supported, but at this moment we don't. */
3197         sin = (struct sockaddr_in *)&sav->sah->saidx.dst;
3198         if (sin->sin_family != AF_INET)
3199                 return 0;
3200         if (bcmp(&oip->ip_dst, &sin->sin_addr, sizeof(oip->ip_dst)) != 0)
3201                 return 0;
3202
3203         /* XXX slow */
3204         bzero(&osrc, sizeof(osrc));
3205         bzero(&odst, sizeof(odst));
3206         bzero(&isrc, sizeof(isrc));
3207         bzero(&idst, sizeof(idst));
3208         osrc.sin_family = odst.sin_family = isrc.sin_family = idst.sin_family =
3209             AF_INET;
3210         osrc.sin_len = odst.sin_len = isrc.sin_len = idst.sin_len =
3211             sizeof(struct sockaddr_in);
3212         osrc.sin_addr = oip->ip_src;
3213         odst.sin_addr = oip->ip_dst;
3214         m_copydata(m, off + offsetof(struct ip, ip_src), sizeof(isrc.sin_addr),
3215             (caddr_t)&isrc.sin_addr);
3216         m_copydata(m, off + offsetof(struct ip, ip_dst), sizeof(idst.sin_addr),
3217             (caddr_t)&idst.sin_addr);
3218
3219         /*
3220          * RFC2401 5.2.1 (b): (assume that we are using tunnel mode)
3221          * - if the inner destination is multicast address, there can be
3222          *   multiple permissible inner source address.  implementation
3223          *   may want to skip verification of inner source address against
3224          *   SPD selector.
3225          * - if the inner protocol is ICMP, the packet may be an error report
3226          *   from routers on the other side of the VPN cloud (R in the
3227          *   following diagram).  in this case, we cannot verify inner source
3228          *   address against SPD selector.
3229          *      me -- gw === gw -- R -- you
3230          *
3231          * we consider the first bullet to be users responsibility on SPD entry
3232          * configuration (if you need to encrypt multicast traffic, set
3233          * the source range of SPD selector to 0.0.0.0/0, or have explicit
3234          * address ranges for possible senders).
3235          * the second bullet is not taken care of (yet).
3236          *
3237          * therefore, we do not do anything special about inner source.
3238          */
3239
3240         lwkt_gettoken(&key_token);
3241         sp = key_gettunnel((struct sockaddr *)&osrc, (struct sockaddr *)&odst,
3242                            (struct sockaddr *)&isrc, (struct sockaddr *)&idst);
3243         if (sp) {
3244                 key_freesp(sp);
3245                 lwkt_reltoken(&key_token);
3246                 return 1;
3247         } else{
3248                 lwkt_reltoken(&key_token);
3249                 return 0;
3250         }
3251 }
3252
3253 #ifdef INET6
3254 /* validate inbound IPsec tunnel packet. */
3255 int
3256 ipsec6_tunnel_validate(struct mbuf *m, /* no pullup permitted, m->m_len >= ip */
3257                        int off, u_int nxt0, struct secasvar *sav)
3258 {
3259         u_int8_t nxt = nxt0 & 0xff;
3260         struct sockaddr_in6 *sin6;
3261         struct sockaddr_in6 osrc, odst, isrc, idst;
3262         struct secpolicy *sp;
3263         struct ip6_hdr *oip6;
3264
3265 #ifdef DIAGNOSTIC
3266         if (m->m_len < sizeof(struct ip6_hdr))
3267                 panic("too short mbuf on ipsec6_tunnel_validate");
3268 #endif
3269         if (nxt != IPPROTO_IPV6)
3270                 return 0;
3271         if (m->m_pkthdr.len < off + sizeof(struct ip6_hdr))
3272                 return 0;
3273         /* do not decapsulate if the SA is for transport mode only */
3274         if (sav->sah->saidx.mode == IPSEC_MODE_TRANSPORT)
3275                 return 0;
3276
3277         oip6 = mtod(m, struct ip6_hdr *);
3278         /* AF_INET should be supported, but at this moment we don't. */
3279         sin6 = (struct sockaddr_in6 *)&sav->sah->saidx.dst;
3280         if (sin6->sin6_family != AF_INET6)
3281                 return 0;
3282         if (!IN6_ARE_ADDR_EQUAL(&oip6->ip6_dst, &sin6->sin6_addr))
3283                 return 0;
3284
3285         /* XXX slow */
3286         bzero(&osrc, sizeof(osrc));
3287         bzero(&odst, sizeof(odst));
3288         bzero(&isrc, sizeof(isrc));
3289         bzero(&idst, sizeof(idst));
3290         osrc.sin6_family = odst.sin6_family = isrc.sin6_family =
3291             idst.sin6_family = AF_INET6;
3292         osrc.sin6_len = odst.sin6_len = isrc.sin6_len = idst.sin6_len =
3293             sizeof(struct sockaddr_in6);
3294         osrc.sin6_addr = oip6->ip6_src;
3295         odst.sin6_addr = oip6->ip6_dst;
3296         m_copydata(m, off + offsetof(struct ip6_hdr, ip6_src),
3297             sizeof(isrc.sin6_addr), (caddr_t)&isrc.sin6_addr);
3298         m_copydata(m, off + offsetof(struct ip6_hdr, ip6_dst),
3299             sizeof(idst.sin6_addr), (caddr_t)&idst.sin6_addr);
3300
3301         /*
3302          * regarding to inner source address validation, see a long comment
3303          * in ipsec4_tunnel_validate.
3304          */
3305
3306         lwkt_gettoken(&key_token);
3307         sp = key_gettunnel((struct sockaddr *)&osrc, (struct sockaddr *)&odst,
3308                            (struct sockaddr *)&isrc, (struct sockaddr *)&idst);
3309         /*
3310          * when there is no suitable inbound policy for the packet of the ipsec
3311          * tunnel mode, the kernel never decapsulate the tunneled packet
3312          * as the ipsec tunnel mode even when the system wide policy is "none".
3313          * then the kernel leaves the generic tunnel module to process this
3314          * packet.  if there is no rule of the generic tunnel, the packet
3315          * is rejected and the statistics will be counted up.
3316          */
3317         if (sp) {
3318                 key_freesp(sp);
3319                 lwkt_reltoken(&key_token);
3320                 return 1;
3321         } else {
3322                 lwkt_reltoken(&key_token);
3323                 return 0;
3324         }
3325 }
3326 #endif
3327
3328 /*
3329  * Make a mbuf chain for encryption.
3330  * If the original mbuf chain contains a mbuf with a cluster,
3331  * allocate a new cluster and copy the data to the new cluster.
3332  * XXX: this hack is inefficient, but is necessary to handle cases
3333  * of TCP retransmission...
3334  */
3335 struct mbuf *
3336 ipsec_copypkt(struct mbuf *m)
3337 {
3338         struct mbuf *n, **mpp, *mnew;
3339
3340         for (n = m, mpp = &m; n; n = n->m_next) {
3341                 if (n->m_flags & M_EXT) {
3342                         /*
3343                          * Make a copy only if there are more than one
3344                          * references to the cluster.
3345                          * XXX: is this approach effective?
3346                          */
3347                         if (m_sharecount(n) > 1) {
3348                                 int remain, copied;
3349                                 struct mbuf *mm;
3350
3351                                 if (n->m_flags & M_PKTHDR) {
3352                                         MGETHDR(mnew, MB_DONTWAIT, MT_HEADER);
3353                                         if (mnew == NULL)
3354                                                 goto fail;
3355                                         if (!m_dup_pkthdr(mnew, n, MB_DONTWAIT)) {
3356                                                 m_free(mnew);
3357                                                 goto fail;
3358                                         }
3359                                 }
3360                                 else {
3361                                         MGET(mnew, MB_DONTWAIT, MT_DATA);
3362                                         if (mnew == NULL)
3363                                                 goto fail;
3364                                 }
3365                                 mnew->m_len = 0;
3366                                 mm = mnew;
3367
3368                                 /*
3369                                  * Copy data. If we don't have enough space to
3370                                  * store the whole data, allocate a cluster
3371                                  * or additional mbufs.
3372                                  * XXX: we don't use m_copyback(), since the
3373                                  * function does not use clusters and thus is
3374                                  * inefficient.
3375                                  */
3376                                 remain = n->m_len;
3377                                 copied = 0;
3378                                 while (1) {
3379                                         int len;
3380                                         struct mbuf *mn;
3381
3382                                         if (remain <= (mm->m_flags & M_PKTHDR ? MHLEN : MLEN))
3383                                                 len = remain;
3384                                         else { /* allocate a cluster */
3385                                                 MCLGET(mm, MB_DONTWAIT);
3386                                                 if (!(mm->m_flags & M_EXT)) {
3387                                                         m_free(mm);
3388                                                         goto fail;
3389                                                 }
3390                                                 len = remain < MCLBYTES ?
3391                                                         remain : MCLBYTES;
3392                                         }
3393
3394                                         bcopy(n->m_data + copied, mm->m_data,
3395                                               len);
3396
3397                                         copied += len;
3398                                         remain -= len;
3399                                         mm->m_len = len;
3400
3401                                         if (remain <= 0) /* completed? */
3402                                                 break;
3403
3404                                         /* need another mbuf */
3405                                         MGETHDR(mn, MB_DONTWAIT, MT_HEADER);
3406                                         if (mn == NULL)
3407                                                 goto fail;
3408                                         mn->m_pkthdr.rcvif = NULL;
3409                                         mm->m_next = mn;
3410                                         mm = mn;
3411                                 }
3412
3413                                 /* adjust chain */
3414                                 mm->m_next = m_free(n);
3415                                 n = mm;
3416                                 *mpp = mnew;
3417                                 mpp = &n->m_next;
3418
3419                                 continue;
3420                         }
3421                 }
3422                 *mpp = n;
3423                 mpp = &n->m_next;
3424         }
3425
3426         return (m);
3427 fail:
3428         m_freem(m);
3429         return (NULL);
3430 }
3431
3432 void
3433 ipsec_delaux(struct mbuf *m)
3434 {
3435         struct m_tag *tag;
3436
3437         while ((tag = m_tag_find(m, PACKET_TAG_IPSEC_HISTORY, NULL)) != NULL)
3438                 m_tag_delete(m, tag);
3439 }
3440
3441 int
3442 ipsec_addhist(struct mbuf *m, int proto, u_int32_t spi)
3443 {
3444         struct m_tag *tag;
3445         struct ipsec_history *p;
3446
3447         tag = m_tag_get(PACKET_TAG_IPSEC_HISTORY,
3448                         sizeof (struct ipsec_history), MB_DONTWAIT);
3449         if (tag == NULL)
3450                 return ENOBUFS;
3451         p = (struct ipsec_history *)m_tag_data(tag);
3452         bzero(p, sizeof(*p));
3453         p->ih_proto = proto;
3454         p->ih_spi = spi;
3455         m_tag_prepend(m, tag);
3456         return 0;
3457 }
3458
3459 struct ipsec_history *
3460 ipsec_gethist(struct mbuf *m, int *lenp)
3461 {
3462         struct m_tag *tag;
3463
3464         tag = m_tag_find(m, PACKET_TAG_IPSEC_HISTORY, NULL);
3465         if (tag == NULL)
3466                 return NULL;
3467         /* XXX NB: noone uses this so fake it */
3468         if (lenp)
3469                 *lenp = sizeof (struct ipsec_history);
3470         return ((struct ipsec_history *)(tag+1));
3471 }