Merge bmake-20230909
[freebsd.git] / sys / netpfil / pf / pf_ioctl.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002,2003 Henning Brauer
6  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  *    - Redistributions of source code must retain the above copyright
14  *      notice, this list of conditions and the following disclaimer.
15  *    - Redistributions in binary form must reproduce the above
16  *      copyright notice, this list of conditions and the following
17  *      disclaimer in the documentation and/or other materials provided
18  *      with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Effort sponsored in part by the Defense Advanced Research Projects
34  * Agency (DARPA) and Air Force Research Laboratory, Air Force
35  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36  *
37  *      $OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $
38  */
39
40 #include <sys/cdefs.h>
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 #include "opt_bpf.h"
44 #include "opt_pf.h"
45
46 #include <sys/param.h>
47 #include <sys/_bitset.h>
48 #include <sys/bitset.h>
49 #include <sys/bus.h>
50 #include <sys/conf.h>
51 #include <sys/endian.h>
52 #include <sys/fcntl.h>
53 #include <sys/filio.h>
54 #include <sys/hash.h>
55 #include <sys/interrupt.h>
56 #include <sys/jail.h>
57 #include <sys/kernel.h>
58 #include <sys/kthread.h>
59 #include <sys/lock.h>
60 #include <sys/mbuf.h>
61 #include <sys/module.h>
62 #include <sys/nv.h>
63 #include <sys/proc.h>
64 #include <sys/sdt.h>
65 #include <sys/smp.h>
66 #include <sys/socket.h>
67 #include <sys/sysctl.h>
68 #include <sys/md5.h>
69 #include <sys/ucred.h>
70
71 #include <net/if.h>
72 #include <net/if_var.h>
73 #include <net/if_private.h>
74 #include <net/vnet.h>
75 #include <net/route.h>
76 #include <net/pfil.h>
77 #include <net/pfvar.h>
78 #include <net/if_pfsync.h>
79 #include <net/if_pflog.h>
80
81 #include <netinet/in.h>
82 #include <netinet/ip.h>
83 #include <netinet/ip_var.h>
84 #include <netinet6/ip6_var.h>
85 #include <netinet/ip_icmp.h>
86 #include <netpfil/pf/pf_nv.h>
87
88 #ifdef INET6
89 #include <netinet/ip6.h>
90 #endif /* INET6 */
91
92 #ifdef ALTQ
93 #include <net/altq/altq.h>
94 #endif
95
96 SDT_PROBE_DEFINE3(pf, ioctl, ioctl, error, "int", "int", "int");
97 SDT_PROBE_DEFINE3(pf, ioctl, function, error, "char *", "int", "int");
98 SDT_PROBE_DEFINE2(pf, ioctl, addrule, error, "int", "int");
99 SDT_PROBE_DEFINE2(pf, ioctl, nvchk, error, "int", "int");
100
101 static struct pf_kpool  *pf_get_kpool(const char *, u_int32_t, u_int8_t,
102                             u_int32_t, u_int8_t, u_int8_t, u_int8_t);
103
104 static void              pf_mv_kpool(struct pf_kpalist *, struct pf_kpalist *);
105 static void              pf_empty_kpool(struct pf_kpalist *);
106 static int               pfioctl(struct cdev *, u_long, caddr_t, int,
107                             struct thread *);
108 static int               pf_begin_eth(uint32_t *, const char *);
109 static void              pf_rollback_eth_cb(struct epoch_context *);
110 static int               pf_rollback_eth(uint32_t, const char *);
111 static int               pf_commit_eth(uint32_t, const char *);
112 static void              pf_free_eth_rule(struct pf_keth_rule *);
113 #ifdef ALTQ
114 static int               pf_begin_altq(u_int32_t *);
115 static int               pf_rollback_altq(u_int32_t);
116 static int               pf_commit_altq(u_int32_t);
117 static int               pf_enable_altq(struct pf_altq *);
118 static int               pf_disable_altq(struct pf_altq *);
119 static uint16_t          pf_qname2qid(const char *);
120 static void              pf_qid_unref(uint16_t);
121 #endif /* ALTQ */
122 static int               pf_begin_rules(u_int32_t *, int, const char *);
123 static int               pf_rollback_rules(u_int32_t, int, char *);
124 static int               pf_setup_pfsync_matching(struct pf_kruleset *);
125 static void              pf_hash_rule_rolling(MD5_CTX *, struct pf_krule *);
126 static void              pf_hash_rule(struct pf_krule *);
127 static void              pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
128 static int               pf_commit_rules(u_int32_t, int, char *);
129 static int               pf_addr_setup(struct pf_kruleset *,
130                             struct pf_addr_wrap *, sa_family_t);
131 static void              pf_addr_copyout(struct pf_addr_wrap *);
132 static void              pf_src_node_copy(const struct pf_ksrc_node *,
133                             struct pf_src_node *);
134 #ifdef ALTQ
135 static int               pf_export_kaltq(struct pf_altq *,
136                             struct pfioc_altq_v1 *, size_t);
137 static int               pf_import_kaltq(struct pfioc_altq_v1 *,
138                             struct pf_altq *, size_t);
139 #endif /* ALTQ */
140
141 VNET_DEFINE(struct pf_krule,    pf_default_rule);
142
143 static __inline int             pf_krule_compare(struct pf_krule *,
144                                     struct pf_krule *);
145
146 RB_GENERATE(pf_krule_global, pf_krule, entry_global, pf_krule_compare);
147
148 #ifdef ALTQ
149 VNET_DEFINE_STATIC(int,         pf_altq_running);
150 #define V_pf_altq_running       VNET(pf_altq_running)
151 #endif
152
153 #define TAGID_MAX        50000
154 struct pf_tagname {
155         TAILQ_ENTRY(pf_tagname) namehash_entries;
156         TAILQ_ENTRY(pf_tagname) taghash_entries;
157         char                    name[PF_TAG_NAME_SIZE];
158         uint16_t                tag;
159         int                     ref;
160 };
161
162 struct pf_tagset {
163         TAILQ_HEAD(, pf_tagname)        *namehash;
164         TAILQ_HEAD(, pf_tagname)        *taghash;
165         unsigned int                     mask;
166         uint32_t                         seed;
167         BITSET_DEFINE(, TAGID_MAX)       avail;
168 };
169
170 VNET_DEFINE(struct pf_tagset, pf_tags);
171 #define V_pf_tags       VNET(pf_tags)
172 static unsigned int     pf_rule_tag_hashsize;
173 #define PF_RULE_TAG_HASH_SIZE_DEFAULT   128
174 SYSCTL_UINT(_net_pf, OID_AUTO, rule_tag_hashsize, CTLFLAG_RDTUN,
175     &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT,
176     "Size of pf(4) rule tag hashtable");
177
178 #ifdef ALTQ
179 VNET_DEFINE(struct pf_tagset, pf_qids);
180 #define V_pf_qids       VNET(pf_qids)
181 static unsigned int     pf_queue_tag_hashsize;
182 #define PF_QUEUE_TAG_HASH_SIZE_DEFAULT  128
183 SYSCTL_UINT(_net_pf, OID_AUTO, queue_tag_hashsize, CTLFLAG_RDTUN,
184     &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT,
185     "Size of pf(4) queue tag hashtable");
186 #endif
187 VNET_DEFINE(uma_zone_t,  pf_tag_z);
188 #define V_pf_tag_z               VNET(pf_tag_z)
189 static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
190 static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
191
192 #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
193 #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
194 #endif
195
196 VNET_DEFINE_STATIC(bool, pf_filter_local) = false;
197 #define V_pf_filter_local       VNET(pf_filter_local)
198 SYSCTL_BOOL(_net_pf, OID_AUTO, filter_local, CTLFLAG_VNET | CTLFLAG_RW,
199     &VNET_NAME(pf_filter_local), false,
200     "Enable filtering for packets delivered to local network stack");
201
202 static void              pf_init_tagset(struct pf_tagset *, unsigned int *,
203                             unsigned int);
204 static void              pf_cleanup_tagset(struct pf_tagset *);
205 static uint16_t          tagname2hashindex(const struct pf_tagset *, const char *);
206 static uint16_t          tag2hashindex(const struct pf_tagset *, uint16_t);
207 static u_int16_t         tagname2tag(struct pf_tagset *, const char *);
208 static u_int16_t         pf_tagname2tag(const char *);
209 static void              tag_unref(struct pf_tagset *, u_int16_t);
210
211 #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
212
213 struct cdev *pf_dev;
214
215 /*
216  * XXX - These are new and need to be checked when moveing to a new version
217  */
218 static void              pf_clear_all_states(void);
219 static unsigned int      pf_clear_states(const struct pf_kstate_kill *);
220 static void              pf_killstates(struct pf_kstate_kill *,
221                             unsigned int *);
222 static int               pf_killstates_row(struct pf_kstate_kill *,
223                             struct pf_idhash *);
224 static int               pf_killstates_nv(struct pfioc_nv *);
225 static int               pf_clearstates_nv(struct pfioc_nv *);
226 static int               pf_getstate(struct pfioc_nv *);
227 static int               pf_getstatus(struct pfioc_nv *);
228 static int               pf_clear_tables(void);
229 static void              pf_clear_srcnodes(struct pf_ksrc_node *);
230 static void              pf_kill_srcnodes(struct pfioc_src_node_kill *);
231 static int               pf_keepcounters(struct pfioc_nv *);
232 static void              pf_tbladdr_copyout(struct pf_addr_wrap *);
233
234 /*
235  * Wrapper functions for pfil(9) hooks
236  */
237 static pfil_return_t pf_eth_check_in(struct mbuf **m, struct ifnet *ifp,
238     int flags, void *ruleset __unused, struct inpcb *inp);
239 static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp,
240     int flags, void *ruleset __unused, struct inpcb *inp);
241 #ifdef INET
242 static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp,
243     int flags, void *ruleset __unused, struct inpcb *inp);
244 static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp,
245     int flags, void *ruleset __unused, struct inpcb *inp);
246 #endif
247 #ifdef INET6
248 static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp,
249     int flags, void *ruleset __unused, struct inpcb *inp);
250 static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp,
251     int flags, void *ruleset __unused, struct inpcb *inp);
252 #endif
253
254 static void             hook_pf_eth(void);
255 static void             hook_pf(void);
256 static void             dehook_pf_eth(void);
257 static void             dehook_pf(void);
258 static int              shutdown_pf(void);
259 static int              pf_load(void);
260 static void             pf_unload(void);
261
262 static struct cdevsw pf_cdevsw = {
263         .d_ioctl =      pfioctl,
264         .d_name =       PF_NAME,
265         .d_version =    D_VERSION,
266 };
267
268 VNET_DEFINE_STATIC(bool, pf_pfil_hooked);
269 #define V_pf_pfil_hooked        VNET(pf_pfil_hooked)
270 VNET_DEFINE_STATIC(bool, pf_pfil_eth_hooked);
271 #define V_pf_pfil_eth_hooked    VNET(pf_pfil_eth_hooked)
272
273 /*
274  * We need a flag that is neither hooked nor running to know when
275  * the VNET is "valid".  We primarily need this to control (global)
276  * external event, e.g., eventhandlers.
277  */
278 VNET_DEFINE(int, pf_vnet_active);
279 #define V_pf_vnet_active        VNET(pf_vnet_active)
280
281 int pf_end_threads;
282 struct proc *pf_purge_proc;
283
284 VNET_DEFINE(struct rmlock, pf_rules_lock);
285 VNET_DEFINE_STATIC(struct sx, pf_ioctl_lock);
286 #define V_pf_ioctl_lock         VNET(pf_ioctl_lock)
287 struct sx                       pf_end_lock;
288
289 /* pfsync */
290 VNET_DEFINE(pfsync_state_import_t *, pfsync_state_import_ptr);
291 VNET_DEFINE(pfsync_insert_state_t *, pfsync_insert_state_ptr);
292 VNET_DEFINE(pfsync_update_state_t *, pfsync_update_state_ptr);
293 VNET_DEFINE(pfsync_delete_state_t *, pfsync_delete_state_ptr);
294 VNET_DEFINE(pfsync_clear_states_t *, pfsync_clear_states_ptr);
295 VNET_DEFINE(pfsync_defer_t *, pfsync_defer_ptr);
296 pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr;
297
298 /* pflog */
299 pflog_packet_t                  *pflog_packet_ptr = NULL;
300
301 /*
302  * Copy a user-provided string, returning an error if truncation would occur.
303  * Avoid scanning past "sz" bytes in the source string since there's no
304  * guarantee that it's nul-terminated.
305  */
306 static int
307 pf_user_strcpy(char *dst, const char *src, size_t sz)
308 {
309         if (strnlen(src, sz) == sz)
310                 return (EINVAL);
311         (void)strlcpy(dst, src, sz);
312         return (0);
313 }
314
315 static void
316 pfattach_vnet(void)
317 {
318         u_int32_t *my_timeout = V_pf_default_rule.timeout;
319
320         bzero(&V_pf_status, sizeof(V_pf_status));
321
322         pf_initialize();
323         pfr_initialize();
324         pfi_initialize_vnet();
325         pf_normalize_init();
326         pf_syncookies_init();
327
328         V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
329         V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
330
331         RB_INIT(&V_pf_anchors);
332         pf_init_kruleset(&pf_main_ruleset);
333
334         pf_init_keth(V_pf_keth);
335
336         /* default rule should never be garbage collected */
337         V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next;
338 #ifdef PF_DEFAULT_TO_DROP
339         V_pf_default_rule.action = PF_DROP;
340 #else
341         V_pf_default_rule.action = PF_PASS;
342 #endif
343         V_pf_default_rule.nr = -1;
344         V_pf_default_rule.rtableid = -1;
345
346         pf_counter_u64_init(&V_pf_default_rule.evaluations, M_WAITOK);
347         for (int i = 0; i < 2; i++) {
348                 pf_counter_u64_init(&V_pf_default_rule.packets[i], M_WAITOK);
349                 pf_counter_u64_init(&V_pf_default_rule.bytes[i], M_WAITOK);
350         }
351         V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK);
352         V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK);
353         V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK);
354
355         V_pf_default_rule.timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
356             M_WAITOK | M_ZERO);
357
358 #ifdef PF_WANT_32_TO_64_COUNTER
359         V_pf_kifmarker = malloc(sizeof(*V_pf_kifmarker), PFI_MTYPE, M_WAITOK | M_ZERO);
360         V_pf_rulemarker = malloc(sizeof(*V_pf_rulemarker), M_PFRULE, M_WAITOK | M_ZERO);
361         PF_RULES_WLOCK();
362         LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist);
363         LIST_INSERT_HEAD(&V_pf_allrulelist, &V_pf_default_rule, allrulelist);
364         V_pf_allrulecount++;
365         LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist);
366         PF_RULES_WUNLOCK();
367 #endif
368
369         /* initialize default timeouts */
370         my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
371         my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
372         my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
373         my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
374         my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
375         my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
376         my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
377         my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
378         my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
379         my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
380         my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
381         my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
382         my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
383         my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
384         my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
385         my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
386         my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
387         my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
388         my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
389         my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
390
391         V_pf_status.debug = PF_DEBUG_URGENT;
392         /*
393          * XXX This is different than in OpenBSD where reassembly is enabled by
394          * defult. In FreeBSD we expect people to still use scrub rules and
395          * switch to the new syntax later. Only when they switch they must
396          * explicitly enable reassemle. We could change the default once the
397          * scrub rule functionality is hopefully removed some day in future.
398          */
399         V_pf_status.reass = 0;
400
401         V_pf_pfil_hooked = false;
402         V_pf_pfil_eth_hooked = false;
403
404         /* XXX do our best to avoid a conflict */
405         V_pf_status.hostid = arc4random();
406
407         for (int i = 0; i < PFRES_MAX; i++)
408                 V_pf_status.counters[i] = counter_u64_alloc(M_WAITOK);
409         for (int i = 0; i < KLCNT_MAX; i++)
410                 V_pf_status.lcounters[i] = counter_u64_alloc(M_WAITOK);
411         for (int i = 0; i < FCNT_MAX; i++)
412                 pf_counter_u64_init(&V_pf_status.fcounters[i], M_WAITOK);
413         for (int i = 0; i < SCNT_MAX; i++)
414                 V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK);
415
416         if (swi_add(&V_pf_swi_ie, "pf send", pf_intr, curvnet, SWI_NET,
417             INTR_MPSAFE, &V_pf_swi_cookie) != 0)
418                 /* XXXGL: leaked all above. */
419                 return;
420 }
421
422 static struct pf_kpool *
423 pf_get_kpool(const char *anchor, u_int32_t ticket, u_int8_t rule_action,
424     u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
425     u_int8_t check_ticket)
426 {
427         struct pf_kruleset      *ruleset;
428         struct pf_krule         *rule;
429         int                      rs_num;
430
431         ruleset = pf_find_kruleset(anchor);
432         if (ruleset == NULL)
433                 return (NULL);
434         rs_num = pf_get_ruleset_number(rule_action);
435         if (rs_num >= PF_RULESET_MAX)
436                 return (NULL);
437         if (active) {
438                 if (check_ticket && ticket !=
439                     ruleset->rules[rs_num].active.ticket)
440                         return (NULL);
441                 if (r_last)
442                         rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
443                             pf_krulequeue);
444                 else
445                         rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
446         } else {
447                 if (check_ticket && ticket !=
448                     ruleset->rules[rs_num].inactive.ticket)
449                         return (NULL);
450                 if (r_last)
451                         rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
452                             pf_krulequeue);
453                 else
454                         rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr);
455         }
456         if (!r_last) {
457                 while ((rule != NULL) && (rule->nr != rule_number))
458                         rule = TAILQ_NEXT(rule, entries);
459         }
460         if (rule == NULL)
461                 return (NULL);
462
463         return (&rule->rpool);
464 }
465
466 static void
467 pf_mv_kpool(struct pf_kpalist *poola, struct pf_kpalist *poolb)
468 {
469         struct pf_kpooladdr     *mv_pool_pa;
470
471         while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) {
472                 TAILQ_REMOVE(poola, mv_pool_pa, entries);
473                 TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries);
474         }
475 }
476
477 static void
478 pf_empty_kpool(struct pf_kpalist *poola)
479 {
480         struct pf_kpooladdr *pa;
481
482         while ((pa = TAILQ_FIRST(poola)) != NULL) {
483                 switch (pa->addr.type) {
484                 case PF_ADDR_DYNIFTL:
485                         pfi_dynaddr_remove(pa->addr.p.dyn);
486                         break;
487                 case PF_ADDR_TABLE:
488                         /* XXX: this could be unfinished pooladdr on pabuf */
489                         if (pa->addr.p.tbl != NULL)
490                                 pfr_detach_table(pa->addr.p.tbl);
491                         break;
492                 }
493                 if (pa->kif)
494                         pfi_kkif_unref(pa->kif);
495                 TAILQ_REMOVE(poola, pa, entries);
496                 free(pa, M_PFRULE);
497         }
498 }
499
500 static void
501 pf_unlink_rule_locked(struct pf_krulequeue *rulequeue, struct pf_krule *rule)
502 {
503
504         PF_RULES_WASSERT();
505         PF_UNLNKDRULES_ASSERT();
506
507         TAILQ_REMOVE(rulequeue, rule, entries);
508
509         rule->rule_ref |= PFRULE_REFS;
510         TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries);
511 }
512
513 static void
514 pf_unlink_rule(struct pf_krulequeue *rulequeue, struct pf_krule *rule)
515 {
516
517         PF_RULES_WASSERT();
518
519         PF_UNLNKDRULES_LOCK();
520         pf_unlink_rule_locked(rulequeue, rule);
521         PF_UNLNKDRULES_UNLOCK();
522 }
523
524 static void
525 pf_free_eth_rule(struct pf_keth_rule *rule)
526 {
527         PF_RULES_WASSERT();
528
529         if (rule == NULL)
530                 return;
531
532         if (rule->tag)
533                 tag_unref(&V_pf_tags, rule->tag);
534         if (rule->match_tag)
535                 tag_unref(&V_pf_tags, rule->match_tag);
536 #ifdef ALTQ
537         pf_qid_unref(rule->qid);
538 #endif
539
540         if (rule->bridge_to)
541                 pfi_kkif_unref(rule->bridge_to);
542         if (rule->kif)
543                 pfi_kkif_unref(rule->kif);
544
545         if (rule->ipsrc.addr.type == PF_ADDR_TABLE)
546                 pfr_detach_table(rule->ipsrc.addr.p.tbl);
547         if (rule->ipdst.addr.type == PF_ADDR_TABLE)
548                 pfr_detach_table(rule->ipdst.addr.p.tbl);
549
550         counter_u64_free(rule->evaluations);
551         for (int i = 0; i < 2; i++) {
552                 counter_u64_free(rule->packets[i]);
553                 counter_u64_free(rule->bytes[i]);
554         }
555         uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp);
556         pf_keth_anchor_remove(rule);
557
558         free(rule, M_PFRULE);
559 }
560
561 void
562 pf_free_rule(struct pf_krule *rule)
563 {
564
565         PF_RULES_WASSERT();
566         PF_CONFIG_ASSERT();
567
568         if (rule->tag)
569                 tag_unref(&V_pf_tags, rule->tag);
570         if (rule->match_tag)
571                 tag_unref(&V_pf_tags, rule->match_tag);
572 #ifdef ALTQ
573         if (rule->pqid != rule->qid)
574                 pf_qid_unref(rule->pqid);
575         pf_qid_unref(rule->qid);
576 #endif
577         switch (rule->src.addr.type) {
578         case PF_ADDR_DYNIFTL:
579                 pfi_dynaddr_remove(rule->src.addr.p.dyn);
580                 break;
581         case PF_ADDR_TABLE:
582                 pfr_detach_table(rule->src.addr.p.tbl);
583                 break;
584         }
585         switch (rule->dst.addr.type) {
586         case PF_ADDR_DYNIFTL:
587                 pfi_dynaddr_remove(rule->dst.addr.p.dyn);
588                 break;
589         case PF_ADDR_TABLE:
590                 pfr_detach_table(rule->dst.addr.p.tbl);
591                 break;
592         }
593         if (rule->overload_tbl)
594                 pfr_detach_table(rule->overload_tbl);
595         if (rule->kif)
596                 pfi_kkif_unref(rule->kif);
597         pf_kanchor_remove(rule);
598         pf_empty_kpool(&rule->rpool.list);
599
600         pf_krule_free(rule);
601 }
602
603 static void
604 pf_init_tagset(struct pf_tagset *ts, unsigned int *tunable_size,
605     unsigned int default_size)
606 {
607         unsigned int i;
608         unsigned int hashsize;
609
610         if (*tunable_size == 0 || !powerof2(*tunable_size))
611                 *tunable_size = default_size;
612
613         hashsize = *tunable_size;
614         ts->namehash = mallocarray(hashsize, sizeof(*ts->namehash), M_PFHASH,
615             M_WAITOK);
616         ts->taghash = mallocarray(hashsize, sizeof(*ts->taghash), M_PFHASH,
617             M_WAITOK);
618         ts->mask = hashsize - 1;
619         ts->seed = arc4random();
620         for (i = 0; i < hashsize; i++) {
621                 TAILQ_INIT(&ts->namehash[i]);
622                 TAILQ_INIT(&ts->taghash[i]);
623         }
624         BIT_FILL(TAGID_MAX, &ts->avail);
625 }
626
627 static void
628 pf_cleanup_tagset(struct pf_tagset *ts)
629 {
630         unsigned int i;
631         unsigned int hashsize;
632         struct pf_tagname *t, *tmp;
633
634         /*
635          * Only need to clean up one of the hashes as each tag is hashed
636          * into each table.
637          */
638         hashsize = ts->mask + 1;
639         for (i = 0; i < hashsize; i++)
640                 TAILQ_FOREACH_SAFE(t, &ts->namehash[i], namehash_entries, tmp)
641                         uma_zfree(V_pf_tag_z, t);
642
643         free(ts->namehash, M_PFHASH);
644         free(ts->taghash, M_PFHASH);
645 }
646
647 static uint16_t
648 tagname2hashindex(const struct pf_tagset *ts, const char *tagname)
649 {
650         size_t len;
651
652         len = strnlen(tagname, PF_TAG_NAME_SIZE - 1);
653         return (murmur3_32_hash(tagname, len, ts->seed) & ts->mask);
654 }
655
656 static uint16_t
657 tag2hashindex(const struct pf_tagset *ts, uint16_t tag)
658 {
659
660         return (tag & ts->mask);
661 }
662
663 static u_int16_t
664 tagname2tag(struct pf_tagset *ts, const char *tagname)
665 {
666         struct pf_tagname       *tag;
667         u_int32_t                index;
668         u_int16_t                new_tagid;
669
670         PF_RULES_WASSERT();
671
672         index = tagname2hashindex(ts, tagname);
673         TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries)
674                 if (strcmp(tagname, tag->name) == 0) {
675                         tag->ref++;
676                         return (tag->tag);
677                 }
678
679         /*
680          * new entry
681          *
682          * to avoid fragmentation, we do a linear search from the beginning
683          * and take the first free slot we find.
684          */
685         new_tagid = BIT_FFS(TAGID_MAX, &ts->avail);
686         /*
687          * Tags are 1-based, with valid tags in the range [1..TAGID_MAX].
688          * BIT_FFS() returns a 1-based bit number, with 0 indicating no bits
689          * set.  It may also return a bit number greater than TAGID_MAX due
690          * to rounding of the number of bits in the vector up to a multiple
691          * of the vector word size at declaration/allocation time.
692          */
693         if ((new_tagid == 0) || (new_tagid > TAGID_MAX))
694                 return (0);
695
696         /* Mark the tag as in use.  Bits are 0-based for BIT_CLR() */
697         BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail);
698
699         /* allocate and fill new struct pf_tagname */
700         tag = uma_zalloc(V_pf_tag_z, M_NOWAIT);
701         if (tag == NULL)
702                 return (0);
703         strlcpy(tag->name, tagname, sizeof(tag->name));
704         tag->tag = new_tagid;
705         tag->ref = 1;
706
707         /* Insert into namehash */
708         TAILQ_INSERT_TAIL(&ts->namehash[index], tag, namehash_entries);
709
710         /* Insert into taghash */
711         index = tag2hashindex(ts, new_tagid);
712         TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries);
713
714         return (tag->tag);
715 }
716
717 static void
718 tag_unref(struct pf_tagset *ts, u_int16_t tag)
719 {
720         struct pf_tagname       *t;
721         uint16_t                 index;
722
723         PF_RULES_WASSERT();
724
725         index = tag2hashindex(ts, tag);
726         TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries)
727                 if (tag == t->tag) {
728                         if (--t->ref == 0) {
729                                 TAILQ_REMOVE(&ts->taghash[index], t,
730                                     taghash_entries);
731                                 index = tagname2hashindex(ts, t->name);
732                                 TAILQ_REMOVE(&ts->namehash[index], t,
733                                     namehash_entries);
734                                 /* Bits are 0-based for BIT_SET() */
735                                 BIT_SET(TAGID_MAX, tag - 1, &ts->avail);
736                                 uma_zfree(V_pf_tag_z, t);
737                         }
738                         break;
739                 }
740 }
741
742 static uint16_t
743 pf_tagname2tag(const char *tagname)
744 {
745         return (tagname2tag(&V_pf_tags, tagname));
746 }
747
748 static int
749 pf_begin_eth(uint32_t *ticket, const char *anchor)
750 {
751         struct pf_keth_rule *rule, *tmp;
752         struct pf_keth_ruleset *rs;
753
754         PF_RULES_WASSERT();
755
756         rs = pf_find_or_create_keth_ruleset(anchor);
757         if (rs == NULL)
758                 return (EINVAL);
759
760         /* Purge old inactive rules. */
761         TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries,
762             tmp) {
763                 TAILQ_REMOVE(rs->inactive.rules, rule,
764                     entries);
765                 pf_free_eth_rule(rule);
766         }
767
768         *ticket = ++rs->inactive.ticket;
769         rs->inactive.open = 1;
770
771         return (0);
772 }
773
774 static void
775 pf_rollback_eth_cb(struct epoch_context *ctx)
776 {
777         struct pf_keth_ruleset *rs;
778
779         rs = __containerof(ctx, struct pf_keth_ruleset, epoch_ctx);
780
781         CURVNET_SET(rs->vnet);
782
783         PF_RULES_WLOCK();
784         pf_rollback_eth(rs->inactive.ticket,
785             rs->anchor ? rs->anchor->path : "");
786         PF_RULES_WUNLOCK();
787
788         CURVNET_RESTORE();
789 }
790
791 static int
792 pf_rollback_eth(uint32_t ticket, const char *anchor)
793 {
794         struct pf_keth_rule *rule, *tmp;
795         struct pf_keth_ruleset *rs;
796
797         PF_RULES_WASSERT();
798
799         rs = pf_find_keth_ruleset(anchor);
800         if (rs == NULL)
801                 return (EINVAL);
802
803         if (!rs->inactive.open ||
804             ticket != rs->inactive.ticket)
805                 return (0);
806
807         /* Purge old inactive rules. */
808         TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries,
809             tmp) {
810                 TAILQ_REMOVE(rs->inactive.rules, rule, entries);
811                 pf_free_eth_rule(rule);
812         }
813
814         rs->inactive.open = 0;
815
816         pf_remove_if_empty_keth_ruleset(rs);
817
818         return (0);
819 }
820
821 #define PF_SET_SKIP_STEPS(i)                                    \
822         do {                                                    \
823                 while (head[i] != cur) {                        \
824                         head[i]->skip[i].ptr = cur;             \
825                         head[i] = TAILQ_NEXT(head[i], entries); \
826                 }                                               \
827         } while (0)
828
829 static void
830 pf_eth_calc_skip_steps(struct pf_keth_ruleq *rules)
831 {
832         struct pf_keth_rule *cur, *prev, *head[PFE_SKIP_COUNT];
833         int i;
834
835         cur = TAILQ_FIRST(rules);
836         prev = cur;
837         for (i = 0; i < PFE_SKIP_COUNT; ++i)
838                 head[i] = cur;
839         while (cur != NULL) {
840                 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
841                         PF_SET_SKIP_STEPS(PFE_SKIP_IFP);
842                 if (cur->direction != prev->direction)
843                         PF_SET_SKIP_STEPS(PFE_SKIP_DIR);
844                 if (cur->proto != prev->proto)
845                         PF_SET_SKIP_STEPS(PFE_SKIP_PROTO);
846                 if (memcmp(&cur->src, &prev->src, sizeof(cur->src)) != 0)
847                         PF_SET_SKIP_STEPS(PFE_SKIP_SRC_ADDR);
848                 if (memcmp(&cur->dst, &prev->dst, sizeof(cur->dst)) != 0)
849                         PF_SET_SKIP_STEPS(PFE_SKIP_DST_ADDR);
850                 if (cur->ipsrc.neg != prev->ipsrc.neg ||
851                     pf_addr_wrap_neq(&cur->ipsrc.addr, &prev->ipsrc.addr))
852                         PF_SET_SKIP_STEPS(PFE_SKIP_SRC_IP_ADDR);
853                 if (cur->ipdst.neg != prev->ipdst.neg ||
854                     pf_addr_wrap_neq(&cur->ipdst.addr, &prev->ipdst.addr))
855                         PF_SET_SKIP_STEPS(PFE_SKIP_DST_IP_ADDR);
856
857                 prev = cur;
858                 cur = TAILQ_NEXT(cur, entries);
859         }
860         for (i = 0; i < PFE_SKIP_COUNT; ++i)
861                 PF_SET_SKIP_STEPS(i);
862 }
863
864 static int
865 pf_commit_eth(uint32_t ticket, const char *anchor)
866 {
867         struct pf_keth_ruleq *rules;
868         struct pf_keth_ruleset *rs;
869
870         rs = pf_find_keth_ruleset(anchor);
871         if (rs == NULL) {
872                 return (EINVAL);
873         }
874
875         if (!rs->inactive.open ||
876             ticket != rs->inactive.ticket)
877                 return (EBUSY);
878
879         PF_RULES_WASSERT();
880
881         pf_eth_calc_skip_steps(rs->inactive.rules);
882
883         rules = rs->active.rules;
884         ck_pr_store_ptr(&rs->active.rules, rs->inactive.rules);
885         rs->inactive.rules = rules;
886         rs->inactive.ticket = rs->active.ticket;
887
888         /* Clean up inactive rules (i.e. previously active rules), only when
889          * we're sure they're no longer used. */
890         NET_EPOCH_CALL(pf_rollback_eth_cb, &rs->epoch_ctx);
891
892         return (0);
893 }
894
895 #ifdef ALTQ
896 static uint16_t
897 pf_qname2qid(const char *qname)
898 {
899         return (tagname2tag(&V_pf_qids, qname));
900 }
901
902 static void
903 pf_qid_unref(uint16_t qid)
904 {
905         tag_unref(&V_pf_qids, qid);
906 }
907
908 static int
909 pf_begin_altq(u_int32_t *ticket)
910 {
911         struct pf_altq  *altq, *tmp;
912         int              error = 0;
913
914         PF_RULES_WASSERT();
915
916         /* Purge the old altq lists */
917         TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
918                 if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
919                         /* detach and destroy the discipline */
920                         error = altq_remove(altq);
921                 }
922                 free(altq, M_PFALTQ);
923         }
924         TAILQ_INIT(V_pf_altq_ifs_inactive);
925         TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
926                 pf_qid_unref(altq->qid);
927                 free(altq, M_PFALTQ);
928         }
929         TAILQ_INIT(V_pf_altqs_inactive);
930         if (error)
931                 return (error);
932         *ticket = ++V_ticket_altqs_inactive;
933         V_altqs_inactive_open = 1;
934         return (0);
935 }
936
937 static int
938 pf_rollback_altq(u_int32_t ticket)
939 {
940         struct pf_altq  *altq, *tmp;
941         int              error = 0;
942
943         PF_RULES_WASSERT();
944
945         if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
946                 return (0);
947         /* Purge the old altq lists */
948         TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
949                 if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
950                         /* detach and destroy the discipline */
951                         error = altq_remove(altq);
952                 }
953                 free(altq, M_PFALTQ);
954         }
955         TAILQ_INIT(V_pf_altq_ifs_inactive);
956         TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
957                 pf_qid_unref(altq->qid);
958                 free(altq, M_PFALTQ);
959         }
960         TAILQ_INIT(V_pf_altqs_inactive);
961         V_altqs_inactive_open = 0;
962         return (error);
963 }
964
965 static int
966 pf_commit_altq(u_int32_t ticket)
967 {
968         struct pf_altqqueue     *old_altqs, *old_altq_ifs;
969         struct pf_altq          *altq, *tmp;
970         int                      err, error = 0;
971
972         PF_RULES_WASSERT();
973
974         if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
975                 return (EBUSY);
976
977         /* swap altqs, keep the old. */
978         old_altqs = V_pf_altqs_active;
979         old_altq_ifs = V_pf_altq_ifs_active;
980         V_pf_altqs_active = V_pf_altqs_inactive;
981         V_pf_altq_ifs_active = V_pf_altq_ifs_inactive;
982         V_pf_altqs_inactive = old_altqs;
983         V_pf_altq_ifs_inactive = old_altq_ifs;
984         V_ticket_altqs_active = V_ticket_altqs_inactive;
985
986         /* Attach new disciplines */
987         TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
988                 if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
989                         /* attach the discipline */
990                         error = altq_pfattach(altq);
991                         if (error == 0 && V_pf_altq_running)
992                                 error = pf_enable_altq(altq);
993                         if (error != 0)
994                                 return (error);
995                 }
996         }
997
998         /* Purge the old altq lists */
999         TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
1000                 if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
1001                         /* detach and destroy the discipline */
1002                         if (V_pf_altq_running)
1003                                 error = pf_disable_altq(altq);
1004                         err = altq_pfdetach(altq);
1005                         if (err != 0 && error == 0)
1006                                 error = err;
1007                         err = altq_remove(altq);
1008                         if (err != 0 && error == 0)
1009                                 error = err;
1010                 }
1011                 free(altq, M_PFALTQ);
1012         }
1013         TAILQ_INIT(V_pf_altq_ifs_inactive);
1014         TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
1015                 pf_qid_unref(altq->qid);
1016                 free(altq, M_PFALTQ);
1017         }
1018         TAILQ_INIT(V_pf_altqs_inactive);
1019
1020         V_altqs_inactive_open = 0;
1021         return (error);
1022 }
1023
1024 static int
1025 pf_enable_altq(struct pf_altq *altq)
1026 {
1027         struct ifnet            *ifp;
1028         struct tb_profile        tb;
1029         int                      error = 0;
1030
1031         if ((ifp = ifunit(altq->ifname)) == NULL)
1032                 return (EINVAL);
1033
1034         if (ifp->if_snd.altq_type != ALTQT_NONE)
1035                 error = altq_enable(&ifp->if_snd);
1036
1037         /* set tokenbucket regulator */
1038         if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
1039                 tb.rate = altq->ifbandwidth;
1040                 tb.depth = altq->tbrsize;
1041                 error = tbr_set(&ifp->if_snd, &tb);
1042         }
1043
1044         return (error);
1045 }
1046
1047 static int
1048 pf_disable_altq(struct pf_altq *altq)
1049 {
1050         struct ifnet            *ifp;
1051         struct tb_profile        tb;
1052         int                      error;
1053
1054         if ((ifp = ifunit(altq->ifname)) == NULL)
1055                 return (EINVAL);
1056
1057         /*
1058          * when the discipline is no longer referenced, it was overridden
1059          * by a new one.  if so, just return.
1060          */
1061         if (altq->altq_disc != ifp->if_snd.altq_disc)
1062                 return (0);
1063
1064         error = altq_disable(&ifp->if_snd);
1065
1066         if (error == 0) {
1067                 /* clear tokenbucket regulator */
1068                 tb.rate = 0;
1069                 error = tbr_set(&ifp->if_snd, &tb);
1070         }
1071
1072         return (error);
1073 }
1074
1075 static int
1076 pf_altq_ifnet_event_add(struct ifnet *ifp, int remove, u_int32_t ticket,
1077     struct pf_altq *altq)
1078 {
1079         struct ifnet    *ifp1;
1080         int              error = 0;
1081
1082         /* Deactivate the interface in question */
1083         altq->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
1084         if ((ifp1 = ifunit(altq->ifname)) == NULL ||
1085             (remove && ifp1 == ifp)) {
1086                 altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
1087         } else {
1088                 error = altq_add(ifp1, altq);
1089
1090                 if (ticket != V_ticket_altqs_inactive)
1091                         error = EBUSY;
1092
1093                 if (error)
1094                         free(altq, M_PFALTQ);
1095         }
1096
1097         return (error);
1098 }
1099
1100 void
1101 pf_altq_ifnet_event(struct ifnet *ifp, int remove)
1102 {
1103         struct pf_altq  *a1, *a2, *a3;
1104         u_int32_t        ticket;
1105         int              error = 0;
1106
1107         /*
1108          * No need to re-evaluate the configuration for events on interfaces
1109          * that do not support ALTQ, as it's not possible for such
1110          * interfaces to be part of the configuration.
1111          */
1112         if (!ALTQ_IS_READY(&ifp->if_snd))
1113                 return;
1114
1115         /* Interrupt userland queue modifications */
1116         if (V_altqs_inactive_open)
1117                 pf_rollback_altq(V_ticket_altqs_inactive);
1118
1119         /* Start new altq ruleset */
1120         if (pf_begin_altq(&ticket))
1121                 return;
1122
1123         /* Copy the current active set */
1124         TAILQ_FOREACH(a1, V_pf_altq_ifs_active, entries) {
1125                 a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
1126                 if (a2 == NULL) {
1127                         error = ENOMEM;
1128                         break;
1129                 }
1130                 bcopy(a1, a2, sizeof(struct pf_altq));
1131
1132                 error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
1133                 if (error)
1134                         break;
1135
1136                 TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, a2, entries);
1137         }
1138         if (error)
1139                 goto out;
1140         TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
1141                 a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
1142                 if (a2 == NULL) {
1143                         error = ENOMEM;
1144                         break;
1145                 }
1146                 bcopy(a1, a2, sizeof(struct pf_altq));
1147
1148                 if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
1149                         error = EBUSY;
1150                         free(a2, M_PFALTQ);
1151                         break;
1152                 }
1153                 a2->altq_disc = NULL;
1154                 TAILQ_FOREACH(a3, V_pf_altq_ifs_inactive, entries) {
1155                         if (strncmp(a3->ifname, a2->ifname,
1156                                 IFNAMSIZ) == 0) {
1157                                 a2->altq_disc = a3->altq_disc;
1158                                 break;
1159                         }
1160                 }
1161                 error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
1162                 if (error)
1163                         break;
1164
1165                 TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries);
1166         }
1167
1168 out:
1169         if (error != 0)
1170                 pf_rollback_altq(ticket);
1171         else
1172                 pf_commit_altq(ticket);
1173 }
1174 #endif /* ALTQ */
1175
1176 static struct pf_krule_global *
1177 pf_rule_tree_alloc(int flags)
1178 {
1179         struct pf_krule_global *tree;
1180
1181         tree = malloc(sizeof(struct pf_krule_global), M_TEMP, flags);
1182         if (tree == NULL)
1183                 return (NULL);
1184         RB_INIT(tree);
1185         return (tree);
1186 }
1187
1188 static void
1189 pf_rule_tree_free(struct pf_krule_global *tree)
1190 {
1191
1192         free(tree, M_TEMP);
1193 }
1194
1195 static int
1196 pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
1197 {
1198         struct pf_krule_global *tree;
1199         struct pf_kruleset      *rs;
1200         struct pf_krule         *rule;
1201
1202         PF_RULES_WASSERT();
1203
1204         if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
1205                 return (EINVAL);
1206         tree = pf_rule_tree_alloc(M_NOWAIT);
1207         if (tree == NULL)
1208                 return (ENOMEM);
1209         rs = pf_find_or_create_kruleset(anchor);
1210         if (rs == NULL) {
1211                 free(tree, M_TEMP);
1212                 return (EINVAL);
1213         }
1214         pf_rule_tree_free(rs->rules[rs_num].inactive.tree);
1215         rs->rules[rs_num].inactive.tree = tree;
1216
1217         while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
1218                 pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
1219                 rs->rules[rs_num].inactive.rcount--;
1220         }
1221         *ticket = ++rs->rules[rs_num].inactive.ticket;
1222         rs->rules[rs_num].inactive.open = 1;
1223         return (0);
1224 }
1225
1226 static int
1227 pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
1228 {
1229         struct pf_kruleset      *rs;
1230         struct pf_krule         *rule;
1231
1232         PF_RULES_WASSERT();
1233
1234         if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
1235                 return (EINVAL);
1236         rs = pf_find_kruleset(anchor);
1237         if (rs == NULL || !rs->rules[rs_num].inactive.open ||
1238             rs->rules[rs_num].inactive.ticket != ticket)
1239                 return (0);
1240         while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
1241                 pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
1242                 rs->rules[rs_num].inactive.rcount--;
1243         }
1244         rs->rules[rs_num].inactive.open = 0;
1245         return (0);
1246 }
1247
1248 #define PF_MD5_UPD(st, elm)                                             \
1249                 MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
1250
1251 #define PF_MD5_UPD_STR(st, elm)                                         \
1252                 MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
1253
1254 #define PF_MD5_UPD_HTONL(st, elm, stor) do {                            \
1255                 (stor) = htonl((st)->elm);                              \
1256                 MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
1257 } while (0)
1258
1259 #define PF_MD5_UPD_HTONS(st, elm, stor) do {                            \
1260                 (stor) = htons((st)->elm);                              \
1261                 MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
1262 } while (0)
1263
1264 static void
1265 pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
1266 {
1267         PF_MD5_UPD(pfr, addr.type);
1268         switch (pfr->addr.type) {
1269                 case PF_ADDR_DYNIFTL:
1270                         PF_MD5_UPD(pfr, addr.v.ifname);
1271                         PF_MD5_UPD(pfr, addr.iflags);
1272                         break;
1273                 case PF_ADDR_TABLE:
1274                         PF_MD5_UPD(pfr, addr.v.tblname);
1275                         break;
1276                 case PF_ADDR_ADDRMASK:
1277                         /* XXX ignore af? */
1278                         PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
1279                         PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
1280                         break;
1281         }
1282
1283         PF_MD5_UPD(pfr, port[0]);
1284         PF_MD5_UPD(pfr, port[1]);
1285         PF_MD5_UPD(pfr, neg);
1286         PF_MD5_UPD(pfr, port_op);
1287 }
1288
1289 static void
1290 pf_hash_rule_rolling(MD5_CTX *ctx, struct pf_krule *rule)
1291 {
1292         u_int16_t x;
1293         u_int32_t y;
1294
1295         pf_hash_rule_addr(ctx, &rule->src);
1296         pf_hash_rule_addr(ctx, &rule->dst);
1297         for (int i = 0; i < PF_RULE_MAX_LABEL_COUNT; i++)
1298                 PF_MD5_UPD_STR(rule, label[i]);
1299         PF_MD5_UPD_STR(rule, ifname);
1300         PF_MD5_UPD_STR(rule, match_tagname);
1301         PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
1302         PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
1303         PF_MD5_UPD_HTONL(rule, prob, y);
1304         PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
1305         PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
1306         PF_MD5_UPD(rule, uid.op);
1307         PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
1308         PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
1309         PF_MD5_UPD(rule, gid.op);
1310         PF_MD5_UPD_HTONL(rule, rule_flag, y);
1311         PF_MD5_UPD(rule, action);
1312         PF_MD5_UPD(rule, direction);
1313         PF_MD5_UPD(rule, af);
1314         PF_MD5_UPD(rule, quick);
1315         PF_MD5_UPD(rule, ifnot);
1316         PF_MD5_UPD(rule, match_tag_not);
1317         PF_MD5_UPD(rule, natpass);
1318         PF_MD5_UPD(rule, keep_state);
1319         PF_MD5_UPD(rule, proto);
1320         PF_MD5_UPD(rule, type);
1321         PF_MD5_UPD(rule, code);
1322         PF_MD5_UPD(rule, flags);
1323         PF_MD5_UPD(rule, flagset);
1324         PF_MD5_UPD(rule, allow_opts);
1325         PF_MD5_UPD(rule, rt);
1326         PF_MD5_UPD(rule, tos);
1327         PF_MD5_UPD(rule, scrub_flags);
1328         PF_MD5_UPD(rule, min_ttl);
1329         PF_MD5_UPD(rule, set_tos);
1330         if (rule->anchor != NULL)
1331                 PF_MD5_UPD_STR(rule, anchor->path);
1332 }
1333
1334 static void
1335 pf_hash_rule(struct pf_krule *rule)
1336 {
1337         MD5_CTX         ctx;
1338
1339         MD5Init(&ctx);
1340         pf_hash_rule_rolling(&ctx, rule);
1341         MD5Final(rule->md5sum, &ctx);
1342 }
1343
1344 static int
1345 pf_krule_compare(struct pf_krule *a, struct pf_krule *b)
1346 {
1347
1348         return (memcmp(a->md5sum, b->md5sum, PF_MD5_DIGEST_LENGTH));
1349 }
1350
1351 static int
1352 pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
1353 {
1354         struct pf_kruleset      *rs;
1355         struct pf_krule         *rule, **old_array, *old_rule;
1356         struct pf_krulequeue    *old_rules;
1357         struct pf_krule_global  *old_tree;
1358         int                      error;
1359         u_int32_t                old_rcount;
1360
1361         PF_RULES_WASSERT();
1362
1363         if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
1364                 return (EINVAL);
1365         rs = pf_find_kruleset(anchor);
1366         if (rs == NULL || !rs->rules[rs_num].inactive.open ||
1367             ticket != rs->rules[rs_num].inactive.ticket)
1368                 return (EBUSY);
1369
1370         /* Calculate checksum for the main ruleset */
1371         if (rs == &pf_main_ruleset) {
1372                 error = pf_setup_pfsync_matching(rs);
1373                 if (error != 0)
1374                         return (error);
1375         }
1376
1377         /* Swap rules, keep the old. */
1378         old_rules = rs->rules[rs_num].active.ptr;
1379         old_rcount = rs->rules[rs_num].active.rcount;
1380         old_array = rs->rules[rs_num].active.ptr_array;
1381         old_tree = rs->rules[rs_num].active.tree;
1382
1383         rs->rules[rs_num].active.ptr =
1384             rs->rules[rs_num].inactive.ptr;
1385         rs->rules[rs_num].active.ptr_array =
1386             rs->rules[rs_num].inactive.ptr_array;
1387         rs->rules[rs_num].active.tree =
1388             rs->rules[rs_num].inactive.tree;
1389         rs->rules[rs_num].active.rcount =
1390             rs->rules[rs_num].inactive.rcount;
1391
1392         /* Attempt to preserve counter information. */
1393         if (V_pf_status.keep_counters && old_tree != NULL) {
1394                 TAILQ_FOREACH(rule, rs->rules[rs_num].active.ptr,
1395                     entries) {
1396                         old_rule = RB_FIND(pf_krule_global, old_tree, rule);
1397                         if (old_rule == NULL) {
1398                                 continue;
1399                         }
1400                         pf_counter_u64_critical_enter();
1401                         pf_counter_u64_add_protected(&rule->evaluations,
1402                             pf_counter_u64_fetch(&old_rule->evaluations));
1403                         pf_counter_u64_add_protected(&rule->packets[0],
1404                             pf_counter_u64_fetch(&old_rule->packets[0]));
1405                         pf_counter_u64_add_protected(&rule->packets[1],
1406                             pf_counter_u64_fetch(&old_rule->packets[1]));
1407                         pf_counter_u64_add_protected(&rule->bytes[0],
1408                             pf_counter_u64_fetch(&old_rule->bytes[0]));
1409                         pf_counter_u64_add_protected(&rule->bytes[1],
1410                             pf_counter_u64_fetch(&old_rule->bytes[1]));
1411                         pf_counter_u64_critical_exit();
1412                 }
1413         }
1414
1415         rs->rules[rs_num].inactive.ptr = old_rules;
1416         rs->rules[rs_num].inactive.ptr_array = old_array;
1417         rs->rules[rs_num].inactive.tree = NULL; /* important for pf_ioctl_addrule */
1418         rs->rules[rs_num].inactive.rcount = old_rcount;
1419
1420         rs->rules[rs_num].active.ticket =
1421             rs->rules[rs_num].inactive.ticket;
1422         pf_calc_skip_steps(rs->rules[rs_num].active.ptr);
1423
1424         /* Purge the old rule list. */
1425         PF_UNLNKDRULES_LOCK();
1426         while ((rule = TAILQ_FIRST(old_rules)) != NULL)
1427                 pf_unlink_rule_locked(old_rules, rule);
1428         PF_UNLNKDRULES_UNLOCK();
1429         if (rs->rules[rs_num].inactive.ptr_array)
1430                 free(rs->rules[rs_num].inactive.ptr_array, M_TEMP);
1431         rs->rules[rs_num].inactive.ptr_array = NULL;
1432         rs->rules[rs_num].inactive.rcount = 0;
1433         rs->rules[rs_num].inactive.open = 0;
1434         pf_remove_if_empty_kruleset(rs);
1435         free(old_tree, M_TEMP);
1436
1437         return (0);
1438 }
1439
1440 static int
1441 pf_setup_pfsync_matching(struct pf_kruleset *rs)
1442 {
1443         MD5_CTX                  ctx;
1444         struct pf_krule         *rule;
1445         int                      rs_cnt;
1446         u_int8_t                 digest[PF_MD5_DIGEST_LENGTH];
1447
1448         MD5Init(&ctx);
1449         for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) {
1450                 /* XXX PF_RULESET_SCRUB as well? */
1451                 if (rs_cnt == PF_RULESET_SCRUB)
1452                         continue;
1453
1454                 if (rs->rules[rs_cnt].inactive.ptr_array)
1455                         free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP);
1456                 rs->rules[rs_cnt].inactive.ptr_array = NULL;
1457
1458                 if (rs->rules[rs_cnt].inactive.rcount) {
1459                         rs->rules[rs_cnt].inactive.ptr_array =
1460                             mallocarray(rs->rules[rs_cnt].inactive.rcount,
1461                             sizeof(struct pf_rule **),
1462                             M_TEMP, M_NOWAIT);
1463
1464                         if (!rs->rules[rs_cnt].inactive.ptr_array)
1465                                 return (ENOMEM);
1466                 }
1467
1468                 TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
1469                     entries) {
1470                         pf_hash_rule_rolling(&ctx, rule);
1471                         (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule;
1472                 }
1473         }
1474
1475         MD5Final(digest, &ctx);
1476         memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum));
1477         return (0);
1478 }
1479
1480 static int
1481 pf_eth_addr_setup(struct pf_keth_ruleset *ruleset, struct pf_addr_wrap *addr)
1482 {
1483         int error = 0;
1484
1485         switch (addr->type) {
1486         case PF_ADDR_TABLE:
1487                 addr->p.tbl = pfr_eth_attach_table(ruleset, addr->v.tblname);
1488                 if (addr->p.tbl == NULL)
1489                         error = ENOMEM;
1490                 break;
1491         default:
1492                 error = EINVAL;
1493         }
1494
1495         return (error);
1496 }
1497
1498 static int
1499 pf_addr_setup(struct pf_kruleset *ruleset, struct pf_addr_wrap *addr,
1500     sa_family_t af)
1501 {
1502         int error = 0;
1503
1504         switch (addr->type) {
1505         case PF_ADDR_TABLE:
1506                 addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname);
1507                 if (addr->p.tbl == NULL)
1508                         error = ENOMEM;
1509                 break;
1510         case PF_ADDR_DYNIFTL:
1511                 error = pfi_dynaddr_setup(addr, af);
1512                 break;
1513         }
1514
1515         return (error);
1516 }
1517
1518 static void
1519 pf_addr_copyout(struct pf_addr_wrap *addr)
1520 {
1521
1522         switch (addr->type) {
1523         case PF_ADDR_DYNIFTL:
1524                 pfi_dynaddr_copyout(addr);
1525                 break;
1526         case PF_ADDR_TABLE:
1527                 pf_tbladdr_copyout(addr);
1528                 break;
1529         }
1530 }
1531
1532 static void
1533 pf_src_node_copy(const struct pf_ksrc_node *in, struct pf_src_node *out)
1534 {
1535         int     secs = time_uptime, diff;
1536
1537         bzero(out, sizeof(struct pf_src_node));
1538
1539         bcopy(&in->addr, &out->addr, sizeof(struct pf_addr));
1540         bcopy(&in->raddr, &out->raddr, sizeof(struct pf_addr));
1541
1542         if (in->rule.ptr != NULL)
1543                 out->rule.nr = in->rule.ptr->nr;
1544
1545         for (int i = 0; i < 2; i++) {
1546                 out->bytes[i] = counter_u64_fetch(in->bytes[i]);
1547                 out->packets[i] = counter_u64_fetch(in->packets[i]);
1548         }
1549
1550         out->states = in->states;
1551         out->conn = in->conn;
1552         out->af = in->af;
1553         out->ruletype = in->ruletype;
1554
1555         out->creation = secs - in->creation;
1556         if (out->expire > secs)
1557                 out->expire -= secs;
1558         else
1559                 out->expire = 0;
1560
1561         /* Adjust the connection rate estimate. */
1562         diff = secs - in->conn_rate.last;
1563         if (diff >= in->conn_rate.seconds)
1564                 out->conn_rate.count = 0;
1565         else
1566                 out->conn_rate.count -=
1567                     in->conn_rate.count * diff /
1568                     in->conn_rate.seconds;
1569 }
1570
1571 #ifdef ALTQ
1572 /*
1573  * Handle export of struct pf_kaltq to user binaries that may be using any
1574  * version of struct pf_altq.
1575  */
1576 static int
1577 pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size)
1578 {
1579         u_int32_t version;
1580
1581         if (ioc_size == sizeof(struct pfioc_altq_v0))
1582                 version = 0;
1583         else
1584                 version = pa->version;
1585
1586         if (version > PFIOC_ALTQ_VERSION)
1587                 return (EINVAL);
1588
1589 #define ASSIGN(x) exported_q->x = q->x
1590 #define COPY(x) \
1591         bcopy(&q->x, &exported_q->x, min(sizeof(q->x), sizeof(exported_q->x)))
1592 #define SATU16(x) (u_int32_t)uqmin((x), USHRT_MAX)
1593 #define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX)
1594
1595         switch (version) {
1596         case 0: {
1597                 struct pf_altq_v0 *exported_q =
1598                     &((struct pfioc_altq_v0 *)pa)->altq;
1599
1600                 COPY(ifname);
1601
1602                 ASSIGN(scheduler);
1603                 ASSIGN(tbrsize);
1604                 exported_q->tbrsize = SATU16(q->tbrsize);
1605                 exported_q->ifbandwidth = SATU32(q->ifbandwidth);
1606
1607                 COPY(qname);
1608                 COPY(parent);
1609                 ASSIGN(parent_qid);
1610                 exported_q->bandwidth = SATU32(q->bandwidth);
1611                 ASSIGN(priority);
1612                 ASSIGN(local_flags);
1613
1614                 ASSIGN(qlimit);
1615                 ASSIGN(flags);
1616
1617                 if (q->scheduler == ALTQT_HFSC) {
1618 #define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x
1619 #define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \
1620                             SATU32(q->pq_u.hfsc_opts.x)
1621                         
1622                         ASSIGN_OPT_SATU32(rtsc_m1);
1623                         ASSIGN_OPT(rtsc_d);
1624                         ASSIGN_OPT_SATU32(rtsc_m2);
1625
1626                         ASSIGN_OPT_SATU32(lssc_m1);
1627                         ASSIGN_OPT(lssc_d);
1628                         ASSIGN_OPT_SATU32(lssc_m2);
1629
1630                         ASSIGN_OPT_SATU32(ulsc_m1);
1631                         ASSIGN_OPT(ulsc_d);
1632                         ASSIGN_OPT_SATU32(ulsc_m2);
1633
1634                         ASSIGN_OPT(flags);
1635                         
1636 #undef ASSIGN_OPT
1637 #undef ASSIGN_OPT_SATU32
1638                 } else
1639                         COPY(pq_u);
1640
1641                 ASSIGN(qid);
1642                 break;
1643         }
1644         case 1: {
1645                 struct pf_altq_v1 *exported_q =
1646                     &((struct pfioc_altq_v1 *)pa)->altq;
1647
1648                 COPY(ifname);
1649
1650                 ASSIGN(scheduler);
1651                 ASSIGN(tbrsize);
1652                 ASSIGN(ifbandwidth);
1653
1654                 COPY(qname);
1655                 COPY(parent);
1656                 ASSIGN(parent_qid);
1657                 ASSIGN(bandwidth);
1658                 ASSIGN(priority);
1659                 ASSIGN(local_flags);
1660
1661                 ASSIGN(qlimit);
1662                 ASSIGN(flags);
1663                 COPY(pq_u);
1664
1665                 ASSIGN(qid);
1666                 break;
1667         }
1668         default:
1669                 panic("%s: unhandled struct pfioc_altq version", __func__);
1670                 break;
1671         }
1672
1673 #undef ASSIGN
1674 #undef COPY
1675 #undef SATU16
1676 #undef SATU32
1677
1678         return (0);
1679 }
1680
1681 /*
1682  * Handle import to struct pf_kaltq of struct pf_altq from user binaries
1683  * that may be using any version of it.
1684  */
1685 static int
1686 pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size)
1687 {
1688         u_int32_t version;
1689
1690         if (ioc_size == sizeof(struct pfioc_altq_v0))
1691                 version = 0;
1692         else
1693                 version = pa->version;
1694
1695         if (version > PFIOC_ALTQ_VERSION)
1696                 return (EINVAL);
1697
1698 #define ASSIGN(x) q->x = imported_q->x
1699 #define COPY(x) \
1700         bcopy(&imported_q->x, &q->x, min(sizeof(imported_q->x), sizeof(q->x)))
1701
1702         switch (version) {
1703         case 0: {
1704                 struct pf_altq_v0 *imported_q =
1705                     &((struct pfioc_altq_v0 *)pa)->altq;
1706
1707                 COPY(ifname);
1708
1709                 ASSIGN(scheduler);
1710                 ASSIGN(tbrsize); /* 16-bit -> 32-bit */
1711                 ASSIGN(ifbandwidth); /* 32-bit -> 64-bit */
1712
1713                 COPY(qname);
1714                 COPY(parent);
1715                 ASSIGN(parent_qid);
1716                 ASSIGN(bandwidth); /* 32-bit -> 64-bit */
1717                 ASSIGN(priority);
1718                 ASSIGN(local_flags);
1719
1720                 ASSIGN(qlimit);
1721                 ASSIGN(flags);
1722
1723                 if (imported_q->scheduler == ALTQT_HFSC) {
1724 #define ASSIGN_OPT(x) q->pq_u.hfsc_opts.x = imported_q->pq_u.hfsc_opts.x
1725
1726                         /*
1727                          * The m1 and m2 parameters are being copied from
1728                          * 32-bit to 64-bit.
1729                          */
1730                         ASSIGN_OPT(rtsc_m1);
1731                         ASSIGN_OPT(rtsc_d);
1732                         ASSIGN_OPT(rtsc_m2);
1733
1734                         ASSIGN_OPT(lssc_m1);
1735                         ASSIGN_OPT(lssc_d);
1736                         ASSIGN_OPT(lssc_m2);
1737
1738                         ASSIGN_OPT(ulsc_m1);
1739                         ASSIGN_OPT(ulsc_d);
1740                         ASSIGN_OPT(ulsc_m2);
1741
1742                         ASSIGN_OPT(flags);
1743                         
1744 #undef ASSIGN_OPT
1745                 } else
1746                         COPY(pq_u);
1747
1748                 ASSIGN(qid);
1749                 break;
1750         }
1751         case 1: {
1752                 struct pf_altq_v1 *imported_q =
1753                     &((struct pfioc_altq_v1 *)pa)->altq;
1754
1755                 COPY(ifname);
1756
1757                 ASSIGN(scheduler);
1758                 ASSIGN(tbrsize);
1759                 ASSIGN(ifbandwidth);
1760
1761                 COPY(qname);
1762                 COPY(parent);
1763                 ASSIGN(parent_qid);
1764                 ASSIGN(bandwidth);
1765                 ASSIGN(priority);
1766                 ASSIGN(local_flags);
1767
1768                 ASSIGN(qlimit);
1769                 ASSIGN(flags);
1770                 COPY(pq_u);
1771
1772                 ASSIGN(qid);
1773                 break;
1774         }
1775         default:        
1776                 panic("%s: unhandled struct pfioc_altq version", __func__);
1777                 break;
1778         }
1779
1780 #undef ASSIGN
1781 #undef COPY
1782
1783         return (0);
1784 }
1785
1786 static struct pf_altq *
1787 pf_altq_get_nth_active(u_int32_t n)
1788 {
1789         struct pf_altq          *altq;
1790         u_int32_t                nr;
1791
1792         nr = 0;
1793         TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
1794                 if (nr == n)
1795                         return (altq);
1796                 nr++;
1797         }
1798
1799         TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
1800                 if (nr == n)
1801                         return (altq);
1802                 nr++;
1803         }
1804
1805         return (NULL);
1806 }
1807 #endif /* ALTQ */
1808
1809 struct pf_krule *
1810 pf_krule_alloc(void)
1811 {
1812         struct pf_krule *rule;
1813
1814         rule = malloc(sizeof(struct pf_krule), M_PFRULE, M_WAITOK | M_ZERO);
1815         mtx_init(&rule->rpool.mtx, "pf_krule_pool", NULL, MTX_DEF);
1816         rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
1817             M_WAITOK | M_ZERO);
1818         return (rule);
1819 }
1820
1821 void
1822 pf_krule_free(struct pf_krule *rule)
1823 {
1824 #ifdef PF_WANT_32_TO_64_COUNTER
1825         bool wowned;
1826 #endif
1827
1828         if (rule == NULL)
1829                 return;
1830
1831 #ifdef PF_WANT_32_TO_64_COUNTER
1832         if (rule->allrulelinked) {
1833                 wowned = PF_RULES_WOWNED();
1834                 if (!wowned)
1835                         PF_RULES_WLOCK();
1836                 LIST_REMOVE(rule, allrulelist);
1837                 V_pf_allrulecount--;
1838                 if (!wowned)
1839                         PF_RULES_WUNLOCK();
1840         }
1841 #endif
1842
1843         pf_counter_u64_deinit(&rule->evaluations);
1844         for (int i = 0; i < 2; i++) {
1845                 pf_counter_u64_deinit(&rule->packets[i]);
1846                 pf_counter_u64_deinit(&rule->bytes[i]);
1847         }
1848         counter_u64_free(rule->states_cur);
1849         counter_u64_free(rule->states_tot);
1850         counter_u64_free(rule->src_nodes);
1851         uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp);
1852
1853         mtx_destroy(&rule->rpool.mtx);
1854         free(rule, M_PFRULE);
1855 }
1856
1857 static void
1858 pf_kpooladdr_to_pooladdr(const struct pf_kpooladdr *kpool,
1859     struct pf_pooladdr *pool)
1860 {
1861
1862         bzero(pool, sizeof(*pool));
1863         bcopy(&kpool->addr, &pool->addr, sizeof(pool->addr));
1864         strlcpy(pool->ifname, kpool->ifname, sizeof(pool->ifname));
1865 }
1866
1867 static int
1868 pf_pooladdr_to_kpooladdr(const struct pf_pooladdr *pool,
1869     struct pf_kpooladdr *kpool)
1870 {
1871         int ret;
1872
1873         bzero(kpool, sizeof(*kpool));
1874         bcopy(&pool->addr, &kpool->addr, sizeof(kpool->addr));
1875         ret = pf_user_strcpy(kpool->ifname, pool->ifname,
1876             sizeof(kpool->ifname));
1877         return (ret);
1878 }
1879
1880 static void
1881 pf_pool_to_kpool(const struct pf_pool *pool, struct pf_kpool *kpool)
1882 {
1883         _Static_assert(sizeof(pool->key) == sizeof(kpool->key), "");
1884         _Static_assert(sizeof(pool->counter) == sizeof(kpool->counter), "");
1885
1886         bcopy(&pool->key, &kpool->key, sizeof(kpool->key));
1887         bcopy(&pool->counter, &kpool->counter, sizeof(kpool->counter));
1888
1889         kpool->tblidx = pool->tblidx;
1890         kpool->proxy_port[0] = pool->proxy_port[0];
1891         kpool->proxy_port[1] = pool->proxy_port[1];
1892         kpool->opts = pool->opts;
1893 }
1894
1895 static int
1896 pf_rule_to_krule(const struct pf_rule *rule, struct pf_krule *krule)
1897 {
1898         int ret;
1899
1900 #ifndef INET
1901         if (rule->af == AF_INET) {
1902                 return (EAFNOSUPPORT);
1903         }
1904 #endif /* INET */
1905 #ifndef INET6
1906         if (rule->af == AF_INET6) {
1907                 return (EAFNOSUPPORT);
1908         }
1909 #endif /* INET6 */
1910
1911         ret = pf_check_rule_addr(&rule->src);
1912         if (ret != 0)
1913                 return (ret);
1914         ret = pf_check_rule_addr(&rule->dst);
1915         if (ret != 0)
1916                 return (ret);
1917
1918         bcopy(&rule->src, &krule->src, sizeof(rule->src));
1919         bcopy(&rule->dst, &krule->dst, sizeof(rule->dst));
1920
1921         ret = pf_user_strcpy(krule->label[0], rule->label, sizeof(rule->label));
1922         if (ret != 0)
1923                 return (ret);
1924         ret = pf_user_strcpy(krule->ifname, rule->ifname, sizeof(rule->ifname));
1925         if (ret != 0)
1926                 return (ret);
1927         ret = pf_user_strcpy(krule->qname, rule->qname, sizeof(rule->qname));
1928         if (ret != 0)
1929                 return (ret);
1930         ret = pf_user_strcpy(krule->pqname, rule->pqname, sizeof(rule->pqname));
1931         if (ret != 0)
1932                 return (ret);
1933         ret = pf_user_strcpy(krule->tagname, rule->tagname,
1934             sizeof(rule->tagname));
1935         if (ret != 0)
1936                 return (ret);
1937         ret = pf_user_strcpy(krule->match_tagname, rule->match_tagname,
1938             sizeof(rule->match_tagname));
1939         if (ret != 0)
1940                 return (ret);
1941         ret = pf_user_strcpy(krule->overload_tblname, rule->overload_tblname,
1942             sizeof(rule->overload_tblname));
1943         if (ret != 0)
1944                 return (ret);
1945
1946         pf_pool_to_kpool(&rule->rpool, &krule->rpool);
1947
1948         /* Don't allow userspace to set evaluations, packets or bytes. */
1949         /* kif, anchor, overload_tbl are not copied over. */
1950
1951         krule->os_fingerprint = rule->os_fingerprint;
1952
1953         krule->rtableid = rule->rtableid;
1954         bcopy(rule->timeout, krule->timeout, sizeof(krule->timeout));
1955         krule->max_states = rule->max_states;
1956         krule->max_src_nodes = rule->max_src_nodes;
1957         krule->max_src_states = rule->max_src_states;
1958         krule->max_src_conn = rule->max_src_conn;
1959         krule->max_src_conn_rate.limit = rule->max_src_conn_rate.limit;
1960         krule->max_src_conn_rate.seconds = rule->max_src_conn_rate.seconds;
1961         krule->qid = rule->qid;
1962         krule->pqid = rule->pqid;
1963         krule->nr = rule->nr;
1964         krule->prob = rule->prob;
1965         krule->cuid = rule->cuid;
1966         krule->cpid = rule->cpid;
1967
1968         krule->return_icmp = rule->return_icmp;
1969         krule->return_icmp6 = rule->return_icmp6;
1970         krule->max_mss = rule->max_mss;
1971         krule->tag = rule->tag;
1972         krule->match_tag = rule->match_tag;
1973         krule->scrub_flags = rule->scrub_flags;
1974
1975         bcopy(&rule->uid, &krule->uid, sizeof(krule->uid));
1976         bcopy(&rule->gid, &krule->gid, sizeof(krule->gid));
1977
1978         krule->rule_flag = rule->rule_flag;
1979         krule->action = rule->action;
1980         krule->direction = rule->direction;
1981         krule->log = rule->log;
1982         krule->logif = rule->logif;
1983         krule->quick = rule->quick;
1984         krule->ifnot = rule->ifnot;
1985         krule->match_tag_not = rule->match_tag_not;
1986         krule->natpass = rule->natpass;
1987
1988         krule->keep_state = rule->keep_state;
1989         krule->af = rule->af;
1990         krule->proto = rule->proto;
1991         krule->type = rule->type;
1992         krule->code = rule->code;
1993         krule->flags = rule->flags;
1994         krule->flagset = rule->flagset;
1995         krule->min_ttl = rule->min_ttl;
1996         krule->allow_opts = rule->allow_opts;
1997         krule->rt = rule->rt;
1998         krule->return_ttl = rule->return_ttl;
1999         krule->tos = rule->tos;
2000         krule->set_tos = rule->set_tos;
2001
2002         krule->flush = rule->flush;
2003         krule->prio = rule->prio;
2004         krule->set_prio[0] = rule->set_prio[0];
2005         krule->set_prio[1] = rule->set_prio[1];
2006
2007         bcopy(&rule->divert, &krule->divert, sizeof(krule->divert));
2008
2009         return (0);
2010 }
2011
2012 static int
2013 pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
2014     uint32_t pool_ticket, const char *anchor, const char *anchor_call,
2015     struct thread *td)
2016 {
2017         struct pf_kruleset      *ruleset;
2018         struct pf_krule         *tail;
2019         struct pf_kpooladdr     *pa;
2020         struct pfi_kkif         *kif = NULL;
2021         int                      rs_num;
2022         int                      error = 0;
2023
2024         if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) {
2025                 error = EINVAL;
2026                 goto errout_unlocked;
2027         }
2028
2029 #define ERROUT(x)       ERROUT_FUNCTION(errout, x)
2030
2031         if (rule->ifname[0])
2032                 kif = pf_kkif_create(M_WAITOK);
2033         pf_counter_u64_init(&rule->evaluations, M_WAITOK);
2034         for (int i = 0; i < 2; i++) {
2035                 pf_counter_u64_init(&rule->packets[i], M_WAITOK);
2036                 pf_counter_u64_init(&rule->bytes[i], M_WAITOK);
2037         }
2038         rule->states_cur = counter_u64_alloc(M_WAITOK);
2039         rule->states_tot = counter_u64_alloc(M_WAITOK);
2040         rule->src_nodes = counter_u64_alloc(M_WAITOK);
2041         rule->cuid = td->td_ucred->cr_ruid;
2042         rule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
2043         TAILQ_INIT(&rule->rpool.list);
2044
2045         PF_CONFIG_LOCK();
2046         PF_RULES_WLOCK();
2047 #ifdef PF_WANT_32_TO_64_COUNTER
2048         LIST_INSERT_HEAD(&V_pf_allrulelist, rule, allrulelist);
2049         MPASS(!rule->allrulelinked);
2050         rule->allrulelinked = true;
2051         V_pf_allrulecount++;
2052 #endif
2053         ruleset = pf_find_kruleset(anchor);
2054         if (ruleset == NULL)
2055                 ERROUT(EINVAL);
2056         rs_num = pf_get_ruleset_number(rule->action);
2057         if (rs_num >= PF_RULESET_MAX)
2058                 ERROUT(EINVAL);
2059         if (ticket != ruleset->rules[rs_num].inactive.ticket) {
2060                 DPFPRINTF(PF_DEBUG_MISC,
2061                     ("ticket: %d != [%d]%d\n", ticket, rs_num,
2062                     ruleset->rules[rs_num].inactive.ticket));
2063                 ERROUT(EBUSY);
2064         }
2065         if (pool_ticket != V_ticket_pabuf) {
2066                 DPFPRINTF(PF_DEBUG_MISC,
2067                     ("pool_ticket: %d != %d\n", pool_ticket,
2068                     V_ticket_pabuf));
2069                 ERROUT(EBUSY);
2070         }
2071         /*
2072          * XXXMJG hack: there is no mechanism to ensure they started the
2073          * transaction. Ticket checked above may happen to match by accident,
2074          * even if nobody called DIOCXBEGIN, let alone this process.
2075          * Partially work around it by checking if the RB tree got allocated,
2076          * see pf_begin_rules.
2077          */
2078         if (ruleset->rules[rs_num].inactive.tree == NULL) {
2079                 ERROUT(EINVAL);
2080         }
2081
2082         tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
2083             pf_krulequeue);
2084         if (tail)
2085                 rule->nr = tail->nr + 1;
2086         else
2087                 rule->nr = 0;
2088         if (rule->ifname[0]) {
2089                 rule->kif = pfi_kkif_attach(kif, rule->ifname);
2090                 kif = NULL;
2091                 pfi_kkif_ref(rule->kif);
2092         } else
2093                 rule->kif = NULL;
2094
2095         if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs)
2096                 error = EBUSY;
2097
2098 #ifdef ALTQ
2099         /* set queue IDs */
2100         if (rule->qname[0] != 0) {
2101                 if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
2102                         error = EBUSY;
2103                 else if (rule->pqname[0] != 0) {
2104                         if ((rule->pqid =
2105                             pf_qname2qid(rule->pqname)) == 0)
2106                                 error = EBUSY;
2107                 } else
2108                         rule->pqid = rule->qid;
2109         }
2110 #endif
2111         if (rule->tagname[0])
2112                 if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
2113                         error = EBUSY;
2114         if (rule->match_tagname[0])
2115                 if ((rule->match_tag =
2116                     pf_tagname2tag(rule->match_tagname)) == 0)
2117                         error = EBUSY;
2118         if (rule->rt && !rule->direction)
2119                 error = EINVAL;
2120         if (!rule->log)
2121                 rule->logif = 0;
2122         if (rule->logif >= PFLOGIFS_MAX)
2123                 error = EINVAL;
2124         if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
2125                 error = ENOMEM;
2126         if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
2127                 error = ENOMEM;
2128         if (pf_kanchor_setup(rule, ruleset, anchor_call))
2129                 error = EINVAL;
2130         if (rule->scrub_flags & PFSTATE_SETPRIO &&
2131             (rule->set_prio[0] > PF_PRIO_MAX ||
2132             rule->set_prio[1] > PF_PRIO_MAX))
2133                 error = EINVAL;
2134         TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
2135                 if (pa->addr.type == PF_ADDR_TABLE) {
2136                         pa->addr.p.tbl = pfr_attach_table(ruleset,
2137                             pa->addr.v.tblname);
2138                         if (pa->addr.p.tbl == NULL)
2139                                 error = ENOMEM;
2140                 }
2141
2142         rule->overload_tbl = NULL;
2143         if (rule->overload_tblname[0]) {
2144                 if ((rule->overload_tbl = pfr_attach_table(ruleset,
2145                     rule->overload_tblname)) == NULL)
2146                         error = EINVAL;
2147                 else
2148                         rule->overload_tbl->pfrkt_flags |=
2149                             PFR_TFLAG_ACTIVE;
2150         }
2151
2152         pf_mv_kpool(&V_pf_pabuf, &rule->rpool.list);
2153         if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
2154             (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
2155             (rule->rt > PF_NOPFROUTE)) &&
2156             (TAILQ_FIRST(&rule->rpool.list) == NULL))
2157                 error = EINVAL;
2158
2159         if (error) {
2160                 pf_free_rule(rule);
2161                 rule = NULL;
2162                 ERROUT(error);
2163         }
2164
2165         rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
2166         TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
2167             rule, entries);
2168         ruleset->rules[rs_num].inactive.rcount++;
2169
2170         PF_RULES_WUNLOCK();
2171         pf_hash_rule(rule);
2172         if (RB_INSERT(pf_krule_global, ruleset->rules[rs_num].inactive.tree, rule) != NULL) {
2173                 PF_RULES_WLOCK();
2174                 TAILQ_REMOVE(ruleset->rules[rs_num].inactive.ptr, rule, entries);
2175                 ruleset->rules[rs_num].inactive.rcount--;
2176                 pf_free_rule(rule);
2177                 rule = NULL;
2178                 ERROUT(EEXIST);
2179         }
2180         PF_CONFIG_UNLOCK();
2181
2182         return (0);
2183
2184 #undef ERROUT
2185 errout:
2186         PF_RULES_WUNLOCK();
2187         PF_CONFIG_UNLOCK();
2188 errout_unlocked:
2189         pf_kkif_free(kif);
2190         pf_krule_free(rule);
2191         return (error);
2192 }
2193
2194 static bool
2195 pf_label_match(const struct pf_krule *rule, const char *label)
2196 {
2197         int i = 0;
2198
2199         while (*rule->label[i]) {
2200                 if (strcmp(rule->label[i], label) == 0)
2201                         return (true);
2202                 i++;
2203         }
2204
2205         return (false);
2206 }
2207
2208 static unsigned int
2209 pf_kill_matching_state(struct pf_state_key_cmp *key, int dir)
2210 {
2211         struct pf_kstate *s;
2212         int more = 0;
2213
2214         s = pf_find_state_all(key, dir, &more);
2215         if (s == NULL)
2216                 return (0);
2217
2218         if (more) {
2219                 PF_STATE_UNLOCK(s);
2220                 return (0);
2221         }
2222
2223         pf_unlink_state(s);
2224         return (1);
2225 }
2226
2227 static int
2228 pf_killstates_row(struct pf_kstate_kill *psk, struct pf_idhash *ih)
2229 {
2230         struct pf_kstate        *s;
2231         struct pf_state_key     *sk;
2232         struct pf_addr          *srcaddr, *dstaddr;
2233         struct pf_state_key_cmp  match_key;
2234         int                      idx, killed = 0;
2235         unsigned int             dir;
2236         u_int16_t                srcport, dstport;
2237         struct pfi_kkif         *kif;
2238
2239 relock_DIOCKILLSTATES:
2240         PF_HASHROW_LOCK(ih);
2241         LIST_FOREACH(s, &ih->states, entry) {
2242                 /* For floating states look at the original kif. */
2243                 kif = s->kif == V_pfi_all ? s->orig_kif : s->kif;
2244
2245                 sk = s->key[PF_SK_WIRE];
2246                 if (s->direction == PF_OUT) {
2247                         srcaddr = &sk->addr[1];
2248                         dstaddr = &sk->addr[0];
2249                         srcport = sk->port[1];
2250                         dstport = sk->port[0];
2251                 } else {
2252                         srcaddr = &sk->addr[0];
2253                         dstaddr = &sk->addr[1];
2254                         srcport = sk->port[0];
2255                         dstport = sk->port[1];
2256                 }
2257
2258                 if (psk->psk_af && sk->af != psk->psk_af)
2259                         continue;
2260
2261                 if (psk->psk_proto && psk->psk_proto != sk->proto)
2262                         continue;
2263
2264                 if (! PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr,
2265                     &psk->psk_src.addr.v.a.mask, srcaddr, sk->af))
2266                         continue;
2267
2268                 if (! PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr,
2269                     &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af))
2270                         continue;
2271
2272                 if (!  PF_MATCHA(psk->psk_rt_addr.neg,
2273                     &psk->psk_rt_addr.addr.v.a.addr,
2274                     &psk->psk_rt_addr.addr.v.a.mask,
2275                     &s->rt_addr, sk->af))
2276                         continue;
2277
2278                 if (psk->psk_src.port_op != 0 &&
2279                     ! pf_match_port(psk->psk_src.port_op,
2280                     psk->psk_src.port[0], psk->psk_src.port[1], srcport))
2281                         continue;
2282
2283                 if (psk->psk_dst.port_op != 0 &&
2284                     ! pf_match_port(psk->psk_dst.port_op,
2285                     psk->psk_dst.port[0], psk->psk_dst.port[1], dstport))
2286                         continue;
2287
2288                 if (psk->psk_label[0] &&
2289                     ! pf_label_match(s->rule.ptr, psk->psk_label))
2290                         continue;
2291
2292                 if (psk->psk_ifname[0] && strcmp(psk->psk_ifname,
2293                     kif->pfik_name))
2294                         continue;
2295
2296                 if (psk->psk_kill_match) {
2297                         /* Create the key to find matching states, with lock
2298                          * held. */
2299
2300                         bzero(&match_key, sizeof(match_key));
2301
2302                         if (s->direction == PF_OUT) {
2303                                 dir = PF_IN;
2304                                 idx = PF_SK_STACK;
2305                         } else {
2306                                 dir = PF_OUT;
2307                                 idx = PF_SK_WIRE;
2308                         }
2309
2310                         match_key.af = s->key[idx]->af;
2311                         match_key.proto = s->key[idx]->proto;
2312                         PF_ACPY(&match_key.addr[0],
2313                             &s->key[idx]->addr[1], match_key.af);
2314                         match_key.port[0] = s->key[idx]->port[1];
2315                         PF_ACPY(&match_key.addr[1],
2316                             &s->key[idx]->addr[0], match_key.af);
2317                         match_key.port[1] = s->key[idx]->port[0];
2318                 }
2319
2320                 pf_unlink_state(s);
2321                 killed++;
2322
2323                 if (psk->psk_kill_match)
2324                         killed += pf_kill_matching_state(&match_key, dir);
2325
2326                 goto relock_DIOCKILLSTATES;
2327         }
2328         PF_HASHROW_UNLOCK(ih);
2329
2330         return (killed);
2331 }
2332
2333 static int
2334 pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
2335 {
2336         int                      error = 0;
2337         PF_RULES_RLOCK_TRACKER;
2338
2339 #define ERROUT_IOCTL(target, x)                                 \
2340     do {                                                                \
2341             error = (x);                                                \
2342             SDT_PROBE3(pf, ioctl, ioctl, error, cmd, error, __LINE__);  \
2343             goto target;                                                \
2344     } while (0)
2345
2346
2347         /* XXX keep in sync with switch() below */
2348         if (securelevel_gt(td->td_ucred, 2))
2349                 switch (cmd) {
2350                 case DIOCGETRULES:
2351                 case DIOCGETRULENV:
2352                 case DIOCGETADDRS:
2353                 case DIOCGETADDR:
2354                 case DIOCGETSTATE:
2355                 case DIOCGETSTATENV:
2356                 case DIOCSETSTATUSIF:
2357                 case DIOCGETSTATUSNV:
2358                 case DIOCCLRSTATUS:
2359                 case DIOCNATLOOK:
2360                 case DIOCSETDEBUG:
2361                 case DIOCGETSTATES:
2362                 case DIOCGETSTATESV2:
2363                 case DIOCGETTIMEOUT:
2364                 case DIOCCLRRULECTRS:
2365                 case DIOCGETLIMIT:
2366                 case DIOCGETALTQSV0:
2367                 case DIOCGETALTQSV1:
2368                 case DIOCGETALTQV0:
2369                 case DIOCGETALTQV1:
2370                 case DIOCGETQSTATSV0:
2371                 case DIOCGETQSTATSV1:
2372                 case DIOCGETRULESETS:
2373                 case DIOCGETRULESET:
2374                 case DIOCRGETTABLES:
2375                 case DIOCRGETTSTATS:
2376                 case DIOCRCLRTSTATS:
2377                 case DIOCRCLRADDRS:
2378                 case DIOCRADDADDRS:
2379                 case DIOCRDELADDRS:
2380                 case DIOCRSETADDRS:
2381                 case DIOCRGETADDRS:
2382                 case DIOCRGETASTATS:
2383                 case DIOCRCLRASTATS:
2384                 case DIOCRTSTADDRS:
2385                 case DIOCOSFPGET:
2386                 case DIOCGETSRCNODES:
2387                 case DIOCCLRSRCNODES:
2388                 case DIOCGETSYNCOOKIES:
2389                 case DIOCIGETIFACES:
2390                 case DIOCGIFSPEEDV0:
2391                 case DIOCGIFSPEEDV1:
2392                 case DIOCSETIFFLAG:
2393                 case DIOCCLRIFFLAG:
2394                 case DIOCGETETHRULES:
2395                 case DIOCGETETHRULE:
2396                 case DIOCGETETHRULESETS:
2397                 case DIOCGETETHRULESET:
2398                         break;
2399                 case DIOCRCLRTABLES:
2400                 case DIOCRADDTABLES:
2401                 case DIOCRDELTABLES:
2402                 case DIOCRSETTFLAGS:
2403                         if (((struct pfioc_table *)addr)->pfrio_flags &
2404                             PFR_FLAG_DUMMY)
2405                                 break; /* dummy operation ok */
2406                         return (EPERM);
2407                 default:
2408                         return (EPERM);
2409                 }
2410
2411         if (!(flags & FWRITE))
2412                 switch (cmd) {
2413                 case DIOCGETRULES:
2414                 case DIOCGETADDRS:
2415                 case DIOCGETADDR:
2416                 case DIOCGETSTATE:
2417                 case DIOCGETSTATENV:
2418                 case DIOCGETSTATUSNV:
2419                 case DIOCGETSTATES:
2420                 case DIOCGETSTATESV2:
2421                 case DIOCGETTIMEOUT:
2422                 case DIOCGETLIMIT:
2423                 case DIOCGETALTQSV0:
2424                 case DIOCGETALTQSV1:
2425                 case DIOCGETALTQV0:
2426                 case DIOCGETALTQV1:
2427                 case DIOCGETQSTATSV0:
2428                 case DIOCGETQSTATSV1:
2429                 case DIOCGETRULESETS:
2430                 case DIOCGETRULESET:
2431                 case DIOCNATLOOK:
2432                 case DIOCRGETTABLES:
2433                 case DIOCRGETTSTATS:
2434                 case DIOCRGETADDRS:
2435                 case DIOCRGETASTATS:
2436                 case DIOCRTSTADDRS:
2437                 case DIOCOSFPGET:
2438                 case DIOCGETSRCNODES:
2439                 case DIOCGETSYNCOOKIES:
2440                 case DIOCIGETIFACES:
2441                 case DIOCGIFSPEEDV1:
2442                 case DIOCGIFSPEEDV0:
2443                 case DIOCGETRULENV:
2444                 case DIOCGETETHRULES:
2445                 case DIOCGETETHRULE:
2446                 case DIOCGETETHRULESETS:
2447                 case DIOCGETETHRULESET:
2448                         break;
2449                 case DIOCRCLRTABLES:
2450                 case DIOCRADDTABLES:
2451                 case DIOCRDELTABLES:
2452                 case DIOCRCLRTSTATS:
2453                 case DIOCRCLRADDRS:
2454                 case DIOCRADDADDRS:
2455                 case DIOCRDELADDRS:
2456                 case DIOCRSETADDRS:
2457                 case DIOCRSETTFLAGS:
2458                         if (((struct pfioc_table *)addr)->pfrio_flags &
2459                             PFR_FLAG_DUMMY) {
2460                                 flags |= FWRITE; /* need write lock for dummy */
2461                                 break; /* dummy operation ok */
2462                         }
2463                         return (EACCES);
2464                 default:
2465                         return (EACCES);
2466                 }
2467
2468         CURVNET_SET(TD_TO_VNET(td));
2469
2470         switch (cmd) {
2471         case DIOCSTART:
2472                 sx_xlock(&V_pf_ioctl_lock);
2473                 if (V_pf_status.running)
2474                         error = EEXIST;
2475                 else {
2476                         hook_pf();
2477                         if (! TAILQ_EMPTY(V_pf_keth->active.rules))
2478                                 hook_pf_eth();
2479                         V_pf_status.running = 1;
2480                         V_pf_status.since = time_second;
2481                         new_unrhdr64(&V_pf_stateid, time_second);
2482
2483                         DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
2484                 }
2485                 break;
2486
2487         case DIOCSTOP:
2488                 sx_xlock(&V_pf_ioctl_lock);
2489                 if (!V_pf_status.running)
2490                         error = ENOENT;
2491                 else {
2492                         V_pf_status.running = 0;
2493                         dehook_pf();
2494                         dehook_pf_eth();
2495                         V_pf_status.since = time_second;
2496                         DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
2497                 }
2498                 break;
2499
2500         case DIOCGETETHRULES: {
2501                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
2502                 nvlist_t                *nvl;
2503                 void                    *packed;
2504                 struct pf_keth_rule     *tail;
2505                 struct pf_keth_ruleset  *rs;
2506                 u_int32_t                ticket, nr;
2507                 const char              *anchor = "";
2508
2509                 nvl = NULL;
2510                 packed = NULL;
2511
2512 #define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULES_error, x)
2513
2514                 if (nv->len > pf_ioctl_maxcount)
2515                         ERROUT(ENOMEM);
2516
2517                 /* Copy the request in */
2518                 packed = malloc(nv->len, M_NVLIST, M_WAITOK);
2519                 if (packed == NULL)
2520                         ERROUT(ENOMEM);
2521
2522                 error = copyin(nv->data, packed, nv->len);
2523                 if (error)
2524                         ERROUT(error);
2525
2526                 nvl = nvlist_unpack(packed, nv->len, 0);
2527                 if (nvl == NULL)
2528                         ERROUT(EBADMSG);
2529
2530                 if (! nvlist_exists_string(nvl, "anchor"))
2531                         ERROUT(EBADMSG);
2532
2533                 anchor = nvlist_get_string(nvl, "anchor");
2534
2535                 rs = pf_find_keth_ruleset(anchor);
2536
2537                 nvlist_destroy(nvl);
2538                 nvl = NULL;
2539                 free(packed, M_NVLIST);
2540                 packed = NULL;
2541
2542                 if (rs == NULL)
2543                         ERROUT(ENOENT);
2544
2545                 /* Reply */
2546                 nvl = nvlist_create(0);
2547                 if (nvl == NULL)
2548                         ERROUT(ENOMEM);
2549
2550                 PF_RULES_RLOCK();
2551
2552                 ticket = rs->active.ticket;
2553                 tail = TAILQ_LAST(rs->active.rules, pf_keth_ruleq);
2554                 if (tail)
2555                         nr = tail->nr + 1;
2556                 else
2557                         nr = 0;
2558
2559                 PF_RULES_RUNLOCK();
2560
2561                 nvlist_add_number(nvl, "ticket", ticket);
2562                 nvlist_add_number(nvl, "nr", nr);
2563
2564                 packed = nvlist_pack(nvl, &nv->len);
2565                 if (packed == NULL)
2566                         ERROUT(ENOMEM);
2567
2568                 if (nv->size == 0)
2569                         ERROUT(0);
2570                 else if (nv->size < nv->len)
2571                         ERROUT(ENOSPC);
2572
2573                 error = copyout(packed, nv->data, nv->len);
2574
2575 #undef ERROUT
2576 DIOCGETETHRULES_error:
2577                 free(packed, M_NVLIST);
2578                 nvlist_destroy(nvl);
2579                 break;
2580         }
2581
2582         case DIOCGETETHRULE: {
2583                 struct epoch_tracker     et;
2584                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
2585                 nvlist_t                *nvl = NULL;
2586                 void                    *nvlpacked = NULL;
2587                 struct pf_keth_rule     *rule = NULL;
2588                 struct pf_keth_ruleset  *rs;
2589                 u_int32_t                ticket, nr;
2590                 bool                     clear = false;
2591                 const char              *anchor;
2592
2593 #define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULE_error, x)
2594
2595                 if (nv->len > pf_ioctl_maxcount)
2596                         ERROUT(ENOMEM);
2597
2598                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2599                 if (nvlpacked == NULL)
2600                         ERROUT(ENOMEM);
2601
2602                 error = copyin(nv->data, nvlpacked, nv->len);
2603                 if (error)
2604                         ERROUT(error);
2605
2606                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2607                 if (nvl == NULL)
2608                         ERROUT(EBADMSG);
2609                 if (! nvlist_exists_number(nvl, "ticket"))
2610                         ERROUT(EBADMSG);
2611                 ticket = nvlist_get_number(nvl, "ticket");
2612                 if (! nvlist_exists_string(nvl, "anchor"))
2613                         ERROUT(EBADMSG);
2614                 anchor = nvlist_get_string(nvl, "anchor");
2615
2616                 if (nvlist_exists_bool(nvl, "clear"))
2617                         clear = nvlist_get_bool(nvl, "clear");
2618
2619                 if (clear && !(flags & FWRITE))
2620                         ERROUT(EACCES);
2621
2622                 if (! nvlist_exists_number(nvl, "nr"))
2623                         ERROUT(EBADMSG);
2624                 nr = nvlist_get_number(nvl, "nr");
2625
2626                 PF_RULES_RLOCK();
2627                 rs = pf_find_keth_ruleset(anchor);
2628                 if (rs == NULL) {
2629                         PF_RULES_RUNLOCK();
2630                         ERROUT(ENOENT);
2631                 }
2632                 if (ticket != rs->active.ticket) {
2633                         PF_RULES_RUNLOCK();
2634                         ERROUT(EBUSY);
2635                 }
2636
2637                 nvlist_destroy(nvl);
2638                 nvl = NULL;
2639                 free(nvlpacked, M_NVLIST);
2640                 nvlpacked = NULL;
2641
2642                 rule = TAILQ_FIRST(rs->active.rules);
2643                 while ((rule != NULL) && (rule->nr != nr))
2644                         rule = TAILQ_NEXT(rule, entries);
2645                 if (rule == NULL) {
2646                         PF_RULES_RUNLOCK();
2647                         ERROUT(ENOENT);
2648                 }
2649                 /* Make sure rule can't go away. */
2650                 NET_EPOCH_ENTER(et);
2651                 PF_RULES_RUNLOCK();
2652                 nvl = pf_keth_rule_to_nveth_rule(rule);
2653                 if (pf_keth_anchor_nvcopyout(rs, rule, nvl))
2654                         ERROUT(EBUSY);
2655                 NET_EPOCH_EXIT(et);
2656                 if (nvl == NULL)
2657                         ERROUT(ENOMEM);
2658
2659                 nvlpacked = nvlist_pack(nvl, &nv->len);
2660                 if (nvlpacked == NULL)
2661                         ERROUT(ENOMEM);
2662
2663                 if (nv->size == 0)
2664                         ERROUT(0);
2665                 else if (nv->size < nv->len)
2666                         ERROUT(ENOSPC);
2667
2668                 error = copyout(nvlpacked, nv->data, nv->len);
2669                 if (error == 0 && clear) {
2670                         counter_u64_zero(rule->evaluations);
2671                         for (int i = 0; i < 2; i++) {
2672                                 counter_u64_zero(rule->packets[i]);
2673                                 counter_u64_zero(rule->bytes[i]);
2674                         }
2675                 }
2676
2677 #undef ERROUT
2678 DIOCGETETHRULE_error:
2679                 free(nvlpacked, M_NVLIST);
2680                 nvlist_destroy(nvl);
2681                 break;
2682         }
2683
2684         case DIOCADDETHRULE: {
2685                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
2686                 nvlist_t                *nvl = NULL;
2687                 void                    *nvlpacked = NULL;
2688                 struct pf_keth_rule     *rule = NULL, *tail = NULL;
2689                 struct pf_keth_ruleset  *ruleset = NULL;
2690                 struct pfi_kkif         *kif = NULL, *bridge_to_kif = NULL;
2691                 const char              *anchor = "", *anchor_call = "";
2692
2693 #define ERROUT(x)       ERROUT_IOCTL(DIOCADDETHRULE_error, x)
2694
2695                 if (nv->len > pf_ioctl_maxcount)
2696                         ERROUT(ENOMEM);
2697
2698                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2699                 if (nvlpacked == NULL)
2700                         ERROUT(ENOMEM);
2701
2702                 error = copyin(nv->data, nvlpacked, nv->len);
2703                 if (error)
2704                         ERROUT(error);
2705
2706                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2707                 if (nvl == NULL)
2708                         ERROUT(EBADMSG);
2709
2710                 if (! nvlist_exists_number(nvl, "ticket"))
2711                         ERROUT(EBADMSG);
2712
2713                 if (nvlist_exists_string(nvl, "anchor"))
2714                         anchor = nvlist_get_string(nvl, "anchor");
2715                 if (nvlist_exists_string(nvl, "anchor_call"))
2716                         anchor_call = nvlist_get_string(nvl, "anchor_call");
2717
2718                 ruleset = pf_find_keth_ruleset(anchor);
2719                 if (ruleset == NULL)
2720                         ERROUT(EINVAL);
2721
2722                 if (nvlist_get_number(nvl, "ticket") !=
2723                     ruleset->inactive.ticket) {
2724                         DPFPRINTF(PF_DEBUG_MISC,
2725                             ("ticket: %d != %d\n",
2726                             (u_int32_t)nvlist_get_number(nvl, "ticket"),
2727                             ruleset->inactive.ticket));
2728                         ERROUT(EBUSY);
2729                 }
2730
2731                 rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK);
2732                 if (rule == NULL)
2733                         ERROUT(ENOMEM);
2734                 rule->timestamp = NULL;
2735
2736                 error = pf_nveth_rule_to_keth_rule(nvl, rule);
2737                 if (error != 0)
2738                         ERROUT(error);
2739
2740                 if (rule->ifname[0])
2741                         kif = pf_kkif_create(M_WAITOK);
2742                 if (rule->bridge_to_name[0])
2743                         bridge_to_kif = pf_kkif_create(M_WAITOK);
2744                 rule->evaluations = counter_u64_alloc(M_WAITOK);
2745                 for (int i = 0; i < 2; i++) {
2746                         rule->packets[i] = counter_u64_alloc(M_WAITOK);
2747                         rule->bytes[i] = counter_u64_alloc(M_WAITOK);
2748                 }
2749                 rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
2750                     M_WAITOK | M_ZERO);
2751
2752                 PF_RULES_WLOCK();
2753
2754                 if (rule->ifname[0]) {
2755                         rule->kif = pfi_kkif_attach(kif, rule->ifname);
2756                         pfi_kkif_ref(rule->kif);
2757                 } else
2758                         rule->kif = NULL;
2759                 if (rule->bridge_to_name[0]) {
2760                         rule->bridge_to = pfi_kkif_attach(bridge_to_kif,
2761                             rule->bridge_to_name);
2762                         pfi_kkif_ref(rule->bridge_to);
2763                 } else
2764                         rule->bridge_to = NULL;
2765
2766 #ifdef ALTQ
2767                 /* set queue IDs */
2768                 if (rule->qname[0] != 0) {
2769                         if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
2770                                 error = EBUSY;
2771                         else
2772                                 rule->qid = rule->qid;
2773                 }
2774 #endif
2775                 if (rule->tagname[0])
2776                         if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
2777                                 error = EBUSY;
2778                 if (rule->match_tagname[0])
2779                         if ((rule->match_tag = pf_tagname2tag(
2780                             rule->match_tagname)) == 0)
2781                                 error = EBUSY;
2782
2783                 if (error == 0 && rule->ipdst.addr.type == PF_ADDR_TABLE)
2784                         error = pf_eth_addr_setup(ruleset, &rule->ipdst.addr);
2785                 if (error == 0 && rule->ipsrc.addr.type == PF_ADDR_TABLE)
2786                         error = pf_eth_addr_setup(ruleset, &rule->ipsrc.addr);
2787
2788                 if (error) {
2789                         pf_free_eth_rule(rule);
2790                         PF_RULES_WUNLOCK();
2791                         ERROUT(error);
2792                 }
2793
2794                 if (pf_keth_anchor_setup(rule, ruleset, anchor_call)) {
2795                         pf_free_eth_rule(rule);
2796                         PF_RULES_WUNLOCK();
2797                         ERROUT(EINVAL);
2798                 }
2799
2800                 tail = TAILQ_LAST(ruleset->inactive.rules, pf_keth_ruleq);
2801                 if (tail)
2802                         rule->nr = tail->nr + 1;
2803                 else
2804                         rule->nr = 0;
2805
2806                 TAILQ_INSERT_TAIL(ruleset->inactive.rules, rule, entries);
2807
2808                 PF_RULES_WUNLOCK();
2809
2810 #undef ERROUT
2811 DIOCADDETHRULE_error:
2812                 nvlist_destroy(nvl);
2813                 free(nvlpacked, M_NVLIST);
2814                 break;
2815         }
2816
2817         case DIOCGETETHRULESETS: {
2818                 struct epoch_tracker     et;
2819                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
2820                 nvlist_t                *nvl = NULL;
2821                 void                    *nvlpacked = NULL;
2822                 struct pf_keth_ruleset  *ruleset;
2823                 struct pf_keth_anchor   *anchor;
2824                 int                      nr = 0;
2825
2826 #define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULESETS_error, x)
2827
2828                 if (nv->len > pf_ioctl_maxcount)
2829                         ERROUT(ENOMEM);
2830
2831                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2832                 if (nvlpacked == NULL)
2833                         ERROUT(ENOMEM);
2834
2835                 error = copyin(nv->data, nvlpacked, nv->len);
2836                 if (error)
2837                         ERROUT(error);
2838
2839                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2840                 if (nvl == NULL)
2841                         ERROUT(EBADMSG);
2842                 if (! nvlist_exists_string(nvl, "path"))
2843                         ERROUT(EBADMSG);
2844
2845                 NET_EPOCH_ENTER(et);
2846
2847                 if ((ruleset = pf_find_keth_ruleset(
2848                     nvlist_get_string(nvl, "path"))) == NULL) {
2849                         NET_EPOCH_EXIT(et);
2850                         ERROUT(ENOENT);
2851                 }
2852
2853                 if (ruleset->anchor == NULL) {
2854                         RB_FOREACH(anchor, pf_keth_anchor_global, &V_pf_keth_anchors)
2855                                 if (anchor->parent == NULL)
2856                                         nr++;
2857                 } else {
2858                         RB_FOREACH(anchor, pf_keth_anchor_node,
2859                             &ruleset->anchor->children)
2860                                 nr++;
2861                 }
2862
2863                 NET_EPOCH_EXIT(et);
2864
2865                 nvlist_destroy(nvl);
2866                 nvl = NULL;
2867                 free(nvlpacked, M_NVLIST);
2868                 nvlpacked = NULL;
2869
2870                 nvl = nvlist_create(0);
2871                 if (nvl == NULL)
2872                         ERROUT(ENOMEM);
2873
2874                 nvlist_add_number(nvl, "nr", nr);
2875
2876                 nvlpacked = nvlist_pack(nvl, &nv->len);
2877                 if (nvlpacked == NULL)
2878                         ERROUT(ENOMEM);
2879
2880                 if (nv->size == 0)
2881                         ERROUT(0);
2882                 else if (nv->size < nv->len)
2883                         ERROUT(ENOSPC);
2884
2885                 error = copyout(nvlpacked, nv->data, nv->len);
2886
2887 #undef ERROUT
2888 DIOCGETETHRULESETS_error:
2889                 free(nvlpacked, M_NVLIST);
2890                 nvlist_destroy(nvl);
2891                 break;
2892         }
2893
2894         case DIOCGETETHRULESET: {
2895                 struct epoch_tracker     et;
2896                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
2897                 nvlist_t                *nvl = NULL;
2898                 void                    *nvlpacked = NULL;
2899                 struct pf_keth_ruleset  *ruleset;
2900                 struct pf_keth_anchor   *anchor;
2901                 int                      nr = 0, req_nr = 0;
2902                 bool                     found = false;
2903
2904 #define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULESET_error, x)
2905
2906                 if (nv->len > pf_ioctl_maxcount)
2907                         ERROUT(ENOMEM);
2908
2909                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2910                 if (nvlpacked == NULL)
2911                         ERROUT(ENOMEM);
2912
2913                 error = copyin(nv->data, nvlpacked, nv->len);
2914                 if (error)
2915                         ERROUT(error);
2916
2917                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2918                 if (nvl == NULL)
2919                         ERROUT(EBADMSG);
2920                 if (! nvlist_exists_string(nvl, "path"))
2921                         ERROUT(EBADMSG);
2922                 if (! nvlist_exists_number(nvl, "nr"))
2923                         ERROUT(EBADMSG);
2924
2925                 req_nr = nvlist_get_number(nvl, "nr");
2926
2927                 NET_EPOCH_ENTER(et);
2928
2929                 if ((ruleset = pf_find_keth_ruleset(
2930                     nvlist_get_string(nvl, "path"))) == NULL) {
2931                         NET_EPOCH_EXIT(et);
2932                         ERROUT(ENOENT);
2933                 }
2934
2935                 nvlist_destroy(nvl);
2936                 nvl = NULL;
2937                 free(nvlpacked, M_NVLIST);
2938                 nvlpacked = NULL;
2939
2940                 nvl = nvlist_create(0);
2941                 if (nvl == NULL) {
2942                         NET_EPOCH_EXIT(et);
2943                         ERROUT(ENOMEM);
2944                 }
2945
2946                 if (ruleset->anchor == NULL) {
2947                         RB_FOREACH(anchor, pf_keth_anchor_global,
2948                             &V_pf_keth_anchors) {
2949                                 if (anchor->parent == NULL && nr++ == req_nr) {
2950                                         found = true;
2951                                         break;
2952                                 }
2953                         }
2954                 } else {
2955                         RB_FOREACH(anchor, pf_keth_anchor_node,
2956                              &ruleset->anchor->children) {
2957                                 if (nr++ == req_nr) {
2958                                         found = true;
2959                                         break;
2960                                 }
2961                         }
2962                 }
2963
2964                 NET_EPOCH_EXIT(et);
2965                 if (found) {
2966                         nvlist_add_number(nvl, "nr", nr);
2967                         nvlist_add_string(nvl, "name", anchor->name);
2968                         if (ruleset->anchor)
2969                                 nvlist_add_string(nvl, "path",
2970                                     ruleset->anchor->path);
2971                         else
2972                                 nvlist_add_string(nvl, "path", "");
2973                 } else {
2974                         ERROUT(EBUSY);
2975                 }
2976
2977                 nvlpacked = nvlist_pack(nvl, &nv->len);
2978                 if (nvlpacked == NULL)
2979                         ERROUT(ENOMEM);
2980
2981                 if (nv->size == 0)
2982                         ERROUT(0);
2983                 else if (nv->size < nv->len)
2984                         ERROUT(ENOSPC);
2985
2986                 error = copyout(nvlpacked, nv->data, nv->len);
2987
2988 #undef ERROUT
2989 DIOCGETETHRULESET_error:
2990                 free(nvlpacked, M_NVLIST);
2991                 nvlist_destroy(nvl);
2992                 break;
2993         }
2994
2995         case DIOCADDRULENV: {
2996                 struct pfioc_nv *nv = (struct pfioc_nv *)addr;
2997                 nvlist_t        *nvl = NULL;
2998                 void            *nvlpacked = NULL;
2999                 struct pf_krule *rule = NULL;
3000                 const char      *anchor = "", *anchor_call = "";
3001                 uint32_t         ticket = 0, pool_ticket = 0;
3002
3003 #define ERROUT(x)       ERROUT_IOCTL(DIOCADDRULENV_error, x)
3004
3005                 if (nv->len > pf_ioctl_maxcount)
3006                         ERROUT(ENOMEM);
3007
3008                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
3009                 error = copyin(nv->data, nvlpacked, nv->len);
3010                 if (error)
3011                         ERROUT(error);
3012
3013                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
3014                 if (nvl == NULL)
3015                         ERROUT(EBADMSG);
3016
3017                 if (! nvlist_exists_number(nvl, "ticket"))
3018                         ERROUT(EINVAL);
3019                 ticket = nvlist_get_number(nvl, "ticket");
3020
3021                 if (! nvlist_exists_number(nvl, "pool_ticket"))
3022                         ERROUT(EINVAL);
3023                 pool_ticket = nvlist_get_number(nvl, "pool_ticket");
3024
3025                 if (! nvlist_exists_nvlist(nvl, "rule"))
3026                         ERROUT(EINVAL);
3027
3028                 rule = pf_krule_alloc();
3029                 error = pf_nvrule_to_krule(nvlist_get_nvlist(nvl, "rule"),
3030                     rule);
3031                 if (error)
3032                         ERROUT(error);
3033
3034                 if (nvlist_exists_string(nvl, "anchor"))
3035                         anchor = nvlist_get_string(nvl, "anchor");
3036                 if (nvlist_exists_string(nvl, "anchor_call"))
3037                         anchor_call = nvlist_get_string(nvl, "anchor_call");
3038
3039                 if ((error = nvlist_error(nvl)))
3040                         ERROUT(error);
3041
3042                 /* Frees rule on error */
3043                 error = pf_ioctl_addrule(rule, ticket, pool_ticket, anchor,
3044                     anchor_call, td);
3045
3046                 nvlist_destroy(nvl);
3047                 free(nvlpacked, M_NVLIST);
3048                 break;
3049 #undef ERROUT
3050 DIOCADDRULENV_error:
3051                 pf_krule_free(rule);
3052                 nvlist_destroy(nvl);
3053                 free(nvlpacked, M_NVLIST);
3054
3055                 break;
3056         }
3057         case DIOCADDRULE: {
3058                 struct pfioc_rule       *pr = (struct pfioc_rule *)addr;
3059                 struct pf_krule         *rule;
3060
3061                 rule = pf_krule_alloc();
3062                 error = pf_rule_to_krule(&pr->rule, rule);
3063                 if (error != 0) {
3064                         pf_krule_free(rule);
3065                         break;
3066                 }
3067
3068                 pr->anchor[sizeof(pr->anchor) - 1] = 0;
3069
3070                 /* Frees rule on error */
3071                 error = pf_ioctl_addrule(rule, pr->ticket, pr->pool_ticket,
3072                     pr->anchor, pr->anchor_call, td);
3073                 break;
3074         }
3075
3076         case DIOCGETRULES: {
3077                 struct pfioc_rule       *pr = (struct pfioc_rule *)addr;
3078                 struct pf_kruleset      *ruleset;
3079                 struct pf_krule         *tail;
3080                 int                      rs_num;
3081
3082                 pr->anchor[sizeof(pr->anchor) - 1] = 0;
3083
3084                 PF_RULES_WLOCK();
3085                 ruleset = pf_find_kruleset(pr->anchor);
3086                 if (ruleset == NULL) {
3087                         PF_RULES_WUNLOCK();
3088                         error = EINVAL;
3089                         break;
3090                 }
3091                 rs_num = pf_get_ruleset_number(pr->rule.action);
3092                 if (rs_num >= PF_RULESET_MAX) {
3093                         PF_RULES_WUNLOCK();
3094                         error = EINVAL;
3095                         break;
3096                 }
3097                 tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
3098                     pf_krulequeue);
3099                 if (tail)
3100                         pr->nr = tail->nr + 1;
3101                 else
3102                         pr->nr = 0;
3103                 pr->ticket = ruleset->rules[rs_num].active.ticket;
3104                 PF_RULES_WUNLOCK();
3105                 break;
3106         }
3107
3108         case DIOCGETRULENV: {
3109                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
3110                 nvlist_t                *nvrule = NULL;
3111                 nvlist_t                *nvl = NULL;
3112                 struct pf_kruleset      *ruleset;
3113                 struct pf_krule         *rule;
3114                 void                    *nvlpacked = NULL;
3115                 int                      rs_num, nr;
3116                 bool                     clear_counter = false;
3117
3118 #define ERROUT(x)       ERROUT_IOCTL(DIOCGETRULENV_error, x)
3119
3120                 if (nv->len > pf_ioctl_maxcount)
3121                         ERROUT(ENOMEM);
3122
3123                 /* Copy the request in */
3124                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
3125                 if (nvlpacked == NULL)
3126                         ERROUT(ENOMEM);
3127
3128                 error = copyin(nv->data, nvlpacked, nv->len);
3129                 if (error)
3130                         ERROUT(error);
3131
3132                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
3133                 if (nvl == NULL)
3134                         ERROUT(EBADMSG);
3135
3136                 if (! nvlist_exists_string(nvl, "anchor"))
3137                         ERROUT(EBADMSG);
3138                 if (! nvlist_exists_number(nvl, "ruleset"))
3139                         ERROUT(EBADMSG);
3140                 if (! nvlist_exists_number(nvl, "ticket"))
3141                         ERROUT(EBADMSG);
3142                 if (! nvlist_exists_number(nvl, "nr"))
3143                         ERROUT(EBADMSG);
3144
3145                 if (nvlist_exists_bool(nvl, "clear_counter"))
3146                         clear_counter = nvlist_get_bool(nvl, "clear_counter");
3147
3148                 if (clear_counter && !(flags & FWRITE))
3149                         ERROUT(EACCES);
3150
3151                 nr = nvlist_get_number(nvl, "nr");
3152
3153                 PF_RULES_WLOCK();
3154                 ruleset = pf_find_kruleset(nvlist_get_string(nvl, "anchor"));
3155                 if (ruleset == NULL) {
3156                         PF_RULES_WUNLOCK();
3157                         ERROUT(ENOENT);
3158                 }
3159
3160                 rs_num = pf_get_ruleset_number(nvlist_get_number(nvl, "ruleset"));
3161                 if (rs_num >= PF_RULESET_MAX) {
3162                         PF_RULES_WUNLOCK();
3163                         ERROUT(EINVAL);
3164                 }
3165
3166                 if (nvlist_get_number(nvl, "ticket") !=
3167                     ruleset->rules[rs_num].active.ticket) {
3168                         PF_RULES_WUNLOCK();
3169                         ERROUT(EBUSY);
3170                 }
3171
3172                 if ((error = nvlist_error(nvl))) {
3173                         PF_RULES_WUNLOCK();
3174                         ERROUT(error);
3175                 }
3176
3177                 rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
3178                 while ((rule != NULL) && (rule->nr != nr))
3179                         rule = TAILQ_NEXT(rule, entries);
3180                 if (rule == NULL) {
3181                         PF_RULES_WUNLOCK();
3182                         ERROUT(EBUSY);
3183                 }
3184
3185                 nvrule = pf_krule_to_nvrule(rule);
3186
3187                 nvlist_destroy(nvl);
3188                 nvl = nvlist_create(0);
3189                 if (nvl == NULL) {
3190                         PF_RULES_WUNLOCK();
3191                         ERROUT(ENOMEM);
3192                 }
3193                 nvlist_add_number(nvl, "nr", nr);
3194                 nvlist_add_nvlist(nvl, "rule", nvrule);
3195                 nvlist_destroy(nvrule);
3196                 nvrule = NULL;
3197                 if (pf_kanchor_nvcopyout(ruleset, rule, nvl)) {
3198                         PF_RULES_WUNLOCK();
3199                         ERROUT(EBUSY);
3200                 }
3201
3202                 free(nvlpacked, M_NVLIST);
3203                 nvlpacked = nvlist_pack(nvl, &nv->len);
3204                 if (nvlpacked == NULL) {
3205                         PF_RULES_WUNLOCK();
3206                         ERROUT(ENOMEM);
3207                 }
3208
3209                 if (nv->size == 0) {
3210                         PF_RULES_WUNLOCK();
3211                         ERROUT(0);
3212                 }
3213                 else if (nv->size < nv->len) {
3214                         PF_RULES_WUNLOCK();
3215                         ERROUT(ENOSPC);
3216                 }
3217
3218                 if (clear_counter) {
3219                         pf_counter_u64_zero(&rule->evaluations);
3220                         for (int i = 0; i < 2; i++) {
3221                                 pf_counter_u64_zero(&rule->packets[i]);
3222                                 pf_counter_u64_zero(&rule->bytes[i]);
3223                         }
3224                         counter_u64_zero(rule->states_tot);
3225                 }
3226                 PF_RULES_WUNLOCK();
3227
3228                 error = copyout(nvlpacked, nv->data, nv->len);
3229
3230 #undef ERROUT
3231 DIOCGETRULENV_error:
3232                 free(nvlpacked, M_NVLIST);
3233                 nvlist_destroy(nvrule);
3234                 nvlist_destroy(nvl);
3235
3236                 break;
3237         }
3238
3239         case DIOCCHANGERULE: {
3240                 struct pfioc_rule       *pcr = (struct pfioc_rule *)addr;
3241                 struct pf_kruleset      *ruleset;
3242                 struct pf_krule         *oldrule = NULL, *newrule = NULL;
3243                 struct pfi_kkif         *kif = NULL;
3244                 struct pf_kpooladdr     *pa;
3245                 u_int32_t                nr = 0;
3246                 int                      rs_num;
3247
3248                 pcr->anchor[sizeof(pcr->anchor) - 1] = 0;
3249
3250                 if (pcr->action < PF_CHANGE_ADD_HEAD ||
3251                     pcr->action > PF_CHANGE_GET_TICKET) {
3252                         error = EINVAL;
3253                         break;
3254                 }
3255                 if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
3256                         error = EINVAL;
3257                         break;
3258                 }
3259
3260                 if (pcr->action != PF_CHANGE_REMOVE) {
3261                         newrule = pf_krule_alloc();
3262                         error = pf_rule_to_krule(&pcr->rule, newrule);
3263                         if (error != 0) {
3264                                 pf_krule_free(newrule);
3265                                 break;
3266                         }
3267
3268                         if (newrule->ifname[0])
3269                                 kif = pf_kkif_create(M_WAITOK);
3270                         pf_counter_u64_init(&newrule->evaluations, M_WAITOK);
3271                         for (int i = 0; i < 2; i++) {
3272                                 pf_counter_u64_init(&newrule->packets[i], M_WAITOK);
3273                                 pf_counter_u64_init(&newrule->bytes[i], M_WAITOK);
3274                         }
3275                         newrule->states_cur = counter_u64_alloc(M_WAITOK);
3276                         newrule->states_tot = counter_u64_alloc(M_WAITOK);
3277                         newrule->src_nodes = counter_u64_alloc(M_WAITOK);
3278                         newrule->cuid = td->td_ucred->cr_ruid;
3279                         newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
3280                         TAILQ_INIT(&newrule->rpool.list);
3281                 }
3282 #define ERROUT(x)       ERROUT_IOCTL(DIOCCHANGERULE_error, x)
3283
3284                 PF_CONFIG_LOCK();
3285                 PF_RULES_WLOCK();
3286 #ifdef PF_WANT_32_TO_64_COUNTER
3287                 if (newrule != NULL) {
3288                         LIST_INSERT_HEAD(&V_pf_allrulelist, newrule, allrulelist);
3289                         newrule->allrulelinked = true;
3290                         V_pf_allrulecount++;
3291                 }
3292 #endif
3293
3294                 if (!(pcr->action == PF_CHANGE_REMOVE ||
3295                     pcr->action == PF_CHANGE_GET_TICKET) &&
3296                     pcr->pool_ticket != V_ticket_pabuf)
3297                         ERROUT(EBUSY);
3298
3299                 ruleset = pf_find_kruleset(pcr->anchor);
3300                 if (ruleset == NULL)
3301                         ERROUT(EINVAL);
3302
3303                 rs_num = pf_get_ruleset_number(pcr->rule.action);
3304                 if (rs_num >= PF_RULESET_MAX)
3305                         ERROUT(EINVAL);
3306
3307                 /*
3308                  * XXXMJG: there is no guarantee that the ruleset was
3309                  * created by the usual route of calling DIOCXBEGIN.
3310                  * As a result it is possible the rule tree will not
3311                  * be allocated yet. Hack around it by doing it here.
3312                  * Note it is fine to let the tree persist in case of
3313                  * error as it will be freed down the road on future
3314                  * updates (if need be).
3315                  */
3316                 if (ruleset->rules[rs_num].active.tree == NULL) {
3317                         ruleset->rules[rs_num].active.tree = pf_rule_tree_alloc(M_NOWAIT);
3318                         if (ruleset->rules[rs_num].active.tree == NULL) {
3319                                 ERROUT(ENOMEM);
3320                         }
3321                 }
3322
3323                 if (pcr->action == PF_CHANGE_GET_TICKET) {
3324                         pcr->ticket = ++ruleset->rules[rs_num].active.ticket;
3325                         ERROUT(0);
3326                 } else if (pcr->ticket !=
3327                             ruleset->rules[rs_num].active.ticket)
3328                                 ERROUT(EINVAL);
3329
3330                 if (pcr->action != PF_CHANGE_REMOVE) {
3331                         if (newrule->ifname[0]) {
3332                                 newrule->kif = pfi_kkif_attach(kif,
3333                                     newrule->ifname);
3334                                 kif = NULL;
3335                                 pfi_kkif_ref(newrule->kif);
3336                         } else
3337                                 newrule->kif = NULL;
3338
3339                         if (newrule->rtableid > 0 &&
3340                             newrule->rtableid >= rt_numfibs)
3341                                 error = EBUSY;
3342
3343 #ifdef ALTQ
3344                         /* set queue IDs */
3345                         if (newrule->qname[0] != 0) {
3346                                 if ((newrule->qid =
3347                                     pf_qname2qid(newrule->qname)) == 0)
3348                                         error = EBUSY;
3349                                 else if (newrule->pqname[0] != 0) {
3350                                         if ((newrule->pqid =
3351                                             pf_qname2qid(newrule->pqname)) == 0)
3352                                                 error = EBUSY;
3353                                 } else
3354                                         newrule->pqid = newrule->qid;
3355                         }
3356 #endif /* ALTQ */
3357                         if (newrule->tagname[0])
3358                                 if ((newrule->tag =
3359                                     pf_tagname2tag(newrule->tagname)) == 0)
3360                                         error = EBUSY;
3361                         if (newrule->match_tagname[0])
3362                                 if ((newrule->match_tag = pf_tagname2tag(
3363                                     newrule->match_tagname)) == 0)
3364                                         error = EBUSY;
3365                         if (newrule->rt && !newrule->direction)
3366                                 error = EINVAL;
3367                         if (!newrule->log)
3368                                 newrule->logif = 0;
3369                         if (newrule->logif >= PFLOGIFS_MAX)
3370                                 error = EINVAL;
3371                         if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af))
3372                                 error = ENOMEM;
3373                         if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af))
3374                                 error = ENOMEM;
3375                         if (pf_kanchor_setup(newrule, ruleset, pcr->anchor_call))
3376                                 error = EINVAL;
3377                         TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
3378                                 if (pa->addr.type == PF_ADDR_TABLE) {
3379                                         pa->addr.p.tbl =
3380                                             pfr_attach_table(ruleset,
3381                                             pa->addr.v.tblname);
3382                                         if (pa->addr.p.tbl == NULL)
3383                                                 error = ENOMEM;
3384                                 }
3385
3386                         newrule->overload_tbl = NULL;
3387                         if (newrule->overload_tblname[0]) {
3388                                 if ((newrule->overload_tbl = pfr_attach_table(
3389                                     ruleset, newrule->overload_tblname)) ==
3390                                     NULL)
3391                                         error = EINVAL;
3392                                 else
3393                                         newrule->overload_tbl->pfrkt_flags |=
3394                                             PFR_TFLAG_ACTIVE;
3395                         }
3396
3397                         pf_mv_kpool(&V_pf_pabuf, &newrule->rpool.list);
3398                         if (((((newrule->action == PF_NAT) ||
3399                             (newrule->action == PF_RDR) ||
3400                             (newrule->action == PF_BINAT) ||
3401                             (newrule->rt > PF_NOPFROUTE)) &&
3402                             !newrule->anchor)) &&
3403                             (TAILQ_FIRST(&newrule->rpool.list) == NULL))
3404                                 error = EINVAL;
3405
3406                         if (error) {
3407                                 pf_free_rule(newrule);
3408                                 PF_RULES_WUNLOCK();
3409                                 PF_CONFIG_UNLOCK();
3410                                 break;
3411                         }
3412
3413                         newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list);
3414                 }
3415                 pf_empty_kpool(&V_pf_pabuf);
3416
3417                 if (pcr->action == PF_CHANGE_ADD_HEAD)
3418                         oldrule = TAILQ_FIRST(
3419                             ruleset->rules[rs_num].active.ptr);
3420                 else if (pcr->action == PF_CHANGE_ADD_TAIL)
3421                         oldrule = TAILQ_LAST(
3422                             ruleset->rules[rs_num].active.ptr, pf_krulequeue);
3423                 else {
3424                         oldrule = TAILQ_FIRST(
3425                             ruleset->rules[rs_num].active.ptr);
3426                         while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
3427                                 oldrule = TAILQ_NEXT(oldrule, entries);
3428                         if (oldrule == NULL) {
3429                                 if (newrule != NULL)
3430                                         pf_free_rule(newrule);
3431                                 PF_RULES_WUNLOCK();
3432                                 PF_CONFIG_UNLOCK();
3433                                 error = EINVAL;
3434                                 break;
3435                         }
3436                 }
3437
3438                 if (pcr->action == PF_CHANGE_REMOVE) {
3439                         pf_unlink_rule(ruleset->rules[rs_num].active.ptr,
3440                             oldrule);
3441                         RB_REMOVE(pf_krule_global,
3442                             ruleset->rules[rs_num].active.tree, oldrule);
3443                         ruleset->rules[rs_num].active.rcount--;
3444                 } else {
3445                         pf_hash_rule(newrule);
3446                         if (RB_INSERT(pf_krule_global,
3447                             ruleset->rules[rs_num].active.tree, newrule) != NULL) {
3448                                 pf_free_rule(newrule);
3449                                 PF_RULES_WUNLOCK();
3450                                 PF_CONFIG_UNLOCK();
3451                                 error = EEXIST;
3452                                 break;
3453                         }
3454
3455                         if (oldrule == NULL)
3456                                 TAILQ_INSERT_TAIL(
3457                                     ruleset->rules[rs_num].active.ptr,
3458                                     newrule, entries);
3459                         else if (pcr->action == PF_CHANGE_ADD_HEAD ||
3460                             pcr->action == PF_CHANGE_ADD_BEFORE)
3461                                 TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
3462                         else
3463                                 TAILQ_INSERT_AFTER(
3464                                     ruleset->rules[rs_num].active.ptr,
3465                                     oldrule, newrule, entries);
3466                         ruleset->rules[rs_num].active.rcount++;
3467                 }
3468
3469                 nr = 0;
3470                 TAILQ_FOREACH(oldrule,
3471                     ruleset->rules[rs_num].active.ptr, entries)
3472                         oldrule->nr = nr++;
3473
3474                 ruleset->rules[rs_num].active.ticket++;
3475
3476                 pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr);
3477                 pf_remove_if_empty_kruleset(ruleset);
3478
3479                 PF_RULES_WUNLOCK();
3480                 PF_CONFIG_UNLOCK();
3481                 break;
3482
3483 #undef ERROUT
3484 DIOCCHANGERULE_error:
3485                 PF_RULES_WUNLOCK();
3486                 PF_CONFIG_UNLOCK();
3487                 pf_krule_free(newrule);
3488                 pf_kkif_free(kif);
3489                 break;
3490         }
3491
3492         case DIOCCLRSTATESNV: {
3493                 error = pf_clearstates_nv((struct pfioc_nv *)addr);
3494                 break;
3495         }
3496
3497         case DIOCKILLSTATESNV: {
3498                 error = pf_killstates_nv((struct pfioc_nv *)addr);
3499                 break;
3500         }
3501
3502         case DIOCADDSTATE: {
3503                 struct pfioc_state              *ps = (struct pfioc_state *)addr;
3504                 struct pfsync_state_1301        *sp = &ps->state;
3505
3506                 if (sp->timeout >= PFTM_MAX) {
3507                         error = EINVAL;
3508                         break;
3509                 }
3510                 if (V_pfsync_state_import_ptr != NULL) {
3511                         PF_RULES_RLOCK();
3512                         error = V_pfsync_state_import_ptr(
3513                             (union pfsync_state_union *)sp, PFSYNC_SI_IOCTL,
3514                             PFSYNC_MSG_VERSION_1301);
3515                         PF_RULES_RUNLOCK();
3516                 } else
3517                         error = EOPNOTSUPP;
3518                 break;
3519         }
3520
3521         case DIOCGETSTATE: {
3522                 struct pfioc_state      *ps = (struct pfioc_state *)addr;
3523                 struct pf_kstate        *s;
3524
3525                 s = pf_find_state_byid(ps->state.id, ps->state.creatorid);
3526                 if (s == NULL) {
3527                         error = ENOENT;
3528                         break;
3529                 }
3530
3531                 pfsync_state_export((union pfsync_state_union*)&ps->state,
3532                     s, PFSYNC_MSG_VERSION_1301);
3533                 PF_STATE_UNLOCK(s);
3534                 break;
3535         }
3536
3537         case DIOCGETSTATENV: {
3538                 error = pf_getstate((struct pfioc_nv *)addr);
3539                 break;
3540         }
3541
3542         case DIOCGETSTATES: {
3543                 struct pfioc_states     *ps = (struct pfioc_states *)addr;
3544                 struct pf_kstate        *s;
3545                 struct pfsync_state_1301        *pstore, *p;
3546                 int                      i, nr;
3547                 size_t                   slice_count = 16, count;
3548                 void                    *out;
3549
3550                 if (ps->ps_len <= 0) {
3551                         nr = uma_zone_get_cur(V_pf_state_z);
3552                         ps->ps_len = sizeof(struct pfsync_state_1301) * nr;
3553                         break;
3554                 }
3555
3556                 out = ps->ps_states;
3557                 pstore = mallocarray(slice_count,
3558                     sizeof(struct pfsync_state_1301), M_TEMP, M_WAITOK | M_ZERO);
3559                 nr = 0;
3560
3561                 for (i = 0; i <= pf_hashmask; i++) {
3562                         struct pf_idhash *ih = &V_pf_idhash[i];
3563
3564 DIOCGETSTATES_retry:
3565                         p = pstore;
3566
3567                         if (LIST_EMPTY(&ih->states))
3568                                 continue;
3569
3570                         PF_HASHROW_LOCK(ih);
3571                         count = 0;
3572                         LIST_FOREACH(s, &ih->states, entry) {
3573                                 if (s->timeout == PFTM_UNLINKED)
3574                                         continue;
3575                                 count++;
3576                         }
3577
3578                         if (count > slice_count) {
3579                                 PF_HASHROW_UNLOCK(ih);
3580                                 free(pstore, M_TEMP);
3581                                 slice_count = count * 2;
3582                                 pstore = mallocarray(slice_count,
3583                                     sizeof(struct pfsync_state_1301), M_TEMP,
3584                                     M_WAITOK | M_ZERO);
3585                                 goto DIOCGETSTATES_retry;
3586                         }
3587
3588                         if ((nr+count) * sizeof(*p) > ps->ps_len) {
3589                                 PF_HASHROW_UNLOCK(ih);
3590                                 goto DIOCGETSTATES_full;
3591                         }
3592
3593                         LIST_FOREACH(s, &ih->states, entry) {
3594                                 if (s->timeout == PFTM_UNLINKED)
3595                                         continue;
3596
3597                                 pfsync_state_export((union pfsync_state_union*)p,
3598                                     s, PFSYNC_MSG_VERSION_1301);
3599                                 p++;
3600                                 nr++;
3601                         }
3602                         PF_HASHROW_UNLOCK(ih);
3603                         error = copyout(pstore, out,
3604                             sizeof(struct pfsync_state_1301) * count);
3605                         if (error)
3606                                 break;
3607                         out = ps->ps_states + nr;
3608                 }
3609 DIOCGETSTATES_full:
3610                 ps->ps_len = sizeof(struct pfsync_state_1301) * nr;
3611                 free(pstore, M_TEMP);
3612
3613                 break;
3614         }
3615
3616         case DIOCGETSTATESV2: {
3617                 struct pfioc_states_v2  *ps = (struct pfioc_states_v2 *)addr;
3618                 struct pf_kstate        *s;
3619                 struct pf_state_export  *pstore, *p;
3620                 int i, nr;
3621                 size_t slice_count = 16, count;
3622                 void *out;
3623
3624                 if (ps->ps_req_version > PF_STATE_VERSION) {
3625                         error = ENOTSUP;
3626                         break;
3627                 }
3628
3629                 if (ps->ps_len <= 0) {
3630                         nr = uma_zone_get_cur(V_pf_state_z);
3631                         ps->ps_len = sizeof(struct pf_state_export) * nr;
3632                         break;
3633                 }
3634
3635                 out = ps->ps_states;
3636                 pstore = mallocarray(slice_count,
3637                     sizeof(struct pf_state_export), M_TEMP, M_WAITOK | M_ZERO);
3638                 nr = 0;
3639
3640                 for (i = 0; i <= pf_hashmask; i++) {
3641                         struct pf_idhash *ih = &V_pf_idhash[i];
3642
3643 DIOCGETSTATESV2_retry:
3644                         p = pstore;
3645
3646                         if (LIST_EMPTY(&ih->states))
3647                                 continue;
3648
3649                         PF_HASHROW_LOCK(ih);
3650                         count = 0;
3651                         LIST_FOREACH(s, &ih->states, entry) {
3652                                 if (s->timeout == PFTM_UNLINKED)
3653                                         continue;
3654                                 count++;
3655                         }
3656
3657                         if (count > slice_count) {
3658                                 PF_HASHROW_UNLOCK(ih);
3659                                 free(pstore, M_TEMP);
3660                                 slice_count = count * 2;
3661                                 pstore = mallocarray(slice_count,
3662                                     sizeof(struct pf_state_export), M_TEMP,
3663                                     M_WAITOK | M_ZERO);
3664                                 goto DIOCGETSTATESV2_retry;
3665                         }
3666
3667                         if ((nr+count) * sizeof(*p) > ps->ps_len) {
3668                                 PF_HASHROW_UNLOCK(ih);
3669                                 goto DIOCGETSTATESV2_full;
3670                         }
3671
3672                         LIST_FOREACH(s, &ih->states, entry) {
3673                                 if (s->timeout == PFTM_UNLINKED)
3674                                         continue;
3675
3676                                 pf_state_export(p, s);
3677                                 p++;
3678                                 nr++;
3679                         }
3680                         PF_HASHROW_UNLOCK(ih);
3681                         error = copyout(pstore, out,
3682                             sizeof(struct pf_state_export) * count);
3683                         if (error)
3684                                 break;
3685                         out = ps->ps_states + nr;
3686                 }
3687 DIOCGETSTATESV2_full:
3688                 ps->ps_len = nr * sizeof(struct pf_state_export);
3689                 free(pstore, M_TEMP);
3690
3691                 break;
3692         }
3693
3694         case DIOCGETSTATUSNV: {
3695                 error = pf_getstatus((struct pfioc_nv *)addr);
3696                 break;
3697         }
3698
3699         case DIOCSETSTATUSIF: {
3700                 struct pfioc_if *pi = (struct pfioc_if *)addr;
3701
3702                 if (pi->ifname[0] == 0) {
3703                         bzero(V_pf_status.ifname, IFNAMSIZ);
3704                         break;
3705                 }
3706                 PF_RULES_WLOCK();
3707                 error = pf_user_strcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ);
3708                 PF_RULES_WUNLOCK();
3709                 break;
3710         }
3711
3712         case DIOCCLRSTATUS: {
3713                 PF_RULES_WLOCK();
3714                 for (int i = 0; i < PFRES_MAX; i++)
3715                         counter_u64_zero(V_pf_status.counters[i]);
3716                 for (int i = 0; i < FCNT_MAX; i++)
3717                         pf_counter_u64_zero(&V_pf_status.fcounters[i]);
3718                 for (int i = 0; i < SCNT_MAX; i++)
3719                         counter_u64_zero(V_pf_status.scounters[i]);
3720                 for (int i = 0; i < KLCNT_MAX; i++)
3721                         counter_u64_zero(V_pf_status.lcounters[i]);
3722                 V_pf_status.since = time_second;
3723                 if (*V_pf_status.ifname)
3724                         pfi_update_status(V_pf_status.ifname, NULL);
3725                 PF_RULES_WUNLOCK();
3726                 break;
3727         }
3728
3729         case DIOCNATLOOK: {
3730                 struct pfioc_natlook    *pnl = (struct pfioc_natlook *)addr;
3731                 struct pf_state_key     *sk;
3732                 struct pf_kstate        *state;
3733                 struct pf_state_key_cmp  key;
3734                 int                      m = 0, direction = pnl->direction;
3735                 int                      sidx, didx;
3736
3737                 /* NATLOOK src and dst are reversed, so reverse sidx/didx */
3738                 sidx = (direction == PF_IN) ? 1 : 0;
3739                 didx = (direction == PF_IN) ? 0 : 1;
3740
3741                 if (!pnl->proto ||
3742                     PF_AZERO(&pnl->saddr, pnl->af) ||
3743                     PF_AZERO(&pnl->daddr, pnl->af) ||
3744                     ((pnl->proto == IPPROTO_TCP ||
3745                     pnl->proto == IPPROTO_UDP) &&
3746                     (!pnl->dport || !pnl->sport)))
3747                         error = EINVAL;
3748                 else {
3749                         bzero(&key, sizeof(key));
3750                         key.af = pnl->af;
3751                         key.proto = pnl->proto;
3752                         PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af);
3753                         key.port[sidx] = pnl->sport;
3754                         PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af);
3755                         key.port[didx] = pnl->dport;
3756
3757                         state = pf_find_state_all(&key, direction, &m);
3758                         if (state == NULL) {
3759                                 error = ENOENT;
3760                         } else {
3761                                 if (m > 1) {
3762                                         PF_STATE_UNLOCK(state);
3763                                         error = E2BIG;  /* more than one state */
3764                                 } else {
3765                                         sk = state->key[sidx];
3766                                         PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af);
3767                                         pnl->rsport = sk->port[sidx];
3768                                         PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af);
3769                                         pnl->rdport = sk->port[didx];
3770                                         PF_STATE_UNLOCK(state);
3771                                 }
3772                         }
3773                 }
3774                 break;
3775         }
3776
3777         case DIOCSETTIMEOUT: {
3778                 struct pfioc_tm *pt = (struct pfioc_tm *)addr;
3779                 int              old;
3780
3781                 if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
3782                     pt->seconds < 0) {
3783                         error = EINVAL;
3784                         break;
3785                 }
3786                 PF_RULES_WLOCK();
3787                 old = V_pf_default_rule.timeout[pt->timeout];
3788                 if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
3789                         pt->seconds = 1;
3790                 V_pf_default_rule.timeout[pt->timeout] = pt->seconds;
3791                 if (pt->timeout == PFTM_INTERVAL && pt->seconds < old)
3792                         wakeup(pf_purge_thread);
3793                 pt->seconds = old;
3794                 PF_RULES_WUNLOCK();
3795                 break;
3796         }
3797
3798         case DIOCGETTIMEOUT: {
3799                 struct pfioc_tm *pt = (struct pfioc_tm *)addr;
3800
3801                 if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
3802                         error = EINVAL;
3803                         break;
3804                 }
3805                 PF_RULES_RLOCK();
3806                 pt->seconds = V_pf_default_rule.timeout[pt->timeout];
3807                 PF_RULES_RUNLOCK();
3808                 break;
3809         }
3810
3811         case DIOCGETLIMIT: {
3812                 struct pfioc_limit      *pl = (struct pfioc_limit *)addr;
3813
3814                 if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
3815                         error = EINVAL;
3816                         break;
3817                 }
3818                 PF_RULES_RLOCK();
3819                 pl->limit = V_pf_limits[pl->index].limit;
3820                 PF_RULES_RUNLOCK();
3821                 break;
3822         }
3823
3824         case DIOCSETLIMIT: {
3825                 struct pfioc_limit      *pl = (struct pfioc_limit *)addr;
3826                 int                      old_limit;
3827
3828                 PF_RULES_WLOCK();
3829                 if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
3830                     V_pf_limits[pl->index].zone == NULL) {
3831                         PF_RULES_WUNLOCK();
3832                         error = EINVAL;
3833                         break;
3834                 }
3835                 uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit);
3836                 old_limit = V_pf_limits[pl->index].limit;
3837                 V_pf_limits[pl->index].limit = pl->limit;
3838                 pl->limit = old_limit;
3839                 PF_RULES_WUNLOCK();
3840                 break;
3841         }
3842
3843         case DIOCSETDEBUG: {
3844                 u_int32_t       *level = (u_int32_t *)addr;
3845
3846                 PF_RULES_WLOCK();
3847                 V_pf_status.debug = *level;
3848                 PF_RULES_WUNLOCK();
3849                 break;
3850         }
3851
3852         case DIOCCLRRULECTRS: {
3853                 /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
3854                 struct pf_kruleset      *ruleset = &pf_main_ruleset;
3855                 struct pf_krule         *rule;
3856
3857                 PF_RULES_WLOCK();
3858                 TAILQ_FOREACH(rule,
3859                     ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
3860                         pf_counter_u64_zero(&rule->evaluations);
3861                         for (int i = 0; i < 2; i++) {
3862                                 pf_counter_u64_zero(&rule->packets[i]);
3863                                 pf_counter_u64_zero(&rule->bytes[i]);
3864                         }
3865                 }
3866                 PF_RULES_WUNLOCK();
3867                 break;
3868         }
3869
3870         case DIOCGIFSPEEDV0:
3871         case DIOCGIFSPEEDV1: {
3872                 struct pf_ifspeed_v1    *psp = (struct pf_ifspeed_v1 *)addr;
3873                 struct pf_ifspeed_v1    ps;
3874                 struct ifnet            *ifp;
3875
3876                 if (psp->ifname[0] == '\0') {
3877                         error = EINVAL;
3878                         break;
3879                 }
3880
3881                 error = pf_user_strcpy(ps.ifname, psp->ifname, IFNAMSIZ);
3882                 if (error != 0)
3883                         break;
3884                 ifp = ifunit(ps.ifname);
3885                 if (ifp != NULL) {
3886                         psp->baudrate32 =
3887                             (u_int32_t)uqmin(ifp->if_baudrate, UINT_MAX);
3888                         if (cmd == DIOCGIFSPEEDV1)
3889                                 psp->baudrate = ifp->if_baudrate;
3890                 } else {
3891                         error = EINVAL;
3892                 }
3893                 break;
3894         }
3895
3896 #ifdef ALTQ
3897         case DIOCSTARTALTQ: {
3898                 struct pf_altq          *altq;
3899
3900                 PF_RULES_WLOCK();
3901                 /* enable all altq interfaces on active list */
3902                 TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
3903                         if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
3904                                 error = pf_enable_altq(altq);
3905                                 if (error != 0)
3906                                         break;
3907                         }
3908                 }
3909                 if (error == 0)
3910                         V_pf_altq_running = 1;
3911                 PF_RULES_WUNLOCK();
3912                 DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
3913                 break;
3914         }
3915
3916         case DIOCSTOPALTQ: {
3917                 struct pf_altq          *altq;
3918
3919                 PF_RULES_WLOCK();
3920                 /* disable all altq interfaces on active list */
3921                 TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
3922                         if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
3923                                 error = pf_disable_altq(altq);
3924                                 if (error != 0)
3925                                         break;
3926                         }
3927                 }
3928                 if (error == 0)
3929                         V_pf_altq_running = 0;
3930                 PF_RULES_WUNLOCK();
3931                 DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
3932                 break;
3933         }
3934
3935         case DIOCADDALTQV0:
3936         case DIOCADDALTQV1: {
3937                 struct pfioc_altq_v1    *pa = (struct pfioc_altq_v1 *)addr;
3938                 struct pf_altq          *altq, *a;
3939                 struct ifnet            *ifp;
3940
3941                 altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK | M_ZERO);
3942                 error = pf_import_kaltq(pa, altq, IOCPARM_LEN(cmd));
3943                 if (error)
3944                         break;
3945                 altq->local_flags = 0;
3946
3947                 PF_RULES_WLOCK();
3948                 if (pa->ticket != V_ticket_altqs_inactive) {
3949                         PF_RULES_WUNLOCK();
3950                         free(altq, M_PFALTQ);
3951                         error = EBUSY;
3952                         break;
3953                 }
3954
3955                 /*
3956                  * if this is for a queue, find the discipline and
3957                  * copy the necessary fields
3958                  */
3959                 if (altq->qname[0] != 0) {
3960                         if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
3961                                 PF_RULES_WUNLOCK();
3962                                 error = EBUSY;
3963                                 free(altq, M_PFALTQ);
3964                                 break;
3965                         }
3966                         altq->altq_disc = NULL;
3967                         TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) {
3968                                 if (strncmp(a->ifname, altq->ifname,
3969                                     IFNAMSIZ) == 0) {
3970                                         altq->altq_disc = a->altq_disc;
3971                                         break;
3972                                 }
3973                         }
3974                 }
3975
3976                 if ((ifp = ifunit(altq->ifname)) == NULL)
3977                         altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
3978                 else
3979                         error = altq_add(ifp, altq);
3980
3981                 if (error) {
3982                         PF_RULES_WUNLOCK();
3983                         free(altq, M_PFALTQ);
3984                         break;
3985                 }
3986
3987                 if (altq->qname[0] != 0)
3988                         TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
3989                 else
3990                         TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, altq, entries);
3991                 /* version error check done on import above */
3992                 pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
3993                 PF_RULES_WUNLOCK();
3994                 break;
3995         }
3996
3997         case DIOCGETALTQSV0:
3998         case DIOCGETALTQSV1: {
3999                 struct pfioc_altq_v1    *pa = (struct pfioc_altq_v1 *)addr;
4000                 struct pf_altq          *altq;
4001
4002                 PF_RULES_RLOCK();
4003                 pa->nr = 0;
4004                 TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries)
4005                         pa->nr++;
4006                 TAILQ_FOREACH(altq, V_pf_altqs_active, entries)
4007                         pa->nr++;
4008                 pa->ticket = V_ticket_altqs_active;
4009                 PF_RULES_RUNLOCK();
4010                 break;
4011         }
4012
4013         case DIOCGETALTQV0:
4014         case DIOCGETALTQV1: {
4015                 struct pfioc_altq_v1    *pa = (struct pfioc_altq_v1 *)addr;
4016                 struct pf_altq          *altq;
4017
4018                 PF_RULES_RLOCK();
4019                 if (pa->ticket != V_ticket_altqs_active) {
4020                         PF_RULES_RUNLOCK();
4021                         error = EBUSY;
4022                         break;
4023                 }
4024                 altq = pf_altq_get_nth_active(pa->nr);
4025                 if (altq == NULL) {
4026                         PF_RULES_RUNLOCK();
4027                         error = EBUSY;
4028                         break;
4029                 }
4030                 pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
4031                 PF_RULES_RUNLOCK();
4032                 break;
4033         }
4034
4035         case DIOCCHANGEALTQV0:
4036         case DIOCCHANGEALTQV1:
4037                 /* CHANGEALTQ not supported yet! */
4038                 error = ENODEV;
4039                 break;
4040
4041         case DIOCGETQSTATSV0:
4042         case DIOCGETQSTATSV1: {
4043                 struct pfioc_qstats_v1  *pq = (struct pfioc_qstats_v1 *)addr;
4044                 struct pf_altq          *altq;
4045                 int                      nbytes;
4046                 u_int32_t                version;
4047
4048                 PF_RULES_RLOCK();
4049                 if (pq->ticket != V_ticket_altqs_active) {
4050                         PF_RULES_RUNLOCK();
4051                         error = EBUSY;
4052                         break;
4053                 }
4054                 nbytes = pq->nbytes;
4055                 altq = pf_altq_get_nth_active(pq->nr);
4056                 if (altq == NULL) {
4057                         PF_RULES_RUNLOCK();
4058                         error = EBUSY;
4059                         break;
4060                 }
4061
4062                 if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
4063                         PF_RULES_RUNLOCK();
4064                         error = ENXIO;
4065                         break;
4066                 }
4067                 PF_RULES_RUNLOCK();
4068                 if (cmd == DIOCGETQSTATSV0)
4069                         version = 0;  /* DIOCGETQSTATSV0 means stats struct v0 */
4070                 else
4071                         version = pq->version;
4072                 error = altq_getqstats(altq, pq->buf, &nbytes, version);
4073                 if (error == 0) {
4074                         pq->scheduler = altq->scheduler;
4075                         pq->nbytes = nbytes;
4076                 }
4077                 break;
4078         }
4079 #endif /* ALTQ */
4080
4081         case DIOCBEGINADDRS: {
4082                 struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
4083
4084                 PF_RULES_WLOCK();
4085                 pf_empty_kpool(&V_pf_pabuf);
4086                 pp->ticket = ++V_ticket_pabuf;
4087                 PF_RULES_WUNLOCK();
4088                 break;
4089         }
4090
4091         case DIOCADDADDR: {
4092                 struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
4093                 struct pf_kpooladdr     *pa;
4094                 struct pfi_kkif         *kif = NULL;
4095
4096 #ifndef INET
4097                 if (pp->af == AF_INET) {
4098                         error = EAFNOSUPPORT;
4099                         break;
4100                 }
4101 #endif /* INET */
4102 #ifndef INET6
4103                 if (pp->af == AF_INET6) {
4104                         error = EAFNOSUPPORT;
4105                         break;
4106                 }
4107 #endif /* INET6 */
4108                 if (pp->addr.addr.type != PF_ADDR_ADDRMASK &&
4109                     pp->addr.addr.type != PF_ADDR_DYNIFTL &&
4110                     pp->addr.addr.type != PF_ADDR_TABLE) {
4111                         error = EINVAL;
4112                         break;
4113                 }
4114                 if (pp->addr.addr.p.dyn != NULL) {
4115                         error = EINVAL;
4116                         break;
4117                 }
4118                 pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK);
4119                 error = pf_pooladdr_to_kpooladdr(&pp->addr, pa);
4120                 if (error != 0)
4121                         break;
4122                 if (pa->ifname[0])
4123                         kif = pf_kkif_create(M_WAITOK);
4124                 PF_RULES_WLOCK();
4125                 if (pp->ticket != V_ticket_pabuf) {
4126                         PF_RULES_WUNLOCK();
4127                         if (pa->ifname[0])
4128                                 pf_kkif_free(kif);
4129                         free(pa, M_PFRULE);
4130                         error = EBUSY;
4131                         break;
4132                 }
4133                 if (pa->ifname[0]) {
4134                         pa->kif = pfi_kkif_attach(kif, pa->ifname);
4135                         kif = NULL;
4136                         pfi_kkif_ref(pa->kif);
4137                 } else
4138                         pa->kif = NULL;
4139                 if (pa->addr.type == PF_ADDR_DYNIFTL && ((error =
4140                     pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) {
4141                         if (pa->ifname[0])
4142                                 pfi_kkif_unref(pa->kif);
4143                         PF_RULES_WUNLOCK();
4144                         free(pa, M_PFRULE);
4145                         break;
4146                 }
4147                 TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries);
4148                 PF_RULES_WUNLOCK();
4149                 break;
4150         }
4151
4152         case DIOCGETADDRS: {
4153                 struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
4154                 struct pf_kpool         *pool;
4155                 struct pf_kpooladdr     *pa;
4156
4157                 pp->anchor[sizeof(pp->anchor) - 1] = 0;
4158                 pp->nr = 0;
4159
4160                 PF_RULES_RLOCK();
4161                 pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action,
4162                     pp->r_num, 0, 1, 0);
4163                 if (pool == NULL) {
4164                         PF_RULES_RUNLOCK();
4165                         error = EBUSY;
4166                         break;
4167                 }
4168                 TAILQ_FOREACH(pa, &pool->list, entries)
4169                         pp->nr++;
4170                 PF_RULES_RUNLOCK();
4171                 break;
4172         }
4173
4174         case DIOCGETADDR: {
4175                 struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
4176                 struct pf_kpool         *pool;
4177                 struct pf_kpooladdr     *pa;
4178                 u_int32_t                nr = 0;
4179
4180                 pp->anchor[sizeof(pp->anchor) - 1] = 0;
4181
4182                 PF_RULES_RLOCK();
4183                 pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action,
4184                     pp->r_num, 0, 1, 1);
4185                 if (pool == NULL) {
4186                         PF_RULES_RUNLOCK();
4187                         error = EBUSY;
4188                         break;
4189                 }
4190                 pa = TAILQ_FIRST(&pool->list);
4191                 while ((pa != NULL) && (nr < pp->nr)) {
4192                         pa = TAILQ_NEXT(pa, entries);
4193                         nr++;
4194                 }
4195                 if (pa == NULL) {
4196                         PF_RULES_RUNLOCK();
4197                         error = EBUSY;
4198                         break;
4199                 }
4200                 pf_kpooladdr_to_pooladdr(pa, &pp->addr);
4201                 pf_addr_copyout(&pp->addr.addr);
4202                 PF_RULES_RUNLOCK();
4203                 break;
4204         }
4205
4206         case DIOCCHANGEADDR: {
4207                 struct pfioc_pooladdr   *pca = (struct pfioc_pooladdr *)addr;
4208                 struct pf_kpool         *pool;
4209                 struct pf_kpooladdr     *oldpa = NULL, *newpa = NULL;
4210                 struct pf_kruleset      *ruleset;
4211                 struct pfi_kkif         *kif = NULL;
4212
4213                 pca->anchor[sizeof(pca->anchor) - 1] = 0;
4214
4215                 if (pca->action < PF_CHANGE_ADD_HEAD ||
4216                     pca->action > PF_CHANGE_REMOVE) {
4217                         error = EINVAL;
4218                         break;
4219                 }
4220                 if (pca->addr.addr.type != PF_ADDR_ADDRMASK &&
4221                     pca->addr.addr.type != PF_ADDR_DYNIFTL &&
4222                     pca->addr.addr.type != PF_ADDR_TABLE) {
4223                         error = EINVAL;
4224                         break;
4225                 }
4226                 if (pca->addr.addr.p.dyn != NULL) {
4227                         error = EINVAL;
4228                         break;
4229                 }
4230
4231                 if (pca->action != PF_CHANGE_REMOVE) {
4232 #ifndef INET
4233                         if (pca->af == AF_INET) {
4234                                 error = EAFNOSUPPORT;
4235                                 break;
4236                         }
4237 #endif /* INET */
4238 #ifndef INET6
4239                         if (pca->af == AF_INET6) {
4240                                 error = EAFNOSUPPORT;
4241                                 break;
4242                         }
4243 #endif /* INET6 */
4244                         newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK);
4245                         bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
4246                         if (newpa->ifname[0])
4247                                 kif = pf_kkif_create(M_WAITOK);
4248                         newpa->kif = NULL;
4249                 }
4250 #define ERROUT(x)       ERROUT_IOCTL(DIOCCHANGEADDR_error, x)
4251                 PF_RULES_WLOCK();
4252                 ruleset = pf_find_kruleset(pca->anchor);
4253                 if (ruleset == NULL)
4254                         ERROUT(EBUSY);
4255
4256                 pool = pf_get_kpool(pca->anchor, pca->ticket, pca->r_action,
4257                     pca->r_num, pca->r_last, 1, 1);
4258                 if (pool == NULL)
4259                         ERROUT(EBUSY);
4260
4261                 if (pca->action != PF_CHANGE_REMOVE) {
4262                         if (newpa->ifname[0]) {
4263                                 newpa->kif = pfi_kkif_attach(kif, newpa->ifname);
4264                                 pfi_kkif_ref(newpa->kif);
4265                                 kif = NULL;
4266                         }
4267
4268                         switch (newpa->addr.type) {
4269                         case PF_ADDR_DYNIFTL:
4270                                 error = pfi_dynaddr_setup(&newpa->addr,
4271                                     pca->af);
4272                                 break;
4273                         case PF_ADDR_TABLE:
4274                                 newpa->addr.p.tbl = pfr_attach_table(ruleset,
4275                                     newpa->addr.v.tblname);
4276                                 if (newpa->addr.p.tbl == NULL)
4277                                         error = ENOMEM;
4278                                 break;
4279                         }
4280                         if (error)
4281                                 goto DIOCCHANGEADDR_error;
4282                 }
4283
4284                 switch (pca->action) {
4285                 case PF_CHANGE_ADD_HEAD:
4286                         oldpa = TAILQ_FIRST(&pool->list);
4287                         break;
4288                 case PF_CHANGE_ADD_TAIL:
4289                         oldpa = TAILQ_LAST(&pool->list, pf_kpalist);
4290                         break;
4291                 default:
4292                         oldpa = TAILQ_FIRST(&pool->list);
4293                         for (int i = 0; oldpa && i < pca->nr; i++)
4294                                 oldpa = TAILQ_NEXT(oldpa, entries);
4295
4296                         if (oldpa == NULL)
4297                                 ERROUT(EINVAL);
4298                 }
4299
4300                 if (pca->action == PF_CHANGE_REMOVE) {
4301                         TAILQ_REMOVE(&pool->list, oldpa, entries);
4302                         switch (oldpa->addr.type) {
4303                         case PF_ADDR_DYNIFTL:
4304                                 pfi_dynaddr_remove(oldpa->addr.p.dyn);
4305                                 break;
4306                         case PF_ADDR_TABLE:
4307                                 pfr_detach_table(oldpa->addr.p.tbl);
4308                                 break;
4309                         }
4310                         if (oldpa->kif)
4311                                 pfi_kkif_unref(oldpa->kif);
4312                         free(oldpa, M_PFRULE);
4313                 } else {
4314                         if (oldpa == NULL)
4315                                 TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
4316                         else if (pca->action == PF_CHANGE_ADD_HEAD ||
4317                             pca->action == PF_CHANGE_ADD_BEFORE)
4318                                 TAILQ_INSERT_BEFORE(oldpa, newpa, entries);
4319                         else
4320                                 TAILQ_INSERT_AFTER(&pool->list, oldpa,
4321                                     newpa, entries);
4322                 }
4323
4324                 pool->cur = TAILQ_FIRST(&pool->list);
4325                 PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af);
4326                 PF_RULES_WUNLOCK();
4327                 break;
4328
4329 #undef ERROUT
4330 DIOCCHANGEADDR_error:
4331                 if (newpa != NULL) {
4332                         if (newpa->kif)
4333                                 pfi_kkif_unref(newpa->kif);
4334                         free(newpa, M_PFRULE);
4335                 }
4336                 PF_RULES_WUNLOCK();
4337                 pf_kkif_free(kif);
4338                 break;
4339         }
4340
4341         case DIOCGETRULESETS: {
4342                 struct pfioc_ruleset    *pr = (struct pfioc_ruleset *)addr;
4343                 struct pf_kruleset      *ruleset;
4344                 struct pf_kanchor       *anchor;
4345
4346                 pr->path[sizeof(pr->path) - 1] = 0;
4347
4348                 PF_RULES_RLOCK();
4349                 if ((ruleset = pf_find_kruleset(pr->path)) == NULL) {
4350                         PF_RULES_RUNLOCK();
4351                         error = ENOENT;
4352                         break;
4353                 }
4354                 pr->nr = 0;
4355                 if (ruleset->anchor == NULL) {
4356                         /* XXX kludge for pf_main_ruleset */
4357                         RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
4358                                 if (anchor->parent == NULL)
4359                                         pr->nr++;
4360                 } else {
4361                         RB_FOREACH(anchor, pf_kanchor_node,
4362                             &ruleset->anchor->children)
4363                                 pr->nr++;
4364                 }
4365                 PF_RULES_RUNLOCK();
4366                 break;
4367         }
4368
4369         case DIOCGETRULESET: {
4370                 struct pfioc_ruleset    *pr = (struct pfioc_ruleset *)addr;
4371                 struct pf_kruleset      *ruleset;
4372                 struct pf_kanchor       *anchor;
4373                 u_int32_t                nr = 0;
4374
4375                 pr->path[sizeof(pr->path) - 1] = 0;
4376
4377                 PF_RULES_RLOCK();
4378                 if ((ruleset = pf_find_kruleset(pr->path)) == NULL) {
4379                         PF_RULES_RUNLOCK();
4380                         error = ENOENT;
4381                         break;
4382                 }
4383                 pr->name[0] = 0;
4384                 if (ruleset->anchor == NULL) {
4385                         /* XXX kludge for pf_main_ruleset */
4386                         RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
4387                                 if (anchor->parent == NULL && nr++ == pr->nr) {
4388                                         strlcpy(pr->name, anchor->name,
4389                                             sizeof(pr->name));
4390                                         break;
4391                                 }
4392                 } else {
4393                         RB_FOREACH(anchor, pf_kanchor_node,
4394                             &ruleset->anchor->children)
4395                                 if (nr++ == pr->nr) {
4396                                         strlcpy(pr->name, anchor->name,
4397                                             sizeof(pr->name));
4398                                         break;
4399                                 }
4400                 }
4401                 if (!pr->name[0])
4402                         error = EBUSY;
4403                 PF_RULES_RUNLOCK();
4404                 break;
4405         }
4406
4407         case DIOCRCLRTABLES: {
4408                 struct pfioc_table *io = (struct pfioc_table *)addr;
4409
4410                 if (io->pfrio_esize != 0) {
4411                         error = ENODEV;
4412                         break;
4413                 }
4414                 PF_RULES_WLOCK();
4415                 error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
4416                     io->pfrio_flags | PFR_FLAG_USERIOCTL);
4417                 PF_RULES_WUNLOCK();
4418                 break;
4419         }
4420
4421         case DIOCRADDTABLES: {
4422                 struct pfioc_table *io = (struct pfioc_table *)addr;
4423                 struct pfr_table *pfrts;
4424                 size_t totlen;
4425
4426                 if (io->pfrio_esize != sizeof(struct pfr_table)) {
4427                         error = ENODEV;
4428                         break;
4429                 }
4430
4431                 if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
4432                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
4433                         error = ENOMEM;
4434                         break;
4435                 }
4436
4437                 totlen = io->pfrio_size * sizeof(struct pfr_table);
4438                 pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4439                     M_TEMP, M_WAITOK);
4440                 error = copyin(io->pfrio_buffer, pfrts, totlen);
4441                 if (error) {
4442                         free(pfrts, M_TEMP);
4443                         break;
4444                 }
4445                 PF_RULES_WLOCK();
4446                 error = pfr_add_tables(pfrts, io->pfrio_size,
4447                     &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4448                 PF_RULES_WUNLOCK();
4449                 free(pfrts, M_TEMP);
4450                 break;
4451         }
4452
4453         case DIOCRDELTABLES: {
4454                 struct pfioc_table *io = (struct pfioc_table *)addr;
4455                 struct pfr_table *pfrts;
4456                 size_t totlen;
4457
4458                 if (io->pfrio_esize != sizeof(struct pfr_table)) {
4459                         error = ENODEV;
4460                         break;
4461                 }
4462
4463                 if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
4464                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
4465                         error = ENOMEM;
4466                         break;
4467                 }
4468
4469                 totlen = io->pfrio_size * sizeof(struct pfr_table);
4470                 pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4471                     M_TEMP, M_WAITOK);
4472                 error = copyin(io->pfrio_buffer, pfrts, totlen);
4473                 if (error) {
4474                         free(pfrts, M_TEMP);
4475                         break;
4476                 }
4477                 PF_RULES_WLOCK();
4478                 error = pfr_del_tables(pfrts, io->pfrio_size,
4479                     &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4480                 PF_RULES_WUNLOCK();
4481                 free(pfrts, M_TEMP);
4482                 break;
4483         }
4484
4485         case DIOCRGETTABLES: {
4486                 struct pfioc_table *io = (struct pfioc_table *)addr;
4487                 struct pfr_table *pfrts;
4488                 size_t totlen;
4489                 int n;
4490
4491                 if (io->pfrio_esize != sizeof(struct pfr_table)) {
4492                         error = ENODEV;
4493                         break;
4494                 }
4495                 PF_RULES_RLOCK();
4496                 n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
4497                 if (n < 0) {
4498                         PF_RULES_RUNLOCK();
4499                         error = EINVAL;
4500                         break;
4501                 }
4502                 io->pfrio_size = min(io->pfrio_size, n);
4503
4504                 totlen = io->pfrio_size * sizeof(struct pfr_table);
4505
4506                 pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4507                     M_TEMP, M_NOWAIT | M_ZERO);
4508                 if (pfrts == NULL) {
4509                         error = ENOMEM;
4510                         PF_RULES_RUNLOCK();
4511                         break;
4512                 }
4513                 error = pfr_get_tables(&io->pfrio_table, pfrts,
4514                     &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4515                 PF_RULES_RUNLOCK();
4516                 if (error == 0)
4517                         error = copyout(pfrts, io->pfrio_buffer, totlen);
4518                 free(pfrts, M_TEMP);
4519                 break;
4520         }
4521
4522         case DIOCRGETTSTATS: {
4523                 struct pfioc_table *io = (struct pfioc_table *)addr;
4524                 struct pfr_tstats *pfrtstats;
4525                 size_t totlen;
4526                 int n;
4527
4528                 if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
4529                         error = ENODEV;
4530                         break;
4531                 }
4532                 PF_TABLE_STATS_LOCK();
4533                 PF_RULES_RLOCK();
4534                 n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
4535                 if (n < 0) {
4536                         PF_RULES_RUNLOCK();
4537                         PF_TABLE_STATS_UNLOCK();
4538                         error = EINVAL;
4539                         break;
4540                 }
4541                 io->pfrio_size = min(io->pfrio_size, n);
4542
4543                 totlen = io->pfrio_size * sizeof(struct pfr_tstats);
4544                 pfrtstats = mallocarray(io->pfrio_size,
4545                     sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO);
4546                 if (pfrtstats == NULL) {
4547                         error = ENOMEM;
4548                         PF_RULES_RUNLOCK();
4549                         PF_TABLE_STATS_UNLOCK();
4550                         break;
4551                 }
4552                 error = pfr_get_tstats(&io->pfrio_table, pfrtstats,
4553                     &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4554                 PF_RULES_RUNLOCK();
4555                 PF_TABLE_STATS_UNLOCK();
4556                 if (error == 0)
4557                         error = copyout(pfrtstats, io->pfrio_buffer, totlen);
4558                 free(pfrtstats, M_TEMP);
4559                 break;
4560         }
4561
4562         case DIOCRCLRTSTATS: {
4563                 struct pfioc_table *io = (struct pfioc_table *)addr;
4564                 struct pfr_table *pfrts;
4565                 size_t totlen;
4566
4567                 if (io->pfrio_esize != sizeof(struct pfr_table)) {
4568                         error = ENODEV;
4569                         break;
4570                 }
4571
4572                 if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
4573                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
4574                         /* We used to count tables and use the minimum required
4575                          * size, so we didn't fail on overly large requests.
4576                          * Keep doing so. */
4577                         io->pfrio_size = pf_ioctl_maxcount;
4578                         break;
4579                 }
4580
4581                 totlen = io->pfrio_size * sizeof(struct pfr_table);
4582                 pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4583                     M_TEMP, M_WAITOK);
4584                 error = copyin(io->pfrio_buffer, pfrts, totlen);
4585                 if (error) {
4586                         free(pfrts, M_TEMP);
4587                         break;
4588                 }
4589
4590                 PF_TABLE_STATS_LOCK();
4591                 PF_RULES_RLOCK();
4592                 error = pfr_clr_tstats(pfrts, io->pfrio_size,
4593                     &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4594                 PF_RULES_RUNLOCK();
4595                 PF_TABLE_STATS_UNLOCK();
4596                 free(pfrts, M_TEMP);
4597                 break;
4598         }
4599
4600         case DIOCRSETTFLAGS: {
4601                 struct pfioc_table *io = (struct pfioc_table *)addr;
4602                 struct pfr_table *pfrts;
4603                 size_t totlen;
4604                 int n;
4605
4606                 if (io->pfrio_esize != sizeof(struct pfr_table)) {
4607                         error = ENODEV;
4608                         break;
4609                 }
4610
4611                 PF_RULES_RLOCK();
4612                 n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
4613                 if (n < 0) {
4614                         PF_RULES_RUNLOCK();
4615                         error = EINVAL;
4616                         break;
4617                 }
4618
4619                 io->pfrio_size = min(io->pfrio_size, n);
4620                 PF_RULES_RUNLOCK();
4621
4622                 totlen = io->pfrio_size * sizeof(struct pfr_table);
4623                 pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4624                     M_TEMP, M_WAITOK);
4625                 error = copyin(io->pfrio_buffer, pfrts, totlen);
4626                 if (error) {
4627                         free(pfrts, M_TEMP);
4628                         break;
4629                 }
4630                 PF_RULES_WLOCK();
4631                 error = pfr_set_tflags(pfrts, io->pfrio_size,
4632                     io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
4633                     &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4634                 PF_RULES_WUNLOCK();
4635                 free(pfrts, M_TEMP);
4636                 break;
4637         }
4638
4639         case DIOCRCLRADDRS: {
4640                 struct pfioc_table *io = (struct pfioc_table *)addr;
4641
4642                 if (io->pfrio_esize != 0) {
4643                         error = ENODEV;
4644                         break;
4645                 }
4646                 PF_RULES_WLOCK();
4647                 error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
4648                     io->pfrio_flags | PFR_FLAG_USERIOCTL);
4649                 PF_RULES_WUNLOCK();
4650                 break;
4651         }
4652
4653         case DIOCRADDADDRS: {
4654                 struct pfioc_table *io = (struct pfioc_table *)addr;
4655                 struct pfr_addr *pfras;
4656                 size_t totlen;
4657
4658                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4659                         error = ENODEV;
4660                         break;
4661                 }
4662                 if (io->pfrio_size < 0 ||
4663                     io->pfrio_size > pf_ioctl_maxcount ||
4664                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4665                         error = EINVAL;
4666                         break;
4667                 }
4668                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4669                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4670                     M_TEMP, M_WAITOK);
4671                 error = copyin(io->pfrio_buffer, pfras, totlen);
4672                 if (error) {
4673                         free(pfras, M_TEMP);
4674                         break;
4675                 }
4676                 PF_RULES_WLOCK();
4677                 error = pfr_add_addrs(&io->pfrio_table, pfras,
4678                     io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
4679                     PFR_FLAG_USERIOCTL);
4680                 PF_RULES_WUNLOCK();
4681                 if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4682                         error = copyout(pfras, io->pfrio_buffer, totlen);
4683                 free(pfras, M_TEMP);
4684                 break;
4685         }
4686
4687         case DIOCRDELADDRS: {
4688                 struct pfioc_table *io = (struct pfioc_table *)addr;
4689                 struct pfr_addr *pfras;
4690                 size_t totlen;
4691
4692                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4693                         error = ENODEV;
4694                         break;
4695                 }
4696                 if (io->pfrio_size < 0 ||
4697                     io->pfrio_size > pf_ioctl_maxcount ||
4698                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4699                         error = EINVAL;
4700                         break;
4701                 }
4702                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4703                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4704                     M_TEMP, M_WAITOK);
4705                 error = copyin(io->pfrio_buffer, pfras, totlen);
4706                 if (error) {
4707                         free(pfras, M_TEMP);
4708                         break;
4709                 }
4710                 PF_RULES_WLOCK();
4711                 error = pfr_del_addrs(&io->pfrio_table, pfras,
4712                     io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
4713                     PFR_FLAG_USERIOCTL);
4714                 PF_RULES_WUNLOCK();
4715                 if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4716                         error = copyout(pfras, io->pfrio_buffer, totlen);
4717                 free(pfras, M_TEMP);
4718                 break;
4719         }
4720
4721         case DIOCRSETADDRS: {
4722                 struct pfioc_table *io = (struct pfioc_table *)addr;
4723                 struct pfr_addr *pfras;
4724                 size_t totlen, count;
4725
4726                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4727                         error = ENODEV;
4728                         break;
4729                 }
4730                 if (io->pfrio_size < 0 || io->pfrio_size2 < 0) {
4731                         error = EINVAL;
4732                         break;
4733                 }
4734                 count = max(io->pfrio_size, io->pfrio_size2);
4735                 if (count > pf_ioctl_maxcount ||
4736                     WOULD_OVERFLOW(count, sizeof(struct pfr_addr))) {
4737                         error = EINVAL;
4738                         break;
4739                 }
4740                 totlen = count * sizeof(struct pfr_addr);
4741                 pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP,
4742                     M_WAITOK);
4743                 error = copyin(io->pfrio_buffer, pfras, totlen);
4744                 if (error) {
4745                         free(pfras, M_TEMP);
4746                         break;
4747                 }
4748                 PF_RULES_WLOCK();
4749                 error = pfr_set_addrs(&io->pfrio_table, pfras,
4750                     io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
4751                     &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
4752                     PFR_FLAG_USERIOCTL, 0);
4753                 PF_RULES_WUNLOCK();
4754                 if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4755                         error = copyout(pfras, io->pfrio_buffer, totlen);
4756                 free(pfras, M_TEMP);
4757                 break;
4758         }
4759
4760         case DIOCRGETADDRS: {
4761                 struct pfioc_table *io = (struct pfioc_table *)addr;
4762                 struct pfr_addr *pfras;
4763                 size_t totlen;
4764
4765                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4766                         error = ENODEV;
4767                         break;
4768                 }
4769                 if (io->pfrio_size < 0 ||
4770                     io->pfrio_size > pf_ioctl_maxcount ||
4771                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4772                         error = EINVAL;
4773                         break;
4774                 }
4775                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4776                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4777                     M_TEMP, M_WAITOK | M_ZERO);
4778                 PF_RULES_RLOCK();
4779                 error = pfr_get_addrs(&io->pfrio_table, pfras,
4780                     &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4781                 PF_RULES_RUNLOCK();
4782                 if (error == 0)
4783                         error = copyout(pfras, io->pfrio_buffer, totlen);
4784                 free(pfras, M_TEMP);
4785                 break;
4786         }
4787
4788         case DIOCRGETASTATS: {
4789                 struct pfioc_table *io = (struct pfioc_table *)addr;
4790                 struct pfr_astats *pfrastats;
4791                 size_t totlen;
4792
4793                 if (io->pfrio_esize != sizeof(struct pfr_astats)) {
4794                         error = ENODEV;
4795                         break;
4796                 }
4797                 if (io->pfrio_size < 0 ||
4798                     io->pfrio_size > pf_ioctl_maxcount ||
4799                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_astats))) {
4800                         error = EINVAL;
4801                         break;
4802                 }
4803                 totlen = io->pfrio_size * sizeof(struct pfr_astats);
4804                 pfrastats = mallocarray(io->pfrio_size,
4805                     sizeof(struct pfr_astats), M_TEMP, M_WAITOK | M_ZERO);
4806                 PF_RULES_RLOCK();
4807                 error = pfr_get_astats(&io->pfrio_table, pfrastats,
4808                     &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4809                 PF_RULES_RUNLOCK();
4810                 if (error == 0)
4811                         error = copyout(pfrastats, io->pfrio_buffer, totlen);
4812                 free(pfrastats, M_TEMP);
4813                 break;
4814         }
4815
4816         case DIOCRCLRASTATS: {
4817                 struct pfioc_table *io = (struct pfioc_table *)addr;
4818                 struct pfr_addr *pfras;
4819                 size_t totlen;
4820
4821                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4822                         error = ENODEV;
4823                         break;
4824                 }
4825                 if (io->pfrio_size < 0 ||
4826                     io->pfrio_size > pf_ioctl_maxcount ||
4827                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4828                         error = EINVAL;
4829                         break;
4830                 }
4831                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4832                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4833                     M_TEMP, M_WAITOK);
4834                 error = copyin(io->pfrio_buffer, pfras, totlen);
4835                 if (error) {
4836                         free(pfras, M_TEMP);
4837                         break;
4838                 }
4839                 PF_RULES_WLOCK();
4840                 error = pfr_clr_astats(&io->pfrio_table, pfras,
4841                     io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
4842                     PFR_FLAG_USERIOCTL);
4843                 PF_RULES_WUNLOCK();
4844                 if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4845                         error = copyout(pfras, io->pfrio_buffer, totlen);
4846                 free(pfras, M_TEMP);
4847                 break;
4848         }
4849
4850         case DIOCRTSTADDRS: {
4851                 struct pfioc_table *io = (struct pfioc_table *)addr;
4852                 struct pfr_addr *pfras;
4853                 size_t totlen;
4854
4855                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4856                         error = ENODEV;
4857                         break;
4858                 }
4859                 if (io->pfrio_size < 0 ||
4860                     io->pfrio_size > pf_ioctl_maxcount ||
4861                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4862                         error = EINVAL;
4863                         break;
4864                 }
4865                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4866                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4867                     M_TEMP, M_WAITOK);
4868                 error = copyin(io->pfrio_buffer, pfras, totlen);
4869                 if (error) {
4870                         free(pfras, M_TEMP);
4871                         break;
4872                 }
4873                 PF_RULES_RLOCK();
4874                 error = pfr_tst_addrs(&io->pfrio_table, pfras,
4875                     io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
4876                     PFR_FLAG_USERIOCTL);
4877                 PF_RULES_RUNLOCK();
4878                 if (error == 0)
4879                         error = copyout(pfras, io->pfrio_buffer, totlen);
4880                 free(pfras, M_TEMP);
4881                 break;
4882         }
4883
4884         case DIOCRINADEFINE: {
4885                 struct pfioc_table *io = (struct pfioc_table *)addr;
4886                 struct pfr_addr *pfras;
4887                 size_t totlen;
4888
4889                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4890                         error = ENODEV;
4891                         break;
4892                 }
4893                 if (io->pfrio_size < 0 ||
4894                     io->pfrio_size > pf_ioctl_maxcount ||
4895                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4896                         error = EINVAL;
4897                         break;
4898                 }
4899                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4900                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4901                     M_TEMP, M_WAITOK);
4902                 error = copyin(io->pfrio_buffer, pfras, totlen);
4903                 if (error) {
4904                         free(pfras, M_TEMP);
4905                         break;
4906                 }
4907                 PF_RULES_WLOCK();
4908                 error = pfr_ina_define(&io->pfrio_table, pfras,
4909                     io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
4910                     io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4911                 PF_RULES_WUNLOCK();
4912                 free(pfras, M_TEMP);
4913                 break;
4914         }
4915
4916         case DIOCOSFPADD: {
4917                 struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
4918                 PF_RULES_WLOCK();
4919                 error = pf_osfp_add(io);
4920                 PF_RULES_WUNLOCK();
4921                 break;
4922         }
4923
4924         case DIOCOSFPGET: {
4925                 struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
4926                 PF_RULES_RLOCK();
4927                 error = pf_osfp_get(io);
4928                 PF_RULES_RUNLOCK();
4929                 break;
4930         }
4931
4932         case DIOCXBEGIN: {
4933                 struct pfioc_trans      *io = (struct pfioc_trans *)addr;
4934                 struct pfioc_trans_e    *ioes, *ioe;
4935                 size_t                   totlen;
4936                 int                      i;
4937
4938                 if (io->esize != sizeof(*ioe)) {
4939                         error = ENODEV;
4940                         break;
4941                 }
4942                 if (io->size < 0 ||
4943                     io->size > pf_ioctl_maxcount ||
4944                     WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
4945                         error = EINVAL;
4946                         break;
4947                 }
4948                 totlen = sizeof(struct pfioc_trans_e) * io->size;
4949                 ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
4950                     M_TEMP, M_WAITOK);
4951                 error = copyin(io->array, ioes, totlen);
4952                 if (error) {
4953                         free(ioes, M_TEMP);
4954                         break;
4955                 }
4956                 /* Ensure there's no more ethernet rules to clean up. */
4957                 NET_EPOCH_DRAIN_CALLBACKS();
4958                 PF_RULES_WLOCK();
4959                 for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
4960                         ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
4961                         switch (ioe->rs_num) {
4962                         case PF_RULESET_ETH:
4963                                 if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) {
4964                                         PF_RULES_WUNLOCK();
4965                                         free(ioes, M_TEMP);
4966                                         goto fail;
4967                                 }
4968                                 break;
4969 #ifdef ALTQ
4970                         case PF_RULESET_ALTQ:
4971                                 if (ioe->anchor[0]) {
4972                                         PF_RULES_WUNLOCK();
4973                                         free(ioes, M_TEMP);
4974                                         error = EINVAL;
4975                                         goto fail;
4976                                 }
4977                                 if ((error = pf_begin_altq(&ioe->ticket))) {
4978                                         PF_RULES_WUNLOCK();
4979                                         free(ioes, M_TEMP);
4980                                         goto fail;
4981                                 }
4982                                 break;
4983 #endif /* ALTQ */
4984                         case PF_RULESET_TABLE:
4985                             {
4986                                 struct pfr_table table;
4987
4988                                 bzero(&table, sizeof(table));
4989                                 strlcpy(table.pfrt_anchor, ioe->anchor,
4990                                     sizeof(table.pfrt_anchor));
4991                                 if ((error = pfr_ina_begin(&table,
4992                                     &ioe->ticket, NULL, 0))) {
4993                                         PF_RULES_WUNLOCK();
4994                                         free(ioes, M_TEMP);
4995                                         goto fail;
4996                                 }
4997                                 break;
4998                             }
4999                         default:
5000                                 if ((error = pf_begin_rules(&ioe->ticket,
5001                                     ioe->rs_num, ioe->anchor))) {
5002                                         PF_RULES_WUNLOCK();
5003                                         free(ioes, M_TEMP);
5004                                         goto fail;
5005                                 }
5006                                 break;
5007                         }
5008                 }
5009                 PF_RULES_WUNLOCK();
5010                 error = copyout(ioes, io->array, totlen);
5011                 free(ioes, M_TEMP);
5012                 break;
5013         }
5014
5015         case DIOCXROLLBACK: {
5016                 struct pfioc_trans      *io = (struct pfioc_trans *)addr;
5017                 struct pfioc_trans_e    *ioe, *ioes;
5018                 size_t                   totlen;
5019                 int                      i;
5020
5021                 if (io->esize != sizeof(*ioe)) {
5022                         error = ENODEV;
5023                         break;
5024                 }
5025                 if (io->size < 0 ||
5026                     io->size > pf_ioctl_maxcount ||
5027                     WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
5028                         error = EINVAL;
5029                         break;
5030                 }
5031                 totlen = sizeof(struct pfioc_trans_e) * io->size;
5032                 ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
5033                     M_TEMP, M_WAITOK);
5034                 error = copyin(io->array, ioes, totlen);
5035                 if (error) {
5036                         free(ioes, M_TEMP);
5037                         break;
5038                 }
5039                 PF_RULES_WLOCK();
5040                 for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
5041                         ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
5042                         switch (ioe->rs_num) {
5043                         case PF_RULESET_ETH:
5044                                 if ((error = pf_rollback_eth(ioe->ticket,
5045                                     ioe->anchor))) {
5046                                         PF_RULES_WUNLOCK();
5047                                         free(ioes, M_TEMP);
5048                                         goto fail; /* really bad */
5049                                 }
5050                                 break;
5051 #ifdef ALTQ
5052                         case PF_RULESET_ALTQ:
5053                                 if (ioe->anchor[0]) {
5054                                         PF_RULES_WUNLOCK();
5055                                         free(ioes, M_TEMP);
5056                                         error = EINVAL;
5057                                         goto fail;
5058                                 }
5059                                 if ((error = pf_rollback_altq(ioe->ticket))) {
5060                                         PF_RULES_WUNLOCK();
5061                                         free(ioes, M_TEMP);
5062                                         goto fail; /* really bad */
5063                                 }
5064                                 break;
5065 #endif /* ALTQ */
5066                         case PF_RULESET_TABLE:
5067                             {
5068                                 struct pfr_table table;
5069
5070                                 bzero(&table, sizeof(table));
5071                                 strlcpy(table.pfrt_anchor, ioe->anchor,
5072                                     sizeof(table.pfrt_anchor));
5073                                 if ((error = pfr_ina_rollback(&table,
5074                                     ioe->ticket, NULL, 0))) {
5075                                         PF_RULES_WUNLOCK();
5076                                         free(ioes, M_TEMP);
5077                                         goto fail; /* really bad */
5078                                 }
5079                                 break;
5080                             }
5081                         default:
5082                                 if ((error = pf_rollback_rules(ioe->ticket,
5083                                     ioe->rs_num, ioe->anchor))) {
5084                                         PF_RULES_WUNLOCK();
5085                                         free(ioes, M_TEMP);
5086                                         goto fail; /* really bad */
5087                                 }
5088                                 break;
5089                         }
5090                 }
5091                 PF_RULES_WUNLOCK();
5092                 free(ioes, M_TEMP);
5093                 break;
5094         }
5095
5096         case DIOCXCOMMIT: {
5097                 struct pfioc_trans      *io = (struct pfioc_trans *)addr;
5098                 struct pfioc_trans_e    *ioe, *ioes;
5099                 struct pf_kruleset      *rs;
5100                 struct pf_keth_ruleset  *ers;
5101                 size_t                   totlen;
5102                 int                      i;
5103
5104                 if (io->esize != sizeof(*ioe)) {
5105                         error = ENODEV;
5106                         break;
5107                 }
5108
5109                 if (io->size < 0 ||
5110                     io->size > pf_ioctl_maxcount ||
5111                     WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
5112                         error = EINVAL;
5113                         break;
5114                 }
5115
5116                 totlen = sizeof(struct pfioc_trans_e) * io->size;
5117                 ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
5118                     M_TEMP, M_WAITOK);
5119                 error = copyin(io->array, ioes, totlen);
5120                 if (error) {
5121                         free(ioes, M_TEMP);
5122                         break;
5123                 }
5124                 PF_RULES_WLOCK();
5125                 /* First makes sure everything will succeed. */
5126                 for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
5127                         ioe->anchor[sizeof(ioe->anchor) - 1] = 0;
5128                         switch (ioe->rs_num) {
5129                         case PF_RULESET_ETH:
5130                                 ers = pf_find_keth_ruleset(ioe->anchor);
5131                                 if (ers == NULL || ioe->ticket == 0 ||
5132                                     ioe->ticket != ers->inactive.ticket) {
5133                                         PF_RULES_WUNLOCK();
5134                                         free(ioes, M_TEMP);
5135                                         error = EINVAL;
5136                                         goto fail;
5137                                 }
5138                                 break;
5139 #ifdef ALTQ
5140                         case PF_RULESET_ALTQ:
5141                                 if (ioe->anchor[0]) {
5142                                         PF_RULES_WUNLOCK();
5143                                         free(ioes, M_TEMP);
5144                                         error = EINVAL;
5145                                         goto fail;
5146                                 }
5147                                 if (!V_altqs_inactive_open || ioe->ticket !=
5148                                     V_ticket_altqs_inactive) {
5149                                         PF_RULES_WUNLOCK();
5150                                         free(ioes, M_TEMP);
5151                                         error = EBUSY;
5152                                         goto fail;
5153                                 }
5154                                 break;
5155 #endif /* ALTQ */
5156                         case PF_RULESET_TABLE:
5157                                 rs = pf_find_kruleset(ioe->anchor);
5158                                 if (rs == NULL || !rs->topen || ioe->ticket !=
5159                                     rs->tticket) {
5160                                         PF_RULES_WUNLOCK();
5161                                         free(ioes, M_TEMP);
5162                                         error = EBUSY;
5163                                         goto fail;
5164                                 }
5165                                 break;
5166                         default:
5167                                 if (ioe->rs_num < 0 || ioe->rs_num >=
5168                                     PF_RULESET_MAX) {
5169                                         PF_RULES_WUNLOCK();
5170                                         free(ioes, M_TEMP);
5171                                         error = EINVAL;
5172                                         goto fail;
5173                                 }
5174                                 rs = pf_find_kruleset(ioe->anchor);
5175                                 if (rs == NULL ||
5176                                     !rs->rules[ioe->rs_num].inactive.open ||
5177                                     rs->rules[ioe->rs_num].inactive.ticket !=
5178                                     ioe->ticket) {
5179                                         PF_RULES_WUNLOCK();
5180                                         free(ioes, M_TEMP);
5181                                         error = EBUSY;
5182                                         goto fail;
5183                                 }
5184                                 break;
5185                         }
5186                 }
5187                 /* Now do the commit - no errors should happen here. */
5188                 for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
5189                         switch (ioe->rs_num) {
5190                         case PF_RULESET_ETH:
5191                                 if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) {
5192                                         PF_RULES_WUNLOCK();
5193                                         free(ioes, M_TEMP);
5194                                         goto fail; /* really bad */
5195                                 }
5196                                 break;
5197 #ifdef ALTQ
5198                         case PF_RULESET_ALTQ:
5199                                 if ((error = pf_commit_altq(ioe->ticket))) {
5200                                         PF_RULES_WUNLOCK();
5201                                         free(ioes, M_TEMP);
5202                                         goto fail; /* really bad */
5203                                 }
5204                                 break;
5205 #endif /* ALTQ */
5206                         case PF_RULESET_TABLE:
5207                             {
5208                                 struct pfr_table table;
5209
5210                                 bzero(&table, sizeof(table));
5211                                 (void)strlcpy(table.pfrt_anchor, ioe->anchor,
5212                                     sizeof(table.pfrt_anchor));
5213                                 if ((error = pfr_ina_commit(&table,
5214                                     ioe->ticket, NULL, NULL, 0))) {
5215                                         PF_RULES_WUNLOCK();
5216                                         free(ioes, M_TEMP);
5217                                         goto fail; /* really bad */
5218                                 }
5219                                 break;
5220                             }
5221                         default:
5222                                 if ((error = pf_commit_rules(ioe->ticket,
5223                                     ioe->rs_num, ioe->anchor))) {
5224                                         PF_RULES_WUNLOCK();
5225                                         free(ioes, M_TEMP);
5226                                         goto fail; /* really bad */
5227                                 }
5228                                 break;
5229                         }
5230                 }
5231                 PF_RULES_WUNLOCK();
5232
5233                 /* Only hook into EtherNet taffic if we've got rules for it. */
5234                 if (! TAILQ_EMPTY(V_pf_keth->active.rules))
5235                         hook_pf_eth();
5236                 else
5237                         dehook_pf_eth();
5238
5239                 free(ioes, M_TEMP);
5240                 break;
5241         }
5242
5243         case DIOCGETSRCNODES: {
5244                 struct pfioc_src_nodes  *psn = (struct pfioc_src_nodes *)addr;
5245                 struct pf_srchash       *sh;
5246                 struct pf_ksrc_node     *n;
5247                 struct pf_src_node      *p, *pstore;
5248                 uint32_t                 i, nr = 0;
5249
5250                 for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
5251                                 i++, sh++) {
5252                         PF_HASHROW_LOCK(sh);
5253                         LIST_FOREACH(n, &sh->nodes, entry)
5254                                 nr++;
5255                         PF_HASHROW_UNLOCK(sh);
5256                 }
5257
5258                 psn->psn_len = min(psn->psn_len,
5259                     sizeof(struct pf_src_node) * nr);
5260
5261                 if (psn->psn_len == 0) {
5262                         psn->psn_len = sizeof(struct pf_src_node) * nr;
5263                         break;
5264                 }
5265
5266                 nr = 0;
5267
5268                 p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO);
5269                 for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
5270                     i++, sh++) {
5271                     PF_HASHROW_LOCK(sh);
5272                     LIST_FOREACH(n, &sh->nodes, entry) {
5273
5274                         if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
5275                                 break;
5276
5277                         pf_src_node_copy(n, p);
5278
5279                         p++;
5280                         nr++;
5281                     }
5282                     PF_HASHROW_UNLOCK(sh);
5283                 }
5284                 error = copyout(pstore, psn->psn_src_nodes,
5285                     sizeof(struct pf_src_node) * nr);
5286                 if (error) {
5287                         free(pstore, M_TEMP);
5288                         break;
5289                 }
5290                 psn->psn_len = sizeof(struct pf_src_node) * nr;
5291                 free(pstore, M_TEMP);
5292                 break;
5293         }
5294
5295         case DIOCCLRSRCNODES: {
5296                 pf_clear_srcnodes(NULL);
5297                 pf_purge_expired_src_nodes();
5298                 break;
5299         }
5300
5301         case DIOCKILLSRCNODES:
5302                 pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
5303                 break;
5304
5305 #ifdef COMPAT_FREEBSD13
5306         case DIOCKEEPCOUNTERS_FREEBSD13:
5307 #endif
5308         case DIOCKEEPCOUNTERS:
5309                 error = pf_keepcounters((struct pfioc_nv *)addr);
5310                 break;
5311
5312         case DIOCGETSYNCOOKIES:
5313                 error = pf_get_syncookies((struct pfioc_nv *)addr);
5314                 break;
5315
5316         case DIOCSETSYNCOOKIES:
5317                 error = pf_set_syncookies((struct pfioc_nv *)addr);
5318                 break;
5319
5320         case DIOCSETHOSTID: {
5321                 u_int32_t       *hostid = (u_int32_t *)addr;
5322
5323                 PF_RULES_WLOCK();
5324                 if (*hostid == 0)
5325                         V_pf_status.hostid = arc4random();
5326                 else
5327                         V_pf_status.hostid = *hostid;
5328                 PF_RULES_WUNLOCK();
5329                 break;
5330         }
5331
5332         case DIOCOSFPFLUSH:
5333                 PF_RULES_WLOCK();
5334                 pf_osfp_flush();
5335                 PF_RULES_WUNLOCK();
5336                 break;
5337
5338         case DIOCIGETIFACES: {
5339                 struct pfioc_iface *io = (struct pfioc_iface *)addr;
5340                 struct pfi_kif *ifstore;
5341                 size_t bufsiz;
5342
5343                 if (io->pfiio_esize != sizeof(struct pfi_kif)) {
5344                         error = ENODEV;
5345                         break;
5346                 }
5347
5348                 if (io->pfiio_size < 0 ||
5349                     io->pfiio_size > pf_ioctl_maxcount ||
5350                     WOULD_OVERFLOW(io->pfiio_size, sizeof(struct pfi_kif))) {
5351                         error = EINVAL;
5352                         break;
5353                 }
5354
5355                 io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';
5356
5357                 bufsiz = io->pfiio_size * sizeof(struct pfi_kif);
5358                 ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif),
5359                     M_TEMP, M_WAITOK | M_ZERO);
5360
5361                 PF_RULES_RLOCK();
5362                 pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size);
5363                 PF_RULES_RUNLOCK();
5364                 error = copyout(ifstore, io->pfiio_buffer, bufsiz);
5365                 free(ifstore, M_TEMP);
5366                 break;
5367         }
5368
5369         case DIOCSETIFFLAG: {
5370                 struct pfioc_iface *io = (struct pfioc_iface *)addr;
5371
5372                 io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';
5373
5374                 PF_RULES_WLOCK();
5375                 error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
5376                 PF_RULES_WUNLOCK();
5377                 break;
5378         }
5379
5380         case DIOCCLRIFFLAG: {
5381                 struct pfioc_iface *io = (struct pfioc_iface *)addr;
5382
5383                 io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';
5384
5385                 PF_RULES_WLOCK();
5386                 error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
5387                 PF_RULES_WUNLOCK();
5388                 break;
5389         }
5390
5391         case DIOCSETREASS: {
5392                 u_int32_t       *reass = (u_int32_t *)addr;
5393
5394                 V_pf_status.reass = *reass & (PF_REASS_ENABLED|PF_REASS_NODF);
5395                 /* Removal of DF flag without reassembly enabled is not a
5396                  * valid combination. Disable reassembly in such case. */
5397                 if (!(V_pf_status.reass & PF_REASS_ENABLED))
5398                         V_pf_status.reass = 0;
5399                 break;
5400         }
5401
5402         default:
5403                 error = ENODEV;
5404                 break;
5405         }
5406 fail:
5407         if (sx_xlocked(&V_pf_ioctl_lock))
5408                 sx_xunlock(&V_pf_ioctl_lock);
5409         CURVNET_RESTORE();
5410
5411 #undef ERROUT_IOCTL
5412
5413         return (error);
5414 }
5415
5416 void
5417 pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_version)
5418 {
5419         bzero(sp, sizeof(union pfsync_state_union));
5420
5421         /* copy from state key */
5422         sp->pfs_1301.key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
5423         sp->pfs_1301.key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
5424         sp->pfs_1301.key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
5425         sp->pfs_1301.key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
5426         sp->pfs_1301.key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
5427         sp->pfs_1301.key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
5428         sp->pfs_1301.key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
5429         sp->pfs_1301.key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
5430         sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto;
5431         sp->pfs_1301.af = st->key[PF_SK_WIRE]->af;
5432
5433         /* copy from state */
5434         strlcpy(sp->pfs_1301.ifname, st->kif->pfik_name, sizeof(sp->pfs_1301.ifname));
5435         bcopy(&st->rt_addr, &sp->pfs_1301.rt_addr, sizeof(sp->pfs_1301.rt_addr));
5436         sp->pfs_1301.creation = htonl(time_uptime - st->creation);
5437         sp->pfs_1301.expire = pf_state_expires(st);
5438         if (sp->pfs_1301.expire <= time_uptime)
5439                 sp->pfs_1301.expire = htonl(0);
5440         else
5441                 sp->pfs_1301.expire = htonl(sp->pfs_1301.expire - time_uptime);
5442
5443         sp->pfs_1301.direction = st->direction;
5444         sp->pfs_1301.log = st->act.log;
5445         sp->pfs_1301.timeout = st->timeout;
5446
5447         switch (msg_version) {
5448                 case PFSYNC_MSG_VERSION_1301:
5449                         sp->pfs_1301.state_flags = st->state_flags;
5450                         break;
5451                 case PFSYNC_MSG_VERSION_1400:
5452                         sp->pfs_1400.state_flags = htons(st->state_flags);
5453                         sp->pfs_1400.qid = htons(st->act.qid);
5454                         sp->pfs_1400.pqid = htons(st->act.pqid);
5455                         sp->pfs_1400.dnpipe = htons(st->act.dnpipe);
5456                         sp->pfs_1400.dnrpipe = htons(st->act.dnrpipe);
5457                         sp->pfs_1400.rtableid = htonl(st->act.rtableid);
5458                         sp->pfs_1400.min_ttl = st->act.min_ttl;
5459                         sp->pfs_1400.set_tos = st->act.set_tos;
5460                         sp->pfs_1400.max_mss = htons(st->act.max_mss);
5461                         sp->pfs_1400.set_prio[0] = st->act.set_prio[0];
5462                         sp->pfs_1400.set_prio[1] = st->act.set_prio[1];
5463                         sp->pfs_1400.rt = st->rt;
5464                         if (st->rt_kif)
5465                                 strlcpy(sp->pfs_1400.rt_ifname,
5466                                     st->rt_kif->pfik_name,
5467                                     sizeof(sp->pfs_1400.rt_ifname));
5468                         break;
5469                 default:
5470                         panic("%s: Unsupported pfsync_msg_version %d",
5471                             __func__, msg_version);
5472         }
5473
5474         if (st->src_node)
5475                 sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE;
5476         if (st->nat_src_node)
5477                 sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE;
5478
5479         sp->pfs_1301.id = st->id;
5480         sp->pfs_1301.creatorid = st->creatorid;
5481         pf_state_peer_hton(&st->src, &sp->pfs_1301.src);
5482         pf_state_peer_hton(&st->dst, &sp->pfs_1301.dst);
5483
5484         if (st->rule.ptr == NULL)
5485                 sp->pfs_1301.rule = htonl(-1);
5486         else
5487                 sp->pfs_1301.rule = htonl(st->rule.ptr->nr);
5488         if (st->anchor.ptr == NULL)
5489                 sp->pfs_1301.anchor = htonl(-1);
5490         else
5491                 sp->pfs_1301.anchor = htonl(st->anchor.ptr->nr);
5492         if (st->nat_rule.ptr == NULL)
5493                 sp->pfs_1301.nat_rule = htonl(-1);
5494         else
5495                 sp->pfs_1301.nat_rule = htonl(st->nat_rule.ptr->nr);
5496
5497         pf_state_counter_hton(st->packets[0], sp->pfs_1301.packets[0]);
5498         pf_state_counter_hton(st->packets[1], sp->pfs_1301.packets[1]);
5499         pf_state_counter_hton(st->bytes[0], sp->pfs_1301.bytes[0]);
5500         pf_state_counter_hton(st->bytes[1], sp->pfs_1301.bytes[1]);
5501 }
5502
5503 void
5504 pf_state_export(struct pf_state_export *sp, struct pf_kstate *st)
5505 {
5506         bzero(sp, sizeof(*sp));
5507
5508         sp->version = PF_STATE_VERSION;
5509
5510         /* copy from state key */
5511         sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
5512         sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
5513         sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
5514         sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
5515         sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
5516         sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
5517         sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
5518         sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
5519         sp->proto = st->key[PF_SK_WIRE]->proto;
5520         sp->af = st->key[PF_SK_WIRE]->af;
5521
5522         /* copy from state */
5523         strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
5524         strlcpy(sp->orig_ifname, st->orig_kif->pfik_name,
5525             sizeof(sp->orig_ifname));
5526         bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
5527         sp->creation = htonl(time_uptime - st->creation);
5528         sp->expire = pf_state_expires(st);
5529         if (sp->expire <= time_uptime)
5530                 sp->expire = htonl(0);
5531         else
5532                 sp->expire = htonl(sp->expire - time_uptime);
5533
5534         sp->direction = st->direction;
5535         sp->log = st->act.log;
5536         sp->timeout = st->timeout;
5537         /* 8 bits for the old libpfctl, 16 bits for the new libpfctl */
5538         sp->state_flags_compat = st->state_flags;
5539         sp->state_flags = htons(st->state_flags);
5540         if (st->src_node)
5541                 sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
5542         if (st->nat_src_node)
5543                 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
5544
5545         sp->id = st->id;
5546         sp->creatorid = st->creatorid;
5547         pf_state_peer_hton(&st->src, &sp->src);
5548         pf_state_peer_hton(&st->dst, &sp->dst);
5549
5550         if (st->rule.ptr == NULL)
5551                 sp->rule = htonl(-1);
5552         else
5553                 sp->rule = htonl(st->rule.ptr->nr);
5554         if (st->anchor.ptr == NULL)
5555                 sp->anchor = htonl(-1);
5556         else
5557                 sp->anchor = htonl(st->anchor.ptr->nr);
5558         if (st->nat_rule.ptr == NULL)
5559                 sp->nat_rule = htonl(-1);
5560         else
5561                 sp->nat_rule = htonl(st->nat_rule.ptr->nr);
5562
5563         sp->packets[0] = st->packets[0];
5564         sp->packets[1] = st->packets[1];
5565         sp->bytes[0] = st->bytes[0];
5566         sp->bytes[1] = st->bytes[1];
5567
5568         sp->qid = htons(st->act.qid);
5569         sp->pqid = htons(st->act.pqid);
5570         sp->dnpipe = htons(st->act.dnpipe);
5571         sp->dnrpipe = htons(st->act.dnrpipe);
5572         sp->rtableid = htonl(st->act.rtableid);
5573         sp->min_ttl = st->act.min_ttl;
5574         sp->set_tos = st->act.set_tos;
5575         sp->max_mss = htons(st->act.max_mss);
5576         sp->rt = st->rt;
5577         if (st->rt_kif)
5578                 strlcpy(sp->rt_ifname, st->rt_kif->pfik_name,
5579                     sizeof(sp->rt_ifname));
5580         sp->set_prio[0] = st->act.set_prio[0];
5581         sp->set_prio[1] = st->act.set_prio[1];
5582
5583 }
5584
5585 static void
5586 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
5587 {
5588         struct pfr_ktable *kt;
5589
5590         KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type));
5591
5592         kt = aw->p.tbl;
5593         if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
5594                 kt = kt->pfrkt_root;
5595         aw->p.tbl = NULL;
5596         aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
5597                 kt->pfrkt_cnt : -1;
5598 }
5599
5600 static int
5601 pf_add_status_counters(nvlist_t *nvl, const char *name, counter_u64_t *counters,
5602     size_t number, char **names)
5603 {
5604         nvlist_t        *nvc;
5605
5606         nvc = nvlist_create(0);
5607         if (nvc == NULL)
5608                 return (ENOMEM);
5609
5610         for (int i = 0; i < number; i++) {
5611                 nvlist_append_number_array(nvc, "counters",
5612                     counter_u64_fetch(counters[i]));
5613                 nvlist_append_string_array(nvc, "names",
5614                     names[i]);
5615                 nvlist_append_number_array(nvc, "ids",
5616                     i);
5617         }
5618         nvlist_add_nvlist(nvl, name, nvc);
5619         nvlist_destroy(nvc);
5620
5621         return (0);
5622 }
5623
5624 static int
5625 pf_getstatus(struct pfioc_nv *nv)
5626 {
5627         nvlist_t        *nvl = NULL, *nvc = NULL;
5628         void            *nvlpacked = NULL;
5629         int              error;
5630         struct pf_status s;
5631         char *pf_reasons[PFRES_MAX+1] = PFRES_NAMES;
5632         char *pf_lcounter[KLCNT_MAX+1] = KLCNT_NAMES;
5633         char *pf_fcounter[FCNT_MAX+1] = FCNT_NAMES;
5634         PF_RULES_RLOCK_TRACKER;
5635
5636 #define ERROUT(x)      ERROUT_FUNCTION(errout, x)
5637
5638         PF_RULES_RLOCK();
5639
5640         nvl = nvlist_create(0);
5641         if (nvl == NULL)
5642                 ERROUT(ENOMEM);
5643
5644         nvlist_add_bool(nvl, "running", V_pf_status.running);
5645         nvlist_add_number(nvl, "since", V_pf_status.since);
5646         nvlist_add_number(nvl, "debug", V_pf_status.debug);
5647         nvlist_add_number(nvl, "hostid", V_pf_status.hostid);
5648         nvlist_add_number(nvl, "states", V_pf_status.states);
5649         nvlist_add_number(nvl, "src_nodes", V_pf_status.src_nodes);
5650         nvlist_add_number(nvl, "reass", V_pf_status.reass);
5651         nvlist_add_bool(nvl, "syncookies_active",
5652             V_pf_status.syncookies_active);
5653
5654         /* counters */
5655         error = pf_add_status_counters(nvl, "counters", V_pf_status.counters,
5656             PFRES_MAX, pf_reasons);
5657         if (error != 0)
5658                 ERROUT(error);
5659
5660         /* lcounters */
5661         error = pf_add_status_counters(nvl, "lcounters", V_pf_status.lcounters,
5662             KLCNT_MAX, pf_lcounter);
5663         if (error != 0)
5664                 ERROUT(error);
5665
5666         /* fcounters */
5667         nvc = nvlist_create(0);
5668         if (nvc == NULL)
5669                 ERROUT(ENOMEM);
5670
5671         for (int i = 0; i < FCNT_MAX; i++) {
5672                 nvlist_append_number_array(nvc, "counters",
5673                     pf_counter_u64_fetch(&V_pf_status.fcounters[i]));
5674                 nvlist_append_string_array(nvc, "names",
5675                     pf_fcounter[i]);
5676                 nvlist_append_number_array(nvc, "ids",
5677                     i);
5678         }
5679         nvlist_add_nvlist(nvl, "fcounters", nvc);
5680         nvlist_destroy(nvc);
5681         nvc = NULL;
5682
5683         /* scounters */
5684         error = pf_add_status_counters(nvl, "scounters", V_pf_status.scounters,
5685             SCNT_MAX, pf_fcounter);
5686         if (error != 0)
5687                 ERROUT(error);
5688
5689         nvlist_add_string(nvl, "ifname", V_pf_status.ifname);
5690         nvlist_add_binary(nvl, "chksum", V_pf_status.pf_chksum,
5691             PF_MD5_DIGEST_LENGTH);
5692
5693         pfi_update_status(V_pf_status.ifname, &s);
5694
5695         /* pcounters / bcounters */
5696         for (int i = 0; i < 2; i++) {
5697                 for (int j = 0; j < 2; j++) {
5698                         for (int k = 0; k < 2; k++) {
5699                                 nvlist_append_number_array(nvl, "pcounters",
5700                                     s.pcounters[i][j][k]);
5701                         }
5702                         nvlist_append_number_array(nvl, "bcounters",
5703                             s.bcounters[i][j]);
5704                 }
5705         }
5706
5707         nvlpacked = nvlist_pack(nvl, &nv->len);
5708         if (nvlpacked == NULL)
5709                 ERROUT(ENOMEM);
5710
5711         if (nv->size == 0)
5712                 ERROUT(0);
5713         else if (nv->size < nv->len)
5714                 ERROUT(ENOSPC);
5715
5716         PF_RULES_RUNLOCK();
5717         error = copyout(nvlpacked, nv->data, nv->len);
5718         goto done;
5719
5720 #undef ERROUT
5721 errout:
5722         PF_RULES_RUNLOCK();
5723 done:
5724         free(nvlpacked, M_NVLIST);
5725         nvlist_destroy(nvc);
5726         nvlist_destroy(nvl);
5727
5728         return (error);
5729 }
5730
5731 /*
5732  * XXX - Check for version mismatch!!!
5733  */
5734 static void
5735 pf_clear_all_states(void)
5736 {
5737         struct pf_kstate        *s;
5738         u_int i;
5739
5740         for (i = 0; i <= pf_hashmask; i++) {
5741                 struct pf_idhash *ih = &V_pf_idhash[i];
5742 relock:
5743                 PF_HASHROW_LOCK(ih);
5744                 LIST_FOREACH(s, &ih->states, entry) {
5745                         s->timeout = PFTM_PURGE;
5746                         /* Don't send out individual delete messages. */
5747                         s->state_flags |= PFSTATE_NOSYNC;
5748                         pf_unlink_state(s);
5749                         goto relock;
5750                 }
5751                 PF_HASHROW_UNLOCK(ih);
5752         }
5753 }
5754
5755 static int
5756 pf_clear_tables(void)
5757 {
5758         struct pfioc_table io;
5759         int error;
5760
5761         bzero(&io, sizeof(io));
5762
5763         error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel,
5764             io.pfrio_flags);
5765
5766         return (error);
5767 }
5768
5769 static void
5770 pf_clear_srcnodes(struct pf_ksrc_node *n)
5771 {
5772         struct pf_kstate *s;
5773         int i;
5774
5775         for (i = 0; i <= pf_hashmask; i++) {
5776                 struct pf_idhash *ih = &V_pf_idhash[i];
5777
5778                 PF_HASHROW_LOCK(ih);
5779                 LIST_FOREACH(s, &ih->states, entry) {
5780                         if (n == NULL || n == s->src_node)
5781                                 s->src_node = NULL;
5782                         if (n == NULL || n == s->nat_src_node)
5783                                 s->nat_src_node = NULL;
5784                 }
5785                 PF_HASHROW_UNLOCK(ih);
5786         }
5787
5788         if (n == NULL) {
5789                 struct pf_srchash *sh;
5790
5791                 for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
5792                     i++, sh++) {
5793                         PF_HASHROW_LOCK(sh);
5794                         LIST_FOREACH(n, &sh->nodes, entry) {
5795                                 n->expire = 1;
5796                                 n->states = 0;
5797                         }
5798                         PF_HASHROW_UNLOCK(sh);
5799                 }
5800         } else {
5801                 /* XXX: hash slot should already be locked here. */
5802                 n->expire = 1;
5803                 n->states = 0;
5804         }
5805 }
5806
5807 static void
5808 pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
5809 {
5810         struct pf_ksrc_node_list         kill;
5811
5812         LIST_INIT(&kill);
5813         for (int i = 0; i <= pf_srchashmask; i++) {
5814                 struct pf_srchash *sh = &V_pf_srchash[i];
5815                 struct pf_ksrc_node *sn, *tmp;
5816
5817                 PF_HASHROW_LOCK(sh);
5818                 LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
5819                         if (PF_MATCHA(psnk->psnk_src.neg,
5820                               &psnk->psnk_src.addr.v.a.addr,
5821                               &psnk->psnk_src.addr.v.a.mask,
5822                               &sn->addr, sn->af) &&
5823                             PF_MATCHA(psnk->psnk_dst.neg,
5824                               &psnk->psnk_dst.addr.v.a.addr,
5825                               &psnk->psnk_dst.addr.v.a.mask,
5826                               &sn->raddr, sn->af)) {
5827                                 pf_unlink_src_node(sn);
5828                                 LIST_INSERT_HEAD(&kill, sn, entry);
5829                                 sn->expire = 1;
5830                         }
5831                 PF_HASHROW_UNLOCK(sh);
5832         }
5833
5834         for (int i = 0; i <= pf_hashmask; i++) {
5835                 struct pf_idhash *ih = &V_pf_idhash[i];
5836                 struct pf_kstate *s;
5837
5838                 PF_HASHROW_LOCK(ih);
5839                 LIST_FOREACH(s, &ih->states, entry) {
5840                         if (s->src_node && s->src_node->expire == 1)
5841                                 s->src_node = NULL;
5842                         if (s->nat_src_node && s->nat_src_node->expire == 1)
5843                                 s->nat_src_node = NULL;
5844                 }
5845                 PF_HASHROW_UNLOCK(ih);
5846         }
5847
5848         psnk->psnk_killed = pf_free_src_nodes(&kill);
5849 }
5850
5851 static int
5852 pf_keepcounters(struct pfioc_nv *nv)
5853 {
5854         nvlist_t        *nvl = NULL;
5855         void            *nvlpacked = NULL;
5856         int              error = 0;
5857
5858 #define ERROUT(x)       ERROUT_FUNCTION(on_error, x)
5859
5860         if (nv->len > pf_ioctl_maxcount)
5861                 ERROUT(ENOMEM);
5862
5863         nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
5864         if (nvlpacked == NULL)
5865                 ERROUT(ENOMEM);
5866
5867         error = copyin(nv->data, nvlpacked, nv->len);
5868         if (error)
5869                 ERROUT(error);
5870
5871         nvl = nvlist_unpack(nvlpacked, nv->len, 0);
5872         if (nvl == NULL)
5873                 ERROUT(EBADMSG);
5874
5875         if (! nvlist_exists_bool(nvl, "keep_counters"))
5876                 ERROUT(EBADMSG);
5877
5878         V_pf_status.keep_counters = nvlist_get_bool(nvl, "keep_counters");
5879
5880 on_error:
5881         nvlist_destroy(nvl);
5882         free(nvlpacked, M_NVLIST);
5883         return (error);
5884 }
5885
5886 static unsigned int
5887 pf_clear_states(const struct pf_kstate_kill *kill)
5888 {
5889         struct pf_state_key_cmp  match_key;
5890         struct pf_kstate        *s;
5891         struct pfi_kkif *kif;
5892         int              idx;
5893         unsigned int     killed = 0, dir;
5894
5895         for (unsigned int i = 0; i <= pf_hashmask; i++) {
5896                 struct pf_idhash *ih = &V_pf_idhash[i];
5897
5898 relock_DIOCCLRSTATES:
5899                 PF_HASHROW_LOCK(ih);
5900                 LIST_FOREACH(s, &ih->states, entry) {
5901                         /* For floating states look at the original kif. */
5902                         kif = s->kif == V_pfi_all ? s->orig_kif : s->kif;
5903
5904                         if (kill->psk_ifname[0] &&
5905                             strcmp(kill->psk_ifname,
5906                             kif->pfik_name))
5907                                 continue;
5908
5909                         if (kill->psk_kill_match) {
5910                                 bzero(&match_key, sizeof(match_key));
5911
5912                                 if (s->direction == PF_OUT) {
5913                                         dir = PF_IN;
5914                                         idx = PF_SK_STACK;
5915                                 } else {
5916                                         dir = PF_OUT;
5917                                         idx = PF_SK_WIRE;
5918                                 }
5919
5920                                 match_key.af = s->key[idx]->af;
5921                                 match_key.proto = s->key[idx]->proto;
5922                                 PF_ACPY(&match_key.addr[0],
5923                                     &s->key[idx]->addr[1], match_key.af);
5924                                 match_key.port[0] = s->key[idx]->port[1];
5925                                 PF_ACPY(&match_key.addr[1],
5926                                     &s->key[idx]->addr[0], match_key.af);
5927                                 match_key.port[1] = s->key[idx]->port[0];
5928                         }
5929
5930                         /*
5931                          * Don't send out individual
5932                          * delete messages.
5933                          */
5934                         s->state_flags |= PFSTATE_NOSYNC;
5935                         pf_unlink_state(s);
5936                         killed++;
5937
5938                         if (kill->psk_kill_match)
5939                                 killed += pf_kill_matching_state(&match_key,
5940                                     dir);
5941
5942                         goto relock_DIOCCLRSTATES;
5943                 }
5944                 PF_HASHROW_UNLOCK(ih);
5945         }
5946
5947         if (V_pfsync_clear_states_ptr != NULL)
5948                 V_pfsync_clear_states_ptr(V_pf_status.hostid, kill->psk_ifname);
5949
5950         return (killed);
5951 }
5952
5953 static void
5954 pf_killstates(struct pf_kstate_kill *kill, unsigned int *killed)
5955 {
5956         struct pf_kstate        *s;
5957
5958         if (kill->psk_pfcmp.id) {
5959                 if (kill->psk_pfcmp.creatorid == 0)
5960                         kill->psk_pfcmp.creatorid = V_pf_status.hostid;
5961                 if ((s = pf_find_state_byid(kill->psk_pfcmp.id,
5962                     kill->psk_pfcmp.creatorid))) {
5963                         pf_unlink_state(s);
5964                         *killed = 1;
5965                 }
5966                 return;
5967         }
5968
5969         for (unsigned int i = 0; i <= pf_hashmask; i++)
5970                 *killed += pf_killstates_row(kill, &V_pf_idhash[i]);
5971
5972         return;
5973 }
5974
5975 static int
5976 pf_killstates_nv(struct pfioc_nv *nv)
5977 {
5978         struct pf_kstate_kill    kill;
5979         nvlist_t                *nvl = NULL;
5980         void                    *nvlpacked = NULL;
5981         int                      error = 0;
5982         unsigned int             killed = 0;
5983
5984 #define ERROUT(x)       ERROUT_FUNCTION(on_error, x)
5985
5986         if (nv->len > pf_ioctl_maxcount)
5987                 ERROUT(ENOMEM);
5988
5989         nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
5990         if (nvlpacked == NULL)
5991                 ERROUT(ENOMEM);
5992
5993         error = copyin(nv->data, nvlpacked, nv->len);
5994         if (error)
5995                 ERROUT(error);
5996
5997         nvl = nvlist_unpack(nvlpacked, nv->len, 0);
5998         if (nvl == NULL)
5999                 ERROUT(EBADMSG);
6000
6001         error = pf_nvstate_kill_to_kstate_kill(nvl, &kill);
6002         if (error)
6003                 ERROUT(error);
6004
6005         pf_killstates(&kill, &killed);
6006
6007         free(nvlpacked, M_NVLIST);
6008         nvlpacked = NULL;
6009         nvlist_destroy(nvl);
6010         nvl = nvlist_create(0);
6011         if (nvl == NULL)
6012                 ERROUT(ENOMEM);
6013
6014         nvlist_add_number(nvl, "killed", killed);
6015
6016         nvlpacked = nvlist_pack(nvl, &nv->len);
6017         if (nvlpacked == NULL)
6018                 ERROUT(ENOMEM);
6019
6020         if (nv->size == 0)
6021                 ERROUT(0);
6022         else if (nv->size < nv->len)
6023                 ERROUT(ENOSPC);
6024
6025         error = copyout(nvlpacked, nv->data, nv->len);
6026
6027 on_error:
6028         nvlist_destroy(nvl);
6029         free(nvlpacked, M_NVLIST);
6030         return (error);
6031 }
6032
6033 static int
6034 pf_clearstates_nv(struct pfioc_nv *nv)
6035 {
6036         struct pf_kstate_kill    kill;
6037         nvlist_t                *nvl = NULL;
6038         void                    *nvlpacked = NULL;
6039         int                      error = 0;
6040         unsigned int             killed;
6041
6042 #define ERROUT(x)       ERROUT_FUNCTION(on_error, x)
6043
6044         if (nv->len > pf_ioctl_maxcount)
6045                 ERROUT(ENOMEM);
6046
6047         nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
6048         if (nvlpacked == NULL)
6049                 ERROUT(ENOMEM);
6050
6051         error = copyin(nv->data, nvlpacked, nv->len);
6052         if (error)
6053                 ERROUT(error);
6054
6055         nvl = nvlist_unpack(nvlpacked, nv->len, 0);
6056         if (nvl == NULL)
6057                 ERROUT(EBADMSG);
6058
6059         error = pf_nvstate_kill_to_kstate_kill(nvl, &kill);
6060         if (error)
6061                 ERROUT(error);
6062
6063         killed = pf_clear_states(&kill);
6064
6065         free(nvlpacked, M_NVLIST);
6066         nvlpacked = NULL;
6067         nvlist_destroy(nvl);
6068         nvl = nvlist_create(0);
6069         if (nvl == NULL)
6070                 ERROUT(ENOMEM);
6071
6072         nvlist_add_number(nvl, "killed", killed);
6073
6074         nvlpacked = nvlist_pack(nvl, &nv->len);
6075         if (nvlpacked == NULL)
6076                 ERROUT(ENOMEM);
6077
6078         if (nv->size == 0)
6079                 ERROUT(0);
6080         else if (nv->size < nv->len)
6081                 ERROUT(ENOSPC);
6082
6083         error = copyout(nvlpacked, nv->data, nv->len);
6084
6085 #undef ERROUT
6086 on_error:
6087         nvlist_destroy(nvl);
6088         free(nvlpacked, M_NVLIST);
6089         return (error);
6090 }
6091
6092 static int
6093 pf_getstate(struct pfioc_nv *nv)
6094 {
6095         nvlist_t                *nvl = NULL, *nvls;
6096         void                    *nvlpacked = NULL;
6097         struct pf_kstate        *s = NULL;
6098         int                      error = 0;
6099         uint64_t                 id, creatorid;
6100
6101 #define ERROUT(x)       ERROUT_FUNCTION(errout, x)
6102
6103         if (nv->len > pf_ioctl_maxcount)
6104                 ERROUT(ENOMEM);
6105
6106         nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
6107         if (nvlpacked == NULL)
6108                 ERROUT(ENOMEM);
6109
6110         error = copyin(nv->data, nvlpacked, nv->len);
6111         if (error)
6112                 ERROUT(error);
6113
6114         nvl = nvlist_unpack(nvlpacked, nv->len, 0);
6115         if (nvl == NULL)
6116                 ERROUT(EBADMSG);
6117
6118         PFNV_CHK(pf_nvuint64(nvl, "id", &id));
6119         PFNV_CHK(pf_nvuint64(nvl, "creatorid", &creatorid));
6120
6121         s = pf_find_state_byid(id, creatorid);
6122         if (s == NULL)
6123                 ERROUT(ENOENT);
6124
6125         free(nvlpacked, M_NVLIST);
6126         nvlpacked = NULL;
6127         nvlist_destroy(nvl);
6128         nvl = nvlist_create(0);
6129         if (nvl == NULL)
6130                 ERROUT(ENOMEM);
6131
6132         nvls = pf_state_to_nvstate(s);
6133         if (nvls == NULL)
6134                 ERROUT(ENOMEM);
6135
6136         nvlist_add_nvlist(nvl, "state", nvls);
6137         nvlist_destroy(nvls);
6138
6139         nvlpacked = nvlist_pack(nvl, &nv->len);
6140         if (nvlpacked == NULL)
6141                 ERROUT(ENOMEM);
6142
6143         if (nv->size == 0)
6144                 ERROUT(0);
6145         else if (nv->size < nv->len)
6146                 ERROUT(ENOSPC);
6147
6148         error = copyout(nvlpacked, nv->data, nv->len);
6149
6150 #undef ERROUT
6151 errout:
6152         if (s != NULL)
6153                 PF_STATE_UNLOCK(s);
6154         free(nvlpacked, M_NVLIST);
6155         nvlist_destroy(nvl);
6156         return (error);
6157 }
6158
6159 /*
6160  * XXX - Check for version mismatch!!!
6161  */
6162
6163 /*
6164  * Duplicate pfctl -Fa operation to get rid of as much as we can.
6165  */
6166 static int
6167 shutdown_pf(void)
6168 {
6169         int error = 0;
6170         u_int32_t t[5];
6171         char nn = '\0';
6172
6173         do {
6174                 if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
6175                     != 0) {
6176                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n"));
6177                         break;
6178                 }
6179                 if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
6180                     != 0) {
6181                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n"));
6182                         break;          /* XXX: rollback? */
6183                 }
6184                 if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
6185                     != 0) {
6186                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n"));
6187                         break;          /* XXX: rollback? */
6188                 }
6189                 if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
6190                     != 0) {
6191                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n"));
6192                         break;          /* XXX: rollback? */
6193                 }
6194                 if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
6195                     != 0) {
6196                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n"));
6197                         break;          /* XXX: rollback? */
6198                 }
6199
6200                 /* XXX: these should always succeed here */
6201                 pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn);
6202                 pf_commit_rules(t[1], PF_RULESET_FILTER, &nn);
6203                 pf_commit_rules(t[2], PF_RULESET_NAT, &nn);
6204                 pf_commit_rules(t[3], PF_RULESET_BINAT, &nn);
6205                 pf_commit_rules(t[4], PF_RULESET_RDR, &nn);
6206
6207                 if ((error = pf_clear_tables()) != 0)
6208                         break;
6209
6210                 if ((error = pf_begin_eth(&t[0], &nn)) != 0) {
6211                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: eth\n"));
6212                         break;
6213                 }
6214                 pf_commit_eth(t[0], &nn);
6215
6216 #ifdef ALTQ
6217                 if ((error = pf_begin_altq(&t[0])) != 0) {
6218                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
6219                         break;
6220                 }
6221                 pf_commit_altq(t[0]);
6222 #endif
6223
6224                 pf_clear_all_states();
6225
6226                 pf_clear_srcnodes(NULL);
6227
6228                 /* status does not use malloced mem so no need to cleanup */
6229                 /* fingerprints and interfaces have their own cleanup code */
6230         } while(0);
6231
6232         return (error);
6233 }
6234
6235 static pfil_return_t
6236 pf_check_return(int chk, struct mbuf **m)
6237 {
6238
6239         switch (chk) {
6240         case PF_PASS:
6241                 if (*m == NULL)
6242                         return (PFIL_CONSUMED);
6243                 else
6244                         return (PFIL_PASS);
6245                 break;
6246         default:
6247                 if (*m != NULL) {
6248                         m_freem(*m);
6249                         *m = NULL;
6250                 }
6251                 return (PFIL_DROPPED);
6252         }
6253 }
6254
6255 static pfil_return_t
6256 pf_eth_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
6257     void *ruleset __unused, struct inpcb *inp)
6258 {
6259         int chk;
6260
6261         chk = pf_test_eth(PF_IN, flags, ifp, m, inp);
6262
6263         return (pf_check_return(chk, m));
6264 }
6265
6266 static pfil_return_t
6267 pf_eth_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
6268     void *ruleset __unused, struct inpcb *inp)
6269 {
6270         int chk;
6271
6272         chk = pf_test_eth(PF_OUT, flags, ifp, m, inp);
6273
6274         return (pf_check_return(chk, m));
6275 }
6276
6277 #ifdef INET
6278 static pfil_return_t
6279 pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
6280     void *ruleset __unused, struct inpcb *inp)
6281 {
6282         int chk;
6283
6284         chk = pf_test(PF_IN, flags, ifp, m, inp, NULL);
6285
6286         return (pf_check_return(chk, m));
6287 }
6288
6289 static pfil_return_t
6290 pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
6291     void *ruleset __unused,  struct inpcb *inp)
6292 {
6293         int chk;
6294
6295         chk = pf_test(PF_OUT, flags, ifp, m, inp, NULL);
6296
6297         return (pf_check_return(chk, m));
6298 }
6299 #endif
6300
6301 #ifdef INET6
6302 static pfil_return_t
6303 pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags,
6304     void *ruleset __unused,  struct inpcb *inp)
6305 {
6306         int chk;
6307
6308         /*
6309          * In case of loopback traffic IPv6 uses the real interface in
6310          * order to support scoped addresses. In order to support stateful
6311          * filtering we have change this to lo0 as it is the case in IPv4.
6312          */
6313         CURVNET_SET(ifp->if_vnet);
6314         chk = pf_test6(PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp,
6315             m, inp, NULL);
6316         CURVNET_RESTORE();
6317
6318         return (pf_check_return(chk, m));
6319 }
6320
6321 static pfil_return_t
6322 pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags,
6323     void *ruleset __unused,  struct inpcb *inp)
6324 {
6325         int chk;
6326
6327         CURVNET_SET(ifp->if_vnet);
6328         chk = pf_test6(PF_OUT, flags, ifp, m, inp, NULL);
6329         CURVNET_RESTORE();
6330
6331         return (pf_check_return(chk, m));
6332 }
6333 #endif /* INET6 */
6334
6335 VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_in_hook);
6336 VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_out_hook);
6337 #define V_pf_eth_in_hook        VNET(pf_eth_in_hook)
6338 #define V_pf_eth_out_hook       VNET(pf_eth_out_hook)
6339
6340 #ifdef INET
6341 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook);
6342 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook);
6343 #define V_pf_ip4_in_hook        VNET(pf_ip4_in_hook)
6344 #define V_pf_ip4_out_hook       VNET(pf_ip4_out_hook)
6345 #endif
6346 #ifdef INET6
6347 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook);
6348 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook);
6349 #define V_pf_ip6_in_hook        VNET(pf_ip6_in_hook)
6350 #define V_pf_ip6_out_hook       VNET(pf_ip6_out_hook)
6351 #endif
6352
6353 static void
6354 hook_pf_eth(void)
6355 {
6356         struct pfil_hook_args pha = {
6357                 .pa_version = PFIL_VERSION,
6358                 .pa_modname = "pf",
6359                 .pa_type = PFIL_TYPE_ETHERNET,
6360         };
6361         struct pfil_link_args pla = {
6362                 .pa_version = PFIL_VERSION,
6363         };
6364         int ret __diagused;
6365
6366         if (atomic_load_bool(&V_pf_pfil_eth_hooked))
6367                 return;
6368
6369         pha.pa_mbuf_chk = pf_eth_check_in;
6370         pha.pa_flags = PFIL_IN;
6371         pha.pa_rulname = "eth-in";
6372         V_pf_eth_in_hook = pfil_add_hook(&pha);
6373         pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
6374         pla.pa_head = V_link_pfil_head;
6375         pla.pa_hook = V_pf_eth_in_hook;
6376         ret = pfil_link(&pla);
6377         MPASS(ret == 0);
6378         pha.pa_mbuf_chk = pf_eth_check_out;
6379         pha.pa_flags = PFIL_OUT;
6380         pha.pa_rulname = "eth-out";
6381         V_pf_eth_out_hook = pfil_add_hook(&pha);
6382         pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6383         pla.pa_head = V_link_pfil_head;
6384         pla.pa_hook = V_pf_eth_out_hook;
6385         ret = pfil_link(&pla);
6386         MPASS(ret == 0);
6387
6388         atomic_store_bool(&V_pf_pfil_eth_hooked, true);
6389 }
6390
6391 static void
6392 hook_pf(void)
6393 {
6394         struct pfil_hook_args pha = {
6395                 .pa_version = PFIL_VERSION,
6396                 .pa_modname = "pf",
6397         };
6398         struct pfil_link_args pla = {
6399                 .pa_version = PFIL_VERSION,
6400         };
6401         int ret __diagused;
6402
6403         if (atomic_load_bool(&V_pf_pfil_hooked))
6404                 return;
6405
6406 #ifdef INET
6407         pha.pa_type = PFIL_TYPE_IP4;
6408         pha.pa_mbuf_chk = pf_check_in;
6409         pha.pa_flags = PFIL_IN;
6410         pha.pa_rulname = "default-in";
6411         V_pf_ip4_in_hook = pfil_add_hook(&pha);
6412         pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
6413         pla.pa_head = V_inet_pfil_head;
6414         pla.pa_hook = V_pf_ip4_in_hook;
6415         ret = pfil_link(&pla);
6416         MPASS(ret == 0);
6417         pha.pa_mbuf_chk = pf_check_out;
6418         pha.pa_flags = PFIL_OUT;
6419         pha.pa_rulname = "default-out";
6420         V_pf_ip4_out_hook = pfil_add_hook(&pha);
6421         pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6422         pla.pa_head = V_inet_pfil_head;
6423         pla.pa_hook = V_pf_ip4_out_hook;
6424         ret = pfil_link(&pla);
6425         MPASS(ret == 0);
6426         if (V_pf_filter_local) {
6427                 pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6428                 pla.pa_head = V_inet_local_pfil_head;
6429                 pla.pa_hook = V_pf_ip4_out_hook;
6430                 ret = pfil_link(&pla);
6431                 MPASS(ret == 0);
6432         }
6433 #endif
6434 #ifdef INET6
6435         pha.pa_type = PFIL_TYPE_IP6;
6436         pha.pa_mbuf_chk = pf_check6_in;
6437         pha.pa_flags = PFIL_IN;
6438         pha.pa_rulname = "default-in6";
6439         V_pf_ip6_in_hook = pfil_add_hook(&pha);
6440         pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
6441         pla.pa_head = V_inet6_pfil_head;
6442         pla.pa_hook = V_pf_ip6_in_hook;
6443         ret = pfil_link(&pla);
6444         MPASS(ret == 0);
6445         pha.pa_mbuf_chk = pf_check6_out;
6446         pha.pa_rulname = "default-out6";
6447         pha.pa_flags = PFIL_OUT;
6448         V_pf_ip6_out_hook = pfil_add_hook(&pha);
6449         pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6450         pla.pa_head = V_inet6_pfil_head;
6451         pla.pa_hook = V_pf_ip6_out_hook;
6452         ret = pfil_link(&pla);
6453         MPASS(ret == 0);
6454         if (V_pf_filter_local) {
6455                 pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6456                 pla.pa_head = V_inet6_local_pfil_head;
6457                 pla.pa_hook = V_pf_ip6_out_hook;
6458                 ret = pfil_link(&pla);
6459                 MPASS(ret == 0);
6460         }
6461 #endif
6462
6463         atomic_store_bool(&V_pf_pfil_hooked, true);
6464 }
6465
6466 static void
6467 dehook_pf_eth(void)
6468 {
6469
6470         if (!atomic_load_bool(&V_pf_pfil_eth_hooked))
6471                 return;
6472
6473         pfil_remove_hook(V_pf_eth_in_hook);
6474         pfil_remove_hook(V_pf_eth_out_hook);
6475
6476         atomic_store_bool(&V_pf_pfil_eth_hooked, false);
6477 }
6478
6479 static void
6480 dehook_pf(void)
6481 {
6482
6483         if (!atomic_load_bool(&V_pf_pfil_hooked))
6484                 return;
6485
6486 #ifdef INET
6487         pfil_remove_hook(V_pf_ip4_in_hook);
6488         pfil_remove_hook(V_pf_ip4_out_hook);
6489 #endif
6490 #ifdef INET6
6491         pfil_remove_hook(V_pf_ip6_in_hook);
6492         pfil_remove_hook(V_pf_ip6_out_hook);
6493 #endif
6494
6495         atomic_store_bool(&V_pf_pfil_hooked, false);
6496 }
6497
6498 static void
6499 pf_load_vnet(void)
6500 {
6501         V_pf_tag_z = uma_zcreate("pf tags", sizeof(struct pf_tagname),
6502             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
6503
6504         rm_init_flags(&V_pf_rules_lock, "pf rulesets", RM_RECURSE);
6505         sx_init(&V_pf_ioctl_lock, "pf ioctl");
6506
6507         pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize,
6508             PF_RULE_TAG_HASH_SIZE_DEFAULT);
6509 #ifdef ALTQ
6510         pf_init_tagset(&V_pf_qids, &pf_queue_tag_hashsize,
6511             PF_QUEUE_TAG_HASH_SIZE_DEFAULT);
6512 #endif
6513
6514         V_pf_keth = &V_pf_main_keth_anchor.ruleset;
6515
6516         pfattach_vnet();
6517         V_pf_vnet_active = 1;
6518 }
6519
6520 static int
6521 pf_load(void)
6522 {
6523         int error;
6524
6525         sx_init(&pf_end_lock, "pf end thread");
6526
6527         pf_mtag_initialize();
6528
6529         pf_dev = make_dev(&pf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, PF_NAME);
6530         if (pf_dev == NULL)
6531                 return (ENOMEM);
6532
6533         pf_end_threads = 0;
6534         error = kproc_create(pf_purge_thread, NULL, &pf_purge_proc, 0, 0, "pf purge");
6535         if (error != 0)
6536                 return (error);
6537
6538         pfi_initialize();
6539
6540         return (0);
6541 }
6542
6543 static void
6544 pf_unload_vnet(void)
6545 {
6546         int ret __diagused;
6547
6548         V_pf_vnet_active = 0;
6549         V_pf_status.running = 0;
6550         dehook_pf();
6551         dehook_pf_eth();
6552
6553         PF_RULES_WLOCK();
6554         pf_syncookies_cleanup();
6555         shutdown_pf();
6556         PF_RULES_WUNLOCK();
6557
6558         /* Make sure we've cleaned up ethernet rules before we continue. */
6559         NET_EPOCH_DRAIN_CALLBACKS();
6560
6561         ret = swi_remove(V_pf_swi_cookie);
6562         MPASS(ret == 0);
6563         ret = intr_event_destroy(V_pf_swi_ie);
6564         MPASS(ret == 0);
6565
6566         pf_unload_vnet_purge();
6567
6568         pf_normalize_cleanup();
6569         PF_RULES_WLOCK();
6570         pfi_cleanup_vnet();
6571         PF_RULES_WUNLOCK();
6572         pfr_cleanup();
6573         pf_osfp_flush();
6574         pf_cleanup();
6575         if (IS_DEFAULT_VNET(curvnet))
6576                 pf_mtag_cleanup();
6577
6578         pf_cleanup_tagset(&V_pf_tags);
6579 #ifdef ALTQ
6580         pf_cleanup_tagset(&V_pf_qids);
6581 #endif
6582         uma_zdestroy(V_pf_tag_z);
6583
6584 #ifdef PF_WANT_32_TO_64_COUNTER
6585         PF_RULES_WLOCK();
6586         LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
6587
6588         MPASS(LIST_EMPTY(&V_pf_allkiflist));
6589         MPASS(V_pf_allkifcount == 0);
6590
6591         LIST_REMOVE(&V_pf_default_rule, allrulelist);
6592         V_pf_allrulecount--;
6593         LIST_REMOVE(V_pf_rulemarker, allrulelist);
6594
6595         /*
6596          * There are known pf rule leaks when running the test suite.
6597          */
6598 #ifdef notyet
6599         MPASS(LIST_EMPTY(&V_pf_allrulelist));
6600         MPASS(V_pf_allrulecount == 0);
6601 #endif
6602
6603         PF_RULES_WUNLOCK();
6604
6605         free(V_pf_kifmarker, PFI_MTYPE);
6606         free(V_pf_rulemarker, M_PFRULE);
6607 #endif
6608
6609         /* Free counters last as we updated them during shutdown. */
6610         pf_counter_u64_deinit(&V_pf_default_rule.evaluations);
6611         for (int i = 0; i < 2; i++) {
6612                 pf_counter_u64_deinit(&V_pf_default_rule.packets[i]);
6613                 pf_counter_u64_deinit(&V_pf_default_rule.bytes[i]);
6614         }
6615         counter_u64_free(V_pf_default_rule.states_cur);
6616         counter_u64_free(V_pf_default_rule.states_tot);
6617         counter_u64_free(V_pf_default_rule.src_nodes);
6618         uma_zfree_pcpu(pf_timestamp_pcpu_zone, V_pf_default_rule.timestamp);
6619
6620         for (int i = 0; i < PFRES_MAX; i++)
6621                 counter_u64_free(V_pf_status.counters[i]);
6622         for (int i = 0; i < KLCNT_MAX; i++)
6623                 counter_u64_free(V_pf_status.lcounters[i]);
6624         for (int i = 0; i < FCNT_MAX; i++)
6625                 pf_counter_u64_deinit(&V_pf_status.fcounters[i]);
6626         for (int i = 0; i < SCNT_MAX; i++)
6627                 counter_u64_free(V_pf_status.scounters[i]);
6628
6629         rm_destroy(&V_pf_rules_lock);
6630         sx_destroy(&V_pf_ioctl_lock);
6631 }
6632
6633 static void
6634 pf_unload(void)
6635 {
6636
6637         sx_xlock(&pf_end_lock);
6638         pf_end_threads = 1;
6639         while (pf_end_threads < 2) {
6640                 wakeup_one(pf_purge_thread);
6641                 sx_sleep(pf_purge_proc, &pf_end_lock, 0, "pftmo", 0);
6642         }
6643         sx_xunlock(&pf_end_lock);
6644
6645         if (pf_dev != NULL)
6646                 destroy_dev(pf_dev);
6647
6648         pfi_cleanup();
6649
6650         sx_destroy(&pf_end_lock);
6651 }
6652
6653 static void
6654 vnet_pf_init(void *unused __unused)
6655 {
6656
6657         pf_load_vnet();
6658 }
6659 VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, 
6660     vnet_pf_init, NULL);
6661
6662 static void
6663 vnet_pf_uninit(const void *unused __unused)
6664 {
6665
6666         pf_unload_vnet();
6667
6668 SYSUNINIT(pf_unload, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND, pf_unload, NULL);
6669 VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
6670     vnet_pf_uninit, NULL);
6671
6672 static int
6673 pf_modevent(module_t mod, int type, void *data)
6674 {
6675         int error = 0;
6676
6677         switch(type) {
6678         case MOD_LOAD:
6679                 error = pf_load();
6680                 break;
6681         case MOD_UNLOAD:
6682                 /* Handled in SYSUNINIT(pf_unload) to ensure it's done after
6683                  * the vnet_pf_uninit()s */
6684                 break;
6685         default:
6686                 error = EINVAL;
6687                 break;
6688         }
6689
6690         return (error);
6691 }
6692
6693 static moduledata_t pf_mod = {
6694         "pf",
6695         pf_modevent,
6696         0
6697 };
6698
6699 DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND);
6700 MODULE_VERSION(pf, PF_MODVER);