pf: convert state retrieval to netlink
[freebsd.git] / sys / netpfil / pf / pf_ioctl.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002,2003 Henning Brauer
6  * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  *    - Redistributions of source code must retain the above copyright
14  *      notice, this list of conditions and the following disclaimer.
15  *    - Redistributions in binary form must reproduce the above
16  *      copyright notice, this list of conditions and the following
17  *      disclaimer in the documentation and/or other materials provided
18  *      with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
26  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
30  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Effort sponsored in part by the Defense Advanced Research Projects
34  * Agency (DARPA) and Air Force Research Laboratory, Air Force
35  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
36  *
37  *      $OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $
38  */
39
40 #include <sys/cdefs.h>
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 #include "opt_bpf.h"
44 #include "opt_pf.h"
45
46 #include <sys/param.h>
47 #include <sys/_bitset.h>
48 #include <sys/bitset.h>
49 #include <sys/bus.h>
50 #include <sys/conf.h>
51 #include <sys/endian.h>
52 #include <sys/fcntl.h>
53 #include <sys/filio.h>
54 #include <sys/hash.h>
55 #include <sys/interrupt.h>
56 #include <sys/jail.h>
57 #include <sys/kernel.h>
58 #include <sys/kthread.h>
59 #include <sys/lock.h>
60 #include <sys/mbuf.h>
61 #include <sys/module.h>
62 #include <sys/nv.h>
63 #include <sys/proc.h>
64 #include <sys/sdt.h>
65 #include <sys/smp.h>
66 #include <sys/socket.h>
67 #include <sys/sysctl.h>
68 #include <sys/md5.h>
69 #include <sys/ucred.h>
70
71 #include <net/if.h>
72 #include <net/if_var.h>
73 #include <net/if_private.h>
74 #include <net/vnet.h>
75 #include <net/route.h>
76 #include <net/pfil.h>
77 #include <net/pfvar.h>
78 #include <net/if_pfsync.h>
79 #include <net/if_pflog.h>
80
81 #include <netinet/in.h>
82 #include <netinet/ip.h>
83 #include <netinet/ip_var.h>
84 #include <netinet6/ip6_var.h>
85 #include <netinet/ip_icmp.h>
86 #include <netpfil/pf/pf_nl.h>
87 #include <netpfil/pf/pf_nv.h>
88
89 #ifdef INET6
90 #include <netinet/ip6.h>
91 #endif /* INET6 */
92
93 #ifdef ALTQ
94 #include <net/altq/altq.h>
95 #endif
96
97 SDT_PROBE_DEFINE3(pf, ioctl, ioctl, error, "int", "int", "int");
98 SDT_PROBE_DEFINE3(pf, ioctl, function, error, "char *", "int", "int");
99 SDT_PROBE_DEFINE2(pf, ioctl, addrule, error, "int", "int");
100 SDT_PROBE_DEFINE2(pf, ioctl, nvchk, error, "int", "int");
101
102 static struct pf_kpool  *pf_get_kpool(const char *, u_int32_t, u_int8_t,
103                             u_int32_t, u_int8_t, u_int8_t, u_int8_t);
104
105 static void              pf_mv_kpool(struct pf_kpalist *, struct pf_kpalist *);
106 static void              pf_empty_kpool(struct pf_kpalist *);
107 static int               pfioctl(struct cdev *, u_long, caddr_t, int,
108                             struct thread *);
109 static int               pf_begin_eth(uint32_t *, const char *);
110 static void              pf_rollback_eth_cb(struct epoch_context *);
111 static int               pf_rollback_eth(uint32_t, const char *);
112 static int               pf_commit_eth(uint32_t, const char *);
113 static void              pf_free_eth_rule(struct pf_keth_rule *);
114 #ifdef ALTQ
115 static int               pf_begin_altq(u_int32_t *);
116 static int               pf_rollback_altq(u_int32_t);
117 static int               pf_commit_altq(u_int32_t);
118 static int               pf_enable_altq(struct pf_altq *);
119 static int               pf_disable_altq(struct pf_altq *);
120 static uint16_t          pf_qname2qid(const char *);
121 static void              pf_qid_unref(uint16_t);
122 #endif /* ALTQ */
123 static int               pf_begin_rules(u_int32_t *, int, const char *);
124 static int               pf_rollback_rules(u_int32_t, int, char *);
125 static int               pf_setup_pfsync_matching(struct pf_kruleset *);
126 static void              pf_hash_rule_rolling(MD5_CTX *, struct pf_krule *);
127 static void              pf_hash_rule(struct pf_krule *);
128 static void              pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *);
129 static int               pf_commit_rules(u_int32_t, int, char *);
130 static int               pf_addr_setup(struct pf_kruleset *,
131                             struct pf_addr_wrap *, sa_family_t);
132 static void              pf_addr_copyout(struct pf_addr_wrap *);
133 static void              pf_src_node_copy(const struct pf_ksrc_node *,
134                             struct pf_src_node *);
135 #ifdef ALTQ
136 static int               pf_export_kaltq(struct pf_altq *,
137                             struct pfioc_altq_v1 *, size_t);
138 static int               pf_import_kaltq(struct pfioc_altq_v1 *,
139                             struct pf_altq *, size_t);
140 #endif /* ALTQ */
141
142 VNET_DEFINE(struct pf_krule,    pf_default_rule);
143
144 static __inline int             pf_krule_compare(struct pf_krule *,
145                                     struct pf_krule *);
146
147 RB_GENERATE(pf_krule_global, pf_krule, entry_global, pf_krule_compare);
148
149 #ifdef ALTQ
150 VNET_DEFINE_STATIC(int,         pf_altq_running);
151 #define V_pf_altq_running       VNET(pf_altq_running)
152 #endif
153
154 #define TAGID_MAX        50000
155 struct pf_tagname {
156         TAILQ_ENTRY(pf_tagname) namehash_entries;
157         TAILQ_ENTRY(pf_tagname) taghash_entries;
158         char                    name[PF_TAG_NAME_SIZE];
159         uint16_t                tag;
160         int                     ref;
161 };
162
163 struct pf_tagset {
164         TAILQ_HEAD(, pf_tagname)        *namehash;
165         TAILQ_HEAD(, pf_tagname)        *taghash;
166         unsigned int                     mask;
167         uint32_t                         seed;
168         BITSET_DEFINE(, TAGID_MAX)       avail;
169 };
170
171 VNET_DEFINE(struct pf_tagset, pf_tags);
172 #define V_pf_tags       VNET(pf_tags)
173 static unsigned int     pf_rule_tag_hashsize;
174 #define PF_RULE_TAG_HASH_SIZE_DEFAULT   128
175 SYSCTL_UINT(_net_pf, OID_AUTO, rule_tag_hashsize, CTLFLAG_RDTUN,
176     &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT,
177     "Size of pf(4) rule tag hashtable");
178
179 #ifdef ALTQ
180 VNET_DEFINE(struct pf_tagset, pf_qids);
181 #define V_pf_qids       VNET(pf_qids)
182 static unsigned int     pf_queue_tag_hashsize;
183 #define PF_QUEUE_TAG_HASH_SIZE_DEFAULT  128
184 SYSCTL_UINT(_net_pf, OID_AUTO, queue_tag_hashsize, CTLFLAG_RDTUN,
185     &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT,
186     "Size of pf(4) queue tag hashtable");
187 #endif
188 VNET_DEFINE(uma_zone_t,  pf_tag_z);
189 #define V_pf_tag_z               VNET(pf_tag_z)
190 static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db");
191 static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules");
192
193 #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
194 #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
195 #endif
196
197 VNET_DEFINE_STATIC(bool, pf_filter_local) = false;
198 #define V_pf_filter_local       VNET(pf_filter_local)
199 SYSCTL_BOOL(_net_pf, OID_AUTO, filter_local, CTLFLAG_VNET | CTLFLAG_RW,
200     &VNET_NAME(pf_filter_local), false,
201     "Enable filtering for packets delivered to local network stack");
202
203 #ifdef PF_DEFAULT_TO_DROP
204 VNET_DEFINE_STATIC(bool, default_to_drop) = true;
205 #else
206 VNET_DEFINE_STATIC(bool, default_to_drop);
207 #endif
208 #define V_default_to_drop VNET(default_to_drop)
209 SYSCTL_BOOL(_net_pf, OID_AUTO, default_to_drop, CTLFLAG_RDTUN | CTLFLAG_VNET,
210     &VNET_NAME(default_to_drop), false,
211     "Make the default rule drop all packets.");
212
213 static void              pf_init_tagset(struct pf_tagset *, unsigned int *,
214                             unsigned int);
215 static void              pf_cleanup_tagset(struct pf_tagset *);
216 static uint16_t          tagname2hashindex(const struct pf_tagset *, const char *);
217 static uint16_t          tag2hashindex(const struct pf_tagset *, uint16_t);
218 static u_int16_t         tagname2tag(struct pf_tagset *, const char *);
219 static u_int16_t         pf_tagname2tag(const char *);
220 static void              tag_unref(struct pf_tagset *, u_int16_t);
221
222 #define DPFPRINTF(n, x) if (V_pf_status.debug >= (n)) printf x
223
224 struct cdev *pf_dev;
225
226 /*
227  * XXX - These are new and need to be checked when moveing to a new version
228  */
229 static void              pf_clear_all_states(void);
230 static unsigned int      pf_clear_states(const struct pf_kstate_kill *);
231 static void              pf_killstates(struct pf_kstate_kill *,
232                             unsigned int *);
233 static int               pf_killstates_row(struct pf_kstate_kill *,
234                             struct pf_idhash *);
235 static int               pf_killstates_nv(struct pfioc_nv *);
236 static int               pf_clearstates_nv(struct pfioc_nv *);
237 static int               pf_getstate(struct pfioc_nv *);
238 static int               pf_getstatus(struct pfioc_nv *);
239 static int               pf_clear_tables(void);
240 static void              pf_clear_srcnodes(struct pf_ksrc_node *);
241 static void              pf_kill_srcnodes(struct pfioc_src_node_kill *);
242 static int               pf_keepcounters(struct pfioc_nv *);
243 static void              pf_tbladdr_copyout(struct pf_addr_wrap *);
244
245 /*
246  * Wrapper functions for pfil(9) hooks
247  */
248 static pfil_return_t pf_eth_check_in(struct mbuf **m, struct ifnet *ifp,
249     int flags, void *ruleset __unused, struct inpcb *inp);
250 static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp,
251     int flags, void *ruleset __unused, struct inpcb *inp);
252 #ifdef INET
253 static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp,
254     int flags, void *ruleset __unused, struct inpcb *inp);
255 static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp,
256     int flags, void *ruleset __unused, struct inpcb *inp);
257 #endif
258 #ifdef INET6
259 static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp,
260     int flags, void *ruleset __unused, struct inpcb *inp);
261 static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp,
262     int flags, void *ruleset __unused, struct inpcb *inp);
263 #endif
264
265 static void             hook_pf_eth(void);
266 static void             hook_pf(void);
267 static void             dehook_pf_eth(void);
268 static void             dehook_pf(void);
269 static int              shutdown_pf(void);
270 static int              pf_load(void);
271 static void             pf_unload(void);
272
273 static struct cdevsw pf_cdevsw = {
274         .d_ioctl =      pfioctl,
275         .d_name =       PF_NAME,
276         .d_version =    D_VERSION,
277 };
278
279 VNET_DEFINE_STATIC(bool, pf_pfil_hooked);
280 #define V_pf_pfil_hooked        VNET(pf_pfil_hooked)
281 VNET_DEFINE_STATIC(bool, pf_pfil_eth_hooked);
282 #define V_pf_pfil_eth_hooked    VNET(pf_pfil_eth_hooked)
283
284 /*
285  * We need a flag that is neither hooked nor running to know when
286  * the VNET is "valid".  We primarily need this to control (global)
287  * external event, e.g., eventhandlers.
288  */
289 VNET_DEFINE(int, pf_vnet_active);
290 #define V_pf_vnet_active        VNET(pf_vnet_active)
291
292 int pf_end_threads;
293 struct proc *pf_purge_proc;
294
295 VNET_DEFINE(struct rmlock, pf_rules_lock);
296 VNET_DEFINE_STATIC(struct sx, pf_ioctl_lock);
297 #define V_pf_ioctl_lock         VNET(pf_ioctl_lock)
298 struct sx                       pf_end_lock;
299
300 /* pfsync */
301 VNET_DEFINE(pfsync_state_import_t *, pfsync_state_import_ptr);
302 VNET_DEFINE(pfsync_insert_state_t *, pfsync_insert_state_ptr);
303 VNET_DEFINE(pfsync_update_state_t *, pfsync_update_state_ptr);
304 VNET_DEFINE(pfsync_delete_state_t *, pfsync_delete_state_ptr);
305 VNET_DEFINE(pfsync_clear_states_t *, pfsync_clear_states_ptr);
306 VNET_DEFINE(pfsync_defer_t *, pfsync_defer_ptr);
307 pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr;
308
309 /* pflog */
310 pflog_packet_t                  *pflog_packet_ptr = NULL;
311
312 /*
313  * Copy a user-provided string, returning an error if truncation would occur.
314  * Avoid scanning past "sz" bytes in the source string since there's no
315  * guarantee that it's nul-terminated.
316  */
317 static int
318 pf_user_strcpy(char *dst, const char *src, size_t sz)
319 {
320         if (strnlen(src, sz) == sz)
321                 return (EINVAL);
322         (void)strlcpy(dst, src, sz);
323         return (0);
324 }
325
326 static void
327 pfattach_vnet(void)
328 {
329         u_int32_t *my_timeout = V_pf_default_rule.timeout;
330
331         bzero(&V_pf_status, sizeof(V_pf_status));
332
333         pf_initialize();
334         pfr_initialize();
335         pfi_initialize_vnet();
336         pf_normalize_init();
337         pf_syncookies_init();
338
339         V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT;
340         V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT;
341
342         RB_INIT(&V_pf_anchors);
343         pf_init_kruleset(&pf_main_ruleset);
344
345         pf_init_keth(V_pf_keth);
346
347         /* default rule should never be garbage collected */
348         V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next;
349         V_pf_default_rule.action = V_default_to_drop ? PF_DROP : PF_PASS;
350         V_pf_default_rule.nr = -1;
351         V_pf_default_rule.rtableid = -1;
352
353         pf_counter_u64_init(&V_pf_default_rule.evaluations, M_WAITOK);
354         for (int i = 0; i < 2; i++) {
355                 pf_counter_u64_init(&V_pf_default_rule.packets[i], M_WAITOK);
356                 pf_counter_u64_init(&V_pf_default_rule.bytes[i], M_WAITOK);
357         }
358         V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK);
359         V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK);
360         V_pf_default_rule.src_nodes = counter_u64_alloc(M_WAITOK);
361
362         V_pf_default_rule.timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
363             M_WAITOK | M_ZERO);
364
365 #ifdef PF_WANT_32_TO_64_COUNTER
366         V_pf_kifmarker = malloc(sizeof(*V_pf_kifmarker), PFI_MTYPE, M_WAITOK | M_ZERO);
367         V_pf_rulemarker = malloc(sizeof(*V_pf_rulemarker), M_PFRULE, M_WAITOK | M_ZERO);
368         PF_RULES_WLOCK();
369         LIST_INSERT_HEAD(&V_pf_allkiflist, V_pf_kifmarker, pfik_allkiflist);
370         LIST_INSERT_HEAD(&V_pf_allrulelist, &V_pf_default_rule, allrulelist);
371         V_pf_allrulecount++;
372         LIST_INSERT_HEAD(&V_pf_allrulelist, V_pf_rulemarker, allrulelist);
373         PF_RULES_WUNLOCK();
374 #endif
375
376         /* initialize default timeouts */
377         my_timeout[PFTM_TCP_FIRST_PACKET] = PFTM_TCP_FIRST_PACKET_VAL;
378         my_timeout[PFTM_TCP_OPENING] = PFTM_TCP_OPENING_VAL;
379         my_timeout[PFTM_TCP_ESTABLISHED] = PFTM_TCP_ESTABLISHED_VAL;
380         my_timeout[PFTM_TCP_CLOSING] = PFTM_TCP_CLOSING_VAL;
381         my_timeout[PFTM_TCP_FIN_WAIT] = PFTM_TCP_FIN_WAIT_VAL;
382         my_timeout[PFTM_TCP_CLOSED] = PFTM_TCP_CLOSED_VAL;
383         my_timeout[PFTM_UDP_FIRST_PACKET] = PFTM_UDP_FIRST_PACKET_VAL;
384         my_timeout[PFTM_UDP_SINGLE] = PFTM_UDP_SINGLE_VAL;
385         my_timeout[PFTM_UDP_MULTIPLE] = PFTM_UDP_MULTIPLE_VAL;
386         my_timeout[PFTM_ICMP_FIRST_PACKET] = PFTM_ICMP_FIRST_PACKET_VAL;
387         my_timeout[PFTM_ICMP_ERROR_REPLY] = PFTM_ICMP_ERROR_REPLY_VAL;
388         my_timeout[PFTM_OTHER_FIRST_PACKET] = PFTM_OTHER_FIRST_PACKET_VAL;
389         my_timeout[PFTM_OTHER_SINGLE] = PFTM_OTHER_SINGLE_VAL;
390         my_timeout[PFTM_OTHER_MULTIPLE] = PFTM_OTHER_MULTIPLE_VAL;
391         my_timeout[PFTM_FRAG] = PFTM_FRAG_VAL;
392         my_timeout[PFTM_INTERVAL] = PFTM_INTERVAL_VAL;
393         my_timeout[PFTM_SRC_NODE] = PFTM_SRC_NODE_VAL;
394         my_timeout[PFTM_TS_DIFF] = PFTM_TS_DIFF_VAL;
395         my_timeout[PFTM_ADAPTIVE_START] = PFSTATE_ADAPT_START;
396         my_timeout[PFTM_ADAPTIVE_END] = PFSTATE_ADAPT_END;
397
398         V_pf_status.debug = PF_DEBUG_URGENT;
399         /*
400          * XXX This is different than in OpenBSD where reassembly is enabled by
401          * defult. In FreeBSD we expect people to still use scrub rules and
402          * switch to the new syntax later. Only when they switch they must
403          * explicitly enable reassemle. We could change the default once the
404          * scrub rule functionality is hopefully removed some day in future.
405          */
406         V_pf_status.reass = 0;
407
408         V_pf_pfil_hooked = false;
409         V_pf_pfil_eth_hooked = false;
410
411         /* XXX do our best to avoid a conflict */
412         V_pf_status.hostid = arc4random();
413
414         for (int i = 0; i < PFRES_MAX; i++)
415                 V_pf_status.counters[i] = counter_u64_alloc(M_WAITOK);
416         for (int i = 0; i < KLCNT_MAX; i++)
417                 V_pf_status.lcounters[i] = counter_u64_alloc(M_WAITOK);
418         for (int i = 0; i < FCNT_MAX; i++)
419                 pf_counter_u64_init(&V_pf_status.fcounters[i], M_WAITOK);
420         for (int i = 0; i < SCNT_MAX; i++)
421                 V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK);
422
423         if (swi_add(&V_pf_swi_ie, "pf send", pf_intr, curvnet, SWI_NET,
424             INTR_MPSAFE, &V_pf_swi_cookie) != 0)
425                 /* XXXGL: leaked all above. */
426                 return;
427 }
428
429 static struct pf_kpool *
430 pf_get_kpool(const char *anchor, u_int32_t ticket, u_int8_t rule_action,
431     u_int32_t rule_number, u_int8_t r_last, u_int8_t active,
432     u_int8_t check_ticket)
433 {
434         struct pf_kruleset      *ruleset;
435         struct pf_krule         *rule;
436         int                      rs_num;
437
438         ruleset = pf_find_kruleset(anchor);
439         if (ruleset == NULL)
440                 return (NULL);
441         rs_num = pf_get_ruleset_number(rule_action);
442         if (rs_num >= PF_RULESET_MAX)
443                 return (NULL);
444         if (active) {
445                 if (check_ticket && ticket !=
446                     ruleset->rules[rs_num].active.ticket)
447                         return (NULL);
448                 if (r_last)
449                         rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
450                             pf_krulequeue);
451                 else
452                         rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
453         } else {
454                 if (check_ticket && ticket !=
455                     ruleset->rules[rs_num].inactive.ticket)
456                         return (NULL);
457                 if (r_last)
458                         rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
459                             pf_krulequeue);
460                 else
461                         rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr);
462         }
463         if (!r_last) {
464                 while ((rule != NULL) && (rule->nr != rule_number))
465                         rule = TAILQ_NEXT(rule, entries);
466         }
467         if (rule == NULL)
468                 return (NULL);
469
470         return (&rule->rpool);
471 }
472
473 static void
474 pf_mv_kpool(struct pf_kpalist *poola, struct pf_kpalist *poolb)
475 {
476         struct pf_kpooladdr     *mv_pool_pa;
477
478         while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) {
479                 TAILQ_REMOVE(poola, mv_pool_pa, entries);
480                 TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries);
481         }
482 }
483
484 static void
485 pf_empty_kpool(struct pf_kpalist *poola)
486 {
487         struct pf_kpooladdr *pa;
488
489         while ((pa = TAILQ_FIRST(poola)) != NULL) {
490                 switch (pa->addr.type) {
491                 case PF_ADDR_DYNIFTL:
492                         pfi_dynaddr_remove(pa->addr.p.dyn);
493                         break;
494                 case PF_ADDR_TABLE:
495                         /* XXX: this could be unfinished pooladdr on pabuf */
496                         if (pa->addr.p.tbl != NULL)
497                                 pfr_detach_table(pa->addr.p.tbl);
498                         break;
499                 }
500                 if (pa->kif)
501                         pfi_kkif_unref(pa->kif);
502                 TAILQ_REMOVE(poola, pa, entries);
503                 free(pa, M_PFRULE);
504         }
505 }
506
507 static void
508 pf_unlink_rule_locked(struct pf_krulequeue *rulequeue, struct pf_krule *rule)
509 {
510
511         PF_RULES_WASSERT();
512         PF_UNLNKDRULES_ASSERT();
513
514         TAILQ_REMOVE(rulequeue, rule, entries);
515
516         rule->rule_ref |= PFRULE_REFS;
517         TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries);
518 }
519
520 static void
521 pf_unlink_rule(struct pf_krulequeue *rulequeue, struct pf_krule *rule)
522 {
523
524         PF_RULES_WASSERT();
525
526         PF_UNLNKDRULES_LOCK();
527         pf_unlink_rule_locked(rulequeue, rule);
528         PF_UNLNKDRULES_UNLOCK();
529 }
530
531 static void
532 pf_free_eth_rule(struct pf_keth_rule *rule)
533 {
534         PF_RULES_WASSERT();
535
536         if (rule == NULL)
537                 return;
538
539         if (rule->tag)
540                 tag_unref(&V_pf_tags, rule->tag);
541         if (rule->match_tag)
542                 tag_unref(&V_pf_tags, rule->match_tag);
543 #ifdef ALTQ
544         pf_qid_unref(rule->qid);
545 #endif
546
547         if (rule->bridge_to)
548                 pfi_kkif_unref(rule->bridge_to);
549         if (rule->kif)
550                 pfi_kkif_unref(rule->kif);
551
552         if (rule->ipsrc.addr.type == PF_ADDR_TABLE)
553                 pfr_detach_table(rule->ipsrc.addr.p.tbl);
554         if (rule->ipdst.addr.type == PF_ADDR_TABLE)
555                 pfr_detach_table(rule->ipdst.addr.p.tbl);
556
557         counter_u64_free(rule->evaluations);
558         for (int i = 0; i < 2; i++) {
559                 counter_u64_free(rule->packets[i]);
560                 counter_u64_free(rule->bytes[i]);
561         }
562         uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp);
563         pf_keth_anchor_remove(rule);
564
565         free(rule, M_PFRULE);
566 }
567
568 void
569 pf_free_rule(struct pf_krule *rule)
570 {
571
572         PF_RULES_WASSERT();
573         PF_CONFIG_ASSERT();
574
575         if (rule->tag)
576                 tag_unref(&V_pf_tags, rule->tag);
577         if (rule->match_tag)
578                 tag_unref(&V_pf_tags, rule->match_tag);
579 #ifdef ALTQ
580         if (rule->pqid != rule->qid)
581                 pf_qid_unref(rule->pqid);
582         pf_qid_unref(rule->qid);
583 #endif
584         switch (rule->src.addr.type) {
585         case PF_ADDR_DYNIFTL:
586                 pfi_dynaddr_remove(rule->src.addr.p.dyn);
587                 break;
588         case PF_ADDR_TABLE:
589                 pfr_detach_table(rule->src.addr.p.tbl);
590                 break;
591         }
592         switch (rule->dst.addr.type) {
593         case PF_ADDR_DYNIFTL:
594                 pfi_dynaddr_remove(rule->dst.addr.p.dyn);
595                 break;
596         case PF_ADDR_TABLE:
597                 pfr_detach_table(rule->dst.addr.p.tbl);
598                 break;
599         }
600         if (rule->overload_tbl)
601                 pfr_detach_table(rule->overload_tbl);
602         if (rule->kif)
603                 pfi_kkif_unref(rule->kif);
604         pf_kanchor_remove(rule);
605         pf_empty_kpool(&rule->rpool.list);
606
607         pf_krule_free(rule);
608 }
609
610 static void
611 pf_init_tagset(struct pf_tagset *ts, unsigned int *tunable_size,
612     unsigned int default_size)
613 {
614         unsigned int i;
615         unsigned int hashsize;
616
617         if (*tunable_size == 0 || !powerof2(*tunable_size))
618                 *tunable_size = default_size;
619
620         hashsize = *tunable_size;
621         ts->namehash = mallocarray(hashsize, sizeof(*ts->namehash), M_PFHASH,
622             M_WAITOK);
623         ts->taghash = mallocarray(hashsize, sizeof(*ts->taghash), M_PFHASH,
624             M_WAITOK);
625         ts->mask = hashsize - 1;
626         ts->seed = arc4random();
627         for (i = 0; i < hashsize; i++) {
628                 TAILQ_INIT(&ts->namehash[i]);
629                 TAILQ_INIT(&ts->taghash[i]);
630         }
631         BIT_FILL(TAGID_MAX, &ts->avail);
632 }
633
634 static void
635 pf_cleanup_tagset(struct pf_tagset *ts)
636 {
637         unsigned int i;
638         unsigned int hashsize;
639         struct pf_tagname *t, *tmp;
640
641         /*
642          * Only need to clean up one of the hashes as each tag is hashed
643          * into each table.
644          */
645         hashsize = ts->mask + 1;
646         for (i = 0; i < hashsize; i++)
647                 TAILQ_FOREACH_SAFE(t, &ts->namehash[i], namehash_entries, tmp)
648                         uma_zfree(V_pf_tag_z, t);
649
650         free(ts->namehash, M_PFHASH);
651         free(ts->taghash, M_PFHASH);
652 }
653
654 static uint16_t
655 tagname2hashindex(const struct pf_tagset *ts, const char *tagname)
656 {
657         size_t len;
658
659         len = strnlen(tagname, PF_TAG_NAME_SIZE - 1);
660         return (murmur3_32_hash(tagname, len, ts->seed) & ts->mask);
661 }
662
663 static uint16_t
664 tag2hashindex(const struct pf_tagset *ts, uint16_t tag)
665 {
666
667         return (tag & ts->mask);
668 }
669
670 static u_int16_t
671 tagname2tag(struct pf_tagset *ts, const char *tagname)
672 {
673         struct pf_tagname       *tag;
674         u_int32_t                index;
675         u_int16_t                new_tagid;
676
677         PF_RULES_WASSERT();
678
679         index = tagname2hashindex(ts, tagname);
680         TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries)
681                 if (strcmp(tagname, tag->name) == 0) {
682                         tag->ref++;
683                         return (tag->tag);
684                 }
685
686         /*
687          * new entry
688          *
689          * to avoid fragmentation, we do a linear search from the beginning
690          * and take the first free slot we find.
691          */
692         new_tagid = BIT_FFS(TAGID_MAX, &ts->avail);
693         /*
694          * Tags are 1-based, with valid tags in the range [1..TAGID_MAX].
695          * BIT_FFS() returns a 1-based bit number, with 0 indicating no bits
696          * set.  It may also return a bit number greater than TAGID_MAX due
697          * to rounding of the number of bits in the vector up to a multiple
698          * of the vector word size at declaration/allocation time.
699          */
700         if ((new_tagid == 0) || (new_tagid > TAGID_MAX))
701                 return (0);
702
703         /* Mark the tag as in use.  Bits are 0-based for BIT_CLR() */
704         BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail);
705
706         /* allocate and fill new struct pf_tagname */
707         tag = uma_zalloc(V_pf_tag_z, M_NOWAIT);
708         if (tag == NULL)
709                 return (0);
710         strlcpy(tag->name, tagname, sizeof(tag->name));
711         tag->tag = new_tagid;
712         tag->ref = 1;
713
714         /* Insert into namehash */
715         TAILQ_INSERT_TAIL(&ts->namehash[index], tag, namehash_entries);
716
717         /* Insert into taghash */
718         index = tag2hashindex(ts, new_tagid);
719         TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries);
720
721         return (tag->tag);
722 }
723
724 static void
725 tag_unref(struct pf_tagset *ts, u_int16_t tag)
726 {
727         struct pf_tagname       *t;
728         uint16_t                 index;
729
730         PF_RULES_WASSERT();
731
732         index = tag2hashindex(ts, tag);
733         TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries)
734                 if (tag == t->tag) {
735                         if (--t->ref == 0) {
736                                 TAILQ_REMOVE(&ts->taghash[index], t,
737                                     taghash_entries);
738                                 index = tagname2hashindex(ts, t->name);
739                                 TAILQ_REMOVE(&ts->namehash[index], t,
740                                     namehash_entries);
741                                 /* Bits are 0-based for BIT_SET() */
742                                 BIT_SET(TAGID_MAX, tag - 1, &ts->avail);
743                                 uma_zfree(V_pf_tag_z, t);
744                         }
745                         break;
746                 }
747 }
748
749 static uint16_t
750 pf_tagname2tag(const char *tagname)
751 {
752         return (tagname2tag(&V_pf_tags, tagname));
753 }
754
755 static int
756 pf_begin_eth(uint32_t *ticket, const char *anchor)
757 {
758         struct pf_keth_rule *rule, *tmp;
759         struct pf_keth_ruleset *rs;
760
761         PF_RULES_WASSERT();
762
763         rs = pf_find_or_create_keth_ruleset(anchor);
764         if (rs == NULL)
765                 return (EINVAL);
766
767         /* Purge old inactive rules. */
768         TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries,
769             tmp) {
770                 TAILQ_REMOVE(rs->inactive.rules, rule,
771                     entries);
772                 pf_free_eth_rule(rule);
773         }
774
775         *ticket = ++rs->inactive.ticket;
776         rs->inactive.open = 1;
777
778         return (0);
779 }
780
781 static void
782 pf_rollback_eth_cb(struct epoch_context *ctx)
783 {
784         struct pf_keth_ruleset *rs;
785
786         rs = __containerof(ctx, struct pf_keth_ruleset, epoch_ctx);
787
788         CURVNET_SET(rs->vnet);
789
790         PF_RULES_WLOCK();
791         pf_rollback_eth(rs->inactive.ticket,
792             rs->anchor ? rs->anchor->path : "");
793         PF_RULES_WUNLOCK();
794
795         CURVNET_RESTORE();
796 }
797
798 static int
799 pf_rollback_eth(uint32_t ticket, const char *anchor)
800 {
801         struct pf_keth_rule *rule, *tmp;
802         struct pf_keth_ruleset *rs;
803
804         PF_RULES_WASSERT();
805
806         rs = pf_find_keth_ruleset(anchor);
807         if (rs == NULL)
808                 return (EINVAL);
809
810         if (!rs->inactive.open ||
811             ticket != rs->inactive.ticket)
812                 return (0);
813
814         /* Purge old inactive rules. */
815         TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries,
816             tmp) {
817                 TAILQ_REMOVE(rs->inactive.rules, rule, entries);
818                 pf_free_eth_rule(rule);
819         }
820
821         rs->inactive.open = 0;
822
823         pf_remove_if_empty_keth_ruleset(rs);
824
825         return (0);
826 }
827
828 #define PF_SET_SKIP_STEPS(i)                                    \
829         do {                                                    \
830                 while (head[i] != cur) {                        \
831                         head[i]->skip[i].ptr = cur;             \
832                         head[i] = TAILQ_NEXT(head[i], entries); \
833                 }                                               \
834         } while (0)
835
836 static void
837 pf_eth_calc_skip_steps(struct pf_keth_ruleq *rules)
838 {
839         struct pf_keth_rule *cur, *prev, *head[PFE_SKIP_COUNT];
840         int i;
841
842         cur = TAILQ_FIRST(rules);
843         prev = cur;
844         for (i = 0; i < PFE_SKIP_COUNT; ++i)
845                 head[i] = cur;
846         while (cur != NULL) {
847                 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
848                         PF_SET_SKIP_STEPS(PFE_SKIP_IFP);
849                 if (cur->direction != prev->direction)
850                         PF_SET_SKIP_STEPS(PFE_SKIP_DIR);
851                 if (cur->proto != prev->proto)
852                         PF_SET_SKIP_STEPS(PFE_SKIP_PROTO);
853                 if (memcmp(&cur->src, &prev->src, sizeof(cur->src)) != 0)
854                         PF_SET_SKIP_STEPS(PFE_SKIP_SRC_ADDR);
855                 if (memcmp(&cur->dst, &prev->dst, sizeof(cur->dst)) != 0)
856                         PF_SET_SKIP_STEPS(PFE_SKIP_DST_ADDR);
857                 if (cur->ipsrc.neg != prev->ipsrc.neg ||
858                     pf_addr_wrap_neq(&cur->ipsrc.addr, &prev->ipsrc.addr))
859                         PF_SET_SKIP_STEPS(PFE_SKIP_SRC_IP_ADDR);
860                 if (cur->ipdst.neg != prev->ipdst.neg ||
861                     pf_addr_wrap_neq(&cur->ipdst.addr, &prev->ipdst.addr))
862                         PF_SET_SKIP_STEPS(PFE_SKIP_DST_IP_ADDR);
863
864                 prev = cur;
865                 cur = TAILQ_NEXT(cur, entries);
866         }
867         for (i = 0; i < PFE_SKIP_COUNT; ++i)
868                 PF_SET_SKIP_STEPS(i);
869 }
870
871 static int
872 pf_commit_eth(uint32_t ticket, const char *anchor)
873 {
874         struct pf_keth_ruleq *rules;
875         struct pf_keth_ruleset *rs;
876
877         rs = pf_find_keth_ruleset(anchor);
878         if (rs == NULL) {
879                 return (EINVAL);
880         }
881
882         if (!rs->inactive.open ||
883             ticket != rs->inactive.ticket)
884                 return (EBUSY);
885
886         PF_RULES_WASSERT();
887
888         pf_eth_calc_skip_steps(rs->inactive.rules);
889
890         rules = rs->active.rules;
891         ck_pr_store_ptr(&rs->active.rules, rs->inactive.rules);
892         rs->inactive.rules = rules;
893         rs->inactive.ticket = rs->active.ticket;
894
895         /* Clean up inactive rules (i.e. previously active rules), only when
896          * we're sure they're no longer used. */
897         NET_EPOCH_CALL(pf_rollback_eth_cb, &rs->epoch_ctx);
898
899         return (0);
900 }
901
902 #ifdef ALTQ
903 static uint16_t
904 pf_qname2qid(const char *qname)
905 {
906         return (tagname2tag(&V_pf_qids, qname));
907 }
908
909 static void
910 pf_qid_unref(uint16_t qid)
911 {
912         tag_unref(&V_pf_qids, qid);
913 }
914
915 static int
916 pf_begin_altq(u_int32_t *ticket)
917 {
918         struct pf_altq  *altq, *tmp;
919         int              error = 0;
920
921         PF_RULES_WASSERT();
922
923         /* Purge the old altq lists */
924         TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
925                 if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
926                         /* detach and destroy the discipline */
927                         error = altq_remove(altq);
928                 }
929                 free(altq, M_PFALTQ);
930         }
931         TAILQ_INIT(V_pf_altq_ifs_inactive);
932         TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
933                 pf_qid_unref(altq->qid);
934                 free(altq, M_PFALTQ);
935         }
936         TAILQ_INIT(V_pf_altqs_inactive);
937         if (error)
938                 return (error);
939         *ticket = ++V_ticket_altqs_inactive;
940         V_altqs_inactive_open = 1;
941         return (0);
942 }
943
944 static int
945 pf_rollback_altq(u_int32_t ticket)
946 {
947         struct pf_altq  *altq, *tmp;
948         int              error = 0;
949
950         PF_RULES_WASSERT();
951
952         if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
953                 return (0);
954         /* Purge the old altq lists */
955         TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
956                 if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
957                         /* detach and destroy the discipline */
958                         error = altq_remove(altq);
959                 }
960                 free(altq, M_PFALTQ);
961         }
962         TAILQ_INIT(V_pf_altq_ifs_inactive);
963         TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
964                 pf_qid_unref(altq->qid);
965                 free(altq, M_PFALTQ);
966         }
967         TAILQ_INIT(V_pf_altqs_inactive);
968         V_altqs_inactive_open = 0;
969         return (error);
970 }
971
972 static int
973 pf_commit_altq(u_int32_t ticket)
974 {
975         struct pf_altqqueue     *old_altqs, *old_altq_ifs;
976         struct pf_altq          *altq, *tmp;
977         int                      err, error = 0;
978
979         PF_RULES_WASSERT();
980
981         if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive)
982                 return (EBUSY);
983
984         /* swap altqs, keep the old. */
985         old_altqs = V_pf_altqs_active;
986         old_altq_ifs = V_pf_altq_ifs_active;
987         V_pf_altqs_active = V_pf_altqs_inactive;
988         V_pf_altq_ifs_active = V_pf_altq_ifs_inactive;
989         V_pf_altqs_inactive = old_altqs;
990         V_pf_altq_ifs_inactive = old_altq_ifs;
991         V_ticket_altqs_active = V_ticket_altqs_inactive;
992
993         /* Attach new disciplines */
994         TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
995                 if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
996                         /* attach the discipline */
997                         error = altq_pfattach(altq);
998                         if (error == 0 && V_pf_altq_running)
999                                 error = pf_enable_altq(altq);
1000                         if (error != 0)
1001                                 return (error);
1002                 }
1003         }
1004
1005         /* Purge the old altq lists */
1006         TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) {
1007                 if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
1008                         /* detach and destroy the discipline */
1009                         if (V_pf_altq_running)
1010                                 error = pf_disable_altq(altq);
1011                         err = altq_pfdetach(altq);
1012                         if (err != 0 && error == 0)
1013                                 error = err;
1014                         err = altq_remove(altq);
1015                         if (err != 0 && error == 0)
1016                                 error = err;
1017                 }
1018                 free(altq, M_PFALTQ);
1019         }
1020         TAILQ_INIT(V_pf_altq_ifs_inactive);
1021         TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) {
1022                 pf_qid_unref(altq->qid);
1023                 free(altq, M_PFALTQ);
1024         }
1025         TAILQ_INIT(V_pf_altqs_inactive);
1026
1027         V_altqs_inactive_open = 0;
1028         return (error);
1029 }
1030
1031 static int
1032 pf_enable_altq(struct pf_altq *altq)
1033 {
1034         struct ifnet            *ifp;
1035         struct tb_profile        tb;
1036         int                      error = 0;
1037
1038         if ((ifp = ifunit(altq->ifname)) == NULL)
1039                 return (EINVAL);
1040
1041         if (ifp->if_snd.altq_type != ALTQT_NONE)
1042                 error = altq_enable(&ifp->if_snd);
1043
1044         /* set tokenbucket regulator */
1045         if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
1046                 tb.rate = altq->ifbandwidth;
1047                 tb.depth = altq->tbrsize;
1048                 error = tbr_set(&ifp->if_snd, &tb);
1049         }
1050
1051         return (error);
1052 }
1053
1054 static int
1055 pf_disable_altq(struct pf_altq *altq)
1056 {
1057         struct ifnet            *ifp;
1058         struct tb_profile        tb;
1059         int                      error;
1060
1061         if ((ifp = ifunit(altq->ifname)) == NULL)
1062                 return (EINVAL);
1063
1064         /*
1065          * when the discipline is no longer referenced, it was overridden
1066          * by a new one.  if so, just return.
1067          */
1068         if (altq->altq_disc != ifp->if_snd.altq_disc)
1069                 return (0);
1070
1071         error = altq_disable(&ifp->if_snd);
1072
1073         if (error == 0) {
1074                 /* clear tokenbucket regulator */
1075                 tb.rate = 0;
1076                 error = tbr_set(&ifp->if_snd, &tb);
1077         }
1078
1079         return (error);
1080 }
1081
1082 static int
1083 pf_altq_ifnet_event_add(struct ifnet *ifp, int remove, u_int32_t ticket,
1084     struct pf_altq *altq)
1085 {
1086         struct ifnet    *ifp1;
1087         int              error = 0;
1088
1089         /* Deactivate the interface in question */
1090         altq->local_flags &= ~PFALTQ_FLAG_IF_REMOVED;
1091         if ((ifp1 = ifunit(altq->ifname)) == NULL ||
1092             (remove && ifp1 == ifp)) {
1093                 altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
1094         } else {
1095                 error = altq_add(ifp1, altq);
1096
1097                 if (ticket != V_ticket_altqs_inactive)
1098                         error = EBUSY;
1099
1100                 if (error)
1101                         free(altq, M_PFALTQ);
1102         }
1103
1104         return (error);
1105 }
1106
1107 void
1108 pf_altq_ifnet_event(struct ifnet *ifp, int remove)
1109 {
1110         struct pf_altq  *a1, *a2, *a3;
1111         u_int32_t        ticket;
1112         int              error = 0;
1113
1114         /*
1115          * No need to re-evaluate the configuration for events on interfaces
1116          * that do not support ALTQ, as it's not possible for such
1117          * interfaces to be part of the configuration.
1118          */
1119         if (!ALTQ_IS_READY(&ifp->if_snd))
1120                 return;
1121
1122         /* Interrupt userland queue modifications */
1123         if (V_altqs_inactive_open)
1124                 pf_rollback_altq(V_ticket_altqs_inactive);
1125
1126         /* Start new altq ruleset */
1127         if (pf_begin_altq(&ticket))
1128                 return;
1129
1130         /* Copy the current active set */
1131         TAILQ_FOREACH(a1, V_pf_altq_ifs_active, entries) {
1132                 a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
1133                 if (a2 == NULL) {
1134                         error = ENOMEM;
1135                         break;
1136                 }
1137                 bcopy(a1, a2, sizeof(struct pf_altq));
1138
1139                 error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
1140                 if (error)
1141                         break;
1142
1143                 TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, a2, entries);
1144         }
1145         if (error)
1146                 goto out;
1147         TAILQ_FOREACH(a1, V_pf_altqs_active, entries) {
1148                 a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT);
1149                 if (a2 == NULL) {
1150                         error = ENOMEM;
1151                         break;
1152                 }
1153                 bcopy(a1, a2, sizeof(struct pf_altq));
1154
1155                 if ((a2->qid = pf_qname2qid(a2->qname)) == 0) {
1156                         error = EBUSY;
1157                         free(a2, M_PFALTQ);
1158                         break;
1159                 }
1160                 a2->altq_disc = NULL;
1161                 TAILQ_FOREACH(a3, V_pf_altq_ifs_inactive, entries) {
1162                         if (strncmp(a3->ifname, a2->ifname,
1163                                 IFNAMSIZ) == 0) {
1164                                 a2->altq_disc = a3->altq_disc;
1165                                 break;
1166                         }
1167                 }
1168                 error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2);
1169                 if (error)
1170                         break;
1171
1172                 TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries);
1173         }
1174
1175 out:
1176         if (error != 0)
1177                 pf_rollback_altq(ticket);
1178         else
1179                 pf_commit_altq(ticket);
1180 }
1181 #endif /* ALTQ */
1182
1183 static struct pf_krule_global *
1184 pf_rule_tree_alloc(int flags)
1185 {
1186         struct pf_krule_global *tree;
1187
1188         tree = malloc(sizeof(struct pf_krule_global), M_TEMP, flags);
1189         if (tree == NULL)
1190                 return (NULL);
1191         RB_INIT(tree);
1192         return (tree);
1193 }
1194
1195 static void
1196 pf_rule_tree_free(struct pf_krule_global *tree)
1197 {
1198
1199         free(tree, M_TEMP);
1200 }
1201
1202 static int
1203 pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor)
1204 {
1205         struct pf_krule_global *tree;
1206         struct pf_kruleset      *rs;
1207         struct pf_krule         *rule;
1208
1209         PF_RULES_WASSERT();
1210
1211         if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
1212                 return (EINVAL);
1213         tree = pf_rule_tree_alloc(M_NOWAIT);
1214         if (tree == NULL)
1215                 return (ENOMEM);
1216         rs = pf_find_or_create_kruleset(anchor);
1217         if (rs == NULL) {
1218                 free(tree, M_TEMP);
1219                 return (EINVAL);
1220         }
1221         pf_rule_tree_free(rs->rules[rs_num].inactive.tree);
1222         rs->rules[rs_num].inactive.tree = tree;
1223
1224         while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
1225                 pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
1226                 rs->rules[rs_num].inactive.rcount--;
1227         }
1228         *ticket = ++rs->rules[rs_num].inactive.ticket;
1229         rs->rules[rs_num].inactive.open = 1;
1230         return (0);
1231 }
1232
1233 static int
1234 pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor)
1235 {
1236         struct pf_kruleset      *rs;
1237         struct pf_krule         *rule;
1238
1239         PF_RULES_WASSERT();
1240
1241         if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
1242                 return (EINVAL);
1243         rs = pf_find_kruleset(anchor);
1244         if (rs == NULL || !rs->rules[rs_num].inactive.open ||
1245             rs->rules[rs_num].inactive.ticket != ticket)
1246                 return (0);
1247         while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) {
1248                 pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule);
1249                 rs->rules[rs_num].inactive.rcount--;
1250         }
1251         rs->rules[rs_num].inactive.open = 0;
1252         return (0);
1253 }
1254
1255 #define PF_MD5_UPD(st, elm)                                             \
1256                 MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm))
1257
1258 #define PF_MD5_UPD_STR(st, elm)                                         \
1259                 MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm))
1260
1261 #define PF_MD5_UPD_HTONL(st, elm, stor) do {                            \
1262                 (stor) = htonl((st)->elm);                              \
1263                 MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\
1264 } while (0)
1265
1266 #define PF_MD5_UPD_HTONS(st, elm, stor) do {                            \
1267                 (stor) = htons((st)->elm);                              \
1268                 MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\
1269 } while (0)
1270
1271 static void
1272 pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr)
1273 {
1274         PF_MD5_UPD(pfr, addr.type);
1275         switch (pfr->addr.type) {
1276                 case PF_ADDR_DYNIFTL:
1277                         PF_MD5_UPD(pfr, addr.v.ifname);
1278                         PF_MD5_UPD(pfr, addr.iflags);
1279                         break;
1280                 case PF_ADDR_TABLE:
1281                         PF_MD5_UPD(pfr, addr.v.tblname);
1282                         break;
1283                 case PF_ADDR_ADDRMASK:
1284                         /* XXX ignore af? */
1285                         PF_MD5_UPD(pfr, addr.v.a.addr.addr32);
1286                         PF_MD5_UPD(pfr, addr.v.a.mask.addr32);
1287                         break;
1288         }
1289
1290         PF_MD5_UPD(pfr, port[0]);
1291         PF_MD5_UPD(pfr, port[1]);
1292         PF_MD5_UPD(pfr, neg);
1293         PF_MD5_UPD(pfr, port_op);
1294 }
1295
1296 static void
1297 pf_hash_rule_rolling(MD5_CTX *ctx, struct pf_krule *rule)
1298 {
1299         u_int16_t x;
1300         u_int32_t y;
1301
1302         pf_hash_rule_addr(ctx, &rule->src);
1303         pf_hash_rule_addr(ctx, &rule->dst);
1304         for (int i = 0; i < PF_RULE_MAX_LABEL_COUNT; i++)
1305                 PF_MD5_UPD_STR(rule, label[i]);
1306         PF_MD5_UPD_STR(rule, ifname);
1307         PF_MD5_UPD_STR(rule, match_tagname);
1308         PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */
1309         PF_MD5_UPD_HTONL(rule, os_fingerprint, y);
1310         PF_MD5_UPD_HTONL(rule, prob, y);
1311         PF_MD5_UPD_HTONL(rule, uid.uid[0], y);
1312         PF_MD5_UPD_HTONL(rule, uid.uid[1], y);
1313         PF_MD5_UPD(rule, uid.op);
1314         PF_MD5_UPD_HTONL(rule, gid.gid[0], y);
1315         PF_MD5_UPD_HTONL(rule, gid.gid[1], y);
1316         PF_MD5_UPD(rule, gid.op);
1317         PF_MD5_UPD_HTONL(rule, rule_flag, y);
1318         PF_MD5_UPD(rule, action);
1319         PF_MD5_UPD(rule, direction);
1320         PF_MD5_UPD(rule, af);
1321         PF_MD5_UPD(rule, quick);
1322         PF_MD5_UPD(rule, ifnot);
1323         PF_MD5_UPD(rule, match_tag_not);
1324         PF_MD5_UPD(rule, natpass);
1325         PF_MD5_UPD(rule, keep_state);
1326         PF_MD5_UPD(rule, proto);
1327         PF_MD5_UPD(rule, type);
1328         PF_MD5_UPD(rule, code);
1329         PF_MD5_UPD(rule, flags);
1330         PF_MD5_UPD(rule, flagset);
1331         PF_MD5_UPD(rule, allow_opts);
1332         PF_MD5_UPD(rule, rt);
1333         PF_MD5_UPD(rule, tos);
1334         PF_MD5_UPD(rule, scrub_flags);
1335         PF_MD5_UPD(rule, min_ttl);
1336         PF_MD5_UPD(rule, set_tos);
1337         if (rule->anchor != NULL)
1338                 PF_MD5_UPD_STR(rule, anchor->path);
1339 }
1340
1341 static void
1342 pf_hash_rule(struct pf_krule *rule)
1343 {
1344         MD5_CTX         ctx;
1345
1346         MD5Init(&ctx);
1347         pf_hash_rule_rolling(&ctx, rule);
1348         MD5Final(rule->md5sum, &ctx);
1349 }
1350
1351 static int
1352 pf_krule_compare(struct pf_krule *a, struct pf_krule *b)
1353 {
1354
1355         return (memcmp(a->md5sum, b->md5sum, PF_MD5_DIGEST_LENGTH));
1356 }
1357
1358 static int
1359 pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor)
1360 {
1361         struct pf_kruleset      *rs;
1362         struct pf_krule         *rule, **old_array, *old_rule;
1363         struct pf_krulequeue    *old_rules;
1364         struct pf_krule_global  *old_tree;
1365         int                      error;
1366         u_int32_t                old_rcount;
1367
1368         PF_RULES_WASSERT();
1369
1370         if (rs_num < 0 || rs_num >= PF_RULESET_MAX)
1371                 return (EINVAL);
1372         rs = pf_find_kruleset(anchor);
1373         if (rs == NULL || !rs->rules[rs_num].inactive.open ||
1374             ticket != rs->rules[rs_num].inactive.ticket)
1375                 return (EBUSY);
1376
1377         /* Calculate checksum for the main ruleset */
1378         if (rs == &pf_main_ruleset) {
1379                 error = pf_setup_pfsync_matching(rs);
1380                 if (error != 0)
1381                         return (error);
1382         }
1383
1384         /* Swap rules, keep the old. */
1385         old_rules = rs->rules[rs_num].active.ptr;
1386         old_rcount = rs->rules[rs_num].active.rcount;
1387         old_array = rs->rules[rs_num].active.ptr_array;
1388         old_tree = rs->rules[rs_num].active.tree;
1389
1390         rs->rules[rs_num].active.ptr =
1391             rs->rules[rs_num].inactive.ptr;
1392         rs->rules[rs_num].active.ptr_array =
1393             rs->rules[rs_num].inactive.ptr_array;
1394         rs->rules[rs_num].active.tree =
1395             rs->rules[rs_num].inactive.tree;
1396         rs->rules[rs_num].active.rcount =
1397             rs->rules[rs_num].inactive.rcount;
1398
1399         /* Attempt to preserve counter information. */
1400         if (V_pf_status.keep_counters && old_tree != NULL) {
1401                 TAILQ_FOREACH(rule, rs->rules[rs_num].active.ptr,
1402                     entries) {
1403                         old_rule = RB_FIND(pf_krule_global, old_tree, rule);
1404                         if (old_rule == NULL) {
1405                                 continue;
1406                         }
1407                         pf_counter_u64_critical_enter();
1408                         pf_counter_u64_add_protected(&rule->evaluations,
1409                             pf_counter_u64_fetch(&old_rule->evaluations));
1410                         pf_counter_u64_add_protected(&rule->packets[0],
1411                             pf_counter_u64_fetch(&old_rule->packets[0]));
1412                         pf_counter_u64_add_protected(&rule->packets[1],
1413                             pf_counter_u64_fetch(&old_rule->packets[1]));
1414                         pf_counter_u64_add_protected(&rule->bytes[0],
1415                             pf_counter_u64_fetch(&old_rule->bytes[0]));
1416                         pf_counter_u64_add_protected(&rule->bytes[1],
1417                             pf_counter_u64_fetch(&old_rule->bytes[1]));
1418                         pf_counter_u64_critical_exit();
1419                 }
1420         }
1421
1422         rs->rules[rs_num].inactive.ptr = old_rules;
1423         rs->rules[rs_num].inactive.ptr_array = old_array;
1424         rs->rules[rs_num].inactive.tree = NULL; /* important for pf_ioctl_addrule */
1425         rs->rules[rs_num].inactive.rcount = old_rcount;
1426
1427         rs->rules[rs_num].active.ticket =
1428             rs->rules[rs_num].inactive.ticket;
1429         pf_calc_skip_steps(rs->rules[rs_num].active.ptr);
1430
1431         /* Purge the old rule list. */
1432         PF_UNLNKDRULES_LOCK();
1433         while ((rule = TAILQ_FIRST(old_rules)) != NULL)
1434                 pf_unlink_rule_locked(old_rules, rule);
1435         PF_UNLNKDRULES_UNLOCK();
1436         if (rs->rules[rs_num].inactive.ptr_array)
1437                 free(rs->rules[rs_num].inactive.ptr_array, M_TEMP);
1438         rs->rules[rs_num].inactive.ptr_array = NULL;
1439         rs->rules[rs_num].inactive.rcount = 0;
1440         rs->rules[rs_num].inactive.open = 0;
1441         pf_remove_if_empty_kruleset(rs);
1442         free(old_tree, M_TEMP);
1443
1444         return (0);
1445 }
1446
1447 static int
1448 pf_setup_pfsync_matching(struct pf_kruleset *rs)
1449 {
1450         MD5_CTX                  ctx;
1451         struct pf_krule         *rule;
1452         int                      rs_cnt;
1453         u_int8_t                 digest[PF_MD5_DIGEST_LENGTH];
1454
1455         MD5Init(&ctx);
1456         for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) {
1457                 /* XXX PF_RULESET_SCRUB as well? */
1458                 if (rs_cnt == PF_RULESET_SCRUB)
1459                         continue;
1460
1461                 if (rs->rules[rs_cnt].inactive.ptr_array)
1462                         free(rs->rules[rs_cnt].inactive.ptr_array, M_TEMP);
1463                 rs->rules[rs_cnt].inactive.ptr_array = NULL;
1464
1465                 if (rs->rules[rs_cnt].inactive.rcount) {
1466                         rs->rules[rs_cnt].inactive.ptr_array =
1467                             mallocarray(rs->rules[rs_cnt].inactive.rcount,
1468                             sizeof(struct pf_rule **),
1469                             M_TEMP, M_NOWAIT);
1470
1471                         if (!rs->rules[rs_cnt].inactive.ptr_array)
1472                                 return (ENOMEM);
1473                 }
1474
1475                 TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr,
1476                     entries) {
1477                         pf_hash_rule_rolling(&ctx, rule);
1478                         (rs->rules[rs_cnt].inactive.ptr_array)[rule->nr] = rule;
1479                 }
1480         }
1481
1482         MD5Final(digest, &ctx);
1483         memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum));
1484         return (0);
1485 }
1486
1487 static int
1488 pf_eth_addr_setup(struct pf_keth_ruleset *ruleset, struct pf_addr_wrap *addr)
1489 {
1490         int error = 0;
1491
1492         switch (addr->type) {
1493         case PF_ADDR_TABLE:
1494                 addr->p.tbl = pfr_eth_attach_table(ruleset, addr->v.tblname);
1495                 if (addr->p.tbl == NULL)
1496                         error = ENOMEM;
1497                 break;
1498         default:
1499                 error = EINVAL;
1500         }
1501
1502         return (error);
1503 }
1504
1505 static int
1506 pf_addr_setup(struct pf_kruleset *ruleset, struct pf_addr_wrap *addr,
1507     sa_family_t af)
1508 {
1509         int error = 0;
1510
1511         switch (addr->type) {
1512         case PF_ADDR_TABLE:
1513                 addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname);
1514                 if (addr->p.tbl == NULL)
1515                         error = ENOMEM;
1516                 break;
1517         case PF_ADDR_DYNIFTL:
1518                 error = pfi_dynaddr_setup(addr, af);
1519                 break;
1520         }
1521
1522         return (error);
1523 }
1524
1525 static void
1526 pf_addr_copyout(struct pf_addr_wrap *addr)
1527 {
1528
1529         switch (addr->type) {
1530         case PF_ADDR_DYNIFTL:
1531                 pfi_dynaddr_copyout(addr);
1532                 break;
1533         case PF_ADDR_TABLE:
1534                 pf_tbladdr_copyout(addr);
1535                 break;
1536         }
1537 }
1538
1539 static void
1540 pf_src_node_copy(const struct pf_ksrc_node *in, struct pf_src_node *out)
1541 {
1542         int     secs = time_uptime, diff;
1543
1544         bzero(out, sizeof(struct pf_src_node));
1545
1546         bcopy(&in->addr, &out->addr, sizeof(struct pf_addr));
1547         bcopy(&in->raddr, &out->raddr, sizeof(struct pf_addr));
1548
1549         if (in->rule.ptr != NULL)
1550                 out->rule.nr = in->rule.ptr->nr;
1551
1552         for (int i = 0; i < 2; i++) {
1553                 out->bytes[i] = counter_u64_fetch(in->bytes[i]);
1554                 out->packets[i] = counter_u64_fetch(in->packets[i]);
1555         }
1556
1557         out->states = in->states;
1558         out->conn = in->conn;
1559         out->af = in->af;
1560         out->ruletype = in->ruletype;
1561
1562         out->creation = secs - in->creation;
1563         if (out->expire > secs)
1564                 out->expire -= secs;
1565         else
1566                 out->expire = 0;
1567
1568         /* Adjust the connection rate estimate. */
1569         diff = secs - in->conn_rate.last;
1570         if (diff >= in->conn_rate.seconds)
1571                 out->conn_rate.count = 0;
1572         else
1573                 out->conn_rate.count -=
1574                     in->conn_rate.count * diff /
1575                     in->conn_rate.seconds;
1576 }
1577
1578 #ifdef ALTQ
1579 /*
1580  * Handle export of struct pf_kaltq to user binaries that may be using any
1581  * version of struct pf_altq.
1582  */
1583 static int
1584 pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size)
1585 {
1586         u_int32_t version;
1587
1588         if (ioc_size == sizeof(struct pfioc_altq_v0))
1589                 version = 0;
1590         else
1591                 version = pa->version;
1592
1593         if (version > PFIOC_ALTQ_VERSION)
1594                 return (EINVAL);
1595
1596 #define ASSIGN(x) exported_q->x = q->x
1597 #define COPY(x) \
1598         bcopy(&q->x, &exported_q->x, min(sizeof(q->x), sizeof(exported_q->x)))
1599 #define SATU16(x) (u_int32_t)uqmin((x), USHRT_MAX)
1600 #define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX)
1601
1602         switch (version) {
1603         case 0: {
1604                 struct pf_altq_v0 *exported_q =
1605                     &((struct pfioc_altq_v0 *)pa)->altq;
1606
1607                 COPY(ifname);
1608
1609                 ASSIGN(scheduler);
1610                 ASSIGN(tbrsize);
1611                 exported_q->tbrsize = SATU16(q->tbrsize);
1612                 exported_q->ifbandwidth = SATU32(q->ifbandwidth);
1613
1614                 COPY(qname);
1615                 COPY(parent);
1616                 ASSIGN(parent_qid);
1617                 exported_q->bandwidth = SATU32(q->bandwidth);
1618                 ASSIGN(priority);
1619                 ASSIGN(local_flags);
1620
1621                 ASSIGN(qlimit);
1622                 ASSIGN(flags);
1623
1624                 if (q->scheduler == ALTQT_HFSC) {
1625 #define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x
1626 #define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \
1627                             SATU32(q->pq_u.hfsc_opts.x)
1628                         
1629                         ASSIGN_OPT_SATU32(rtsc_m1);
1630                         ASSIGN_OPT(rtsc_d);
1631                         ASSIGN_OPT_SATU32(rtsc_m2);
1632
1633                         ASSIGN_OPT_SATU32(lssc_m1);
1634                         ASSIGN_OPT(lssc_d);
1635                         ASSIGN_OPT_SATU32(lssc_m2);
1636
1637                         ASSIGN_OPT_SATU32(ulsc_m1);
1638                         ASSIGN_OPT(ulsc_d);
1639                         ASSIGN_OPT_SATU32(ulsc_m2);
1640
1641                         ASSIGN_OPT(flags);
1642                         
1643 #undef ASSIGN_OPT
1644 #undef ASSIGN_OPT_SATU32
1645                 } else
1646                         COPY(pq_u);
1647
1648                 ASSIGN(qid);
1649                 break;
1650         }
1651         case 1: {
1652                 struct pf_altq_v1 *exported_q =
1653                     &((struct pfioc_altq_v1 *)pa)->altq;
1654
1655                 COPY(ifname);
1656
1657                 ASSIGN(scheduler);
1658                 ASSIGN(tbrsize);
1659                 ASSIGN(ifbandwidth);
1660
1661                 COPY(qname);
1662                 COPY(parent);
1663                 ASSIGN(parent_qid);
1664                 ASSIGN(bandwidth);
1665                 ASSIGN(priority);
1666                 ASSIGN(local_flags);
1667
1668                 ASSIGN(qlimit);
1669                 ASSIGN(flags);
1670                 COPY(pq_u);
1671
1672                 ASSIGN(qid);
1673                 break;
1674         }
1675         default:
1676                 panic("%s: unhandled struct pfioc_altq version", __func__);
1677                 break;
1678         }
1679
1680 #undef ASSIGN
1681 #undef COPY
1682 #undef SATU16
1683 #undef SATU32
1684
1685         return (0);
1686 }
1687
1688 /*
1689  * Handle import to struct pf_kaltq of struct pf_altq from user binaries
1690  * that may be using any version of it.
1691  */
1692 static int
1693 pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size)
1694 {
1695         u_int32_t version;
1696
1697         if (ioc_size == sizeof(struct pfioc_altq_v0))
1698                 version = 0;
1699         else
1700                 version = pa->version;
1701
1702         if (version > PFIOC_ALTQ_VERSION)
1703                 return (EINVAL);
1704
1705 #define ASSIGN(x) q->x = imported_q->x
1706 #define COPY(x) \
1707         bcopy(&imported_q->x, &q->x, min(sizeof(imported_q->x), sizeof(q->x)))
1708
1709         switch (version) {
1710         case 0: {
1711                 struct pf_altq_v0 *imported_q =
1712                     &((struct pfioc_altq_v0 *)pa)->altq;
1713
1714                 COPY(ifname);
1715
1716                 ASSIGN(scheduler);
1717                 ASSIGN(tbrsize); /* 16-bit -> 32-bit */
1718                 ASSIGN(ifbandwidth); /* 32-bit -> 64-bit */
1719
1720                 COPY(qname);
1721                 COPY(parent);
1722                 ASSIGN(parent_qid);
1723                 ASSIGN(bandwidth); /* 32-bit -> 64-bit */
1724                 ASSIGN(priority);
1725                 ASSIGN(local_flags);
1726
1727                 ASSIGN(qlimit);
1728                 ASSIGN(flags);
1729
1730                 if (imported_q->scheduler == ALTQT_HFSC) {
1731 #define ASSIGN_OPT(x) q->pq_u.hfsc_opts.x = imported_q->pq_u.hfsc_opts.x
1732
1733                         /*
1734                          * The m1 and m2 parameters are being copied from
1735                          * 32-bit to 64-bit.
1736                          */
1737                         ASSIGN_OPT(rtsc_m1);
1738                         ASSIGN_OPT(rtsc_d);
1739                         ASSIGN_OPT(rtsc_m2);
1740
1741                         ASSIGN_OPT(lssc_m1);
1742                         ASSIGN_OPT(lssc_d);
1743                         ASSIGN_OPT(lssc_m2);
1744
1745                         ASSIGN_OPT(ulsc_m1);
1746                         ASSIGN_OPT(ulsc_d);
1747                         ASSIGN_OPT(ulsc_m2);
1748
1749                         ASSIGN_OPT(flags);
1750                         
1751 #undef ASSIGN_OPT
1752                 } else
1753                         COPY(pq_u);
1754
1755                 ASSIGN(qid);
1756                 break;
1757         }
1758         case 1: {
1759                 struct pf_altq_v1 *imported_q =
1760                     &((struct pfioc_altq_v1 *)pa)->altq;
1761
1762                 COPY(ifname);
1763
1764                 ASSIGN(scheduler);
1765                 ASSIGN(tbrsize);
1766                 ASSIGN(ifbandwidth);
1767
1768                 COPY(qname);
1769                 COPY(parent);
1770                 ASSIGN(parent_qid);
1771                 ASSIGN(bandwidth);
1772                 ASSIGN(priority);
1773                 ASSIGN(local_flags);
1774
1775                 ASSIGN(qlimit);
1776                 ASSIGN(flags);
1777                 COPY(pq_u);
1778
1779                 ASSIGN(qid);
1780                 break;
1781         }
1782         default:        
1783                 panic("%s: unhandled struct pfioc_altq version", __func__);
1784                 break;
1785         }
1786
1787 #undef ASSIGN
1788 #undef COPY
1789
1790         return (0);
1791 }
1792
1793 static struct pf_altq *
1794 pf_altq_get_nth_active(u_int32_t n)
1795 {
1796         struct pf_altq          *altq;
1797         u_int32_t                nr;
1798
1799         nr = 0;
1800         TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
1801                 if (nr == n)
1802                         return (altq);
1803                 nr++;
1804         }
1805
1806         TAILQ_FOREACH(altq, V_pf_altqs_active, entries) {
1807                 if (nr == n)
1808                         return (altq);
1809                 nr++;
1810         }
1811
1812         return (NULL);
1813 }
1814 #endif /* ALTQ */
1815
1816 struct pf_krule *
1817 pf_krule_alloc(void)
1818 {
1819         struct pf_krule *rule;
1820
1821         rule = malloc(sizeof(struct pf_krule), M_PFRULE, M_WAITOK | M_ZERO);
1822         mtx_init(&rule->rpool.mtx, "pf_krule_pool", NULL, MTX_DEF);
1823         rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
1824             M_WAITOK | M_ZERO);
1825         return (rule);
1826 }
1827
1828 void
1829 pf_krule_free(struct pf_krule *rule)
1830 {
1831 #ifdef PF_WANT_32_TO_64_COUNTER
1832         bool wowned;
1833 #endif
1834
1835         if (rule == NULL)
1836                 return;
1837
1838 #ifdef PF_WANT_32_TO_64_COUNTER
1839         if (rule->allrulelinked) {
1840                 wowned = PF_RULES_WOWNED();
1841                 if (!wowned)
1842                         PF_RULES_WLOCK();
1843                 LIST_REMOVE(rule, allrulelist);
1844                 V_pf_allrulecount--;
1845                 if (!wowned)
1846                         PF_RULES_WUNLOCK();
1847         }
1848 #endif
1849
1850         pf_counter_u64_deinit(&rule->evaluations);
1851         for (int i = 0; i < 2; i++) {
1852                 pf_counter_u64_deinit(&rule->packets[i]);
1853                 pf_counter_u64_deinit(&rule->bytes[i]);
1854         }
1855         counter_u64_free(rule->states_cur);
1856         counter_u64_free(rule->states_tot);
1857         counter_u64_free(rule->src_nodes);
1858         uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp);
1859
1860         mtx_destroy(&rule->rpool.mtx);
1861         free(rule, M_PFRULE);
1862 }
1863
1864 static void
1865 pf_kpooladdr_to_pooladdr(const struct pf_kpooladdr *kpool,
1866     struct pf_pooladdr *pool)
1867 {
1868
1869         bzero(pool, sizeof(*pool));
1870         bcopy(&kpool->addr, &pool->addr, sizeof(pool->addr));
1871         strlcpy(pool->ifname, kpool->ifname, sizeof(pool->ifname));
1872 }
1873
1874 static int
1875 pf_pooladdr_to_kpooladdr(const struct pf_pooladdr *pool,
1876     struct pf_kpooladdr *kpool)
1877 {
1878         int ret;
1879
1880         bzero(kpool, sizeof(*kpool));
1881         bcopy(&pool->addr, &kpool->addr, sizeof(kpool->addr));
1882         ret = pf_user_strcpy(kpool->ifname, pool->ifname,
1883             sizeof(kpool->ifname));
1884         return (ret);
1885 }
1886
1887 static void
1888 pf_pool_to_kpool(const struct pf_pool *pool, struct pf_kpool *kpool)
1889 {
1890         _Static_assert(sizeof(pool->key) == sizeof(kpool->key), "");
1891         _Static_assert(sizeof(pool->counter) == sizeof(kpool->counter), "");
1892
1893         bcopy(&pool->key, &kpool->key, sizeof(kpool->key));
1894         bcopy(&pool->counter, &kpool->counter, sizeof(kpool->counter));
1895
1896         kpool->tblidx = pool->tblidx;
1897         kpool->proxy_port[0] = pool->proxy_port[0];
1898         kpool->proxy_port[1] = pool->proxy_port[1];
1899         kpool->opts = pool->opts;
1900 }
1901
1902 static int
1903 pf_rule_to_krule(const struct pf_rule *rule, struct pf_krule *krule)
1904 {
1905         int ret;
1906
1907 #ifndef INET
1908         if (rule->af == AF_INET) {
1909                 return (EAFNOSUPPORT);
1910         }
1911 #endif /* INET */
1912 #ifndef INET6
1913         if (rule->af == AF_INET6) {
1914                 return (EAFNOSUPPORT);
1915         }
1916 #endif /* INET6 */
1917
1918         ret = pf_check_rule_addr(&rule->src);
1919         if (ret != 0)
1920                 return (ret);
1921         ret = pf_check_rule_addr(&rule->dst);
1922         if (ret != 0)
1923                 return (ret);
1924
1925         bcopy(&rule->src, &krule->src, sizeof(rule->src));
1926         bcopy(&rule->dst, &krule->dst, sizeof(rule->dst));
1927
1928         ret = pf_user_strcpy(krule->label[0], rule->label, sizeof(rule->label));
1929         if (ret != 0)
1930                 return (ret);
1931         ret = pf_user_strcpy(krule->ifname, rule->ifname, sizeof(rule->ifname));
1932         if (ret != 0)
1933                 return (ret);
1934         ret = pf_user_strcpy(krule->qname, rule->qname, sizeof(rule->qname));
1935         if (ret != 0)
1936                 return (ret);
1937         ret = pf_user_strcpy(krule->pqname, rule->pqname, sizeof(rule->pqname));
1938         if (ret != 0)
1939                 return (ret);
1940         ret = pf_user_strcpy(krule->tagname, rule->tagname,
1941             sizeof(rule->tagname));
1942         if (ret != 0)
1943                 return (ret);
1944         ret = pf_user_strcpy(krule->match_tagname, rule->match_tagname,
1945             sizeof(rule->match_tagname));
1946         if (ret != 0)
1947                 return (ret);
1948         ret = pf_user_strcpy(krule->overload_tblname, rule->overload_tblname,
1949             sizeof(rule->overload_tblname));
1950         if (ret != 0)
1951                 return (ret);
1952
1953         pf_pool_to_kpool(&rule->rpool, &krule->rpool);
1954
1955         /* Don't allow userspace to set evaluations, packets or bytes. */
1956         /* kif, anchor, overload_tbl are not copied over. */
1957
1958         krule->os_fingerprint = rule->os_fingerprint;
1959
1960         krule->rtableid = rule->rtableid;
1961         bcopy(rule->timeout, krule->timeout, sizeof(krule->timeout));
1962         krule->max_states = rule->max_states;
1963         krule->max_src_nodes = rule->max_src_nodes;
1964         krule->max_src_states = rule->max_src_states;
1965         krule->max_src_conn = rule->max_src_conn;
1966         krule->max_src_conn_rate.limit = rule->max_src_conn_rate.limit;
1967         krule->max_src_conn_rate.seconds = rule->max_src_conn_rate.seconds;
1968         krule->qid = rule->qid;
1969         krule->pqid = rule->pqid;
1970         krule->nr = rule->nr;
1971         krule->prob = rule->prob;
1972         krule->cuid = rule->cuid;
1973         krule->cpid = rule->cpid;
1974
1975         krule->return_icmp = rule->return_icmp;
1976         krule->return_icmp6 = rule->return_icmp6;
1977         krule->max_mss = rule->max_mss;
1978         krule->tag = rule->tag;
1979         krule->match_tag = rule->match_tag;
1980         krule->scrub_flags = rule->scrub_flags;
1981
1982         bcopy(&rule->uid, &krule->uid, sizeof(krule->uid));
1983         bcopy(&rule->gid, &krule->gid, sizeof(krule->gid));
1984
1985         krule->rule_flag = rule->rule_flag;
1986         krule->action = rule->action;
1987         krule->direction = rule->direction;
1988         krule->log = rule->log;
1989         krule->logif = rule->logif;
1990         krule->quick = rule->quick;
1991         krule->ifnot = rule->ifnot;
1992         krule->match_tag_not = rule->match_tag_not;
1993         krule->natpass = rule->natpass;
1994
1995         krule->keep_state = rule->keep_state;
1996         krule->af = rule->af;
1997         krule->proto = rule->proto;
1998         krule->type = rule->type;
1999         krule->code = rule->code;
2000         krule->flags = rule->flags;
2001         krule->flagset = rule->flagset;
2002         krule->min_ttl = rule->min_ttl;
2003         krule->allow_opts = rule->allow_opts;
2004         krule->rt = rule->rt;
2005         krule->return_ttl = rule->return_ttl;
2006         krule->tos = rule->tos;
2007         krule->set_tos = rule->set_tos;
2008
2009         krule->flush = rule->flush;
2010         krule->prio = rule->prio;
2011         krule->set_prio[0] = rule->set_prio[0];
2012         krule->set_prio[1] = rule->set_prio[1];
2013
2014         bcopy(&rule->divert, &krule->divert, sizeof(krule->divert));
2015
2016         return (0);
2017 }
2018
2019 static int
2020 pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket,
2021     uint32_t pool_ticket, const char *anchor, const char *anchor_call,
2022     struct thread *td)
2023 {
2024         struct pf_kruleset      *ruleset;
2025         struct pf_krule         *tail;
2026         struct pf_kpooladdr     *pa;
2027         struct pfi_kkif         *kif = NULL;
2028         int                      rs_num;
2029         int                      error = 0;
2030
2031         if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) {
2032                 error = EINVAL;
2033                 goto errout_unlocked;
2034         }
2035
2036 #define ERROUT(x)       ERROUT_FUNCTION(errout, x)
2037
2038         if (rule->ifname[0])
2039                 kif = pf_kkif_create(M_WAITOK);
2040         pf_counter_u64_init(&rule->evaluations, M_WAITOK);
2041         for (int i = 0; i < 2; i++) {
2042                 pf_counter_u64_init(&rule->packets[i], M_WAITOK);
2043                 pf_counter_u64_init(&rule->bytes[i], M_WAITOK);
2044         }
2045         rule->states_cur = counter_u64_alloc(M_WAITOK);
2046         rule->states_tot = counter_u64_alloc(M_WAITOK);
2047         rule->src_nodes = counter_u64_alloc(M_WAITOK);
2048         rule->cuid = td->td_ucred->cr_ruid;
2049         rule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
2050         TAILQ_INIT(&rule->rpool.list);
2051
2052         PF_CONFIG_LOCK();
2053         PF_RULES_WLOCK();
2054 #ifdef PF_WANT_32_TO_64_COUNTER
2055         LIST_INSERT_HEAD(&V_pf_allrulelist, rule, allrulelist);
2056         MPASS(!rule->allrulelinked);
2057         rule->allrulelinked = true;
2058         V_pf_allrulecount++;
2059 #endif
2060         ruleset = pf_find_kruleset(anchor);
2061         if (ruleset == NULL)
2062                 ERROUT(EINVAL);
2063         rs_num = pf_get_ruleset_number(rule->action);
2064         if (rs_num >= PF_RULESET_MAX)
2065                 ERROUT(EINVAL);
2066         if (ticket != ruleset->rules[rs_num].inactive.ticket) {
2067                 DPFPRINTF(PF_DEBUG_MISC,
2068                     ("ticket: %d != [%d]%d\n", ticket, rs_num,
2069                     ruleset->rules[rs_num].inactive.ticket));
2070                 ERROUT(EBUSY);
2071         }
2072         if (pool_ticket != V_ticket_pabuf) {
2073                 DPFPRINTF(PF_DEBUG_MISC,
2074                     ("pool_ticket: %d != %d\n", pool_ticket,
2075                     V_ticket_pabuf));
2076                 ERROUT(EBUSY);
2077         }
2078         /*
2079          * XXXMJG hack: there is no mechanism to ensure they started the
2080          * transaction. Ticket checked above may happen to match by accident,
2081          * even if nobody called DIOCXBEGIN, let alone this process.
2082          * Partially work around it by checking if the RB tree got allocated,
2083          * see pf_begin_rules.
2084          */
2085         if (ruleset->rules[rs_num].inactive.tree == NULL) {
2086                 ERROUT(EINVAL);
2087         }
2088
2089         tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr,
2090             pf_krulequeue);
2091         if (tail)
2092                 rule->nr = tail->nr + 1;
2093         else
2094                 rule->nr = 0;
2095         if (rule->ifname[0]) {
2096                 rule->kif = pfi_kkif_attach(kif, rule->ifname);
2097                 kif = NULL;
2098                 pfi_kkif_ref(rule->kif);
2099         } else
2100                 rule->kif = NULL;
2101
2102         if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs)
2103                 error = EBUSY;
2104
2105 #ifdef ALTQ
2106         /* set queue IDs */
2107         if (rule->qname[0] != 0) {
2108                 if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
2109                         error = EBUSY;
2110                 else if (rule->pqname[0] != 0) {
2111                         if ((rule->pqid =
2112                             pf_qname2qid(rule->pqname)) == 0)
2113                                 error = EBUSY;
2114                 } else
2115                         rule->pqid = rule->qid;
2116         }
2117 #endif
2118         if (rule->tagname[0])
2119                 if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
2120                         error = EBUSY;
2121         if (rule->match_tagname[0])
2122                 if ((rule->match_tag =
2123                     pf_tagname2tag(rule->match_tagname)) == 0)
2124                         error = EBUSY;
2125         if (rule->rt && !rule->direction)
2126                 error = EINVAL;
2127         if (!rule->log)
2128                 rule->logif = 0;
2129         if (rule->logif >= PFLOGIFS_MAX)
2130                 error = EINVAL;
2131         if (pf_addr_setup(ruleset, &rule->src.addr, rule->af))
2132                 error = ENOMEM;
2133         if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af))
2134                 error = ENOMEM;
2135         if (pf_kanchor_setup(rule, ruleset, anchor_call))
2136                 error = EINVAL;
2137         if (rule->scrub_flags & PFSTATE_SETPRIO &&
2138             (rule->set_prio[0] > PF_PRIO_MAX ||
2139             rule->set_prio[1] > PF_PRIO_MAX))
2140                 error = EINVAL;
2141         TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
2142                 if (pa->addr.type == PF_ADDR_TABLE) {
2143                         pa->addr.p.tbl = pfr_attach_table(ruleset,
2144                             pa->addr.v.tblname);
2145                         if (pa->addr.p.tbl == NULL)
2146                                 error = ENOMEM;
2147                 }
2148
2149         rule->overload_tbl = NULL;
2150         if (rule->overload_tblname[0]) {
2151                 if ((rule->overload_tbl = pfr_attach_table(ruleset,
2152                     rule->overload_tblname)) == NULL)
2153                         error = EINVAL;
2154                 else
2155                         rule->overload_tbl->pfrkt_flags |=
2156                             PFR_TFLAG_ACTIVE;
2157         }
2158
2159         pf_mv_kpool(&V_pf_pabuf, &rule->rpool.list);
2160         if (((((rule->action == PF_NAT) || (rule->action == PF_RDR) ||
2161             (rule->action == PF_BINAT)) && rule->anchor == NULL) ||
2162             (rule->rt > PF_NOPFROUTE)) &&
2163             (TAILQ_FIRST(&rule->rpool.list) == NULL))
2164                 error = EINVAL;
2165
2166         if (error) {
2167                 pf_free_rule(rule);
2168                 rule = NULL;
2169                 ERROUT(error);
2170         }
2171
2172         rule->rpool.cur = TAILQ_FIRST(&rule->rpool.list);
2173         TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr,
2174             rule, entries);
2175         ruleset->rules[rs_num].inactive.rcount++;
2176
2177         PF_RULES_WUNLOCK();
2178         pf_hash_rule(rule);
2179         if (RB_INSERT(pf_krule_global, ruleset->rules[rs_num].inactive.tree, rule) != NULL) {
2180                 PF_RULES_WLOCK();
2181                 TAILQ_REMOVE(ruleset->rules[rs_num].inactive.ptr, rule, entries);
2182                 ruleset->rules[rs_num].inactive.rcount--;
2183                 pf_free_rule(rule);
2184                 rule = NULL;
2185                 ERROUT(EEXIST);
2186         }
2187         PF_CONFIG_UNLOCK();
2188
2189         return (0);
2190
2191 #undef ERROUT
2192 errout:
2193         PF_RULES_WUNLOCK();
2194         PF_CONFIG_UNLOCK();
2195 errout_unlocked:
2196         pf_kkif_free(kif);
2197         pf_krule_free(rule);
2198         return (error);
2199 }
2200
2201 static bool
2202 pf_label_match(const struct pf_krule *rule, const char *label)
2203 {
2204         int i = 0;
2205
2206         while (*rule->label[i]) {
2207                 if (strcmp(rule->label[i], label) == 0)
2208                         return (true);
2209                 i++;
2210         }
2211
2212         return (false);
2213 }
2214
2215 static unsigned int
2216 pf_kill_matching_state(struct pf_state_key_cmp *key, int dir)
2217 {
2218         struct pf_kstate *s;
2219         int more = 0;
2220
2221         s = pf_find_state_all(key, dir, &more);
2222         if (s == NULL)
2223                 return (0);
2224
2225         if (more) {
2226                 PF_STATE_UNLOCK(s);
2227                 return (0);
2228         }
2229
2230         pf_unlink_state(s);
2231         return (1);
2232 }
2233
2234 static int
2235 pf_killstates_row(struct pf_kstate_kill *psk, struct pf_idhash *ih)
2236 {
2237         struct pf_kstate        *s;
2238         struct pf_state_key     *sk;
2239         struct pf_addr          *srcaddr, *dstaddr;
2240         struct pf_state_key_cmp  match_key;
2241         int                      idx, killed = 0;
2242         unsigned int             dir;
2243         u_int16_t                srcport, dstport;
2244         struct pfi_kkif         *kif;
2245
2246 relock_DIOCKILLSTATES:
2247         PF_HASHROW_LOCK(ih);
2248         LIST_FOREACH(s, &ih->states, entry) {
2249                 /* For floating states look at the original kif. */
2250                 kif = s->kif == V_pfi_all ? s->orig_kif : s->kif;
2251
2252                 sk = s->key[PF_SK_WIRE];
2253                 if (s->direction == PF_OUT) {
2254                         srcaddr = &sk->addr[1];
2255                         dstaddr = &sk->addr[0];
2256                         srcport = sk->port[1];
2257                         dstport = sk->port[0];
2258                 } else {
2259                         srcaddr = &sk->addr[0];
2260                         dstaddr = &sk->addr[1];
2261                         srcport = sk->port[0];
2262                         dstport = sk->port[1];
2263                 }
2264
2265                 if (psk->psk_af && sk->af != psk->psk_af)
2266                         continue;
2267
2268                 if (psk->psk_proto && psk->psk_proto != sk->proto)
2269                         continue;
2270
2271                 if (! PF_MATCHA(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr,
2272                     &psk->psk_src.addr.v.a.mask, srcaddr, sk->af))
2273                         continue;
2274
2275                 if (! PF_MATCHA(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr,
2276                     &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af))
2277                         continue;
2278
2279                 if (!  PF_MATCHA(psk->psk_rt_addr.neg,
2280                     &psk->psk_rt_addr.addr.v.a.addr,
2281                     &psk->psk_rt_addr.addr.v.a.mask,
2282                     &s->rt_addr, sk->af))
2283                         continue;
2284
2285                 if (psk->psk_src.port_op != 0 &&
2286                     ! pf_match_port(psk->psk_src.port_op,
2287                     psk->psk_src.port[0], psk->psk_src.port[1], srcport))
2288                         continue;
2289
2290                 if (psk->psk_dst.port_op != 0 &&
2291                     ! pf_match_port(psk->psk_dst.port_op,
2292                     psk->psk_dst.port[0], psk->psk_dst.port[1], dstport))
2293                         continue;
2294
2295                 if (psk->psk_label[0] &&
2296                     ! pf_label_match(s->rule.ptr, psk->psk_label))
2297                         continue;
2298
2299                 if (psk->psk_ifname[0] && strcmp(psk->psk_ifname,
2300                     kif->pfik_name))
2301                         continue;
2302
2303                 if (psk->psk_kill_match) {
2304                         /* Create the key to find matching states, with lock
2305                          * held. */
2306
2307                         bzero(&match_key, sizeof(match_key));
2308
2309                         if (s->direction == PF_OUT) {
2310                                 dir = PF_IN;
2311                                 idx = PF_SK_STACK;
2312                         } else {
2313                                 dir = PF_OUT;
2314                                 idx = PF_SK_WIRE;
2315                         }
2316
2317                         match_key.af = s->key[idx]->af;
2318                         match_key.proto = s->key[idx]->proto;
2319                         PF_ACPY(&match_key.addr[0],
2320                             &s->key[idx]->addr[1], match_key.af);
2321                         match_key.port[0] = s->key[idx]->port[1];
2322                         PF_ACPY(&match_key.addr[1],
2323                             &s->key[idx]->addr[0], match_key.af);
2324                         match_key.port[1] = s->key[idx]->port[0];
2325                 }
2326
2327                 pf_unlink_state(s);
2328                 killed++;
2329
2330                 if (psk->psk_kill_match)
2331                         killed += pf_kill_matching_state(&match_key, dir);
2332
2333                 goto relock_DIOCKILLSTATES;
2334         }
2335         PF_HASHROW_UNLOCK(ih);
2336
2337         return (killed);
2338 }
2339
2340 static int
2341 pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
2342 {
2343         int                      error = 0;
2344         PF_RULES_RLOCK_TRACKER;
2345
2346 #define ERROUT_IOCTL(target, x)                                 \
2347     do {                                                                \
2348             error = (x);                                                \
2349             SDT_PROBE3(pf, ioctl, ioctl, error, cmd, error, __LINE__);  \
2350             goto target;                                                \
2351     } while (0)
2352
2353
2354         /* XXX keep in sync with switch() below */
2355         if (securelevel_gt(td->td_ucred, 2))
2356                 switch (cmd) {
2357                 case DIOCGETRULES:
2358                 case DIOCGETRULENV:
2359                 case DIOCGETADDRS:
2360                 case DIOCGETADDR:
2361                 case DIOCGETSTATE:
2362                 case DIOCGETSTATENV:
2363                 case DIOCSETSTATUSIF:
2364                 case DIOCGETSTATUSNV:
2365                 case DIOCCLRSTATUS:
2366                 case DIOCNATLOOK:
2367                 case DIOCSETDEBUG:
2368                 case DIOCGETSTATES:
2369                 case DIOCGETSTATESV2:
2370                 case DIOCGETTIMEOUT:
2371                 case DIOCCLRRULECTRS:
2372                 case DIOCGETLIMIT:
2373                 case DIOCGETALTQSV0:
2374                 case DIOCGETALTQSV1:
2375                 case DIOCGETALTQV0:
2376                 case DIOCGETALTQV1:
2377                 case DIOCGETQSTATSV0:
2378                 case DIOCGETQSTATSV1:
2379                 case DIOCGETRULESETS:
2380                 case DIOCGETRULESET:
2381                 case DIOCRGETTABLES:
2382                 case DIOCRGETTSTATS:
2383                 case DIOCRCLRTSTATS:
2384                 case DIOCRCLRADDRS:
2385                 case DIOCRADDADDRS:
2386                 case DIOCRDELADDRS:
2387                 case DIOCRSETADDRS:
2388                 case DIOCRGETADDRS:
2389                 case DIOCRGETASTATS:
2390                 case DIOCRCLRASTATS:
2391                 case DIOCRTSTADDRS:
2392                 case DIOCOSFPGET:
2393                 case DIOCGETSRCNODES:
2394                 case DIOCCLRSRCNODES:
2395                 case DIOCGETSYNCOOKIES:
2396                 case DIOCIGETIFACES:
2397                 case DIOCGIFSPEEDV0:
2398                 case DIOCGIFSPEEDV1:
2399                 case DIOCSETIFFLAG:
2400                 case DIOCCLRIFFLAG:
2401                 case DIOCGETETHRULES:
2402                 case DIOCGETETHRULE:
2403                 case DIOCGETETHRULESETS:
2404                 case DIOCGETETHRULESET:
2405                         break;
2406                 case DIOCRCLRTABLES:
2407                 case DIOCRADDTABLES:
2408                 case DIOCRDELTABLES:
2409                 case DIOCRSETTFLAGS:
2410                         if (((struct pfioc_table *)addr)->pfrio_flags &
2411                             PFR_FLAG_DUMMY)
2412                                 break; /* dummy operation ok */
2413                         return (EPERM);
2414                 default:
2415                         return (EPERM);
2416                 }
2417
2418         if (!(flags & FWRITE))
2419                 switch (cmd) {
2420                 case DIOCGETRULES:
2421                 case DIOCGETADDRS:
2422                 case DIOCGETADDR:
2423                 case DIOCGETSTATE:
2424                 case DIOCGETSTATENV:
2425                 case DIOCGETSTATUSNV:
2426                 case DIOCGETSTATES:
2427                 case DIOCGETSTATESV2:
2428                 case DIOCGETTIMEOUT:
2429                 case DIOCGETLIMIT:
2430                 case DIOCGETALTQSV0:
2431                 case DIOCGETALTQSV1:
2432                 case DIOCGETALTQV0:
2433                 case DIOCGETALTQV1:
2434                 case DIOCGETQSTATSV0:
2435                 case DIOCGETQSTATSV1:
2436                 case DIOCGETRULESETS:
2437                 case DIOCGETRULESET:
2438                 case DIOCNATLOOK:
2439                 case DIOCRGETTABLES:
2440                 case DIOCRGETTSTATS:
2441                 case DIOCRGETADDRS:
2442                 case DIOCRGETASTATS:
2443                 case DIOCRTSTADDRS:
2444                 case DIOCOSFPGET:
2445                 case DIOCGETSRCNODES:
2446                 case DIOCGETSYNCOOKIES:
2447                 case DIOCIGETIFACES:
2448                 case DIOCGIFSPEEDV1:
2449                 case DIOCGIFSPEEDV0:
2450                 case DIOCGETRULENV:
2451                 case DIOCGETETHRULES:
2452                 case DIOCGETETHRULE:
2453                 case DIOCGETETHRULESETS:
2454                 case DIOCGETETHRULESET:
2455                         break;
2456                 case DIOCRCLRTABLES:
2457                 case DIOCRADDTABLES:
2458                 case DIOCRDELTABLES:
2459                 case DIOCRCLRTSTATS:
2460                 case DIOCRCLRADDRS:
2461                 case DIOCRADDADDRS:
2462                 case DIOCRDELADDRS:
2463                 case DIOCRSETADDRS:
2464                 case DIOCRSETTFLAGS:
2465                         if (((struct pfioc_table *)addr)->pfrio_flags &
2466                             PFR_FLAG_DUMMY) {
2467                                 flags |= FWRITE; /* need write lock for dummy */
2468                                 break; /* dummy operation ok */
2469                         }
2470                         return (EACCES);
2471                 default:
2472                         return (EACCES);
2473                 }
2474
2475         CURVNET_SET(TD_TO_VNET(td));
2476
2477         switch (cmd) {
2478         case DIOCSTART:
2479                 sx_xlock(&V_pf_ioctl_lock);
2480                 if (V_pf_status.running)
2481                         error = EEXIST;
2482                 else {
2483                         hook_pf();
2484                         if (! TAILQ_EMPTY(V_pf_keth->active.rules))
2485                                 hook_pf_eth();
2486                         V_pf_status.running = 1;
2487                         V_pf_status.since = time_second;
2488                         new_unrhdr64(&V_pf_stateid, time_second);
2489
2490                         DPFPRINTF(PF_DEBUG_MISC, ("pf: started\n"));
2491                 }
2492                 break;
2493
2494         case DIOCSTOP:
2495                 sx_xlock(&V_pf_ioctl_lock);
2496                 if (!V_pf_status.running)
2497                         error = ENOENT;
2498                 else {
2499                         V_pf_status.running = 0;
2500                         dehook_pf();
2501                         dehook_pf_eth();
2502                         V_pf_status.since = time_second;
2503                         DPFPRINTF(PF_DEBUG_MISC, ("pf: stopped\n"));
2504                 }
2505                 break;
2506
2507         case DIOCGETETHRULES: {
2508                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
2509                 nvlist_t                *nvl;
2510                 void                    *packed;
2511                 struct pf_keth_rule     *tail;
2512                 struct pf_keth_ruleset  *rs;
2513                 u_int32_t                ticket, nr;
2514                 const char              *anchor = "";
2515
2516                 nvl = NULL;
2517                 packed = NULL;
2518
2519 #define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULES_error, x)
2520
2521                 if (nv->len > pf_ioctl_maxcount)
2522                         ERROUT(ENOMEM);
2523
2524                 /* Copy the request in */
2525                 packed = malloc(nv->len, M_NVLIST, M_WAITOK);
2526                 if (packed == NULL)
2527                         ERROUT(ENOMEM);
2528
2529                 error = copyin(nv->data, packed, nv->len);
2530                 if (error)
2531                         ERROUT(error);
2532
2533                 nvl = nvlist_unpack(packed, nv->len, 0);
2534                 if (nvl == NULL)
2535                         ERROUT(EBADMSG);
2536
2537                 if (! nvlist_exists_string(nvl, "anchor"))
2538                         ERROUT(EBADMSG);
2539
2540                 anchor = nvlist_get_string(nvl, "anchor");
2541
2542                 rs = pf_find_keth_ruleset(anchor);
2543
2544                 nvlist_destroy(nvl);
2545                 nvl = NULL;
2546                 free(packed, M_NVLIST);
2547                 packed = NULL;
2548
2549                 if (rs == NULL)
2550                         ERROUT(ENOENT);
2551
2552                 /* Reply */
2553                 nvl = nvlist_create(0);
2554                 if (nvl == NULL)
2555                         ERROUT(ENOMEM);
2556
2557                 PF_RULES_RLOCK();
2558
2559                 ticket = rs->active.ticket;
2560                 tail = TAILQ_LAST(rs->active.rules, pf_keth_ruleq);
2561                 if (tail)
2562                         nr = tail->nr + 1;
2563                 else
2564                         nr = 0;
2565
2566                 PF_RULES_RUNLOCK();
2567
2568                 nvlist_add_number(nvl, "ticket", ticket);
2569                 nvlist_add_number(nvl, "nr", nr);
2570
2571                 packed = nvlist_pack(nvl, &nv->len);
2572                 if (packed == NULL)
2573                         ERROUT(ENOMEM);
2574
2575                 if (nv->size == 0)
2576                         ERROUT(0);
2577                 else if (nv->size < nv->len)
2578                         ERROUT(ENOSPC);
2579
2580                 error = copyout(packed, nv->data, nv->len);
2581
2582 #undef ERROUT
2583 DIOCGETETHRULES_error:
2584                 free(packed, M_NVLIST);
2585                 nvlist_destroy(nvl);
2586                 break;
2587         }
2588
2589         case DIOCGETETHRULE: {
2590                 struct epoch_tracker     et;
2591                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
2592                 nvlist_t                *nvl = NULL;
2593                 void                    *nvlpacked = NULL;
2594                 struct pf_keth_rule     *rule = NULL;
2595                 struct pf_keth_ruleset  *rs;
2596                 u_int32_t                ticket, nr;
2597                 bool                     clear = false;
2598                 const char              *anchor;
2599
2600 #define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULE_error, x)
2601
2602                 if (nv->len > pf_ioctl_maxcount)
2603                         ERROUT(ENOMEM);
2604
2605                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2606                 if (nvlpacked == NULL)
2607                         ERROUT(ENOMEM);
2608
2609                 error = copyin(nv->data, nvlpacked, nv->len);
2610                 if (error)
2611                         ERROUT(error);
2612
2613                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2614                 if (nvl == NULL)
2615                         ERROUT(EBADMSG);
2616                 if (! nvlist_exists_number(nvl, "ticket"))
2617                         ERROUT(EBADMSG);
2618                 ticket = nvlist_get_number(nvl, "ticket");
2619                 if (! nvlist_exists_string(nvl, "anchor"))
2620                         ERROUT(EBADMSG);
2621                 anchor = nvlist_get_string(nvl, "anchor");
2622
2623                 if (nvlist_exists_bool(nvl, "clear"))
2624                         clear = nvlist_get_bool(nvl, "clear");
2625
2626                 if (clear && !(flags & FWRITE))
2627                         ERROUT(EACCES);
2628
2629                 if (! nvlist_exists_number(nvl, "nr"))
2630                         ERROUT(EBADMSG);
2631                 nr = nvlist_get_number(nvl, "nr");
2632
2633                 PF_RULES_RLOCK();
2634                 rs = pf_find_keth_ruleset(anchor);
2635                 if (rs == NULL) {
2636                         PF_RULES_RUNLOCK();
2637                         ERROUT(ENOENT);
2638                 }
2639                 if (ticket != rs->active.ticket) {
2640                         PF_RULES_RUNLOCK();
2641                         ERROUT(EBUSY);
2642                 }
2643
2644                 nvlist_destroy(nvl);
2645                 nvl = NULL;
2646                 free(nvlpacked, M_NVLIST);
2647                 nvlpacked = NULL;
2648
2649                 rule = TAILQ_FIRST(rs->active.rules);
2650                 while ((rule != NULL) && (rule->nr != nr))
2651                         rule = TAILQ_NEXT(rule, entries);
2652                 if (rule == NULL) {
2653                         PF_RULES_RUNLOCK();
2654                         ERROUT(ENOENT);
2655                 }
2656                 /* Make sure rule can't go away. */
2657                 NET_EPOCH_ENTER(et);
2658                 PF_RULES_RUNLOCK();
2659                 nvl = pf_keth_rule_to_nveth_rule(rule);
2660                 if (pf_keth_anchor_nvcopyout(rs, rule, nvl))
2661                         ERROUT(EBUSY);
2662                 NET_EPOCH_EXIT(et);
2663                 if (nvl == NULL)
2664                         ERROUT(ENOMEM);
2665
2666                 nvlpacked = nvlist_pack(nvl, &nv->len);
2667                 if (nvlpacked == NULL)
2668                         ERROUT(ENOMEM);
2669
2670                 if (nv->size == 0)
2671                         ERROUT(0);
2672                 else if (nv->size < nv->len)
2673                         ERROUT(ENOSPC);
2674
2675                 error = copyout(nvlpacked, nv->data, nv->len);
2676                 if (error == 0 && clear) {
2677                         counter_u64_zero(rule->evaluations);
2678                         for (int i = 0; i < 2; i++) {
2679                                 counter_u64_zero(rule->packets[i]);
2680                                 counter_u64_zero(rule->bytes[i]);
2681                         }
2682                 }
2683
2684 #undef ERROUT
2685 DIOCGETETHRULE_error:
2686                 free(nvlpacked, M_NVLIST);
2687                 nvlist_destroy(nvl);
2688                 break;
2689         }
2690
2691         case DIOCADDETHRULE: {
2692                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
2693                 nvlist_t                *nvl = NULL;
2694                 void                    *nvlpacked = NULL;
2695                 struct pf_keth_rule     *rule = NULL, *tail = NULL;
2696                 struct pf_keth_ruleset  *ruleset = NULL;
2697                 struct pfi_kkif         *kif = NULL, *bridge_to_kif = NULL;
2698                 const char              *anchor = "", *anchor_call = "";
2699
2700 #define ERROUT(x)       ERROUT_IOCTL(DIOCADDETHRULE_error, x)
2701
2702                 if (nv->len > pf_ioctl_maxcount)
2703                         ERROUT(ENOMEM);
2704
2705                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2706                 if (nvlpacked == NULL)
2707                         ERROUT(ENOMEM);
2708
2709                 error = copyin(nv->data, nvlpacked, nv->len);
2710                 if (error)
2711                         ERROUT(error);
2712
2713                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2714                 if (nvl == NULL)
2715                         ERROUT(EBADMSG);
2716
2717                 if (! nvlist_exists_number(nvl, "ticket"))
2718                         ERROUT(EBADMSG);
2719
2720                 if (nvlist_exists_string(nvl, "anchor"))
2721                         anchor = nvlist_get_string(nvl, "anchor");
2722                 if (nvlist_exists_string(nvl, "anchor_call"))
2723                         anchor_call = nvlist_get_string(nvl, "anchor_call");
2724
2725                 ruleset = pf_find_keth_ruleset(anchor);
2726                 if (ruleset == NULL)
2727                         ERROUT(EINVAL);
2728
2729                 if (nvlist_get_number(nvl, "ticket") !=
2730                     ruleset->inactive.ticket) {
2731                         DPFPRINTF(PF_DEBUG_MISC,
2732                             ("ticket: %d != %d\n",
2733                             (u_int32_t)nvlist_get_number(nvl, "ticket"),
2734                             ruleset->inactive.ticket));
2735                         ERROUT(EBUSY);
2736                 }
2737
2738                 rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK);
2739                 if (rule == NULL)
2740                         ERROUT(ENOMEM);
2741                 rule->timestamp = NULL;
2742
2743                 error = pf_nveth_rule_to_keth_rule(nvl, rule);
2744                 if (error != 0)
2745                         ERROUT(error);
2746
2747                 if (rule->ifname[0])
2748                         kif = pf_kkif_create(M_WAITOK);
2749                 if (rule->bridge_to_name[0])
2750                         bridge_to_kif = pf_kkif_create(M_WAITOK);
2751                 rule->evaluations = counter_u64_alloc(M_WAITOK);
2752                 for (int i = 0; i < 2; i++) {
2753                         rule->packets[i] = counter_u64_alloc(M_WAITOK);
2754                         rule->bytes[i] = counter_u64_alloc(M_WAITOK);
2755                 }
2756                 rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone,
2757                     M_WAITOK | M_ZERO);
2758
2759                 PF_RULES_WLOCK();
2760
2761                 if (rule->ifname[0]) {
2762                         rule->kif = pfi_kkif_attach(kif, rule->ifname);
2763                         pfi_kkif_ref(rule->kif);
2764                 } else
2765                         rule->kif = NULL;
2766                 if (rule->bridge_to_name[0]) {
2767                         rule->bridge_to = pfi_kkif_attach(bridge_to_kif,
2768                             rule->bridge_to_name);
2769                         pfi_kkif_ref(rule->bridge_to);
2770                 } else
2771                         rule->bridge_to = NULL;
2772
2773 #ifdef ALTQ
2774                 /* set queue IDs */
2775                 if (rule->qname[0] != 0) {
2776                         if ((rule->qid = pf_qname2qid(rule->qname)) == 0)
2777                                 error = EBUSY;
2778                         else
2779                                 rule->qid = rule->qid;
2780                 }
2781 #endif
2782                 if (rule->tagname[0])
2783                         if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0)
2784                                 error = EBUSY;
2785                 if (rule->match_tagname[0])
2786                         if ((rule->match_tag = pf_tagname2tag(
2787                             rule->match_tagname)) == 0)
2788                                 error = EBUSY;
2789
2790                 if (error == 0 && rule->ipdst.addr.type == PF_ADDR_TABLE)
2791                         error = pf_eth_addr_setup(ruleset, &rule->ipdst.addr);
2792                 if (error == 0 && rule->ipsrc.addr.type == PF_ADDR_TABLE)
2793                         error = pf_eth_addr_setup(ruleset, &rule->ipsrc.addr);
2794
2795                 if (error) {
2796                         pf_free_eth_rule(rule);
2797                         PF_RULES_WUNLOCK();
2798                         ERROUT(error);
2799                 }
2800
2801                 if (pf_keth_anchor_setup(rule, ruleset, anchor_call)) {
2802                         pf_free_eth_rule(rule);
2803                         PF_RULES_WUNLOCK();
2804                         ERROUT(EINVAL);
2805                 }
2806
2807                 tail = TAILQ_LAST(ruleset->inactive.rules, pf_keth_ruleq);
2808                 if (tail)
2809                         rule->nr = tail->nr + 1;
2810                 else
2811                         rule->nr = 0;
2812
2813                 TAILQ_INSERT_TAIL(ruleset->inactive.rules, rule, entries);
2814
2815                 PF_RULES_WUNLOCK();
2816
2817 #undef ERROUT
2818 DIOCADDETHRULE_error:
2819                 nvlist_destroy(nvl);
2820                 free(nvlpacked, M_NVLIST);
2821                 break;
2822         }
2823
2824         case DIOCGETETHRULESETS: {
2825                 struct epoch_tracker     et;
2826                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
2827                 nvlist_t                *nvl = NULL;
2828                 void                    *nvlpacked = NULL;
2829                 struct pf_keth_ruleset  *ruleset;
2830                 struct pf_keth_anchor   *anchor;
2831                 int                      nr = 0;
2832
2833 #define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULESETS_error, x)
2834
2835                 if (nv->len > pf_ioctl_maxcount)
2836                         ERROUT(ENOMEM);
2837
2838                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2839                 if (nvlpacked == NULL)
2840                         ERROUT(ENOMEM);
2841
2842                 error = copyin(nv->data, nvlpacked, nv->len);
2843                 if (error)
2844                         ERROUT(error);
2845
2846                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2847                 if (nvl == NULL)
2848                         ERROUT(EBADMSG);
2849                 if (! nvlist_exists_string(nvl, "path"))
2850                         ERROUT(EBADMSG);
2851
2852                 NET_EPOCH_ENTER(et);
2853
2854                 if ((ruleset = pf_find_keth_ruleset(
2855                     nvlist_get_string(nvl, "path"))) == NULL) {
2856                         NET_EPOCH_EXIT(et);
2857                         ERROUT(ENOENT);
2858                 }
2859
2860                 if (ruleset->anchor == NULL) {
2861                         RB_FOREACH(anchor, pf_keth_anchor_global, &V_pf_keth_anchors)
2862                                 if (anchor->parent == NULL)
2863                                         nr++;
2864                 } else {
2865                         RB_FOREACH(anchor, pf_keth_anchor_node,
2866                             &ruleset->anchor->children)
2867                                 nr++;
2868                 }
2869
2870                 NET_EPOCH_EXIT(et);
2871
2872                 nvlist_destroy(nvl);
2873                 nvl = NULL;
2874                 free(nvlpacked, M_NVLIST);
2875                 nvlpacked = NULL;
2876
2877                 nvl = nvlist_create(0);
2878                 if (nvl == NULL)
2879                         ERROUT(ENOMEM);
2880
2881                 nvlist_add_number(nvl, "nr", nr);
2882
2883                 nvlpacked = nvlist_pack(nvl, &nv->len);
2884                 if (nvlpacked == NULL)
2885                         ERROUT(ENOMEM);
2886
2887                 if (nv->size == 0)
2888                         ERROUT(0);
2889                 else if (nv->size < nv->len)
2890                         ERROUT(ENOSPC);
2891
2892                 error = copyout(nvlpacked, nv->data, nv->len);
2893
2894 #undef ERROUT
2895 DIOCGETETHRULESETS_error:
2896                 free(nvlpacked, M_NVLIST);
2897                 nvlist_destroy(nvl);
2898                 break;
2899         }
2900
2901         case DIOCGETETHRULESET: {
2902                 struct epoch_tracker     et;
2903                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
2904                 nvlist_t                *nvl = NULL;
2905                 void                    *nvlpacked = NULL;
2906                 struct pf_keth_ruleset  *ruleset;
2907                 struct pf_keth_anchor   *anchor;
2908                 int                      nr = 0, req_nr = 0;
2909                 bool                     found = false;
2910
2911 #define ERROUT(x)       ERROUT_IOCTL(DIOCGETETHRULESET_error, x)
2912
2913                 if (nv->len > pf_ioctl_maxcount)
2914                         ERROUT(ENOMEM);
2915
2916                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
2917                 if (nvlpacked == NULL)
2918                         ERROUT(ENOMEM);
2919
2920                 error = copyin(nv->data, nvlpacked, nv->len);
2921                 if (error)
2922                         ERROUT(error);
2923
2924                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
2925                 if (nvl == NULL)
2926                         ERROUT(EBADMSG);
2927                 if (! nvlist_exists_string(nvl, "path"))
2928                         ERROUT(EBADMSG);
2929                 if (! nvlist_exists_number(nvl, "nr"))
2930                         ERROUT(EBADMSG);
2931
2932                 req_nr = nvlist_get_number(nvl, "nr");
2933
2934                 NET_EPOCH_ENTER(et);
2935
2936                 if ((ruleset = pf_find_keth_ruleset(
2937                     nvlist_get_string(nvl, "path"))) == NULL) {
2938                         NET_EPOCH_EXIT(et);
2939                         ERROUT(ENOENT);
2940                 }
2941
2942                 nvlist_destroy(nvl);
2943                 nvl = NULL;
2944                 free(nvlpacked, M_NVLIST);
2945                 nvlpacked = NULL;
2946
2947                 nvl = nvlist_create(0);
2948                 if (nvl == NULL) {
2949                         NET_EPOCH_EXIT(et);
2950                         ERROUT(ENOMEM);
2951                 }
2952
2953                 if (ruleset->anchor == NULL) {
2954                         RB_FOREACH(anchor, pf_keth_anchor_global,
2955                             &V_pf_keth_anchors) {
2956                                 if (anchor->parent == NULL && nr++ == req_nr) {
2957                                         found = true;
2958                                         break;
2959                                 }
2960                         }
2961                 } else {
2962                         RB_FOREACH(anchor, pf_keth_anchor_node,
2963                              &ruleset->anchor->children) {
2964                                 if (nr++ == req_nr) {
2965                                         found = true;
2966                                         break;
2967                                 }
2968                         }
2969                 }
2970
2971                 NET_EPOCH_EXIT(et);
2972                 if (found) {
2973                         nvlist_add_number(nvl, "nr", nr);
2974                         nvlist_add_string(nvl, "name", anchor->name);
2975                         if (ruleset->anchor)
2976                                 nvlist_add_string(nvl, "path",
2977                                     ruleset->anchor->path);
2978                         else
2979                                 nvlist_add_string(nvl, "path", "");
2980                 } else {
2981                         ERROUT(EBUSY);
2982                 }
2983
2984                 nvlpacked = nvlist_pack(nvl, &nv->len);
2985                 if (nvlpacked == NULL)
2986                         ERROUT(ENOMEM);
2987
2988                 if (nv->size == 0)
2989                         ERROUT(0);
2990                 else if (nv->size < nv->len)
2991                         ERROUT(ENOSPC);
2992
2993                 error = copyout(nvlpacked, nv->data, nv->len);
2994
2995 #undef ERROUT
2996 DIOCGETETHRULESET_error:
2997                 free(nvlpacked, M_NVLIST);
2998                 nvlist_destroy(nvl);
2999                 break;
3000         }
3001
3002         case DIOCADDRULENV: {
3003                 struct pfioc_nv *nv = (struct pfioc_nv *)addr;
3004                 nvlist_t        *nvl = NULL;
3005                 void            *nvlpacked = NULL;
3006                 struct pf_krule *rule = NULL;
3007                 const char      *anchor = "", *anchor_call = "";
3008                 uint32_t         ticket = 0, pool_ticket = 0;
3009
3010 #define ERROUT(x)       ERROUT_IOCTL(DIOCADDRULENV_error, x)
3011
3012                 if (nv->len > pf_ioctl_maxcount)
3013                         ERROUT(ENOMEM);
3014
3015                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
3016                 error = copyin(nv->data, nvlpacked, nv->len);
3017                 if (error)
3018                         ERROUT(error);
3019
3020                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
3021                 if (nvl == NULL)
3022                         ERROUT(EBADMSG);
3023
3024                 if (! nvlist_exists_number(nvl, "ticket"))
3025                         ERROUT(EINVAL);
3026                 ticket = nvlist_get_number(nvl, "ticket");
3027
3028                 if (! nvlist_exists_number(nvl, "pool_ticket"))
3029                         ERROUT(EINVAL);
3030                 pool_ticket = nvlist_get_number(nvl, "pool_ticket");
3031
3032                 if (! nvlist_exists_nvlist(nvl, "rule"))
3033                         ERROUT(EINVAL);
3034
3035                 rule = pf_krule_alloc();
3036                 error = pf_nvrule_to_krule(nvlist_get_nvlist(nvl, "rule"),
3037                     rule);
3038                 if (error)
3039                         ERROUT(error);
3040
3041                 if (nvlist_exists_string(nvl, "anchor"))
3042                         anchor = nvlist_get_string(nvl, "anchor");
3043                 if (nvlist_exists_string(nvl, "anchor_call"))
3044                         anchor_call = nvlist_get_string(nvl, "anchor_call");
3045
3046                 if ((error = nvlist_error(nvl)))
3047                         ERROUT(error);
3048
3049                 /* Frees rule on error */
3050                 error = pf_ioctl_addrule(rule, ticket, pool_ticket, anchor,
3051                     anchor_call, td);
3052
3053                 nvlist_destroy(nvl);
3054                 free(nvlpacked, M_NVLIST);
3055                 break;
3056 #undef ERROUT
3057 DIOCADDRULENV_error:
3058                 pf_krule_free(rule);
3059                 nvlist_destroy(nvl);
3060                 free(nvlpacked, M_NVLIST);
3061
3062                 break;
3063         }
3064         case DIOCADDRULE: {
3065                 struct pfioc_rule       *pr = (struct pfioc_rule *)addr;
3066                 struct pf_krule         *rule;
3067
3068                 rule = pf_krule_alloc();
3069                 error = pf_rule_to_krule(&pr->rule, rule);
3070                 if (error != 0) {
3071                         pf_krule_free(rule);
3072                         break;
3073                 }
3074
3075                 pr->anchor[sizeof(pr->anchor) - 1] = 0;
3076
3077                 /* Frees rule on error */
3078                 error = pf_ioctl_addrule(rule, pr->ticket, pr->pool_ticket,
3079                     pr->anchor, pr->anchor_call, td);
3080                 break;
3081         }
3082
3083         case DIOCGETRULES: {
3084                 struct pfioc_rule       *pr = (struct pfioc_rule *)addr;
3085                 struct pf_kruleset      *ruleset;
3086                 struct pf_krule         *tail;
3087                 int                      rs_num;
3088
3089                 pr->anchor[sizeof(pr->anchor) - 1] = 0;
3090
3091                 PF_RULES_WLOCK();
3092                 ruleset = pf_find_kruleset(pr->anchor);
3093                 if (ruleset == NULL) {
3094                         PF_RULES_WUNLOCK();
3095                         error = EINVAL;
3096                         break;
3097                 }
3098                 rs_num = pf_get_ruleset_number(pr->rule.action);
3099                 if (rs_num >= PF_RULESET_MAX) {
3100                         PF_RULES_WUNLOCK();
3101                         error = EINVAL;
3102                         break;
3103                 }
3104                 tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr,
3105                     pf_krulequeue);
3106                 if (tail)
3107                         pr->nr = tail->nr + 1;
3108                 else
3109                         pr->nr = 0;
3110                 pr->ticket = ruleset->rules[rs_num].active.ticket;
3111                 PF_RULES_WUNLOCK();
3112                 break;
3113         }
3114
3115         case DIOCGETRULENV: {
3116                 struct pfioc_nv         *nv = (struct pfioc_nv *)addr;
3117                 nvlist_t                *nvrule = NULL;
3118                 nvlist_t                *nvl = NULL;
3119                 struct pf_kruleset      *ruleset;
3120                 struct pf_krule         *rule;
3121                 void                    *nvlpacked = NULL;
3122                 int                      rs_num, nr;
3123                 bool                     clear_counter = false;
3124
3125 #define ERROUT(x)       ERROUT_IOCTL(DIOCGETRULENV_error, x)
3126
3127                 if (nv->len > pf_ioctl_maxcount)
3128                         ERROUT(ENOMEM);
3129
3130                 /* Copy the request in */
3131                 nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
3132                 if (nvlpacked == NULL)
3133                         ERROUT(ENOMEM);
3134
3135                 error = copyin(nv->data, nvlpacked, nv->len);
3136                 if (error)
3137                         ERROUT(error);
3138
3139                 nvl = nvlist_unpack(nvlpacked, nv->len, 0);
3140                 if (nvl == NULL)
3141                         ERROUT(EBADMSG);
3142
3143                 if (! nvlist_exists_string(nvl, "anchor"))
3144                         ERROUT(EBADMSG);
3145                 if (! nvlist_exists_number(nvl, "ruleset"))
3146                         ERROUT(EBADMSG);
3147                 if (! nvlist_exists_number(nvl, "ticket"))
3148                         ERROUT(EBADMSG);
3149                 if (! nvlist_exists_number(nvl, "nr"))
3150                         ERROUT(EBADMSG);
3151
3152                 if (nvlist_exists_bool(nvl, "clear_counter"))
3153                         clear_counter = nvlist_get_bool(nvl, "clear_counter");
3154
3155                 if (clear_counter && !(flags & FWRITE))
3156                         ERROUT(EACCES);
3157
3158                 nr = nvlist_get_number(nvl, "nr");
3159
3160                 PF_RULES_WLOCK();
3161                 ruleset = pf_find_kruleset(nvlist_get_string(nvl, "anchor"));
3162                 if (ruleset == NULL) {
3163                         PF_RULES_WUNLOCK();
3164                         ERROUT(ENOENT);
3165                 }
3166
3167                 rs_num = pf_get_ruleset_number(nvlist_get_number(nvl, "ruleset"));
3168                 if (rs_num >= PF_RULESET_MAX) {
3169                         PF_RULES_WUNLOCK();
3170                         ERROUT(EINVAL);
3171                 }
3172
3173                 if (nvlist_get_number(nvl, "ticket") !=
3174                     ruleset->rules[rs_num].active.ticket) {
3175                         PF_RULES_WUNLOCK();
3176                         ERROUT(EBUSY);
3177                 }
3178
3179                 if ((error = nvlist_error(nvl))) {
3180                         PF_RULES_WUNLOCK();
3181                         ERROUT(error);
3182                 }
3183
3184                 rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr);
3185                 while ((rule != NULL) && (rule->nr != nr))
3186                         rule = TAILQ_NEXT(rule, entries);
3187                 if (rule == NULL) {
3188                         PF_RULES_WUNLOCK();
3189                         ERROUT(EBUSY);
3190                 }
3191
3192                 nvrule = pf_krule_to_nvrule(rule);
3193
3194                 nvlist_destroy(nvl);
3195                 nvl = nvlist_create(0);
3196                 if (nvl == NULL) {
3197                         PF_RULES_WUNLOCK();
3198                         ERROUT(ENOMEM);
3199                 }
3200                 nvlist_add_number(nvl, "nr", nr);
3201                 nvlist_add_nvlist(nvl, "rule", nvrule);
3202                 nvlist_destroy(nvrule);
3203                 nvrule = NULL;
3204                 if (pf_kanchor_nvcopyout(ruleset, rule, nvl)) {
3205                         PF_RULES_WUNLOCK();
3206                         ERROUT(EBUSY);
3207                 }
3208
3209                 free(nvlpacked, M_NVLIST);
3210                 nvlpacked = nvlist_pack(nvl, &nv->len);
3211                 if (nvlpacked == NULL) {
3212                         PF_RULES_WUNLOCK();
3213                         ERROUT(ENOMEM);
3214                 }
3215
3216                 if (nv->size == 0) {
3217                         PF_RULES_WUNLOCK();
3218                         ERROUT(0);
3219                 }
3220                 else if (nv->size < nv->len) {
3221                         PF_RULES_WUNLOCK();
3222                         ERROUT(ENOSPC);
3223                 }
3224
3225                 if (clear_counter) {
3226                         pf_counter_u64_zero(&rule->evaluations);
3227                         for (int i = 0; i < 2; i++) {
3228                                 pf_counter_u64_zero(&rule->packets[i]);
3229                                 pf_counter_u64_zero(&rule->bytes[i]);
3230                         }
3231                         counter_u64_zero(rule->states_tot);
3232                 }
3233                 PF_RULES_WUNLOCK();
3234
3235                 error = copyout(nvlpacked, nv->data, nv->len);
3236
3237 #undef ERROUT
3238 DIOCGETRULENV_error:
3239                 free(nvlpacked, M_NVLIST);
3240                 nvlist_destroy(nvrule);
3241                 nvlist_destroy(nvl);
3242
3243                 break;
3244         }
3245
3246         case DIOCCHANGERULE: {
3247                 struct pfioc_rule       *pcr = (struct pfioc_rule *)addr;
3248                 struct pf_kruleset      *ruleset;
3249                 struct pf_krule         *oldrule = NULL, *newrule = NULL;
3250                 struct pfi_kkif         *kif = NULL;
3251                 struct pf_kpooladdr     *pa;
3252                 u_int32_t                nr = 0;
3253                 int                      rs_num;
3254
3255                 pcr->anchor[sizeof(pcr->anchor) - 1] = 0;
3256
3257                 if (pcr->action < PF_CHANGE_ADD_HEAD ||
3258                     pcr->action > PF_CHANGE_GET_TICKET) {
3259                         error = EINVAL;
3260                         break;
3261                 }
3262                 if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) {
3263                         error = EINVAL;
3264                         break;
3265                 }
3266
3267                 if (pcr->action != PF_CHANGE_REMOVE) {
3268                         newrule = pf_krule_alloc();
3269                         error = pf_rule_to_krule(&pcr->rule, newrule);
3270                         if (error != 0) {
3271                                 pf_krule_free(newrule);
3272                                 break;
3273                         }
3274
3275                         if (newrule->ifname[0])
3276                                 kif = pf_kkif_create(M_WAITOK);
3277                         pf_counter_u64_init(&newrule->evaluations, M_WAITOK);
3278                         for (int i = 0; i < 2; i++) {
3279                                 pf_counter_u64_init(&newrule->packets[i], M_WAITOK);
3280                                 pf_counter_u64_init(&newrule->bytes[i], M_WAITOK);
3281                         }
3282                         newrule->states_cur = counter_u64_alloc(M_WAITOK);
3283                         newrule->states_tot = counter_u64_alloc(M_WAITOK);
3284                         newrule->src_nodes = counter_u64_alloc(M_WAITOK);
3285                         newrule->cuid = td->td_ucred->cr_ruid;
3286                         newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0;
3287                         TAILQ_INIT(&newrule->rpool.list);
3288                 }
3289 #define ERROUT(x)       ERROUT_IOCTL(DIOCCHANGERULE_error, x)
3290
3291                 PF_CONFIG_LOCK();
3292                 PF_RULES_WLOCK();
3293 #ifdef PF_WANT_32_TO_64_COUNTER
3294                 if (newrule != NULL) {
3295                         LIST_INSERT_HEAD(&V_pf_allrulelist, newrule, allrulelist);
3296                         newrule->allrulelinked = true;
3297                         V_pf_allrulecount++;
3298                 }
3299 #endif
3300
3301                 if (!(pcr->action == PF_CHANGE_REMOVE ||
3302                     pcr->action == PF_CHANGE_GET_TICKET) &&
3303                     pcr->pool_ticket != V_ticket_pabuf)
3304                         ERROUT(EBUSY);
3305
3306                 ruleset = pf_find_kruleset(pcr->anchor);
3307                 if (ruleset == NULL)
3308                         ERROUT(EINVAL);
3309
3310                 rs_num = pf_get_ruleset_number(pcr->rule.action);
3311                 if (rs_num >= PF_RULESET_MAX)
3312                         ERROUT(EINVAL);
3313
3314                 /*
3315                  * XXXMJG: there is no guarantee that the ruleset was
3316                  * created by the usual route of calling DIOCXBEGIN.
3317                  * As a result it is possible the rule tree will not
3318                  * be allocated yet. Hack around it by doing it here.
3319                  * Note it is fine to let the tree persist in case of
3320                  * error as it will be freed down the road on future
3321                  * updates (if need be).
3322                  */
3323                 if (ruleset->rules[rs_num].active.tree == NULL) {
3324                         ruleset->rules[rs_num].active.tree = pf_rule_tree_alloc(M_NOWAIT);
3325                         if (ruleset->rules[rs_num].active.tree == NULL) {
3326                                 ERROUT(ENOMEM);
3327                         }
3328                 }
3329
3330                 if (pcr->action == PF_CHANGE_GET_TICKET) {
3331                         pcr->ticket = ++ruleset->rules[rs_num].active.ticket;
3332                         ERROUT(0);
3333                 } else if (pcr->ticket !=
3334                             ruleset->rules[rs_num].active.ticket)
3335                                 ERROUT(EINVAL);
3336
3337                 if (pcr->action != PF_CHANGE_REMOVE) {
3338                         if (newrule->ifname[0]) {
3339                                 newrule->kif = pfi_kkif_attach(kif,
3340                                     newrule->ifname);
3341                                 kif = NULL;
3342                                 pfi_kkif_ref(newrule->kif);
3343                         } else
3344                                 newrule->kif = NULL;
3345
3346                         if (newrule->rtableid > 0 &&
3347                             newrule->rtableid >= rt_numfibs)
3348                                 error = EBUSY;
3349
3350 #ifdef ALTQ
3351                         /* set queue IDs */
3352                         if (newrule->qname[0] != 0) {
3353                                 if ((newrule->qid =
3354                                     pf_qname2qid(newrule->qname)) == 0)
3355                                         error = EBUSY;
3356                                 else if (newrule->pqname[0] != 0) {
3357                                         if ((newrule->pqid =
3358                                             pf_qname2qid(newrule->pqname)) == 0)
3359                                                 error = EBUSY;
3360                                 } else
3361                                         newrule->pqid = newrule->qid;
3362                         }
3363 #endif /* ALTQ */
3364                         if (newrule->tagname[0])
3365                                 if ((newrule->tag =
3366                                     pf_tagname2tag(newrule->tagname)) == 0)
3367                                         error = EBUSY;
3368                         if (newrule->match_tagname[0])
3369                                 if ((newrule->match_tag = pf_tagname2tag(
3370                                     newrule->match_tagname)) == 0)
3371                                         error = EBUSY;
3372                         if (newrule->rt && !newrule->direction)
3373                                 error = EINVAL;
3374                         if (!newrule->log)
3375                                 newrule->logif = 0;
3376                         if (newrule->logif >= PFLOGIFS_MAX)
3377                                 error = EINVAL;
3378                         if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af))
3379                                 error = ENOMEM;
3380                         if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af))
3381                                 error = ENOMEM;
3382                         if (pf_kanchor_setup(newrule, ruleset, pcr->anchor_call))
3383                                 error = EINVAL;
3384                         TAILQ_FOREACH(pa, &V_pf_pabuf, entries)
3385                                 if (pa->addr.type == PF_ADDR_TABLE) {
3386                                         pa->addr.p.tbl =
3387                                             pfr_attach_table(ruleset,
3388                                             pa->addr.v.tblname);
3389                                         if (pa->addr.p.tbl == NULL)
3390                                                 error = ENOMEM;
3391                                 }
3392
3393                         newrule->overload_tbl = NULL;
3394                         if (newrule->overload_tblname[0]) {
3395                                 if ((newrule->overload_tbl = pfr_attach_table(
3396                                     ruleset, newrule->overload_tblname)) ==
3397                                     NULL)
3398                                         error = EINVAL;
3399                                 else
3400                                         newrule->overload_tbl->pfrkt_flags |=
3401                                             PFR_TFLAG_ACTIVE;
3402                         }
3403
3404                         pf_mv_kpool(&V_pf_pabuf, &newrule->rpool.list);
3405                         if (((((newrule->action == PF_NAT) ||
3406                             (newrule->action == PF_RDR) ||
3407                             (newrule->action == PF_BINAT) ||
3408                             (newrule->rt > PF_NOPFROUTE)) &&
3409                             !newrule->anchor)) &&
3410                             (TAILQ_FIRST(&newrule->rpool.list) == NULL))
3411                                 error = EINVAL;
3412
3413                         if (error) {
3414                                 pf_free_rule(newrule);
3415                                 PF_RULES_WUNLOCK();
3416                                 PF_CONFIG_UNLOCK();
3417                                 break;
3418                         }
3419
3420                         newrule->rpool.cur = TAILQ_FIRST(&newrule->rpool.list);
3421                 }
3422                 pf_empty_kpool(&V_pf_pabuf);
3423
3424                 if (pcr->action == PF_CHANGE_ADD_HEAD)
3425                         oldrule = TAILQ_FIRST(
3426                             ruleset->rules[rs_num].active.ptr);
3427                 else if (pcr->action == PF_CHANGE_ADD_TAIL)
3428                         oldrule = TAILQ_LAST(
3429                             ruleset->rules[rs_num].active.ptr, pf_krulequeue);
3430                 else {
3431                         oldrule = TAILQ_FIRST(
3432                             ruleset->rules[rs_num].active.ptr);
3433                         while ((oldrule != NULL) && (oldrule->nr != pcr->nr))
3434                                 oldrule = TAILQ_NEXT(oldrule, entries);
3435                         if (oldrule == NULL) {
3436                                 if (newrule != NULL)
3437                                         pf_free_rule(newrule);
3438                                 PF_RULES_WUNLOCK();
3439                                 PF_CONFIG_UNLOCK();
3440                                 error = EINVAL;
3441                                 break;
3442                         }
3443                 }
3444
3445                 if (pcr->action == PF_CHANGE_REMOVE) {
3446                         pf_unlink_rule(ruleset->rules[rs_num].active.ptr,
3447                             oldrule);
3448                         RB_REMOVE(pf_krule_global,
3449                             ruleset->rules[rs_num].active.tree, oldrule);
3450                         ruleset->rules[rs_num].active.rcount--;
3451                 } else {
3452                         pf_hash_rule(newrule);
3453                         if (RB_INSERT(pf_krule_global,
3454                             ruleset->rules[rs_num].active.tree, newrule) != NULL) {
3455                                 pf_free_rule(newrule);
3456                                 PF_RULES_WUNLOCK();
3457                                 PF_CONFIG_UNLOCK();
3458                                 error = EEXIST;
3459                                 break;
3460                         }
3461
3462                         if (oldrule == NULL)
3463                                 TAILQ_INSERT_TAIL(
3464                                     ruleset->rules[rs_num].active.ptr,
3465                                     newrule, entries);
3466                         else if (pcr->action == PF_CHANGE_ADD_HEAD ||
3467                             pcr->action == PF_CHANGE_ADD_BEFORE)
3468                                 TAILQ_INSERT_BEFORE(oldrule, newrule, entries);
3469                         else
3470                                 TAILQ_INSERT_AFTER(
3471                                     ruleset->rules[rs_num].active.ptr,
3472                                     oldrule, newrule, entries);
3473                         ruleset->rules[rs_num].active.rcount++;
3474                 }
3475
3476                 nr = 0;
3477                 TAILQ_FOREACH(oldrule,
3478                     ruleset->rules[rs_num].active.ptr, entries)
3479                         oldrule->nr = nr++;
3480
3481                 ruleset->rules[rs_num].active.ticket++;
3482
3483                 pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr);
3484                 pf_remove_if_empty_kruleset(ruleset);
3485
3486                 PF_RULES_WUNLOCK();
3487                 PF_CONFIG_UNLOCK();
3488                 break;
3489
3490 #undef ERROUT
3491 DIOCCHANGERULE_error:
3492                 PF_RULES_WUNLOCK();
3493                 PF_CONFIG_UNLOCK();
3494                 pf_krule_free(newrule);
3495                 pf_kkif_free(kif);
3496                 break;
3497         }
3498
3499         case DIOCCLRSTATESNV: {
3500                 error = pf_clearstates_nv((struct pfioc_nv *)addr);
3501                 break;
3502         }
3503
3504         case DIOCKILLSTATESNV: {
3505                 error = pf_killstates_nv((struct pfioc_nv *)addr);
3506                 break;
3507         }
3508
3509         case DIOCADDSTATE: {
3510                 struct pfioc_state              *ps = (struct pfioc_state *)addr;
3511                 struct pfsync_state_1301        *sp = &ps->state;
3512
3513                 if (sp->timeout >= PFTM_MAX) {
3514                         error = EINVAL;
3515                         break;
3516                 }
3517                 if (V_pfsync_state_import_ptr != NULL) {
3518                         PF_RULES_RLOCK();
3519                         error = V_pfsync_state_import_ptr(
3520                             (union pfsync_state_union *)sp, PFSYNC_SI_IOCTL,
3521                             PFSYNC_MSG_VERSION_1301);
3522                         PF_RULES_RUNLOCK();
3523                 } else
3524                         error = EOPNOTSUPP;
3525                 break;
3526         }
3527
3528         case DIOCGETSTATE: {
3529                 struct pfioc_state      *ps = (struct pfioc_state *)addr;
3530                 struct pf_kstate        *s;
3531
3532                 s = pf_find_state_byid(ps->state.id, ps->state.creatorid);
3533                 if (s == NULL) {
3534                         error = ENOENT;
3535                         break;
3536                 }
3537
3538                 pfsync_state_export((union pfsync_state_union*)&ps->state,
3539                     s, PFSYNC_MSG_VERSION_1301);
3540                 PF_STATE_UNLOCK(s);
3541                 break;
3542         }
3543
3544         case DIOCGETSTATENV: {
3545                 error = pf_getstate((struct pfioc_nv *)addr);
3546                 break;
3547         }
3548
3549         case DIOCGETSTATES: {
3550                 struct pfioc_states     *ps = (struct pfioc_states *)addr;
3551                 struct pf_kstate        *s;
3552                 struct pfsync_state_1301        *pstore, *p;
3553                 int                      i, nr;
3554                 size_t                   slice_count = 16, count;
3555                 void                    *out;
3556
3557                 if (ps->ps_len <= 0) {
3558                         nr = uma_zone_get_cur(V_pf_state_z);
3559                         ps->ps_len = sizeof(struct pfsync_state_1301) * nr;
3560                         break;
3561                 }
3562
3563                 out = ps->ps_states;
3564                 pstore = mallocarray(slice_count,
3565                     sizeof(struct pfsync_state_1301), M_TEMP, M_WAITOK | M_ZERO);
3566                 nr = 0;
3567
3568                 for (i = 0; i <= pf_hashmask; i++) {
3569                         struct pf_idhash *ih = &V_pf_idhash[i];
3570
3571 DIOCGETSTATES_retry:
3572                         p = pstore;
3573
3574                         if (LIST_EMPTY(&ih->states))
3575                                 continue;
3576
3577                         PF_HASHROW_LOCK(ih);
3578                         count = 0;
3579                         LIST_FOREACH(s, &ih->states, entry) {
3580                                 if (s->timeout == PFTM_UNLINKED)
3581                                         continue;
3582                                 count++;
3583                         }
3584
3585                         if (count > slice_count) {
3586                                 PF_HASHROW_UNLOCK(ih);
3587                                 free(pstore, M_TEMP);
3588                                 slice_count = count * 2;
3589                                 pstore = mallocarray(slice_count,
3590                                     sizeof(struct pfsync_state_1301), M_TEMP,
3591                                     M_WAITOK | M_ZERO);
3592                                 goto DIOCGETSTATES_retry;
3593                         }
3594
3595                         if ((nr+count) * sizeof(*p) > ps->ps_len) {
3596                                 PF_HASHROW_UNLOCK(ih);
3597                                 goto DIOCGETSTATES_full;
3598                         }
3599
3600                         LIST_FOREACH(s, &ih->states, entry) {
3601                                 if (s->timeout == PFTM_UNLINKED)
3602                                         continue;
3603
3604                                 pfsync_state_export((union pfsync_state_union*)p,
3605                                     s, PFSYNC_MSG_VERSION_1301);
3606                                 p++;
3607                                 nr++;
3608                         }
3609                         PF_HASHROW_UNLOCK(ih);
3610                         error = copyout(pstore, out,
3611                             sizeof(struct pfsync_state_1301) * count);
3612                         if (error)
3613                                 break;
3614                         out = ps->ps_states + nr;
3615                 }
3616 DIOCGETSTATES_full:
3617                 ps->ps_len = sizeof(struct pfsync_state_1301) * nr;
3618                 free(pstore, M_TEMP);
3619
3620                 break;
3621         }
3622
3623         case DIOCGETSTATESV2: {
3624                 struct pfioc_states_v2  *ps = (struct pfioc_states_v2 *)addr;
3625                 struct pf_kstate        *s;
3626                 struct pf_state_export  *pstore, *p;
3627                 int i, nr;
3628                 size_t slice_count = 16, count;
3629                 void *out;
3630
3631                 if (ps->ps_req_version > PF_STATE_VERSION) {
3632                         error = ENOTSUP;
3633                         break;
3634                 }
3635
3636                 if (ps->ps_len <= 0) {
3637                         nr = uma_zone_get_cur(V_pf_state_z);
3638                         ps->ps_len = sizeof(struct pf_state_export) * nr;
3639                         break;
3640                 }
3641
3642                 out = ps->ps_states;
3643                 pstore = mallocarray(slice_count,
3644                     sizeof(struct pf_state_export), M_TEMP, M_WAITOK | M_ZERO);
3645                 nr = 0;
3646
3647                 for (i = 0; i <= pf_hashmask; i++) {
3648                         struct pf_idhash *ih = &V_pf_idhash[i];
3649
3650 DIOCGETSTATESV2_retry:
3651                         p = pstore;
3652
3653                         if (LIST_EMPTY(&ih->states))
3654                                 continue;
3655
3656                         PF_HASHROW_LOCK(ih);
3657                         count = 0;
3658                         LIST_FOREACH(s, &ih->states, entry) {
3659                                 if (s->timeout == PFTM_UNLINKED)
3660                                         continue;
3661                                 count++;
3662                         }
3663
3664                         if (count > slice_count) {
3665                                 PF_HASHROW_UNLOCK(ih);
3666                                 free(pstore, M_TEMP);
3667                                 slice_count = count * 2;
3668                                 pstore = mallocarray(slice_count,
3669                                     sizeof(struct pf_state_export), M_TEMP,
3670                                     M_WAITOK | M_ZERO);
3671                                 goto DIOCGETSTATESV2_retry;
3672                         }
3673
3674                         if ((nr+count) * sizeof(*p) > ps->ps_len) {
3675                                 PF_HASHROW_UNLOCK(ih);
3676                                 goto DIOCGETSTATESV2_full;
3677                         }
3678
3679                         LIST_FOREACH(s, &ih->states, entry) {
3680                                 if (s->timeout == PFTM_UNLINKED)
3681                                         continue;
3682
3683                                 pf_state_export(p, s);
3684                                 p++;
3685                                 nr++;
3686                         }
3687                         PF_HASHROW_UNLOCK(ih);
3688                         error = copyout(pstore, out,
3689                             sizeof(struct pf_state_export) * count);
3690                         if (error)
3691                                 break;
3692                         out = ps->ps_states + nr;
3693                 }
3694 DIOCGETSTATESV2_full:
3695                 ps->ps_len = nr * sizeof(struct pf_state_export);
3696                 free(pstore, M_TEMP);
3697
3698                 break;
3699         }
3700
3701         case DIOCGETSTATUSNV: {
3702                 error = pf_getstatus((struct pfioc_nv *)addr);
3703                 break;
3704         }
3705
3706         case DIOCSETSTATUSIF: {
3707                 struct pfioc_if *pi = (struct pfioc_if *)addr;
3708
3709                 if (pi->ifname[0] == 0) {
3710                         bzero(V_pf_status.ifname, IFNAMSIZ);
3711                         break;
3712                 }
3713                 PF_RULES_WLOCK();
3714                 error = pf_user_strcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ);
3715                 PF_RULES_WUNLOCK();
3716                 break;
3717         }
3718
3719         case DIOCCLRSTATUS: {
3720                 PF_RULES_WLOCK();
3721                 for (int i = 0; i < PFRES_MAX; i++)
3722                         counter_u64_zero(V_pf_status.counters[i]);
3723                 for (int i = 0; i < FCNT_MAX; i++)
3724                         pf_counter_u64_zero(&V_pf_status.fcounters[i]);
3725                 for (int i = 0; i < SCNT_MAX; i++)
3726                         counter_u64_zero(V_pf_status.scounters[i]);
3727                 for (int i = 0; i < KLCNT_MAX; i++)
3728                         counter_u64_zero(V_pf_status.lcounters[i]);
3729                 V_pf_status.since = time_second;
3730                 if (*V_pf_status.ifname)
3731                         pfi_update_status(V_pf_status.ifname, NULL);
3732                 PF_RULES_WUNLOCK();
3733                 break;
3734         }
3735
3736         case DIOCNATLOOK: {
3737                 struct pfioc_natlook    *pnl = (struct pfioc_natlook *)addr;
3738                 struct pf_state_key     *sk;
3739                 struct pf_kstate        *state;
3740                 struct pf_state_key_cmp  key;
3741                 int                      m = 0, direction = pnl->direction;
3742                 int                      sidx, didx;
3743
3744                 /* NATLOOK src and dst are reversed, so reverse sidx/didx */
3745                 sidx = (direction == PF_IN) ? 1 : 0;
3746                 didx = (direction == PF_IN) ? 0 : 1;
3747
3748                 if (!pnl->proto ||
3749                     PF_AZERO(&pnl->saddr, pnl->af) ||
3750                     PF_AZERO(&pnl->daddr, pnl->af) ||
3751                     ((pnl->proto == IPPROTO_TCP ||
3752                     pnl->proto == IPPROTO_UDP) &&
3753                     (!pnl->dport || !pnl->sport)))
3754                         error = EINVAL;
3755                 else {
3756                         bzero(&key, sizeof(key));
3757                         key.af = pnl->af;
3758                         key.proto = pnl->proto;
3759                         PF_ACPY(&key.addr[sidx], &pnl->saddr, pnl->af);
3760                         key.port[sidx] = pnl->sport;
3761                         PF_ACPY(&key.addr[didx], &pnl->daddr, pnl->af);
3762                         key.port[didx] = pnl->dport;
3763
3764                         state = pf_find_state_all(&key, direction, &m);
3765                         if (state == NULL) {
3766                                 error = ENOENT;
3767                         } else {
3768                                 if (m > 1) {
3769                                         PF_STATE_UNLOCK(state);
3770                                         error = E2BIG;  /* more than one state */
3771                                 } else {
3772                                         sk = state->key[sidx];
3773                                         PF_ACPY(&pnl->rsaddr, &sk->addr[sidx], sk->af);
3774                                         pnl->rsport = sk->port[sidx];
3775                                         PF_ACPY(&pnl->rdaddr, &sk->addr[didx], sk->af);
3776                                         pnl->rdport = sk->port[didx];
3777                                         PF_STATE_UNLOCK(state);
3778                                 }
3779                         }
3780                 }
3781                 break;
3782         }
3783
3784         case DIOCSETTIMEOUT: {
3785                 struct pfioc_tm *pt = (struct pfioc_tm *)addr;
3786                 int              old;
3787
3788                 if (pt->timeout < 0 || pt->timeout >= PFTM_MAX ||
3789                     pt->seconds < 0) {
3790                         error = EINVAL;
3791                         break;
3792                 }
3793                 PF_RULES_WLOCK();
3794                 old = V_pf_default_rule.timeout[pt->timeout];
3795                 if (pt->timeout == PFTM_INTERVAL && pt->seconds == 0)
3796                         pt->seconds = 1;
3797                 V_pf_default_rule.timeout[pt->timeout] = pt->seconds;
3798                 if (pt->timeout == PFTM_INTERVAL && pt->seconds < old)
3799                         wakeup(pf_purge_thread);
3800                 pt->seconds = old;
3801                 PF_RULES_WUNLOCK();
3802                 break;
3803         }
3804
3805         case DIOCGETTIMEOUT: {
3806                 struct pfioc_tm *pt = (struct pfioc_tm *)addr;
3807
3808                 if (pt->timeout < 0 || pt->timeout >= PFTM_MAX) {
3809                         error = EINVAL;
3810                         break;
3811                 }
3812                 PF_RULES_RLOCK();
3813                 pt->seconds = V_pf_default_rule.timeout[pt->timeout];
3814                 PF_RULES_RUNLOCK();
3815                 break;
3816         }
3817
3818         case DIOCGETLIMIT: {
3819                 struct pfioc_limit      *pl = (struct pfioc_limit *)addr;
3820
3821                 if (pl->index < 0 || pl->index >= PF_LIMIT_MAX) {
3822                         error = EINVAL;
3823                         break;
3824                 }
3825                 PF_RULES_RLOCK();
3826                 pl->limit = V_pf_limits[pl->index].limit;
3827                 PF_RULES_RUNLOCK();
3828                 break;
3829         }
3830
3831         case DIOCSETLIMIT: {
3832                 struct pfioc_limit      *pl = (struct pfioc_limit *)addr;
3833                 int                      old_limit;
3834
3835                 PF_RULES_WLOCK();
3836                 if (pl->index < 0 || pl->index >= PF_LIMIT_MAX ||
3837                     V_pf_limits[pl->index].zone == NULL) {
3838                         PF_RULES_WUNLOCK();
3839                         error = EINVAL;
3840                         break;
3841                 }
3842                 uma_zone_set_max(V_pf_limits[pl->index].zone, pl->limit);
3843                 old_limit = V_pf_limits[pl->index].limit;
3844                 V_pf_limits[pl->index].limit = pl->limit;
3845                 pl->limit = old_limit;
3846                 PF_RULES_WUNLOCK();
3847                 break;
3848         }
3849
3850         case DIOCSETDEBUG: {
3851                 u_int32_t       *level = (u_int32_t *)addr;
3852
3853                 PF_RULES_WLOCK();
3854                 V_pf_status.debug = *level;
3855                 PF_RULES_WUNLOCK();
3856                 break;
3857         }
3858
3859         case DIOCCLRRULECTRS: {
3860                 /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */
3861                 struct pf_kruleset      *ruleset = &pf_main_ruleset;
3862                 struct pf_krule         *rule;
3863
3864                 PF_RULES_WLOCK();
3865                 TAILQ_FOREACH(rule,
3866                     ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) {
3867                         pf_counter_u64_zero(&rule->evaluations);
3868                         for (int i = 0; i < 2; i++) {
3869                                 pf_counter_u64_zero(&rule->packets[i]);
3870                                 pf_counter_u64_zero(&rule->bytes[i]);
3871                         }
3872                 }
3873                 PF_RULES_WUNLOCK();
3874                 break;
3875         }
3876
3877         case DIOCGIFSPEEDV0:
3878         case DIOCGIFSPEEDV1: {
3879                 struct pf_ifspeed_v1    *psp = (struct pf_ifspeed_v1 *)addr;
3880                 struct pf_ifspeed_v1    ps;
3881                 struct ifnet            *ifp;
3882
3883                 if (psp->ifname[0] == '\0') {
3884                         error = EINVAL;
3885                         break;
3886                 }
3887
3888                 error = pf_user_strcpy(ps.ifname, psp->ifname, IFNAMSIZ);
3889                 if (error != 0)
3890                         break;
3891                 ifp = ifunit(ps.ifname);
3892                 if (ifp != NULL) {
3893                         psp->baudrate32 =
3894                             (u_int32_t)uqmin(ifp->if_baudrate, UINT_MAX);
3895                         if (cmd == DIOCGIFSPEEDV1)
3896                                 psp->baudrate = ifp->if_baudrate;
3897                 } else {
3898                         error = EINVAL;
3899                 }
3900                 break;
3901         }
3902
3903 #ifdef ALTQ
3904         case DIOCSTARTALTQ: {
3905                 struct pf_altq          *altq;
3906
3907                 PF_RULES_WLOCK();
3908                 /* enable all altq interfaces on active list */
3909                 TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
3910                         if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
3911                                 error = pf_enable_altq(altq);
3912                                 if (error != 0)
3913                                         break;
3914                         }
3915                 }
3916                 if (error == 0)
3917                         V_pf_altq_running = 1;
3918                 PF_RULES_WUNLOCK();
3919                 DPFPRINTF(PF_DEBUG_MISC, ("altq: started\n"));
3920                 break;
3921         }
3922
3923         case DIOCSTOPALTQ: {
3924                 struct pf_altq          *altq;
3925
3926                 PF_RULES_WLOCK();
3927                 /* disable all altq interfaces on active list */
3928                 TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) {
3929                         if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) {
3930                                 error = pf_disable_altq(altq);
3931                                 if (error != 0)
3932                                         break;
3933                         }
3934                 }
3935                 if (error == 0)
3936                         V_pf_altq_running = 0;
3937                 PF_RULES_WUNLOCK();
3938                 DPFPRINTF(PF_DEBUG_MISC, ("altq: stopped\n"));
3939                 break;
3940         }
3941
3942         case DIOCADDALTQV0:
3943         case DIOCADDALTQV1: {
3944                 struct pfioc_altq_v1    *pa = (struct pfioc_altq_v1 *)addr;
3945                 struct pf_altq          *altq, *a;
3946                 struct ifnet            *ifp;
3947
3948                 altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK | M_ZERO);
3949                 error = pf_import_kaltq(pa, altq, IOCPARM_LEN(cmd));
3950                 if (error)
3951                         break;
3952                 altq->local_flags = 0;
3953
3954                 PF_RULES_WLOCK();
3955                 if (pa->ticket != V_ticket_altqs_inactive) {
3956                         PF_RULES_WUNLOCK();
3957                         free(altq, M_PFALTQ);
3958                         error = EBUSY;
3959                         break;
3960                 }
3961
3962                 /*
3963                  * if this is for a queue, find the discipline and
3964                  * copy the necessary fields
3965                  */
3966                 if (altq->qname[0] != 0) {
3967                         if ((altq->qid = pf_qname2qid(altq->qname)) == 0) {
3968                                 PF_RULES_WUNLOCK();
3969                                 error = EBUSY;
3970                                 free(altq, M_PFALTQ);
3971                                 break;
3972                         }
3973                         altq->altq_disc = NULL;
3974                         TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) {
3975                                 if (strncmp(a->ifname, altq->ifname,
3976                                     IFNAMSIZ) == 0) {
3977                                         altq->altq_disc = a->altq_disc;
3978                                         break;
3979                                 }
3980                         }
3981                 }
3982
3983                 if ((ifp = ifunit(altq->ifname)) == NULL)
3984                         altq->local_flags |= PFALTQ_FLAG_IF_REMOVED;
3985                 else
3986                         error = altq_add(ifp, altq);
3987
3988                 if (error) {
3989                         PF_RULES_WUNLOCK();
3990                         free(altq, M_PFALTQ);
3991                         break;
3992                 }
3993
3994                 if (altq->qname[0] != 0)
3995                         TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries);
3996                 else
3997                         TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, altq, entries);
3998                 /* version error check done on import above */
3999                 pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
4000                 PF_RULES_WUNLOCK();
4001                 break;
4002         }
4003
4004         case DIOCGETALTQSV0:
4005         case DIOCGETALTQSV1: {
4006                 struct pfioc_altq_v1    *pa = (struct pfioc_altq_v1 *)addr;
4007                 struct pf_altq          *altq;
4008
4009                 PF_RULES_RLOCK();
4010                 pa->nr = 0;
4011                 TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries)
4012                         pa->nr++;
4013                 TAILQ_FOREACH(altq, V_pf_altqs_active, entries)
4014                         pa->nr++;
4015                 pa->ticket = V_ticket_altqs_active;
4016                 PF_RULES_RUNLOCK();
4017                 break;
4018         }
4019
4020         case DIOCGETALTQV0:
4021         case DIOCGETALTQV1: {
4022                 struct pfioc_altq_v1    *pa = (struct pfioc_altq_v1 *)addr;
4023                 struct pf_altq          *altq;
4024
4025                 PF_RULES_RLOCK();
4026                 if (pa->ticket != V_ticket_altqs_active) {
4027                         PF_RULES_RUNLOCK();
4028                         error = EBUSY;
4029                         break;
4030                 }
4031                 altq = pf_altq_get_nth_active(pa->nr);
4032                 if (altq == NULL) {
4033                         PF_RULES_RUNLOCK();
4034                         error = EBUSY;
4035                         break;
4036                 }
4037                 pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd));
4038                 PF_RULES_RUNLOCK();
4039                 break;
4040         }
4041
4042         case DIOCCHANGEALTQV0:
4043         case DIOCCHANGEALTQV1:
4044                 /* CHANGEALTQ not supported yet! */
4045                 error = ENODEV;
4046                 break;
4047
4048         case DIOCGETQSTATSV0:
4049         case DIOCGETQSTATSV1: {
4050                 struct pfioc_qstats_v1  *pq = (struct pfioc_qstats_v1 *)addr;
4051                 struct pf_altq          *altq;
4052                 int                      nbytes;
4053                 u_int32_t                version;
4054
4055                 PF_RULES_RLOCK();
4056                 if (pq->ticket != V_ticket_altqs_active) {
4057                         PF_RULES_RUNLOCK();
4058                         error = EBUSY;
4059                         break;
4060                 }
4061                 nbytes = pq->nbytes;
4062                 altq = pf_altq_get_nth_active(pq->nr);
4063                 if (altq == NULL) {
4064                         PF_RULES_RUNLOCK();
4065                         error = EBUSY;
4066                         break;
4067                 }
4068
4069                 if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) {
4070                         PF_RULES_RUNLOCK();
4071                         error = ENXIO;
4072                         break;
4073                 }
4074                 PF_RULES_RUNLOCK();
4075                 if (cmd == DIOCGETQSTATSV0)
4076                         version = 0;  /* DIOCGETQSTATSV0 means stats struct v0 */
4077                 else
4078                         version = pq->version;
4079                 error = altq_getqstats(altq, pq->buf, &nbytes, version);
4080                 if (error == 0) {
4081                         pq->scheduler = altq->scheduler;
4082                         pq->nbytes = nbytes;
4083                 }
4084                 break;
4085         }
4086 #endif /* ALTQ */
4087
4088         case DIOCBEGINADDRS: {
4089                 struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
4090
4091                 PF_RULES_WLOCK();
4092                 pf_empty_kpool(&V_pf_pabuf);
4093                 pp->ticket = ++V_ticket_pabuf;
4094                 PF_RULES_WUNLOCK();
4095                 break;
4096         }
4097
4098         case DIOCADDADDR: {
4099                 struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
4100                 struct pf_kpooladdr     *pa;
4101                 struct pfi_kkif         *kif = NULL;
4102
4103 #ifndef INET
4104                 if (pp->af == AF_INET) {
4105                         error = EAFNOSUPPORT;
4106                         break;
4107                 }
4108 #endif /* INET */
4109 #ifndef INET6
4110                 if (pp->af == AF_INET6) {
4111                         error = EAFNOSUPPORT;
4112                         break;
4113                 }
4114 #endif /* INET6 */
4115                 if (pp->addr.addr.type != PF_ADDR_ADDRMASK &&
4116                     pp->addr.addr.type != PF_ADDR_DYNIFTL &&
4117                     pp->addr.addr.type != PF_ADDR_TABLE) {
4118                         error = EINVAL;
4119                         break;
4120                 }
4121                 if (pp->addr.addr.p.dyn != NULL) {
4122                         error = EINVAL;
4123                         break;
4124                 }
4125                 pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK);
4126                 error = pf_pooladdr_to_kpooladdr(&pp->addr, pa);
4127                 if (error != 0)
4128                         break;
4129                 if (pa->ifname[0])
4130                         kif = pf_kkif_create(M_WAITOK);
4131                 PF_RULES_WLOCK();
4132                 if (pp->ticket != V_ticket_pabuf) {
4133                         PF_RULES_WUNLOCK();
4134                         if (pa->ifname[0])
4135                                 pf_kkif_free(kif);
4136                         free(pa, M_PFRULE);
4137                         error = EBUSY;
4138                         break;
4139                 }
4140                 if (pa->ifname[0]) {
4141                         pa->kif = pfi_kkif_attach(kif, pa->ifname);
4142                         kif = NULL;
4143                         pfi_kkif_ref(pa->kif);
4144                 } else
4145                         pa->kif = NULL;
4146                 if (pa->addr.type == PF_ADDR_DYNIFTL && ((error =
4147                     pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) {
4148                         if (pa->ifname[0])
4149                                 pfi_kkif_unref(pa->kif);
4150                         PF_RULES_WUNLOCK();
4151                         free(pa, M_PFRULE);
4152                         break;
4153                 }
4154                 TAILQ_INSERT_TAIL(&V_pf_pabuf, pa, entries);
4155                 PF_RULES_WUNLOCK();
4156                 break;
4157         }
4158
4159         case DIOCGETADDRS: {
4160                 struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
4161                 struct pf_kpool         *pool;
4162                 struct pf_kpooladdr     *pa;
4163
4164                 pp->anchor[sizeof(pp->anchor) - 1] = 0;
4165                 pp->nr = 0;
4166
4167                 PF_RULES_RLOCK();
4168                 pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action,
4169                     pp->r_num, 0, 1, 0);
4170                 if (pool == NULL) {
4171                         PF_RULES_RUNLOCK();
4172                         error = EBUSY;
4173                         break;
4174                 }
4175                 TAILQ_FOREACH(pa, &pool->list, entries)
4176                         pp->nr++;
4177                 PF_RULES_RUNLOCK();
4178                 break;
4179         }
4180
4181         case DIOCGETADDR: {
4182                 struct pfioc_pooladdr   *pp = (struct pfioc_pooladdr *)addr;
4183                 struct pf_kpool         *pool;
4184                 struct pf_kpooladdr     *pa;
4185                 u_int32_t                nr = 0;
4186
4187                 pp->anchor[sizeof(pp->anchor) - 1] = 0;
4188
4189                 PF_RULES_RLOCK();
4190                 pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action,
4191                     pp->r_num, 0, 1, 1);
4192                 if (pool == NULL) {
4193                         PF_RULES_RUNLOCK();
4194                         error = EBUSY;
4195                         break;
4196                 }
4197                 pa = TAILQ_FIRST(&pool->list);
4198                 while ((pa != NULL) && (nr < pp->nr)) {
4199                         pa = TAILQ_NEXT(pa, entries);
4200                         nr++;
4201                 }
4202                 if (pa == NULL) {
4203                         PF_RULES_RUNLOCK();
4204                         error = EBUSY;
4205                         break;
4206                 }
4207                 pf_kpooladdr_to_pooladdr(pa, &pp->addr);
4208                 pf_addr_copyout(&pp->addr.addr);
4209                 PF_RULES_RUNLOCK();
4210                 break;
4211         }
4212
4213         case DIOCCHANGEADDR: {
4214                 struct pfioc_pooladdr   *pca = (struct pfioc_pooladdr *)addr;
4215                 struct pf_kpool         *pool;
4216                 struct pf_kpooladdr     *oldpa = NULL, *newpa = NULL;
4217                 struct pf_kruleset      *ruleset;
4218                 struct pfi_kkif         *kif = NULL;
4219
4220                 pca->anchor[sizeof(pca->anchor) - 1] = 0;
4221
4222                 if (pca->action < PF_CHANGE_ADD_HEAD ||
4223                     pca->action > PF_CHANGE_REMOVE) {
4224                         error = EINVAL;
4225                         break;
4226                 }
4227                 if (pca->addr.addr.type != PF_ADDR_ADDRMASK &&
4228                     pca->addr.addr.type != PF_ADDR_DYNIFTL &&
4229                     pca->addr.addr.type != PF_ADDR_TABLE) {
4230                         error = EINVAL;
4231                         break;
4232                 }
4233                 if (pca->addr.addr.p.dyn != NULL) {
4234                         error = EINVAL;
4235                         break;
4236                 }
4237
4238                 if (pca->action != PF_CHANGE_REMOVE) {
4239 #ifndef INET
4240                         if (pca->af == AF_INET) {
4241                                 error = EAFNOSUPPORT;
4242                                 break;
4243                         }
4244 #endif /* INET */
4245 #ifndef INET6
4246                         if (pca->af == AF_INET6) {
4247                                 error = EAFNOSUPPORT;
4248                                 break;
4249                         }
4250 #endif /* INET6 */
4251                         newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK);
4252                         bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr));
4253                         if (newpa->ifname[0])
4254                                 kif = pf_kkif_create(M_WAITOK);
4255                         newpa->kif = NULL;
4256                 }
4257 #define ERROUT(x)       ERROUT_IOCTL(DIOCCHANGEADDR_error, x)
4258                 PF_RULES_WLOCK();
4259                 ruleset = pf_find_kruleset(pca->anchor);
4260                 if (ruleset == NULL)
4261                         ERROUT(EBUSY);
4262
4263                 pool = pf_get_kpool(pca->anchor, pca->ticket, pca->r_action,
4264                     pca->r_num, pca->r_last, 1, 1);
4265                 if (pool == NULL)
4266                         ERROUT(EBUSY);
4267
4268                 if (pca->action != PF_CHANGE_REMOVE) {
4269                         if (newpa->ifname[0]) {
4270                                 newpa->kif = pfi_kkif_attach(kif, newpa->ifname);
4271                                 pfi_kkif_ref(newpa->kif);
4272                                 kif = NULL;
4273                         }
4274
4275                         switch (newpa->addr.type) {
4276                         case PF_ADDR_DYNIFTL:
4277                                 error = pfi_dynaddr_setup(&newpa->addr,
4278                                     pca->af);
4279                                 break;
4280                         case PF_ADDR_TABLE:
4281                                 newpa->addr.p.tbl = pfr_attach_table(ruleset,
4282                                     newpa->addr.v.tblname);
4283                                 if (newpa->addr.p.tbl == NULL)
4284                                         error = ENOMEM;
4285                                 break;
4286                         }
4287                         if (error)
4288                                 goto DIOCCHANGEADDR_error;
4289                 }
4290
4291                 switch (pca->action) {
4292                 case PF_CHANGE_ADD_HEAD:
4293                         oldpa = TAILQ_FIRST(&pool->list);
4294                         break;
4295                 case PF_CHANGE_ADD_TAIL:
4296                         oldpa = TAILQ_LAST(&pool->list, pf_kpalist);
4297                         break;
4298                 default:
4299                         oldpa = TAILQ_FIRST(&pool->list);
4300                         for (int i = 0; oldpa && i < pca->nr; i++)
4301                                 oldpa = TAILQ_NEXT(oldpa, entries);
4302
4303                         if (oldpa == NULL)
4304                                 ERROUT(EINVAL);
4305                 }
4306
4307                 if (pca->action == PF_CHANGE_REMOVE) {
4308                         TAILQ_REMOVE(&pool->list, oldpa, entries);
4309                         switch (oldpa->addr.type) {
4310                         case PF_ADDR_DYNIFTL:
4311                                 pfi_dynaddr_remove(oldpa->addr.p.dyn);
4312                                 break;
4313                         case PF_ADDR_TABLE:
4314                                 pfr_detach_table(oldpa->addr.p.tbl);
4315                                 break;
4316                         }
4317                         if (oldpa->kif)
4318                                 pfi_kkif_unref(oldpa->kif);
4319                         free(oldpa, M_PFRULE);
4320                 } else {
4321                         if (oldpa == NULL)
4322                                 TAILQ_INSERT_TAIL(&pool->list, newpa, entries);
4323                         else if (pca->action == PF_CHANGE_ADD_HEAD ||
4324                             pca->action == PF_CHANGE_ADD_BEFORE)
4325                                 TAILQ_INSERT_BEFORE(oldpa, newpa, entries);
4326                         else
4327                                 TAILQ_INSERT_AFTER(&pool->list, oldpa,
4328                                     newpa, entries);
4329                 }
4330
4331                 pool->cur = TAILQ_FIRST(&pool->list);
4332                 PF_ACPY(&pool->counter, &pool->cur->addr.v.a.addr, pca->af);
4333                 PF_RULES_WUNLOCK();
4334                 break;
4335
4336 #undef ERROUT
4337 DIOCCHANGEADDR_error:
4338                 if (newpa != NULL) {
4339                         if (newpa->kif)
4340                                 pfi_kkif_unref(newpa->kif);
4341                         free(newpa, M_PFRULE);
4342                 }
4343                 PF_RULES_WUNLOCK();
4344                 pf_kkif_free(kif);
4345                 break;
4346         }
4347
4348         case DIOCGETRULESETS: {
4349                 struct pfioc_ruleset    *pr = (struct pfioc_ruleset *)addr;
4350                 struct pf_kruleset      *ruleset;
4351                 struct pf_kanchor       *anchor;
4352
4353                 pr->path[sizeof(pr->path) - 1] = 0;
4354
4355                 PF_RULES_RLOCK();
4356                 if ((ruleset = pf_find_kruleset(pr->path)) == NULL) {
4357                         PF_RULES_RUNLOCK();
4358                         error = ENOENT;
4359                         break;
4360                 }
4361                 pr->nr = 0;
4362                 if (ruleset->anchor == NULL) {
4363                         /* XXX kludge for pf_main_ruleset */
4364                         RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
4365                                 if (anchor->parent == NULL)
4366                                         pr->nr++;
4367                 } else {
4368                         RB_FOREACH(anchor, pf_kanchor_node,
4369                             &ruleset->anchor->children)
4370                                 pr->nr++;
4371                 }
4372                 PF_RULES_RUNLOCK();
4373                 break;
4374         }
4375
4376         case DIOCGETRULESET: {
4377                 struct pfioc_ruleset    *pr = (struct pfioc_ruleset *)addr;
4378                 struct pf_kruleset      *ruleset;
4379                 struct pf_kanchor       *anchor;
4380                 u_int32_t                nr = 0;
4381
4382                 pr->path[sizeof(pr->path) - 1] = 0;
4383
4384                 PF_RULES_RLOCK();
4385                 if ((ruleset = pf_find_kruleset(pr->path)) == NULL) {
4386                         PF_RULES_RUNLOCK();
4387                         error = ENOENT;
4388                         break;
4389                 }
4390                 pr->name[0] = 0;
4391                 if (ruleset->anchor == NULL) {
4392                         /* XXX kludge for pf_main_ruleset */
4393                         RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors)
4394                                 if (anchor->parent == NULL && nr++ == pr->nr) {
4395                                         strlcpy(pr->name, anchor->name,
4396                                             sizeof(pr->name));
4397                                         break;
4398                                 }
4399                 } else {
4400                         RB_FOREACH(anchor, pf_kanchor_node,
4401                             &ruleset->anchor->children)
4402                                 if (nr++ == pr->nr) {
4403                                         strlcpy(pr->name, anchor->name,
4404                                             sizeof(pr->name));
4405                                         break;
4406                                 }
4407                 }
4408                 if (!pr->name[0])
4409                         error = EBUSY;
4410                 PF_RULES_RUNLOCK();
4411                 break;
4412         }
4413
4414         case DIOCRCLRTABLES: {
4415                 struct pfioc_table *io = (struct pfioc_table *)addr;
4416
4417                 if (io->pfrio_esize != 0) {
4418                         error = ENODEV;
4419                         break;
4420                 }
4421                 PF_RULES_WLOCK();
4422                 error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel,
4423                     io->pfrio_flags | PFR_FLAG_USERIOCTL);
4424                 PF_RULES_WUNLOCK();
4425                 break;
4426         }
4427
4428         case DIOCRADDTABLES: {
4429                 struct pfioc_table *io = (struct pfioc_table *)addr;
4430                 struct pfr_table *pfrts;
4431                 size_t totlen;
4432
4433                 if (io->pfrio_esize != sizeof(struct pfr_table)) {
4434                         error = ENODEV;
4435                         break;
4436                 }
4437
4438                 if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
4439                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
4440                         error = ENOMEM;
4441                         break;
4442                 }
4443
4444                 totlen = io->pfrio_size * sizeof(struct pfr_table);
4445                 pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4446                     M_TEMP, M_WAITOK);
4447                 error = copyin(io->pfrio_buffer, pfrts, totlen);
4448                 if (error) {
4449                         free(pfrts, M_TEMP);
4450                         break;
4451                 }
4452                 PF_RULES_WLOCK();
4453                 error = pfr_add_tables(pfrts, io->pfrio_size,
4454                     &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4455                 PF_RULES_WUNLOCK();
4456                 free(pfrts, M_TEMP);
4457                 break;
4458         }
4459
4460         case DIOCRDELTABLES: {
4461                 struct pfioc_table *io = (struct pfioc_table *)addr;
4462                 struct pfr_table *pfrts;
4463                 size_t totlen;
4464
4465                 if (io->pfrio_esize != sizeof(struct pfr_table)) {
4466                         error = ENODEV;
4467                         break;
4468                 }
4469
4470                 if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
4471                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
4472                         error = ENOMEM;
4473                         break;
4474                 }
4475
4476                 totlen = io->pfrio_size * sizeof(struct pfr_table);
4477                 pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4478                     M_TEMP, M_WAITOK);
4479                 error = copyin(io->pfrio_buffer, pfrts, totlen);
4480                 if (error) {
4481                         free(pfrts, M_TEMP);
4482                         break;
4483                 }
4484                 PF_RULES_WLOCK();
4485                 error = pfr_del_tables(pfrts, io->pfrio_size,
4486                     &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4487                 PF_RULES_WUNLOCK();
4488                 free(pfrts, M_TEMP);
4489                 break;
4490         }
4491
4492         case DIOCRGETTABLES: {
4493                 struct pfioc_table *io = (struct pfioc_table *)addr;
4494                 struct pfr_table *pfrts;
4495                 size_t totlen;
4496                 int n;
4497
4498                 if (io->pfrio_esize != sizeof(struct pfr_table)) {
4499                         error = ENODEV;
4500                         break;
4501                 }
4502                 PF_RULES_RLOCK();
4503                 n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
4504                 if (n < 0) {
4505                         PF_RULES_RUNLOCK();
4506                         error = EINVAL;
4507                         break;
4508                 }
4509                 io->pfrio_size = min(io->pfrio_size, n);
4510
4511                 totlen = io->pfrio_size * sizeof(struct pfr_table);
4512
4513                 pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4514                     M_TEMP, M_NOWAIT | M_ZERO);
4515                 if (pfrts == NULL) {
4516                         error = ENOMEM;
4517                         PF_RULES_RUNLOCK();
4518                         break;
4519                 }
4520                 error = pfr_get_tables(&io->pfrio_table, pfrts,
4521                     &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4522                 PF_RULES_RUNLOCK();
4523                 if (error == 0)
4524                         error = copyout(pfrts, io->pfrio_buffer, totlen);
4525                 free(pfrts, M_TEMP);
4526                 break;
4527         }
4528
4529         case DIOCRGETTSTATS: {
4530                 struct pfioc_table *io = (struct pfioc_table *)addr;
4531                 struct pfr_tstats *pfrtstats;
4532                 size_t totlen;
4533                 int n;
4534
4535                 if (io->pfrio_esize != sizeof(struct pfr_tstats)) {
4536                         error = ENODEV;
4537                         break;
4538                 }
4539                 PF_TABLE_STATS_LOCK();
4540                 PF_RULES_RLOCK();
4541                 n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
4542                 if (n < 0) {
4543                         PF_RULES_RUNLOCK();
4544                         PF_TABLE_STATS_UNLOCK();
4545                         error = EINVAL;
4546                         break;
4547                 }
4548                 io->pfrio_size = min(io->pfrio_size, n);
4549
4550                 totlen = io->pfrio_size * sizeof(struct pfr_tstats);
4551                 pfrtstats = mallocarray(io->pfrio_size,
4552                     sizeof(struct pfr_tstats), M_TEMP, M_NOWAIT | M_ZERO);
4553                 if (pfrtstats == NULL) {
4554                         error = ENOMEM;
4555                         PF_RULES_RUNLOCK();
4556                         PF_TABLE_STATS_UNLOCK();
4557                         break;
4558                 }
4559                 error = pfr_get_tstats(&io->pfrio_table, pfrtstats,
4560                     &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4561                 PF_RULES_RUNLOCK();
4562                 PF_TABLE_STATS_UNLOCK();
4563                 if (error == 0)
4564                         error = copyout(pfrtstats, io->pfrio_buffer, totlen);
4565                 free(pfrtstats, M_TEMP);
4566                 break;
4567         }
4568
4569         case DIOCRCLRTSTATS: {
4570                 struct pfioc_table *io = (struct pfioc_table *)addr;
4571                 struct pfr_table *pfrts;
4572                 size_t totlen;
4573
4574                 if (io->pfrio_esize != sizeof(struct pfr_table)) {
4575                         error = ENODEV;
4576                         break;
4577                 }
4578
4579                 if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount ||
4580                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) {
4581                         /* We used to count tables and use the minimum required
4582                          * size, so we didn't fail on overly large requests.
4583                          * Keep doing so. */
4584                         io->pfrio_size = pf_ioctl_maxcount;
4585                         break;
4586                 }
4587
4588                 totlen = io->pfrio_size * sizeof(struct pfr_table);
4589                 pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4590                     M_TEMP, M_WAITOK);
4591                 error = copyin(io->pfrio_buffer, pfrts, totlen);
4592                 if (error) {
4593                         free(pfrts, M_TEMP);
4594                         break;
4595                 }
4596
4597                 PF_TABLE_STATS_LOCK();
4598                 PF_RULES_RLOCK();
4599                 error = pfr_clr_tstats(pfrts, io->pfrio_size,
4600                     &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4601                 PF_RULES_RUNLOCK();
4602                 PF_TABLE_STATS_UNLOCK();
4603                 free(pfrts, M_TEMP);
4604                 break;
4605         }
4606
4607         case DIOCRSETTFLAGS: {
4608                 struct pfioc_table *io = (struct pfioc_table *)addr;
4609                 struct pfr_table *pfrts;
4610                 size_t totlen;
4611                 int n;
4612
4613                 if (io->pfrio_esize != sizeof(struct pfr_table)) {
4614                         error = ENODEV;
4615                         break;
4616                 }
4617
4618                 PF_RULES_RLOCK();
4619                 n = pfr_table_count(&io->pfrio_table, io->pfrio_flags);
4620                 if (n < 0) {
4621                         PF_RULES_RUNLOCK();
4622                         error = EINVAL;
4623                         break;
4624                 }
4625
4626                 io->pfrio_size = min(io->pfrio_size, n);
4627                 PF_RULES_RUNLOCK();
4628
4629                 totlen = io->pfrio_size * sizeof(struct pfr_table);
4630                 pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table),
4631                     M_TEMP, M_WAITOK);
4632                 error = copyin(io->pfrio_buffer, pfrts, totlen);
4633                 if (error) {
4634                         free(pfrts, M_TEMP);
4635                         break;
4636                 }
4637                 PF_RULES_WLOCK();
4638                 error = pfr_set_tflags(pfrts, io->pfrio_size,
4639                     io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange,
4640                     &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4641                 PF_RULES_WUNLOCK();
4642                 free(pfrts, M_TEMP);
4643                 break;
4644         }
4645
4646         case DIOCRCLRADDRS: {
4647                 struct pfioc_table *io = (struct pfioc_table *)addr;
4648
4649                 if (io->pfrio_esize != 0) {
4650                         error = ENODEV;
4651                         break;
4652                 }
4653                 PF_RULES_WLOCK();
4654                 error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel,
4655                     io->pfrio_flags | PFR_FLAG_USERIOCTL);
4656                 PF_RULES_WUNLOCK();
4657                 break;
4658         }
4659
4660         case DIOCRADDADDRS: {
4661                 struct pfioc_table *io = (struct pfioc_table *)addr;
4662                 struct pfr_addr *pfras;
4663                 size_t totlen;
4664
4665                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4666                         error = ENODEV;
4667                         break;
4668                 }
4669                 if (io->pfrio_size < 0 ||
4670                     io->pfrio_size > pf_ioctl_maxcount ||
4671                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4672                         error = EINVAL;
4673                         break;
4674                 }
4675                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4676                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4677                     M_TEMP, M_WAITOK);
4678                 error = copyin(io->pfrio_buffer, pfras, totlen);
4679                 if (error) {
4680                         free(pfras, M_TEMP);
4681                         break;
4682                 }
4683                 PF_RULES_WLOCK();
4684                 error = pfr_add_addrs(&io->pfrio_table, pfras,
4685                     io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags |
4686                     PFR_FLAG_USERIOCTL);
4687                 PF_RULES_WUNLOCK();
4688                 if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4689                         error = copyout(pfras, io->pfrio_buffer, totlen);
4690                 free(pfras, M_TEMP);
4691                 break;
4692         }
4693
4694         case DIOCRDELADDRS: {
4695                 struct pfioc_table *io = (struct pfioc_table *)addr;
4696                 struct pfr_addr *pfras;
4697                 size_t totlen;
4698
4699                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4700                         error = ENODEV;
4701                         break;
4702                 }
4703                 if (io->pfrio_size < 0 ||
4704                     io->pfrio_size > pf_ioctl_maxcount ||
4705                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4706                         error = EINVAL;
4707                         break;
4708                 }
4709                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4710                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4711                     M_TEMP, M_WAITOK);
4712                 error = copyin(io->pfrio_buffer, pfras, totlen);
4713                 if (error) {
4714                         free(pfras, M_TEMP);
4715                         break;
4716                 }
4717                 PF_RULES_WLOCK();
4718                 error = pfr_del_addrs(&io->pfrio_table, pfras,
4719                     io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags |
4720                     PFR_FLAG_USERIOCTL);
4721                 PF_RULES_WUNLOCK();
4722                 if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4723                         error = copyout(pfras, io->pfrio_buffer, totlen);
4724                 free(pfras, M_TEMP);
4725                 break;
4726         }
4727
4728         case DIOCRSETADDRS: {
4729                 struct pfioc_table *io = (struct pfioc_table *)addr;
4730                 struct pfr_addr *pfras;
4731                 size_t totlen, count;
4732
4733                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4734                         error = ENODEV;
4735                         break;
4736                 }
4737                 if (io->pfrio_size < 0 || io->pfrio_size2 < 0) {
4738                         error = EINVAL;
4739                         break;
4740                 }
4741                 count = max(io->pfrio_size, io->pfrio_size2);
4742                 if (count > pf_ioctl_maxcount ||
4743                     WOULD_OVERFLOW(count, sizeof(struct pfr_addr))) {
4744                         error = EINVAL;
4745                         break;
4746                 }
4747                 totlen = count * sizeof(struct pfr_addr);
4748                 pfras = mallocarray(count, sizeof(struct pfr_addr), M_TEMP,
4749                     M_WAITOK);
4750                 error = copyin(io->pfrio_buffer, pfras, totlen);
4751                 if (error) {
4752                         free(pfras, M_TEMP);
4753                         break;
4754                 }
4755                 PF_RULES_WLOCK();
4756                 error = pfr_set_addrs(&io->pfrio_table, pfras,
4757                     io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd,
4758                     &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags |
4759                     PFR_FLAG_USERIOCTL, 0);
4760                 PF_RULES_WUNLOCK();
4761                 if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4762                         error = copyout(pfras, io->pfrio_buffer, totlen);
4763                 free(pfras, M_TEMP);
4764                 break;
4765         }
4766
4767         case DIOCRGETADDRS: {
4768                 struct pfioc_table *io = (struct pfioc_table *)addr;
4769                 struct pfr_addr *pfras;
4770                 size_t totlen;
4771
4772                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4773                         error = ENODEV;
4774                         break;
4775                 }
4776                 if (io->pfrio_size < 0 ||
4777                     io->pfrio_size > pf_ioctl_maxcount ||
4778                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4779                         error = EINVAL;
4780                         break;
4781                 }
4782                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4783                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4784                     M_TEMP, M_WAITOK | M_ZERO);
4785                 PF_RULES_RLOCK();
4786                 error = pfr_get_addrs(&io->pfrio_table, pfras,
4787                     &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4788                 PF_RULES_RUNLOCK();
4789                 if (error == 0)
4790                         error = copyout(pfras, io->pfrio_buffer, totlen);
4791                 free(pfras, M_TEMP);
4792                 break;
4793         }
4794
4795         case DIOCRGETASTATS: {
4796                 struct pfioc_table *io = (struct pfioc_table *)addr;
4797                 struct pfr_astats *pfrastats;
4798                 size_t totlen;
4799
4800                 if (io->pfrio_esize != sizeof(struct pfr_astats)) {
4801                         error = ENODEV;
4802                         break;
4803                 }
4804                 if (io->pfrio_size < 0 ||
4805                     io->pfrio_size > pf_ioctl_maxcount ||
4806                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_astats))) {
4807                         error = EINVAL;
4808                         break;
4809                 }
4810                 totlen = io->pfrio_size * sizeof(struct pfr_astats);
4811                 pfrastats = mallocarray(io->pfrio_size,
4812                     sizeof(struct pfr_astats), M_TEMP, M_WAITOK | M_ZERO);
4813                 PF_RULES_RLOCK();
4814                 error = pfr_get_astats(&io->pfrio_table, pfrastats,
4815                     &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4816                 PF_RULES_RUNLOCK();
4817                 if (error == 0)
4818                         error = copyout(pfrastats, io->pfrio_buffer, totlen);
4819                 free(pfrastats, M_TEMP);
4820                 break;
4821         }
4822
4823         case DIOCRCLRASTATS: {
4824                 struct pfioc_table *io = (struct pfioc_table *)addr;
4825                 struct pfr_addr *pfras;
4826                 size_t totlen;
4827
4828                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4829                         error = ENODEV;
4830                         break;
4831                 }
4832                 if (io->pfrio_size < 0 ||
4833                     io->pfrio_size > pf_ioctl_maxcount ||
4834                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4835                         error = EINVAL;
4836                         break;
4837                 }
4838                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4839                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4840                     M_TEMP, M_WAITOK);
4841                 error = copyin(io->pfrio_buffer, pfras, totlen);
4842                 if (error) {
4843                         free(pfras, M_TEMP);
4844                         break;
4845                 }
4846                 PF_RULES_WLOCK();
4847                 error = pfr_clr_astats(&io->pfrio_table, pfras,
4848                     io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags |
4849                     PFR_FLAG_USERIOCTL);
4850                 PF_RULES_WUNLOCK();
4851                 if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK)
4852                         error = copyout(pfras, io->pfrio_buffer, totlen);
4853                 free(pfras, M_TEMP);
4854                 break;
4855         }
4856
4857         case DIOCRTSTADDRS: {
4858                 struct pfioc_table *io = (struct pfioc_table *)addr;
4859                 struct pfr_addr *pfras;
4860                 size_t totlen;
4861
4862                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4863                         error = ENODEV;
4864                         break;
4865                 }
4866                 if (io->pfrio_size < 0 ||
4867                     io->pfrio_size > pf_ioctl_maxcount ||
4868                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4869                         error = EINVAL;
4870                         break;
4871                 }
4872                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4873                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4874                     M_TEMP, M_WAITOK);
4875                 error = copyin(io->pfrio_buffer, pfras, totlen);
4876                 if (error) {
4877                         free(pfras, M_TEMP);
4878                         break;
4879                 }
4880                 PF_RULES_RLOCK();
4881                 error = pfr_tst_addrs(&io->pfrio_table, pfras,
4882                     io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags |
4883                     PFR_FLAG_USERIOCTL);
4884                 PF_RULES_RUNLOCK();
4885                 if (error == 0)
4886                         error = copyout(pfras, io->pfrio_buffer, totlen);
4887                 free(pfras, M_TEMP);
4888                 break;
4889         }
4890
4891         case DIOCRINADEFINE: {
4892                 struct pfioc_table *io = (struct pfioc_table *)addr;
4893                 struct pfr_addr *pfras;
4894                 size_t totlen;
4895
4896                 if (io->pfrio_esize != sizeof(struct pfr_addr)) {
4897                         error = ENODEV;
4898                         break;
4899                 }
4900                 if (io->pfrio_size < 0 ||
4901                     io->pfrio_size > pf_ioctl_maxcount ||
4902                     WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) {
4903                         error = EINVAL;
4904                         break;
4905                 }
4906                 totlen = io->pfrio_size * sizeof(struct pfr_addr);
4907                 pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr),
4908                     M_TEMP, M_WAITOK);
4909                 error = copyin(io->pfrio_buffer, pfras, totlen);
4910                 if (error) {
4911                         free(pfras, M_TEMP);
4912                         break;
4913                 }
4914                 PF_RULES_WLOCK();
4915                 error = pfr_ina_define(&io->pfrio_table, pfras,
4916                     io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr,
4917                     io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL);
4918                 PF_RULES_WUNLOCK();
4919                 free(pfras, M_TEMP);
4920                 break;
4921         }
4922
4923         case DIOCOSFPADD: {
4924                 struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
4925                 PF_RULES_WLOCK();
4926                 error = pf_osfp_add(io);
4927                 PF_RULES_WUNLOCK();
4928                 break;
4929         }
4930
4931         case DIOCOSFPGET: {
4932                 struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr;
4933                 PF_RULES_RLOCK();
4934                 error = pf_osfp_get(io);
4935                 PF_RULES_RUNLOCK();
4936                 break;
4937         }
4938
4939         case DIOCXBEGIN: {
4940                 struct pfioc_trans      *io = (struct pfioc_trans *)addr;
4941                 struct pfioc_trans_e    *ioes, *ioe;
4942                 size_t                   totlen;
4943                 int                      i;
4944
4945                 if (io->esize != sizeof(*ioe)) {
4946                         error = ENODEV;
4947                         break;
4948                 }
4949                 if (io->size < 0 ||
4950                     io->size > pf_ioctl_maxcount ||
4951                     WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
4952                         error = EINVAL;
4953                         break;
4954                 }
4955                 totlen = sizeof(struct pfioc_trans_e) * io->size;
4956                 ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
4957                     M_TEMP, M_WAITOK);
4958                 error = copyin(io->array, ioes, totlen);
4959                 if (error) {
4960                         free(ioes, M_TEMP);
4961                         break;
4962                 }
4963                 /* Ensure there's no more ethernet rules to clean up. */
4964                 NET_EPOCH_DRAIN_CALLBACKS();
4965                 PF_RULES_WLOCK();
4966                 for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
4967                         ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
4968                         switch (ioe->rs_num) {
4969                         case PF_RULESET_ETH:
4970                                 if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) {
4971                                         PF_RULES_WUNLOCK();
4972                                         free(ioes, M_TEMP);
4973                                         goto fail;
4974                                 }
4975                                 break;
4976 #ifdef ALTQ
4977                         case PF_RULESET_ALTQ:
4978                                 if (ioe->anchor[0]) {
4979                                         PF_RULES_WUNLOCK();
4980                                         free(ioes, M_TEMP);
4981                                         error = EINVAL;
4982                                         goto fail;
4983                                 }
4984                                 if ((error = pf_begin_altq(&ioe->ticket))) {
4985                                         PF_RULES_WUNLOCK();
4986                                         free(ioes, M_TEMP);
4987                                         goto fail;
4988                                 }
4989                                 break;
4990 #endif /* ALTQ */
4991                         case PF_RULESET_TABLE:
4992                             {
4993                                 struct pfr_table table;
4994
4995                                 bzero(&table, sizeof(table));
4996                                 strlcpy(table.pfrt_anchor, ioe->anchor,
4997                                     sizeof(table.pfrt_anchor));
4998                                 if ((error = pfr_ina_begin(&table,
4999                                     &ioe->ticket, NULL, 0))) {
5000                                         PF_RULES_WUNLOCK();
5001                                         free(ioes, M_TEMP);
5002                                         goto fail;
5003                                 }
5004                                 break;
5005                             }
5006                         default:
5007                                 if ((error = pf_begin_rules(&ioe->ticket,
5008                                     ioe->rs_num, ioe->anchor))) {
5009                                         PF_RULES_WUNLOCK();
5010                                         free(ioes, M_TEMP);
5011                                         goto fail;
5012                                 }
5013                                 break;
5014                         }
5015                 }
5016                 PF_RULES_WUNLOCK();
5017                 error = copyout(ioes, io->array, totlen);
5018                 free(ioes, M_TEMP);
5019                 break;
5020         }
5021
5022         case DIOCXROLLBACK: {
5023                 struct pfioc_trans      *io = (struct pfioc_trans *)addr;
5024                 struct pfioc_trans_e    *ioe, *ioes;
5025                 size_t                   totlen;
5026                 int                      i;
5027
5028                 if (io->esize != sizeof(*ioe)) {
5029                         error = ENODEV;
5030                         break;
5031                 }
5032                 if (io->size < 0 ||
5033                     io->size > pf_ioctl_maxcount ||
5034                     WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
5035                         error = EINVAL;
5036                         break;
5037                 }
5038                 totlen = sizeof(struct pfioc_trans_e) * io->size;
5039                 ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
5040                     M_TEMP, M_WAITOK);
5041                 error = copyin(io->array, ioes, totlen);
5042                 if (error) {
5043                         free(ioes, M_TEMP);
5044                         break;
5045                 }
5046                 PF_RULES_WLOCK();
5047                 for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
5048                         ioe->anchor[sizeof(ioe->anchor) - 1] = '\0';
5049                         switch (ioe->rs_num) {
5050                         case PF_RULESET_ETH:
5051                                 if ((error = pf_rollback_eth(ioe->ticket,
5052                                     ioe->anchor))) {
5053                                         PF_RULES_WUNLOCK();
5054                                         free(ioes, M_TEMP);
5055                                         goto fail; /* really bad */
5056                                 }
5057                                 break;
5058 #ifdef ALTQ
5059                         case PF_RULESET_ALTQ:
5060                                 if (ioe->anchor[0]) {
5061                                         PF_RULES_WUNLOCK();
5062                                         free(ioes, M_TEMP);
5063                                         error = EINVAL;
5064                                         goto fail;
5065                                 }
5066                                 if ((error = pf_rollback_altq(ioe->ticket))) {
5067                                         PF_RULES_WUNLOCK();
5068                                         free(ioes, M_TEMP);
5069                                         goto fail; /* really bad */
5070                                 }
5071                                 break;
5072 #endif /* ALTQ */
5073                         case PF_RULESET_TABLE:
5074                             {
5075                                 struct pfr_table table;
5076
5077                                 bzero(&table, sizeof(table));
5078                                 strlcpy(table.pfrt_anchor, ioe->anchor,
5079                                     sizeof(table.pfrt_anchor));
5080                                 if ((error = pfr_ina_rollback(&table,
5081                                     ioe->ticket, NULL, 0))) {
5082                                         PF_RULES_WUNLOCK();
5083                                         free(ioes, M_TEMP);
5084                                         goto fail; /* really bad */
5085                                 }
5086                                 break;
5087                             }
5088                         default:
5089                                 if ((error = pf_rollback_rules(ioe->ticket,
5090                                     ioe->rs_num, ioe->anchor))) {
5091                                         PF_RULES_WUNLOCK();
5092                                         free(ioes, M_TEMP);
5093                                         goto fail; /* really bad */
5094                                 }
5095                                 break;
5096                         }
5097                 }
5098                 PF_RULES_WUNLOCK();
5099                 free(ioes, M_TEMP);
5100                 break;
5101         }
5102
5103         case DIOCXCOMMIT: {
5104                 struct pfioc_trans      *io = (struct pfioc_trans *)addr;
5105                 struct pfioc_trans_e    *ioe, *ioes;
5106                 struct pf_kruleset      *rs;
5107                 struct pf_keth_ruleset  *ers;
5108                 size_t                   totlen;
5109                 int                      i;
5110
5111                 if (io->esize != sizeof(*ioe)) {
5112                         error = ENODEV;
5113                         break;
5114                 }
5115
5116                 if (io->size < 0 ||
5117                     io->size > pf_ioctl_maxcount ||
5118                     WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) {
5119                         error = EINVAL;
5120                         break;
5121                 }
5122
5123                 totlen = sizeof(struct pfioc_trans_e) * io->size;
5124                 ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e),
5125                     M_TEMP, M_WAITOK);
5126                 error = copyin(io->array, ioes, totlen);
5127                 if (error) {
5128                         free(ioes, M_TEMP);
5129                         break;
5130                 }
5131                 PF_RULES_WLOCK();
5132                 /* First makes sure everything will succeed. */
5133                 for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
5134                         ioe->anchor[sizeof(ioe->anchor) - 1] = 0;
5135                         switch (ioe->rs_num) {
5136                         case PF_RULESET_ETH:
5137                                 ers = pf_find_keth_ruleset(ioe->anchor);
5138                                 if (ers == NULL || ioe->ticket == 0 ||
5139                                     ioe->ticket != ers->inactive.ticket) {
5140                                         PF_RULES_WUNLOCK();
5141                                         free(ioes, M_TEMP);
5142                                         error = EINVAL;
5143                                         goto fail;
5144                                 }
5145                                 break;
5146 #ifdef ALTQ
5147                         case PF_RULESET_ALTQ:
5148                                 if (ioe->anchor[0]) {
5149                                         PF_RULES_WUNLOCK();
5150                                         free(ioes, M_TEMP);
5151                                         error = EINVAL;
5152                                         goto fail;
5153                                 }
5154                                 if (!V_altqs_inactive_open || ioe->ticket !=
5155                                     V_ticket_altqs_inactive) {
5156                                         PF_RULES_WUNLOCK();
5157                                         free(ioes, M_TEMP);
5158                                         error = EBUSY;
5159                                         goto fail;
5160                                 }
5161                                 break;
5162 #endif /* ALTQ */
5163                         case PF_RULESET_TABLE:
5164                                 rs = pf_find_kruleset(ioe->anchor);
5165                                 if (rs == NULL || !rs->topen || ioe->ticket !=
5166                                     rs->tticket) {
5167                                         PF_RULES_WUNLOCK();
5168                                         free(ioes, M_TEMP);
5169                                         error = EBUSY;
5170                                         goto fail;
5171                                 }
5172                                 break;
5173                         default:
5174                                 if (ioe->rs_num < 0 || ioe->rs_num >=
5175                                     PF_RULESET_MAX) {
5176                                         PF_RULES_WUNLOCK();
5177                                         free(ioes, M_TEMP);
5178                                         error = EINVAL;
5179                                         goto fail;
5180                                 }
5181                                 rs = pf_find_kruleset(ioe->anchor);
5182                                 if (rs == NULL ||
5183                                     !rs->rules[ioe->rs_num].inactive.open ||
5184                                     rs->rules[ioe->rs_num].inactive.ticket !=
5185                                     ioe->ticket) {
5186                                         PF_RULES_WUNLOCK();
5187                                         free(ioes, M_TEMP);
5188                                         error = EBUSY;
5189                                         goto fail;
5190                                 }
5191                                 break;
5192                         }
5193                 }
5194                 /* Now do the commit - no errors should happen here. */
5195                 for (i = 0, ioe = ioes; i < io->size; i++, ioe++) {
5196                         switch (ioe->rs_num) {
5197                         case PF_RULESET_ETH:
5198                                 if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) {
5199                                         PF_RULES_WUNLOCK();
5200                                         free(ioes, M_TEMP);
5201                                         goto fail; /* really bad */
5202                                 }
5203                                 break;
5204 #ifdef ALTQ
5205                         case PF_RULESET_ALTQ:
5206                                 if ((error = pf_commit_altq(ioe->ticket))) {
5207                                         PF_RULES_WUNLOCK();
5208                                         free(ioes, M_TEMP);
5209                                         goto fail; /* really bad */
5210                                 }
5211                                 break;
5212 #endif /* ALTQ */
5213                         case PF_RULESET_TABLE:
5214                             {
5215                                 struct pfr_table table;
5216
5217                                 bzero(&table, sizeof(table));
5218                                 (void)strlcpy(table.pfrt_anchor, ioe->anchor,
5219                                     sizeof(table.pfrt_anchor));
5220                                 if ((error = pfr_ina_commit(&table,
5221                                     ioe->ticket, NULL, NULL, 0))) {
5222                                         PF_RULES_WUNLOCK();
5223                                         free(ioes, M_TEMP);
5224                                         goto fail; /* really bad */
5225                                 }
5226                                 break;
5227                             }
5228                         default:
5229                                 if ((error = pf_commit_rules(ioe->ticket,
5230                                     ioe->rs_num, ioe->anchor))) {
5231                                         PF_RULES_WUNLOCK();
5232                                         free(ioes, M_TEMP);
5233                                         goto fail; /* really bad */
5234                                 }
5235                                 break;
5236                         }
5237                 }
5238                 PF_RULES_WUNLOCK();
5239
5240                 /* Only hook into EtherNet taffic if we've got rules for it. */
5241                 if (! TAILQ_EMPTY(V_pf_keth->active.rules))
5242                         hook_pf_eth();
5243                 else
5244                         dehook_pf_eth();
5245
5246                 free(ioes, M_TEMP);
5247                 break;
5248         }
5249
5250         case DIOCGETSRCNODES: {
5251                 struct pfioc_src_nodes  *psn = (struct pfioc_src_nodes *)addr;
5252                 struct pf_srchash       *sh;
5253                 struct pf_ksrc_node     *n;
5254                 struct pf_src_node      *p, *pstore;
5255                 uint32_t                 i, nr = 0;
5256
5257                 for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
5258                                 i++, sh++) {
5259                         PF_HASHROW_LOCK(sh);
5260                         LIST_FOREACH(n, &sh->nodes, entry)
5261                                 nr++;
5262                         PF_HASHROW_UNLOCK(sh);
5263                 }
5264
5265                 psn->psn_len = min(psn->psn_len,
5266                     sizeof(struct pf_src_node) * nr);
5267
5268                 if (psn->psn_len == 0) {
5269                         psn->psn_len = sizeof(struct pf_src_node) * nr;
5270                         break;
5271                 }
5272
5273                 nr = 0;
5274
5275                 p = pstore = malloc(psn->psn_len, M_TEMP, M_WAITOK | M_ZERO);
5276                 for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
5277                     i++, sh++) {
5278                     PF_HASHROW_LOCK(sh);
5279                     LIST_FOREACH(n, &sh->nodes, entry) {
5280
5281                         if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len)
5282                                 break;
5283
5284                         pf_src_node_copy(n, p);
5285
5286                         p++;
5287                         nr++;
5288                     }
5289                     PF_HASHROW_UNLOCK(sh);
5290                 }
5291                 error = copyout(pstore, psn->psn_src_nodes,
5292                     sizeof(struct pf_src_node) * nr);
5293                 if (error) {
5294                         free(pstore, M_TEMP);
5295                         break;
5296                 }
5297                 psn->psn_len = sizeof(struct pf_src_node) * nr;
5298                 free(pstore, M_TEMP);
5299                 break;
5300         }
5301
5302         case DIOCCLRSRCNODES: {
5303                 pf_clear_srcnodes(NULL);
5304                 pf_purge_expired_src_nodes();
5305                 break;
5306         }
5307
5308         case DIOCKILLSRCNODES:
5309                 pf_kill_srcnodes((struct pfioc_src_node_kill *)addr);
5310                 break;
5311
5312 #ifdef COMPAT_FREEBSD13
5313         case DIOCKEEPCOUNTERS_FREEBSD13:
5314 #endif
5315         case DIOCKEEPCOUNTERS:
5316                 error = pf_keepcounters((struct pfioc_nv *)addr);
5317                 break;
5318
5319         case DIOCGETSYNCOOKIES:
5320                 error = pf_get_syncookies((struct pfioc_nv *)addr);
5321                 break;
5322
5323         case DIOCSETSYNCOOKIES:
5324                 error = pf_set_syncookies((struct pfioc_nv *)addr);
5325                 break;
5326
5327         case DIOCSETHOSTID: {
5328                 u_int32_t       *hostid = (u_int32_t *)addr;
5329
5330                 PF_RULES_WLOCK();
5331                 if (*hostid == 0)
5332                         V_pf_status.hostid = arc4random();
5333                 else
5334                         V_pf_status.hostid = *hostid;
5335                 PF_RULES_WUNLOCK();
5336                 break;
5337         }
5338
5339         case DIOCOSFPFLUSH:
5340                 PF_RULES_WLOCK();
5341                 pf_osfp_flush();
5342                 PF_RULES_WUNLOCK();
5343                 break;
5344
5345         case DIOCIGETIFACES: {
5346                 struct pfioc_iface *io = (struct pfioc_iface *)addr;
5347                 struct pfi_kif *ifstore;
5348                 size_t bufsiz;
5349
5350                 if (io->pfiio_esize != sizeof(struct pfi_kif)) {
5351                         error = ENODEV;
5352                         break;
5353                 }
5354
5355                 if (io->pfiio_size < 0 ||
5356                     io->pfiio_size > pf_ioctl_maxcount ||
5357                     WOULD_OVERFLOW(io->pfiio_size, sizeof(struct pfi_kif))) {
5358                         error = EINVAL;
5359                         break;
5360                 }
5361
5362                 io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';
5363
5364                 bufsiz = io->pfiio_size * sizeof(struct pfi_kif);
5365                 ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif),
5366                     M_TEMP, M_WAITOK | M_ZERO);
5367
5368                 PF_RULES_RLOCK();
5369                 pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size);
5370                 PF_RULES_RUNLOCK();
5371                 error = copyout(ifstore, io->pfiio_buffer, bufsiz);
5372                 free(ifstore, M_TEMP);
5373                 break;
5374         }
5375
5376         case DIOCSETIFFLAG: {
5377                 struct pfioc_iface *io = (struct pfioc_iface *)addr;
5378
5379                 io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';
5380
5381                 PF_RULES_WLOCK();
5382                 error = pfi_set_flags(io->pfiio_name, io->pfiio_flags);
5383                 PF_RULES_WUNLOCK();
5384                 break;
5385         }
5386
5387         case DIOCCLRIFFLAG: {
5388                 struct pfioc_iface *io = (struct pfioc_iface *)addr;
5389
5390                 io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0';
5391
5392                 PF_RULES_WLOCK();
5393                 error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags);
5394                 PF_RULES_WUNLOCK();
5395                 break;
5396         }
5397
5398         case DIOCSETREASS: {
5399                 u_int32_t       *reass = (u_int32_t *)addr;
5400
5401                 V_pf_status.reass = *reass & (PF_REASS_ENABLED|PF_REASS_NODF);
5402                 /* Removal of DF flag without reassembly enabled is not a
5403                  * valid combination. Disable reassembly in such case. */
5404                 if (!(V_pf_status.reass & PF_REASS_ENABLED))
5405                         V_pf_status.reass = 0;
5406                 break;
5407         }
5408
5409         default:
5410                 error = ENODEV;
5411                 break;
5412         }
5413 fail:
5414         if (sx_xlocked(&V_pf_ioctl_lock))
5415                 sx_xunlock(&V_pf_ioctl_lock);
5416         CURVNET_RESTORE();
5417
5418 #undef ERROUT_IOCTL
5419
5420         return (error);
5421 }
5422
5423 void
5424 pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_version)
5425 {
5426         bzero(sp, sizeof(union pfsync_state_union));
5427
5428         /* copy from state key */
5429         sp->pfs_1301.key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
5430         sp->pfs_1301.key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
5431         sp->pfs_1301.key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
5432         sp->pfs_1301.key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
5433         sp->pfs_1301.key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
5434         sp->pfs_1301.key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
5435         sp->pfs_1301.key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
5436         sp->pfs_1301.key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
5437         sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto;
5438         sp->pfs_1301.af = st->key[PF_SK_WIRE]->af;
5439
5440         /* copy from state */
5441         strlcpy(sp->pfs_1301.ifname, st->kif->pfik_name, sizeof(sp->pfs_1301.ifname));
5442         bcopy(&st->rt_addr, &sp->pfs_1301.rt_addr, sizeof(sp->pfs_1301.rt_addr));
5443         sp->pfs_1301.creation = htonl(time_uptime - st->creation);
5444         sp->pfs_1301.expire = pf_state_expires(st);
5445         if (sp->pfs_1301.expire <= time_uptime)
5446                 sp->pfs_1301.expire = htonl(0);
5447         else
5448                 sp->pfs_1301.expire = htonl(sp->pfs_1301.expire - time_uptime);
5449
5450         sp->pfs_1301.direction = st->direction;
5451         sp->pfs_1301.log = st->act.log;
5452         sp->pfs_1301.timeout = st->timeout;
5453
5454         switch (msg_version) {
5455                 case PFSYNC_MSG_VERSION_1301:
5456                         sp->pfs_1301.state_flags = st->state_flags;
5457                         break;
5458                 case PFSYNC_MSG_VERSION_1400:
5459                         sp->pfs_1400.state_flags = htons(st->state_flags);
5460                         sp->pfs_1400.qid = htons(st->act.qid);
5461                         sp->pfs_1400.pqid = htons(st->act.pqid);
5462                         sp->pfs_1400.dnpipe = htons(st->act.dnpipe);
5463                         sp->pfs_1400.dnrpipe = htons(st->act.dnrpipe);
5464                         sp->pfs_1400.rtableid = htonl(st->act.rtableid);
5465                         sp->pfs_1400.min_ttl = st->act.min_ttl;
5466                         sp->pfs_1400.set_tos = st->act.set_tos;
5467                         sp->pfs_1400.max_mss = htons(st->act.max_mss);
5468                         sp->pfs_1400.set_prio[0] = st->act.set_prio[0];
5469                         sp->pfs_1400.set_prio[1] = st->act.set_prio[1];
5470                         sp->pfs_1400.rt = st->rt;
5471                         if (st->rt_kif)
5472                                 strlcpy(sp->pfs_1400.rt_ifname,
5473                                     st->rt_kif->pfik_name,
5474                                     sizeof(sp->pfs_1400.rt_ifname));
5475                         break;
5476                 default:
5477                         panic("%s: Unsupported pfsync_msg_version %d",
5478                             __func__, msg_version);
5479         }
5480
5481         if (st->src_node)
5482                 sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE;
5483         if (st->nat_src_node)
5484                 sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE;
5485
5486         sp->pfs_1301.id = st->id;
5487         sp->pfs_1301.creatorid = st->creatorid;
5488         pf_state_peer_hton(&st->src, &sp->pfs_1301.src);
5489         pf_state_peer_hton(&st->dst, &sp->pfs_1301.dst);
5490
5491         if (st->rule.ptr == NULL)
5492                 sp->pfs_1301.rule = htonl(-1);
5493         else
5494                 sp->pfs_1301.rule = htonl(st->rule.ptr->nr);
5495         if (st->anchor.ptr == NULL)
5496                 sp->pfs_1301.anchor = htonl(-1);
5497         else
5498                 sp->pfs_1301.anchor = htonl(st->anchor.ptr->nr);
5499         if (st->nat_rule.ptr == NULL)
5500                 sp->pfs_1301.nat_rule = htonl(-1);
5501         else
5502                 sp->pfs_1301.nat_rule = htonl(st->nat_rule.ptr->nr);
5503
5504         pf_state_counter_hton(st->packets[0], sp->pfs_1301.packets[0]);
5505         pf_state_counter_hton(st->packets[1], sp->pfs_1301.packets[1]);
5506         pf_state_counter_hton(st->bytes[0], sp->pfs_1301.bytes[0]);
5507         pf_state_counter_hton(st->bytes[1], sp->pfs_1301.bytes[1]);
5508 }
5509
5510 void
5511 pf_state_export(struct pf_state_export *sp, struct pf_kstate *st)
5512 {
5513         bzero(sp, sizeof(*sp));
5514
5515         sp->version = PF_STATE_VERSION;
5516
5517         /* copy from state key */
5518         sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
5519         sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
5520         sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
5521         sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
5522         sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
5523         sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
5524         sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
5525         sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
5526         sp->proto = st->key[PF_SK_WIRE]->proto;
5527         sp->af = st->key[PF_SK_WIRE]->af;
5528
5529         /* copy from state */
5530         strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
5531         strlcpy(sp->orig_ifname, st->orig_kif->pfik_name,
5532             sizeof(sp->orig_ifname));
5533         bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
5534         sp->creation = htonl(time_uptime - st->creation);
5535         sp->expire = pf_state_expires(st);
5536         if (sp->expire <= time_uptime)
5537                 sp->expire = htonl(0);
5538         else
5539                 sp->expire = htonl(sp->expire - time_uptime);
5540
5541         sp->direction = st->direction;
5542         sp->log = st->act.log;
5543         sp->timeout = st->timeout;
5544         /* 8 bits for the old libpfctl, 16 bits for the new libpfctl */
5545         sp->state_flags_compat = st->state_flags;
5546         sp->state_flags = htons(st->state_flags);
5547         if (st->src_node)
5548                 sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
5549         if (st->nat_src_node)
5550                 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
5551
5552         sp->id = st->id;
5553         sp->creatorid = st->creatorid;
5554         pf_state_peer_hton(&st->src, &sp->src);
5555         pf_state_peer_hton(&st->dst, &sp->dst);
5556
5557         if (st->rule.ptr == NULL)
5558                 sp->rule = htonl(-1);
5559         else
5560                 sp->rule = htonl(st->rule.ptr->nr);
5561         if (st->anchor.ptr == NULL)
5562                 sp->anchor = htonl(-1);
5563         else
5564                 sp->anchor = htonl(st->anchor.ptr->nr);
5565         if (st->nat_rule.ptr == NULL)
5566                 sp->nat_rule = htonl(-1);
5567         else
5568                 sp->nat_rule = htonl(st->nat_rule.ptr->nr);
5569
5570         sp->packets[0] = st->packets[0];
5571         sp->packets[1] = st->packets[1];
5572         sp->bytes[0] = st->bytes[0];
5573         sp->bytes[1] = st->bytes[1];
5574
5575         sp->qid = htons(st->act.qid);
5576         sp->pqid = htons(st->act.pqid);
5577         sp->dnpipe = htons(st->act.dnpipe);
5578         sp->dnrpipe = htons(st->act.dnrpipe);
5579         sp->rtableid = htonl(st->act.rtableid);
5580         sp->min_ttl = st->act.min_ttl;
5581         sp->set_tos = st->act.set_tos;
5582         sp->max_mss = htons(st->act.max_mss);
5583         sp->rt = st->rt;
5584         if (st->rt_kif)
5585                 strlcpy(sp->rt_ifname, st->rt_kif->pfik_name,
5586                     sizeof(sp->rt_ifname));
5587         sp->set_prio[0] = st->act.set_prio[0];
5588         sp->set_prio[1] = st->act.set_prio[1];
5589
5590 }
5591
5592 static void
5593 pf_tbladdr_copyout(struct pf_addr_wrap *aw)
5594 {
5595         struct pfr_ktable *kt;
5596
5597         KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type));
5598
5599         kt = aw->p.tbl;
5600         if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
5601                 kt = kt->pfrkt_root;
5602         aw->p.tbl = NULL;
5603         aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
5604                 kt->pfrkt_cnt : -1;
5605 }
5606
5607 static int
5608 pf_add_status_counters(nvlist_t *nvl, const char *name, counter_u64_t *counters,
5609     size_t number, char **names)
5610 {
5611         nvlist_t        *nvc;
5612
5613         nvc = nvlist_create(0);
5614         if (nvc == NULL)
5615                 return (ENOMEM);
5616
5617         for (int i = 0; i < number; i++) {
5618                 nvlist_append_number_array(nvc, "counters",
5619                     counter_u64_fetch(counters[i]));
5620                 nvlist_append_string_array(nvc, "names",
5621                     names[i]);
5622                 nvlist_append_number_array(nvc, "ids",
5623                     i);
5624         }
5625         nvlist_add_nvlist(nvl, name, nvc);
5626         nvlist_destroy(nvc);
5627
5628         return (0);
5629 }
5630
5631 static int
5632 pf_getstatus(struct pfioc_nv *nv)
5633 {
5634         nvlist_t        *nvl = NULL, *nvc = NULL;
5635         void            *nvlpacked = NULL;
5636         int              error;
5637         struct pf_status s;
5638         char *pf_reasons[PFRES_MAX+1] = PFRES_NAMES;
5639         char *pf_lcounter[KLCNT_MAX+1] = KLCNT_NAMES;
5640         char *pf_fcounter[FCNT_MAX+1] = FCNT_NAMES;
5641         PF_RULES_RLOCK_TRACKER;
5642
5643 #define ERROUT(x)      ERROUT_FUNCTION(errout, x)
5644
5645         PF_RULES_RLOCK();
5646
5647         nvl = nvlist_create(0);
5648         if (nvl == NULL)
5649                 ERROUT(ENOMEM);
5650
5651         nvlist_add_bool(nvl, "running", V_pf_status.running);
5652         nvlist_add_number(nvl, "since", V_pf_status.since);
5653         nvlist_add_number(nvl, "debug", V_pf_status.debug);
5654         nvlist_add_number(nvl, "hostid", V_pf_status.hostid);
5655         nvlist_add_number(nvl, "states", V_pf_status.states);
5656         nvlist_add_number(nvl, "src_nodes", V_pf_status.src_nodes);
5657         nvlist_add_number(nvl, "reass", V_pf_status.reass);
5658         nvlist_add_bool(nvl, "syncookies_active",
5659             V_pf_status.syncookies_active);
5660
5661         /* counters */
5662         error = pf_add_status_counters(nvl, "counters", V_pf_status.counters,
5663             PFRES_MAX, pf_reasons);
5664         if (error != 0)
5665                 ERROUT(error);
5666
5667         /* lcounters */
5668         error = pf_add_status_counters(nvl, "lcounters", V_pf_status.lcounters,
5669             KLCNT_MAX, pf_lcounter);
5670         if (error != 0)
5671                 ERROUT(error);
5672
5673         /* fcounters */
5674         nvc = nvlist_create(0);
5675         if (nvc == NULL)
5676                 ERROUT(ENOMEM);
5677
5678         for (int i = 0; i < FCNT_MAX; i++) {
5679                 nvlist_append_number_array(nvc, "counters",
5680                     pf_counter_u64_fetch(&V_pf_status.fcounters[i]));
5681                 nvlist_append_string_array(nvc, "names",
5682                     pf_fcounter[i]);
5683                 nvlist_append_number_array(nvc, "ids",
5684                     i);
5685         }
5686         nvlist_add_nvlist(nvl, "fcounters", nvc);
5687         nvlist_destroy(nvc);
5688         nvc = NULL;
5689
5690         /* scounters */
5691         error = pf_add_status_counters(nvl, "scounters", V_pf_status.scounters,
5692             SCNT_MAX, pf_fcounter);
5693         if (error != 0)
5694                 ERROUT(error);
5695
5696         nvlist_add_string(nvl, "ifname", V_pf_status.ifname);
5697         nvlist_add_binary(nvl, "chksum", V_pf_status.pf_chksum,
5698             PF_MD5_DIGEST_LENGTH);
5699
5700         pfi_update_status(V_pf_status.ifname, &s);
5701
5702         /* pcounters / bcounters */
5703         for (int i = 0; i < 2; i++) {
5704                 for (int j = 0; j < 2; j++) {
5705                         for (int k = 0; k < 2; k++) {
5706                                 nvlist_append_number_array(nvl, "pcounters",
5707                                     s.pcounters[i][j][k]);
5708                         }
5709                         nvlist_append_number_array(nvl, "bcounters",
5710                             s.bcounters[i][j]);
5711                 }
5712         }
5713
5714         nvlpacked = nvlist_pack(nvl, &nv->len);
5715         if (nvlpacked == NULL)
5716                 ERROUT(ENOMEM);
5717
5718         if (nv->size == 0)
5719                 ERROUT(0);
5720         else if (nv->size < nv->len)
5721                 ERROUT(ENOSPC);
5722
5723         PF_RULES_RUNLOCK();
5724         error = copyout(nvlpacked, nv->data, nv->len);
5725         goto done;
5726
5727 #undef ERROUT
5728 errout:
5729         PF_RULES_RUNLOCK();
5730 done:
5731         free(nvlpacked, M_NVLIST);
5732         nvlist_destroy(nvc);
5733         nvlist_destroy(nvl);
5734
5735         return (error);
5736 }
5737
5738 /*
5739  * XXX - Check for version mismatch!!!
5740  */
5741 static void
5742 pf_clear_all_states(void)
5743 {
5744         struct pf_kstate        *s;
5745         u_int i;
5746
5747         for (i = 0; i <= pf_hashmask; i++) {
5748                 struct pf_idhash *ih = &V_pf_idhash[i];
5749 relock:
5750                 PF_HASHROW_LOCK(ih);
5751                 LIST_FOREACH(s, &ih->states, entry) {
5752                         s->timeout = PFTM_PURGE;
5753                         /* Don't send out individual delete messages. */
5754                         s->state_flags |= PFSTATE_NOSYNC;
5755                         pf_unlink_state(s);
5756                         goto relock;
5757                 }
5758                 PF_HASHROW_UNLOCK(ih);
5759         }
5760 }
5761
5762 static int
5763 pf_clear_tables(void)
5764 {
5765         struct pfioc_table io;
5766         int error;
5767
5768         bzero(&io, sizeof(io));
5769
5770         error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel,
5771             io.pfrio_flags);
5772
5773         return (error);
5774 }
5775
5776 static void
5777 pf_clear_srcnodes(struct pf_ksrc_node *n)
5778 {
5779         struct pf_kstate *s;
5780         int i;
5781
5782         for (i = 0; i <= pf_hashmask; i++) {
5783                 struct pf_idhash *ih = &V_pf_idhash[i];
5784
5785                 PF_HASHROW_LOCK(ih);
5786                 LIST_FOREACH(s, &ih->states, entry) {
5787                         if (n == NULL || n == s->src_node)
5788                                 s->src_node = NULL;
5789                         if (n == NULL || n == s->nat_src_node)
5790                                 s->nat_src_node = NULL;
5791                 }
5792                 PF_HASHROW_UNLOCK(ih);
5793         }
5794
5795         if (n == NULL) {
5796                 struct pf_srchash *sh;
5797
5798                 for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask;
5799                     i++, sh++) {
5800                         PF_HASHROW_LOCK(sh);
5801                         LIST_FOREACH(n, &sh->nodes, entry) {
5802                                 n->expire = 1;
5803                                 n->states = 0;
5804                         }
5805                         PF_HASHROW_UNLOCK(sh);
5806                 }
5807         } else {
5808                 /* XXX: hash slot should already be locked here. */
5809                 n->expire = 1;
5810                 n->states = 0;
5811         }
5812 }
5813
5814 static void
5815 pf_kill_srcnodes(struct pfioc_src_node_kill *psnk)
5816 {
5817         struct pf_ksrc_node_list         kill;
5818
5819         LIST_INIT(&kill);
5820         for (int i = 0; i <= pf_srchashmask; i++) {
5821                 struct pf_srchash *sh = &V_pf_srchash[i];
5822                 struct pf_ksrc_node *sn, *tmp;
5823
5824                 PF_HASHROW_LOCK(sh);
5825                 LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp)
5826                         if (PF_MATCHA(psnk->psnk_src.neg,
5827                               &psnk->psnk_src.addr.v.a.addr,
5828                               &psnk->psnk_src.addr.v.a.mask,
5829                               &sn->addr, sn->af) &&
5830                             PF_MATCHA(psnk->psnk_dst.neg,
5831                               &psnk->psnk_dst.addr.v.a.addr,
5832                               &psnk->psnk_dst.addr.v.a.mask,
5833                               &sn->raddr, sn->af)) {
5834                                 pf_unlink_src_node(sn);
5835                                 LIST_INSERT_HEAD(&kill, sn, entry);
5836                                 sn->expire = 1;
5837                         }
5838                 PF_HASHROW_UNLOCK(sh);
5839         }
5840
5841         for (int i = 0; i <= pf_hashmask; i++) {
5842                 struct pf_idhash *ih = &V_pf_idhash[i];
5843                 struct pf_kstate *s;
5844
5845                 PF_HASHROW_LOCK(ih);
5846                 LIST_FOREACH(s, &ih->states, entry) {
5847                         if (s->src_node && s->src_node->expire == 1)
5848                                 s->src_node = NULL;
5849                         if (s->nat_src_node && s->nat_src_node->expire == 1)
5850                                 s->nat_src_node = NULL;
5851                 }
5852                 PF_HASHROW_UNLOCK(ih);
5853         }
5854
5855         psnk->psnk_killed = pf_free_src_nodes(&kill);
5856 }
5857
5858 static int
5859 pf_keepcounters(struct pfioc_nv *nv)
5860 {
5861         nvlist_t        *nvl = NULL;
5862         void            *nvlpacked = NULL;
5863         int              error = 0;
5864
5865 #define ERROUT(x)       ERROUT_FUNCTION(on_error, x)
5866
5867         if (nv->len > pf_ioctl_maxcount)
5868                 ERROUT(ENOMEM);
5869
5870         nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
5871         if (nvlpacked == NULL)
5872                 ERROUT(ENOMEM);
5873
5874         error = copyin(nv->data, nvlpacked, nv->len);
5875         if (error)
5876                 ERROUT(error);
5877
5878         nvl = nvlist_unpack(nvlpacked, nv->len, 0);
5879         if (nvl == NULL)
5880                 ERROUT(EBADMSG);
5881
5882         if (! nvlist_exists_bool(nvl, "keep_counters"))
5883                 ERROUT(EBADMSG);
5884
5885         V_pf_status.keep_counters = nvlist_get_bool(nvl, "keep_counters");
5886
5887 on_error:
5888         nvlist_destroy(nvl);
5889         free(nvlpacked, M_NVLIST);
5890         return (error);
5891 }
5892
5893 static unsigned int
5894 pf_clear_states(const struct pf_kstate_kill *kill)
5895 {
5896         struct pf_state_key_cmp  match_key;
5897         struct pf_kstate        *s;
5898         struct pfi_kkif *kif;
5899         int              idx;
5900         unsigned int     killed = 0, dir;
5901
5902         for (unsigned int i = 0; i <= pf_hashmask; i++) {
5903                 struct pf_idhash *ih = &V_pf_idhash[i];
5904
5905 relock_DIOCCLRSTATES:
5906                 PF_HASHROW_LOCK(ih);
5907                 LIST_FOREACH(s, &ih->states, entry) {
5908                         /* For floating states look at the original kif. */
5909                         kif = s->kif == V_pfi_all ? s->orig_kif : s->kif;
5910
5911                         if (kill->psk_ifname[0] &&
5912                             strcmp(kill->psk_ifname,
5913                             kif->pfik_name))
5914                                 continue;
5915
5916                         if (kill->psk_kill_match) {
5917                                 bzero(&match_key, sizeof(match_key));
5918
5919                                 if (s->direction == PF_OUT) {
5920                                         dir = PF_IN;
5921                                         idx = PF_SK_STACK;
5922                                 } else {
5923                                         dir = PF_OUT;
5924                                         idx = PF_SK_WIRE;
5925                                 }
5926
5927                                 match_key.af = s->key[idx]->af;
5928                                 match_key.proto = s->key[idx]->proto;
5929                                 PF_ACPY(&match_key.addr[0],
5930                                     &s->key[idx]->addr[1], match_key.af);
5931                                 match_key.port[0] = s->key[idx]->port[1];
5932                                 PF_ACPY(&match_key.addr[1],
5933                                     &s->key[idx]->addr[0], match_key.af);
5934                                 match_key.port[1] = s->key[idx]->port[0];
5935                         }
5936
5937                         /*
5938                          * Don't send out individual
5939                          * delete messages.
5940                          */
5941                         s->state_flags |= PFSTATE_NOSYNC;
5942                         pf_unlink_state(s);
5943                         killed++;
5944
5945                         if (kill->psk_kill_match)
5946                                 killed += pf_kill_matching_state(&match_key,
5947                                     dir);
5948
5949                         goto relock_DIOCCLRSTATES;
5950                 }
5951                 PF_HASHROW_UNLOCK(ih);
5952         }
5953
5954         if (V_pfsync_clear_states_ptr != NULL)
5955                 V_pfsync_clear_states_ptr(V_pf_status.hostid, kill->psk_ifname);
5956
5957         return (killed);
5958 }
5959
5960 static void
5961 pf_killstates(struct pf_kstate_kill *kill, unsigned int *killed)
5962 {
5963         struct pf_kstate        *s;
5964
5965         if (kill->psk_pfcmp.id) {
5966                 if (kill->psk_pfcmp.creatorid == 0)
5967                         kill->psk_pfcmp.creatorid = V_pf_status.hostid;
5968                 if ((s = pf_find_state_byid(kill->psk_pfcmp.id,
5969                     kill->psk_pfcmp.creatorid))) {
5970                         pf_unlink_state(s);
5971                         *killed = 1;
5972                 }
5973                 return;
5974         }
5975
5976         for (unsigned int i = 0; i <= pf_hashmask; i++)
5977                 *killed += pf_killstates_row(kill, &V_pf_idhash[i]);
5978
5979         return;
5980 }
5981
5982 static int
5983 pf_killstates_nv(struct pfioc_nv *nv)
5984 {
5985         struct pf_kstate_kill    kill;
5986         nvlist_t                *nvl = NULL;
5987         void                    *nvlpacked = NULL;
5988         int                      error = 0;
5989         unsigned int             killed = 0;
5990
5991 #define ERROUT(x)       ERROUT_FUNCTION(on_error, x)
5992
5993         if (nv->len > pf_ioctl_maxcount)
5994                 ERROUT(ENOMEM);
5995
5996         nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
5997         if (nvlpacked == NULL)
5998                 ERROUT(ENOMEM);
5999
6000         error = copyin(nv->data, nvlpacked, nv->len);
6001         if (error)
6002                 ERROUT(error);
6003
6004         nvl = nvlist_unpack(nvlpacked, nv->len, 0);
6005         if (nvl == NULL)
6006                 ERROUT(EBADMSG);
6007
6008         error = pf_nvstate_kill_to_kstate_kill(nvl, &kill);
6009         if (error)
6010                 ERROUT(error);
6011
6012         pf_killstates(&kill, &killed);
6013
6014         free(nvlpacked, M_NVLIST);
6015         nvlpacked = NULL;
6016         nvlist_destroy(nvl);
6017         nvl = nvlist_create(0);
6018         if (nvl == NULL)
6019                 ERROUT(ENOMEM);
6020
6021         nvlist_add_number(nvl, "killed", killed);
6022
6023         nvlpacked = nvlist_pack(nvl, &nv->len);
6024         if (nvlpacked == NULL)
6025                 ERROUT(ENOMEM);
6026
6027         if (nv->size == 0)
6028                 ERROUT(0);
6029         else if (nv->size < nv->len)
6030                 ERROUT(ENOSPC);
6031
6032         error = copyout(nvlpacked, nv->data, nv->len);
6033
6034 on_error:
6035         nvlist_destroy(nvl);
6036         free(nvlpacked, M_NVLIST);
6037         return (error);
6038 }
6039
6040 static int
6041 pf_clearstates_nv(struct pfioc_nv *nv)
6042 {
6043         struct pf_kstate_kill    kill;
6044         nvlist_t                *nvl = NULL;
6045         void                    *nvlpacked = NULL;
6046         int                      error = 0;
6047         unsigned int             killed;
6048
6049 #define ERROUT(x)       ERROUT_FUNCTION(on_error, x)
6050
6051         if (nv->len > pf_ioctl_maxcount)
6052                 ERROUT(ENOMEM);
6053
6054         nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
6055         if (nvlpacked == NULL)
6056                 ERROUT(ENOMEM);
6057
6058         error = copyin(nv->data, nvlpacked, nv->len);
6059         if (error)
6060                 ERROUT(error);
6061
6062         nvl = nvlist_unpack(nvlpacked, nv->len, 0);
6063         if (nvl == NULL)
6064                 ERROUT(EBADMSG);
6065
6066         error = pf_nvstate_kill_to_kstate_kill(nvl, &kill);
6067         if (error)
6068                 ERROUT(error);
6069
6070         killed = pf_clear_states(&kill);
6071
6072         free(nvlpacked, M_NVLIST);
6073         nvlpacked = NULL;
6074         nvlist_destroy(nvl);
6075         nvl = nvlist_create(0);
6076         if (nvl == NULL)
6077                 ERROUT(ENOMEM);
6078
6079         nvlist_add_number(nvl, "killed", killed);
6080
6081         nvlpacked = nvlist_pack(nvl, &nv->len);
6082         if (nvlpacked == NULL)
6083                 ERROUT(ENOMEM);
6084
6085         if (nv->size == 0)
6086                 ERROUT(0);
6087         else if (nv->size < nv->len)
6088                 ERROUT(ENOSPC);
6089
6090         error = copyout(nvlpacked, nv->data, nv->len);
6091
6092 #undef ERROUT
6093 on_error:
6094         nvlist_destroy(nvl);
6095         free(nvlpacked, M_NVLIST);
6096         return (error);
6097 }
6098
6099 static int
6100 pf_getstate(struct pfioc_nv *nv)
6101 {
6102         nvlist_t                *nvl = NULL, *nvls;
6103         void                    *nvlpacked = NULL;
6104         struct pf_kstate        *s = NULL;
6105         int                      error = 0;
6106         uint64_t                 id, creatorid;
6107
6108 #define ERROUT(x)       ERROUT_FUNCTION(errout, x)
6109
6110         if (nv->len > pf_ioctl_maxcount)
6111                 ERROUT(ENOMEM);
6112
6113         nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK);
6114         if (nvlpacked == NULL)
6115                 ERROUT(ENOMEM);
6116
6117         error = copyin(nv->data, nvlpacked, nv->len);
6118         if (error)
6119                 ERROUT(error);
6120
6121         nvl = nvlist_unpack(nvlpacked, nv->len, 0);
6122         if (nvl == NULL)
6123                 ERROUT(EBADMSG);
6124
6125         PFNV_CHK(pf_nvuint64(nvl, "id", &id));
6126         PFNV_CHK(pf_nvuint64(nvl, "creatorid", &creatorid));
6127
6128         s = pf_find_state_byid(id, creatorid);
6129         if (s == NULL)
6130                 ERROUT(ENOENT);
6131
6132         free(nvlpacked, M_NVLIST);
6133         nvlpacked = NULL;
6134         nvlist_destroy(nvl);
6135         nvl = nvlist_create(0);
6136         if (nvl == NULL)
6137                 ERROUT(ENOMEM);
6138
6139         nvls = pf_state_to_nvstate(s);
6140         if (nvls == NULL)
6141                 ERROUT(ENOMEM);
6142
6143         nvlist_add_nvlist(nvl, "state", nvls);
6144         nvlist_destroy(nvls);
6145
6146         nvlpacked = nvlist_pack(nvl, &nv->len);
6147         if (nvlpacked == NULL)
6148                 ERROUT(ENOMEM);
6149
6150         if (nv->size == 0)
6151                 ERROUT(0);
6152         else if (nv->size < nv->len)
6153                 ERROUT(ENOSPC);
6154
6155         error = copyout(nvlpacked, nv->data, nv->len);
6156
6157 #undef ERROUT
6158 errout:
6159         if (s != NULL)
6160                 PF_STATE_UNLOCK(s);
6161         free(nvlpacked, M_NVLIST);
6162         nvlist_destroy(nvl);
6163         return (error);
6164 }
6165
6166 /*
6167  * XXX - Check for version mismatch!!!
6168  */
6169
6170 /*
6171  * Duplicate pfctl -Fa operation to get rid of as much as we can.
6172  */
6173 static int
6174 shutdown_pf(void)
6175 {
6176         int error = 0;
6177         u_int32_t t[5];
6178         char nn = '\0';
6179
6180         do {
6181                 if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn))
6182                     != 0) {
6183                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: SCRUB\n"));
6184                         break;
6185                 }
6186                 if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn))
6187                     != 0) {
6188                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: FILTER\n"));
6189                         break;          /* XXX: rollback? */
6190                 }
6191                 if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn))
6192                     != 0) {
6193                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: NAT\n"));
6194                         break;          /* XXX: rollback? */
6195                 }
6196                 if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn))
6197                     != 0) {
6198                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: BINAT\n"));
6199                         break;          /* XXX: rollback? */
6200                 }
6201                 if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn))
6202                     != 0) {
6203                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: RDR\n"));
6204                         break;          /* XXX: rollback? */
6205                 }
6206
6207                 /* XXX: these should always succeed here */
6208                 pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn);
6209                 pf_commit_rules(t[1], PF_RULESET_FILTER, &nn);
6210                 pf_commit_rules(t[2], PF_RULESET_NAT, &nn);
6211                 pf_commit_rules(t[3], PF_RULESET_BINAT, &nn);
6212                 pf_commit_rules(t[4], PF_RULESET_RDR, &nn);
6213
6214                 if ((error = pf_clear_tables()) != 0)
6215                         break;
6216
6217                 if ((error = pf_begin_eth(&t[0], &nn)) != 0) {
6218                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: eth\n"));
6219                         break;
6220                 }
6221                 pf_commit_eth(t[0], &nn);
6222
6223 #ifdef ALTQ
6224                 if ((error = pf_begin_altq(&t[0])) != 0) {
6225                         DPFPRINTF(PF_DEBUG_MISC, ("shutdown_pf: ALTQ\n"));
6226                         break;
6227                 }
6228                 pf_commit_altq(t[0]);
6229 #endif
6230
6231                 pf_clear_all_states();
6232
6233                 pf_clear_srcnodes(NULL);
6234
6235                 /* status does not use malloced mem so no need to cleanup */
6236                 /* fingerprints and interfaces have their own cleanup code */
6237         } while(0);
6238
6239         return (error);
6240 }
6241
6242 static pfil_return_t
6243 pf_check_return(int chk, struct mbuf **m)
6244 {
6245
6246         switch (chk) {
6247         case PF_PASS:
6248                 if (*m == NULL)
6249                         return (PFIL_CONSUMED);
6250                 else
6251                         return (PFIL_PASS);
6252                 break;
6253         default:
6254                 if (*m != NULL) {
6255                         m_freem(*m);
6256                         *m = NULL;
6257                 }
6258                 return (PFIL_DROPPED);
6259         }
6260 }
6261
6262 static pfil_return_t
6263 pf_eth_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
6264     void *ruleset __unused, struct inpcb *inp)
6265 {
6266         int chk;
6267
6268         chk = pf_test_eth(PF_IN, flags, ifp, m, inp);
6269
6270         return (pf_check_return(chk, m));
6271 }
6272
6273 static pfil_return_t
6274 pf_eth_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
6275     void *ruleset __unused, struct inpcb *inp)
6276 {
6277         int chk;
6278
6279         chk = pf_test_eth(PF_OUT, flags, ifp, m, inp);
6280
6281         return (pf_check_return(chk, m));
6282 }
6283
6284 #ifdef INET
6285 static pfil_return_t
6286 pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags,
6287     void *ruleset __unused, struct inpcb *inp)
6288 {
6289         int chk;
6290
6291         chk = pf_test(PF_IN, flags, ifp, m, inp, NULL);
6292
6293         return (pf_check_return(chk, m));
6294 }
6295
6296 static pfil_return_t
6297 pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags,
6298     void *ruleset __unused,  struct inpcb *inp)
6299 {
6300         int chk;
6301
6302         chk = pf_test(PF_OUT, flags, ifp, m, inp, NULL);
6303
6304         return (pf_check_return(chk, m));
6305 }
6306 #endif
6307
6308 #ifdef INET6
6309 static pfil_return_t
6310 pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags,
6311     void *ruleset __unused,  struct inpcb *inp)
6312 {
6313         int chk;
6314
6315         /*
6316          * In case of loopback traffic IPv6 uses the real interface in
6317          * order to support scoped addresses. In order to support stateful
6318          * filtering we have change this to lo0 as it is the case in IPv4.
6319          */
6320         CURVNET_SET(ifp->if_vnet);
6321         chk = pf_test6(PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp,
6322             m, inp, NULL);
6323         CURVNET_RESTORE();
6324
6325         return (pf_check_return(chk, m));
6326 }
6327
6328 static pfil_return_t
6329 pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags,
6330     void *ruleset __unused,  struct inpcb *inp)
6331 {
6332         int chk;
6333
6334         CURVNET_SET(ifp->if_vnet);
6335         chk = pf_test6(PF_OUT, flags, ifp, m, inp, NULL);
6336         CURVNET_RESTORE();
6337
6338         return (pf_check_return(chk, m));
6339 }
6340 #endif /* INET6 */
6341
6342 VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_in_hook);
6343 VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_out_hook);
6344 #define V_pf_eth_in_hook        VNET(pf_eth_in_hook)
6345 #define V_pf_eth_out_hook       VNET(pf_eth_out_hook)
6346
6347 #ifdef INET
6348 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook);
6349 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook);
6350 #define V_pf_ip4_in_hook        VNET(pf_ip4_in_hook)
6351 #define V_pf_ip4_out_hook       VNET(pf_ip4_out_hook)
6352 #endif
6353 #ifdef INET6
6354 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook);
6355 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook);
6356 #define V_pf_ip6_in_hook        VNET(pf_ip6_in_hook)
6357 #define V_pf_ip6_out_hook       VNET(pf_ip6_out_hook)
6358 #endif
6359
6360 static void
6361 hook_pf_eth(void)
6362 {
6363         struct pfil_hook_args pha = {
6364                 .pa_version = PFIL_VERSION,
6365                 .pa_modname = "pf",
6366                 .pa_type = PFIL_TYPE_ETHERNET,
6367         };
6368         struct pfil_link_args pla = {
6369                 .pa_version = PFIL_VERSION,
6370         };
6371         int ret __diagused;
6372
6373         if (atomic_load_bool(&V_pf_pfil_eth_hooked))
6374                 return;
6375
6376         pha.pa_mbuf_chk = pf_eth_check_in;
6377         pha.pa_flags = PFIL_IN;
6378         pha.pa_rulname = "eth-in";
6379         V_pf_eth_in_hook = pfil_add_hook(&pha);
6380         pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
6381         pla.pa_head = V_link_pfil_head;
6382         pla.pa_hook = V_pf_eth_in_hook;
6383         ret = pfil_link(&pla);
6384         MPASS(ret == 0);
6385         pha.pa_mbuf_chk = pf_eth_check_out;
6386         pha.pa_flags = PFIL_OUT;
6387         pha.pa_rulname = "eth-out";
6388         V_pf_eth_out_hook = pfil_add_hook(&pha);
6389         pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6390         pla.pa_head = V_link_pfil_head;
6391         pla.pa_hook = V_pf_eth_out_hook;
6392         ret = pfil_link(&pla);
6393         MPASS(ret == 0);
6394
6395         atomic_store_bool(&V_pf_pfil_eth_hooked, true);
6396 }
6397
6398 static void
6399 hook_pf(void)
6400 {
6401         struct pfil_hook_args pha = {
6402                 .pa_version = PFIL_VERSION,
6403                 .pa_modname = "pf",
6404         };
6405         struct pfil_link_args pla = {
6406                 .pa_version = PFIL_VERSION,
6407         };
6408         int ret __diagused;
6409
6410         if (atomic_load_bool(&V_pf_pfil_hooked))
6411                 return;
6412
6413 #ifdef INET
6414         pha.pa_type = PFIL_TYPE_IP4;
6415         pha.pa_mbuf_chk = pf_check_in;
6416         pha.pa_flags = PFIL_IN;
6417         pha.pa_rulname = "default-in";
6418         V_pf_ip4_in_hook = pfil_add_hook(&pha);
6419         pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
6420         pla.pa_head = V_inet_pfil_head;
6421         pla.pa_hook = V_pf_ip4_in_hook;
6422         ret = pfil_link(&pla);
6423         MPASS(ret == 0);
6424         pha.pa_mbuf_chk = pf_check_out;
6425         pha.pa_flags = PFIL_OUT;
6426         pha.pa_rulname = "default-out";
6427         V_pf_ip4_out_hook = pfil_add_hook(&pha);
6428         pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6429         pla.pa_head = V_inet_pfil_head;
6430         pla.pa_hook = V_pf_ip4_out_hook;
6431         ret = pfil_link(&pla);
6432         MPASS(ret == 0);
6433         if (V_pf_filter_local) {
6434                 pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6435                 pla.pa_head = V_inet_local_pfil_head;
6436                 pla.pa_hook = V_pf_ip4_out_hook;
6437                 ret = pfil_link(&pla);
6438                 MPASS(ret == 0);
6439         }
6440 #endif
6441 #ifdef INET6
6442         pha.pa_type = PFIL_TYPE_IP6;
6443         pha.pa_mbuf_chk = pf_check6_in;
6444         pha.pa_flags = PFIL_IN;
6445         pha.pa_rulname = "default-in6";
6446         V_pf_ip6_in_hook = pfil_add_hook(&pha);
6447         pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR;
6448         pla.pa_head = V_inet6_pfil_head;
6449         pla.pa_hook = V_pf_ip6_in_hook;
6450         ret = pfil_link(&pla);
6451         MPASS(ret == 0);
6452         pha.pa_mbuf_chk = pf_check6_out;
6453         pha.pa_rulname = "default-out6";
6454         pha.pa_flags = PFIL_OUT;
6455         V_pf_ip6_out_hook = pfil_add_hook(&pha);
6456         pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6457         pla.pa_head = V_inet6_pfil_head;
6458         pla.pa_hook = V_pf_ip6_out_hook;
6459         ret = pfil_link(&pla);
6460         MPASS(ret == 0);
6461         if (V_pf_filter_local) {
6462                 pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR;
6463                 pla.pa_head = V_inet6_local_pfil_head;
6464                 pla.pa_hook = V_pf_ip6_out_hook;
6465                 ret = pfil_link(&pla);
6466                 MPASS(ret == 0);
6467         }
6468 #endif
6469
6470         atomic_store_bool(&V_pf_pfil_hooked, true);
6471 }
6472
6473 static void
6474 dehook_pf_eth(void)
6475 {
6476
6477         if (!atomic_load_bool(&V_pf_pfil_eth_hooked))
6478                 return;
6479
6480         pfil_remove_hook(V_pf_eth_in_hook);
6481         pfil_remove_hook(V_pf_eth_out_hook);
6482
6483         atomic_store_bool(&V_pf_pfil_eth_hooked, false);
6484 }
6485
6486 static void
6487 dehook_pf(void)
6488 {
6489
6490         if (!atomic_load_bool(&V_pf_pfil_hooked))
6491                 return;
6492
6493 #ifdef INET
6494         pfil_remove_hook(V_pf_ip4_in_hook);
6495         pfil_remove_hook(V_pf_ip4_out_hook);
6496 #endif
6497 #ifdef INET6
6498         pfil_remove_hook(V_pf_ip6_in_hook);
6499         pfil_remove_hook(V_pf_ip6_out_hook);
6500 #endif
6501
6502         atomic_store_bool(&V_pf_pfil_hooked, false);
6503 }
6504
6505 static void
6506 pf_load_vnet(void)
6507 {
6508         V_pf_tag_z = uma_zcreate("pf tags", sizeof(struct pf_tagname),
6509             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
6510
6511         rm_init_flags(&V_pf_rules_lock, "pf rulesets", RM_RECURSE);
6512         sx_init(&V_pf_ioctl_lock, "pf ioctl");
6513
6514         pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize,
6515             PF_RULE_TAG_HASH_SIZE_DEFAULT);
6516 #ifdef ALTQ
6517         pf_init_tagset(&V_pf_qids, &pf_queue_tag_hashsize,
6518             PF_QUEUE_TAG_HASH_SIZE_DEFAULT);
6519 #endif
6520
6521         V_pf_keth = &V_pf_main_keth_anchor.ruleset;
6522
6523         pfattach_vnet();
6524         V_pf_vnet_active = 1;
6525 }
6526
6527 static int
6528 pf_load(void)
6529 {
6530         int error;
6531
6532         sx_init(&pf_end_lock, "pf end thread");
6533
6534         pf_mtag_initialize();
6535
6536         pf_dev = make_dev(&pf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, PF_NAME);
6537         if (pf_dev == NULL)
6538                 return (ENOMEM);
6539
6540         pf_end_threads = 0;
6541         error = kproc_create(pf_purge_thread, NULL, &pf_purge_proc, 0, 0, "pf purge");
6542         if (error != 0)
6543                 return (error);
6544
6545         pfi_initialize();
6546
6547         return (0);
6548 }
6549
6550 static void
6551 pf_unload_vnet(void)
6552 {
6553         int ret __diagused;
6554
6555         V_pf_vnet_active = 0;
6556         V_pf_status.running = 0;
6557         dehook_pf();
6558         dehook_pf_eth();
6559
6560         PF_RULES_WLOCK();
6561         pf_syncookies_cleanup();
6562         shutdown_pf();
6563         PF_RULES_WUNLOCK();
6564
6565         /* Make sure we've cleaned up ethernet rules before we continue. */
6566         NET_EPOCH_DRAIN_CALLBACKS();
6567
6568         ret = swi_remove(V_pf_swi_cookie);
6569         MPASS(ret == 0);
6570         ret = intr_event_destroy(V_pf_swi_ie);
6571         MPASS(ret == 0);
6572
6573         pf_unload_vnet_purge();
6574
6575         pf_normalize_cleanup();
6576         PF_RULES_WLOCK();
6577         pfi_cleanup_vnet();
6578         PF_RULES_WUNLOCK();
6579         pfr_cleanup();
6580         pf_osfp_flush();
6581         pf_cleanup();
6582         if (IS_DEFAULT_VNET(curvnet))
6583                 pf_mtag_cleanup();
6584
6585         pf_cleanup_tagset(&V_pf_tags);
6586 #ifdef ALTQ
6587         pf_cleanup_tagset(&V_pf_qids);
6588 #endif
6589         uma_zdestroy(V_pf_tag_z);
6590
6591 #ifdef PF_WANT_32_TO_64_COUNTER
6592         PF_RULES_WLOCK();
6593         LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist);
6594
6595         MPASS(LIST_EMPTY(&V_pf_allkiflist));
6596         MPASS(V_pf_allkifcount == 0);
6597
6598         LIST_REMOVE(&V_pf_default_rule, allrulelist);
6599         V_pf_allrulecount--;
6600         LIST_REMOVE(V_pf_rulemarker, allrulelist);
6601
6602         /*
6603          * There are known pf rule leaks when running the test suite.
6604          */
6605 #ifdef notyet
6606         MPASS(LIST_EMPTY(&V_pf_allrulelist));
6607         MPASS(V_pf_allrulecount == 0);
6608 #endif
6609
6610         PF_RULES_WUNLOCK();
6611
6612         free(V_pf_kifmarker, PFI_MTYPE);
6613         free(V_pf_rulemarker, M_PFRULE);
6614 #endif
6615
6616         /* Free counters last as we updated them during shutdown. */
6617         pf_counter_u64_deinit(&V_pf_default_rule.evaluations);
6618         for (int i = 0; i < 2; i++) {
6619                 pf_counter_u64_deinit(&V_pf_default_rule.packets[i]);
6620                 pf_counter_u64_deinit(&V_pf_default_rule.bytes[i]);
6621         }
6622         counter_u64_free(V_pf_default_rule.states_cur);
6623         counter_u64_free(V_pf_default_rule.states_tot);
6624         counter_u64_free(V_pf_default_rule.src_nodes);
6625         uma_zfree_pcpu(pf_timestamp_pcpu_zone, V_pf_default_rule.timestamp);
6626
6627         for (int i = 0; i < PFRES_MAX; i++)
6628                 counter_u64_free(V_pf_status.counters[i]);
6629         for (int i = 0; i < KLCNT_MAX; i++)
6630                 counter_u64_free(V_pf_status.lcounters[i]);
6631         for (int i = 0; i < FCNT_MAX; i++)
6632                 pf_counter_u64_deinit(&V_pf_status.fcounters[i]);
6633         for (int i = 0; i < SCNT_MAX; i++)
6634                 counter_u64_free(V_pf_status.scounters[i]);
6635
6636         rm_destroy(&V_pf_rules_lock);
6637         sx_destroy(&V_pf_ioctl_lock);
6638 }
6639
6640 static void
6641 pf_unload(void)
6642 {
6643
6644         sx_xlock(&pf_end_lock);
6645         pf_end_threads = 1;
6646         while (pf_end_threads < 2) {
6647                 wakeup_one(pf_purge_thread);
6648                 sx_sleep(pf_purge_proc, &pf_end_lock, 0, "pftmo", 0);
6649         }
6650         sx_xunlock(&pf_end_lock);
6651
6652         pf_nl_unregister();
6653
6654         if (pf_dev != NULL)
6655                 destroy_dev(pf_dev);
6656
6657         pfi_cleanup();
6658
6659         sx_destroy(&pf_end_lock);
6660 }
6661
6662 static void
6663 vnet_pf_init(void *unused __unused)
6664 {
6665
6666         pf_load_vnet();
6667 }
6668 VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, 
6669     vnet_pf_init, NULL);
6670
6671 static void
6672 vnet_pf_uninit(const void *unused __unused)
6673 {
6674
6675         pf_unload_vnet();
6676
6677 SYSUNINIT(pf_unload, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND, pf_unload, NULL);
6678 VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD,
6679     vnet_pf_uninit, NULL);
6680
6681 static int
6682 pf_modevent(module_t mod, int type, void *data)
6683 {
6684         int error = 0;
6685
6686         switch(type) {
6687         case MOD_LOAD:
6688                 error = pf_load();
6689                 pf_nl_register();
6690                 break;
6691         case MOD_UNLOAD:
6692                 /* Handled in SYSUNINIT(pf_unload) to ensure it's done after
6693                  * the vnet_pf_uninit()s */
6694                 break;
6695         default:
6696                 error = EINVAL;
6697                 break;
6698         }
6699
6700         return (error);
6701 }
6702
6703 static moduledata_t pf_mod = {
6704         "pf",
6705         pf_modevent,
6706         0
6707 };
6708
6709 DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND);
6710 MODULE_DEPEND(pf, netlink, 1, 1, 1);
6711 MODULE_VERSION(pf, PF_MODVER);