2 * Copyright (C) 1993-2001 by Darren Reed.
4 * See the IPFILTER.LICENCE file for details on licencing.
6 * I hate legaleese, don't you ?
9 static const char sccsid[] = "%W% %G% (C) 1993-2000 Darren Reed";
10 static const char rcsid[] = "@(#)$Id: ip_sfil.c,v 2.23.2.27 2003/06/12 16:03:14 darrenr Exp $";
13 #include <sys/types.h>
14 #include <sys/errno.h>
15 #include <sys/param.h>
16 #include <sys/cpuvar.h>
18 #include <sys/ioctl.h>
19 #include <sys/filio.h>
20 #include <sys/systm.h>
23 #include <sys/sunddi.h>
24 #include <sys/ksynch.h>
26 #include <sys/mkdev.h>
27 #include <sys/protosw.h>
28 #include <sys/socket.h>
29 #include <sys/dditypes.h>
30 #include <sys/cmn_err.h>
33 #include <net/route.h>
34 #include <netinet/in.h>
35 #include <netinet/in_systm.h>
36 #include <netinet/ip.h>
37 #include <netinet/ip_var.h>
38 #include <netinet/tcp.h>
39 #include <netinet/udp.h>
40 #include <netinet/tcpip.h>
41 #include <netinet/ip_icmp.h>
42 #include "ip_compat.h"
44 # include <netinet/icmp6.h>
52 #include <inet/ip_ire.h>
54 #define MIN(a,b) (((a)<(b))?(a):(b))
58 extern fr_flags, fr_active;
61 int ipl_unreach = ICMP_UNREACH_HOST;
62 u_long ipl_frouteok[2] = {0, 0};
63 static int frzerostats __P((caddr_t));
65 static u_int *ip_ttl_ptr;
66 static u_int *ip_mtudisc;
68 static u_long *ip_ttl_ptr;
69 static u_long *ip_mtudisc;
72 static int frrequest __P((minor_t, int, caddr_t, int));
73 static int send_ip __P((fr_info_t *fin, mblk_t *m));
74 kmutex_t ipl_mutex, ipf_authmx, ipf_rw;
75 KRWLOCK_T ipf_mutex, ipfs_mutex, ipf_solaris;
76 KRWLOCK_T ipf_frag, ipf_state, ipf_nat, ipf_natfrag, ipf_auth;
77 kcondvar_t iplwait, ipfauthwait;
85 cmn_err(CE_CONT, "ipldetach()\n");
88 for (i = IPL_LOGMAX; i >= 0; i--)
91 i = frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE);
92 i += frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE);
97 cv_destroy(&ipfauthwait);
98 mutex_destroy(&ipf_authmx);
99 mutex_destroy(&ipl_mutex);
100 mutex_destroy(&ipf_rw);
101 RW_DESTROY(&ipf_mutex);
102 RW_DESTROY(&ipf_frag);
103 RW_DESTROY(&ipf_state);
104 RW_DESTROY(&ipf_natfrag);
105 RW_DESTROY(&ipf_nat);
106 RW_DESTROY(&ipf_auth);
107 RW_DESTROY(&ipfs_mutex);
108 /* NOTE: This lock is acquired in ipf_detach */
109 RWLOCK_EXIT(&ipf_solaris);
110 RW_DESTROY(&ipf_solaris);
115 int iplattach __P((void))
120 cmn_err(CE_CONT, "iplattach()\n");
122 bzero((char *)frcache, sizeof(frcache));
123 mutex_init(&ipf_rw, "ipf rw mutex", MUTEX_DRIVER, NULL);
124 mutex_init(&ipl_mutex, "ipf log mutex", MUTEX_DRIVER, NULL);
125 mutex_init(&ipf_authmx, "ipf auth log mutex", MUTEX_DRIVER, NULL);
126 RWLOCK_INIT(&ipf_solaris, "ipf filter load/unload mutex", NULL);
127 RWLOCK_INIT(&ipf_mutex, "ipf filter rwlock", NULL);
128 RWLOCK_INIT(&ipfs_mutex, "ipf solaris mutex", NULL);
129 RWLOCK_INIT(&ipf_frag, "ipf fragment rwlock", NULL);
130 RWLOCK_INIT(&ipf_state, "ipf IP state rwlock", NULL);
131 RWLOCK_INIT(&ipf_nat, "ipf IP NAT rwlock", NULL);
132 RWLOCK_INIT(&ipf_natfrag, "ipf IP NAT-Frag rwlock", NULL);
133 RWLOCK_INIT(&ipf_auth, "ipf IP User-Auth rwlock", NULL);
134 cv_init(&iplwait, "ipl condvar", CV_DRIVER, NULL);
135 cv_init(&ipfauthwait, "ipf auth condvar", CV_DRIVER, NULL);
139 if (nat_init() == -1)
141 if (fr_stateinit() == -1)
143 if (appr_init() == -1)
149 * XXX - There is no terminator for this array, so it is not possible
150 * to tell if what we are looking for is missing and go off the end
154 if (strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl") == 0) {
155 ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
156 } else if (strcmp(ip_param_arr[i].ip_param_name,
157 "ip_path_mtu_discovery") == 0) {
158 ip_mtudisc = &ip_param_arr[i].ip_param_value;
161 if (ip_mtudisc != NULL && ip_ttl_ptr != NULL)
168 static int frzerostats(data)
175 error = IWCOPYPTR((caddr_t)&fio, data, sizeof(fio));
179 bzero((char *)frstats, sizeof(*frstats) * 2);
186 * Filter ioctl interface.
188 int iplioctl(dev, cmd, data, mode, cp, rp)
204 cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
205 dev, cmd, data, mode, cp, rp);
207 unit = getminor(dev);
208 if (IPL_LOGMAX < unit)
211 if (fr_running == 0 && (cmd != SIOCFRENB || unit != IPL_LOGIPF))
217 READ_ENTER(&ipf_solaris);
218 if (unit == IPL_LOGNAT) {
219 error = nat_ioctl((caddr_t)data, cmd, mode);
220 RWLOCK_EXIT(&ipf_solaris);
223 if (unit == IPL_LOGSTATE) {
224 error = fr_state_ioctl((caddr_t)data, cmd, mode);
225 RWLOCK_EXIT(&ipf_solaris);
228 if (unit == IPL_LOGAUTH) {
229 if ((cmd == SIOCADAFR) || (cmd == SIOCRMAFR)) {
230 if (!(mode & FWRITE)) {
233 error = frrequest(unit, cmd, (caddr_t)data,
237 error = fr_auth_ioctl((caddr_t)data, mode, cmd);
239 RWLOCK_EXIT(&ipf_solaris);
248 if (!(mode & FWRITE))
251 error = IRCOPY((caddr_t)data, (caddr_t)&enable,
256 if (!(mode & FWRITE))
259 WRITE_ENTER(&ipf_mutex);
260 error = IRCOPY((caddr_t)data, (caddr_t)&fr_flags,
262 RWLOCK_EXIT(&ipf_mutex);
266 error = IWCOPY((caddr_t)&fr_flags, (caddr_t)data,
275 if (!(mode & FWRITE))
278 error = frrequest(unit, cmd, (caddr_t)data, fr_active);
283 if (!(mode & FWRITE))
286 error = frrequest(unit, cmd, (caddr_t)data,
290 if (!(mode & FWRITE))
293 WRITE_ENTER(&ipf_mutex);
294 bzero((char *)frcache, sizeof(frcache[0]) * 2);
295 error = IWCOPY((caddr_t)&fr_active, (caddr_t)data,
299 fr_active = 1 - fr_active;
300 RWLOCK_EXIT(&ipf_mutex);
307 READ_ENTER(&ipf_mutex);
309 RWLOCK_EXIT(&ipf_mutex);
310 error = IWCOPYPTR((caddr_t)&fio, (caddr_t)data, sizeof(fio));
316 if (!(mode & FWRITE))
319 error = frzerostats((caddr_t)data);
322 if (!(mode & FWRITE))
325 error = IRCOPY((caddr_t)data, (caddr_t)&tmp,
328 tmp = frflush(unit, 4, tmp);
329 error = IWCOPY((caddr_t)&tmp, (caddr_t)data,
338 if (!(mode & FWRITE))
341 error = IRCOPY((caddr_t)data, (caddr_t)&tmp,
344 tmp = frflush(unit, 6, tmp);
345 error = IWCOPY((caddr_t)&tmp, (caddr_t)data,
354 error = IRCOPY((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
365 if (!(mode & FWRITE))
368 tmp = ipflog_clear(unit);
369 error = IWCOPY((caddr_t)&tmp, (caddr_t)data,
375 #endif /* IPFILTER_LOG */
377 if (!(mode & FWRITE))
383 error = IWCOPYPTR((caddr_t)ipfr_fragstats(), (caddr_t)data,
389 int copy = (int)iplused[IPL_LOGIPF];
391 error = IWCOPY((caddr_t)©, (caddr_t)data, sizeof(copy));
401 RWLOCK_EXIT(&ipf_solaris);
406 ill_t *get_unit(name, v)
410 size_t len = strlen(name) + 1; /* includes \0 */
413 ill_walk_context_t ctx;
424 for (il = ILL_START_WALK_ALL(&ctx); il; il = ill_next(&ctx, il))
426 for (il = ill_g_head; il; il = il->ill_next)
428 if ((len == il->ill_name_length) && (il->ill_sap == sap) &&
429 !strncmp(il->ill_name, name, len))
435 static int frrequest(unit, req, data, set)
440 register frentry_t *fp, *f, **fprev;
441 register frentry_t **ftail;
442 frgroup_t *fg = NULL;
443 int error = 0, in, i;
453 error = IRCOPYPTR(data, (caddr_t)fp, sizeof(*fp));
459 fp->fr_sap = IP_DL_SAP;
460 else if (fp->fr_v == 6)
461 fp->fr_sap = IP6_DL_SAP;
468 WRITE_ENTER(&ipf_mutex);
470 * Check that the group number does exist and that if a head group
471 * has been specified, doesn't exist.
473 if ((req != SIOCZRLST) && ((req == SIOCINAFR) || (req == SIOCINIFR) ||
474 (req == SIOCADAFR) || (req == SIOCADIFR)) && fp->fr_grhead &&
475 fr_findgroup(fp->fr_grhead, fp->fr_flags, unit, set, NULL)) {
479 if ((req != SIOCZRLST) && fp->fr_group &&
480 !fr_findgroup(fp->fr_group, fp->fr_flags, unit, set, NULL)) {
485 in = (fp->fr_flags & FR_INQUE) ? 0 : 1;
487 if (unit == IPL_LOGAUTH)
488 ftail = fprev = &ipauth;
489 else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 4))
490 ftail = fprev = &ipacct[in][set];
491 else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 4))
492 ftail = fprev = &ipfilter[in][set];
494 else if ((fp->fr_flags & FR_ACCOUNT) && (fp->fr_v == 6))
495 ftail = fprev = &ipacct6[in][set];
496 else if ((fp->fr_flags & (FR_OUTQUE|FR_INQUE)) && (fp->fr_v == 6))
497 ftail = fprev = &ipfilter6[in][set];
504 group = fp->fr_group;
506 fg = fr_findgroup(group, fp->fr_flags, unit, set, NULL);
511 ftail = fprev = fg->fg_start;
514 bzero((char *)frcache, sizeof(frcache[0]) * 2);
516 for (i = 0; i < 4; i++) {
517 if ((fp->fr_ifnames[i][1] == '\0') &&
518 ((fp->fr_ifnames[i][0] == '-') ||
519 (fp->fr_ifnames[i][0] == '*'))) {
520 fp->fr_ifas[i] = NULL;
521 } else if (*fp->fr_ifnames[i]) {
522 fp->fr_ifas[i] = GETUNIT(fp->fr_ifnames[i], fp->fr_v);
524 fp->fr_ifas[i] = (void *)-1;
530 fp->fr_flags &= ~FR_DUP;
531 if (*fdp->fd_ifname) {
532 ill = get_unit(fdp->fd_ifname, (int)fp->fr_v);
535 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 4)) {
537 ire = ire_ctable_lookup(ipif->ipif_local_addr, 0,
538 IRE_LOCAL, NULL, NULL,
541 ire = ire_lookup_myaddr(ipif->ipif_local_addr);
546 fp->fr_flags |= FR_DUP;
549 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 6)) {
550 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, 0,
551 IRE_LOCAL, NULL, NULL,
556 fp->fr_flags |= FR_DUP;
559 fdp->fd_ifp = (struct ifnet *)ire;
564 if (*fdp->fd_ifname) {
565 ill = get_unit(fdp->fd_ifname, (int)fp->fr_v);
568 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 4)) {
570 ire = ire_ctable_lookup(ipif->ipif_local_addr, 0,
571 IRE_LOCAL, NULL, NULL,
574 ire = ire_lookup_myaddr(ipif->ipif_local_addr);
580 else if ((ipif = ill->ill_ipif) && (fp->fr_v == 6)) {
581 ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, 0,
582 IRE_LOCAL, NULL, NULL,
588 fdp->fd_ifp = (struct ifnet *)ire;
592 * Look for a matching filter rule, but don't include the next or
593 * interface pointer in the comparison (fr_next, fr_ifa).
595 for (fp->fr_cksum = 0, p = (u_int *)&fp->fr_ip, pp = &fp->fr_cksum;
599 for (; (f = *ftail); ftail = &f->fr_next)
600 if ((fp->fr_cksum == f->fr_cksum) &&
601 !bcmp((char *)&f->fr_ip, (char *)&fp->fr_ip, FR_CMPSIZ))
605 * If zero'ing statistics, copy current to caller and zero.
607 if (req == SIOCZRLST) {
612 MUTEX_DOWNGRADE(&ipf_mutex);
613 error = IWCOPYPTR((caddr_t)f, data, sizeof(*f));
622 if (req != SIOCINAFR && req != SIOCINIFR)
628 while (--fp->fr_hits && (f = *ftail))
635 if (req == SIOCRMAFR || req == SIOCRMIFR) {
640 * Only return EBUSY if there is a group list, else
641 * it's probably just state information referencing
644 if ((f->fr_ref > 1) && f->fr_grp) {
648 if (fg && fg->fg_head)
649 fg->fg_head->fr_ref--;
650 if (unit == IPL_LOGAUTH) {
651 return fr_preauthcmd(req, f, ftail);
654 fr_delgroup(f->fr_grhead, fp->fr_flags,
656 fixskip(fprev, f, -1);
667 if (unit == IPL_LOGAUTH) {
668 return fr_preauthcmd(req, fp, ftail);
670 KMALLOC(f, frentry_t *);
672 if (fg && fg->fg_head)
673 fg->fg_head->fr_ref++;
674 bcopy((char *)fp, (char *)f, sizeof(*f));
679 if (req == SIOCINIFR || req == SIOCINAFR)
680 fixskip(fprev, f, 1);
682 group = f->fr_grhead;
684 fg = fr_addgroup(group, f, unit, set);
690 RWLOCK_EXIT(&ipf_mutex);
696 * routines below for saving IP headers to buffer
698 int iplopen(devp, flags, otype, cred)
703 minor_t min = getminor(*devp);
706 cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
708 if ((fr_running <= 0) || !(otype & OTYP_CHR))
710 min = (IPL_LOGMAX < min) ? ENXIO : 0;
715 int iplclose(dev, flags, otype, cred)
720 minor_t min = getminor(dev);
723 cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
725 min = (IPL_LOGMAX < min) ? ENXIO : 0;
732 * both of these must operate with at least splnet() lest they be
733 * called during packet processing and cause an inconsistancy to appear in
736 int iplread(dev, uio, cp)
738 register struct uio *uio;
742 cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
744 return ipflog_read(getminor(dev), uio);
746 #endif /* IPFILTER_LOG */
750 * send_reset - this could conceivably be a call to tcp_respond(), but that
751 * requires a large amount of setting up and isn't any more efficient.
753 int send_reset(oip, fin)
757 tcphdr_t *tcp, *tcp2;
761 ip6_t *ip6, *oip6 = (ip6_t *)oip;
765 tcp = (struct tcphdr *)fin->fin_dp;
766 if (tcp->th_flags & TH_RST)
768 tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
771 hlen = sizeof(ip6_t);
775 hlen += sizeof(*tcp2);
776 if ((m = (mblk_t *)allocb(hlen + 16, BPRI_HI)) == NULL)
781 m->b_wptr = m->b_rptr + hlen;
782 bzero((char *)m->b_rptr, hlen);
783 tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
784 tcp2->th_dport = tcp->th_sport;
785 tcp2->th_sport = tcp->th_dport;
786 if (tcp->th_flags & TH_ACK) {
787 tcp2->th_seq = tcp->th_ack;
788 tcp2->th_flags = TH_RST;
790 tcp2->th_ack = ntohl(tcp->th_seq);
791 tcp2->th_ack += tlen;
792 tcp2->th_ack = htonl(tcp2->th_ack);
793 tcp2->th_flags = TH_RST|TH_ACK;
795 tcp2->th_off = sizeof(struct tcphdr) >> 2;
798 * This is to get around a bug in the Solaris 2.4/2.5 TCP checksum
799 * computation that is done by their put routine.
801 tcp2->th_sum = htons(0x14);
803 if (fin->fin_v == 6) {
804 ip6 = (ip6_t *)m->b_rptr;
805 ip6->ip6_src = oip6->ip6_dst;
806 ip6->ip6_dst = oip6->ip6_src;
807 ip6->ip6_plen = htons(sizeof(*tcp));
808 ip6->ip6_nxt = IPPROTO_TCP;
812 ip = (ip_t *)m->b_rptr;
813 ip->ip_src.s_addr = oip->ip_dst.s_addr;
814 ip->ip_dst.s_addr = oip->ip_src.s_addr;
815 ip->ip_hl = sizeof(*ip) >> 2;
816 ip->ip_p = IPPROTO_TCP;
817 ip->ip_len = htons(sizeof(*ip) + sizeof(*tcp));
818 ip->ip_tos = oip->ip_tos;
820 return send_ip(fin, m);
824 int static send_ip(fin, m)
828 RWLOCK_EXIT(&ipfs_mutex);
829 RWLOCK_EXIT(&ipf_solaris);
831 if (fin->fin_v == 6) {
832 extern void ip_wput_v6 __P((queue_t *, mblk_t *));
835 ip6 = (ip6_t *)m->b_rptr;
839 ip_wput_v6(((qif_t *)fin->fin_qif)->qf_ill->ill_wq, m);
845 ip = (ip_t *)m->b_rptr;
846 ip->ip_v = IPVERSION;
847 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
848 ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
849 ip_wput(((qif_t *)fin->fin_qif)->qf_ill->ill_wq, m);
851 READ_ENTER(&ipf_solaris);
852 READ_ENTER(&ipfs_mutex);
857 int send_icmp_err(oip, type, fin, dst)
875 if ((type < 0) || (type > ICMP_MAXTYPE))
878 code = fin->fin_icode;
880 if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int)))
888 if (oip->ip_v == 6) {
891 sz += MIN(m->b_wptr - m->b_rptr, 512);
892 hlen = sizeof(ip6_t);
893 type = icmptoicmp6types[type];
894 if (type == ICMP6_DST_UNREACH)
895 code = icmptoicmp6unreach[code];
899 if ((oip->ip_p == IPPROTO_ICMP) &&
900 !(fin->fin_fi.fi_fl & FI_SHORT))
901 switch (ntohs(fin->fin_data[0]) >> 8)
912 sz = sizeof(ip_t) * 2;
913 sz += 8; /* 64 bits of data */
917 sz += offsetof(struct icmp, icmp_ip);
918 if ((mb = (mblk_t *)allocb((size_t)sz + 16, BPRI_HI)) == NULL)
922 mb->b_wptr = mb->b_rptr + sz;
923 bzero((char *)mb->b_rptr, (size_t)sz);
924 icmp = (struct icmp *)(mb->b_rptr + sizeof(*ip));
925 icmp->icmp_type = type;
926 icmp->icmp_code = code;
927 icmp->icmp_cksum = 0;
929 if (type == ICMP_UNREACH && (il = qif->qf_ill) &&
930 fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
931 icmp->icmp_nextmtu = htons(il->ill_max_frag);
935 if (oip->ip_v == 6) {
936 struct in6_addr dst6;
940 if (fr_ifpaddr(6, ((qif_t *)fin->fin_qif)->qf_ill,
941 (struct in_addr *)&dst6) == -1)
944 dst6 = oip6->ip6_dst;
948 ip6 = (ip6_t *)mb->b_rptr;
952 ip6->ip6_plen = htons(sz);
953 ip6->ip6_nxt = IPPROTO_ICMPV6;
955 ip6->ip6_dst = oip6->ip6_src;
956 sz -= offsetof(struct icmp, icmp_ip);
957 bcopy((char *)m->b_rptr, (char *)&icmp->icmp_ip, sz);
958 icmp->icmp_cksum = csz - sizeof(ip6_t);
962 ip = (ip_t *)mb->b_rptr;
963 ip->ip_v = IPVERSION;
964 ip->ip_hl = (sizeof(*ip) >> 2);
965 ip->ip_p = IPPROTO_ICMP;
966 ip->ip_id = oip->ip_id;
968 ip->ip_ttl = (u_char)(*ip_ttl_ptr);
969 ip->ip_tos = oip->ip_tos;
970 ip->ip_len = (u_short)htons(sz);
972 if (fr_ifpaddr(4, ((qif_t *)fin->fin_qif)->qf_ill,
978 ip->ip_dst = oip->ip_src;
979 bcopy((char *)oip, (char *)&icmp->icmp_ip, sizeof(*oip));
980 bcopy((char *)oip + (oip->ip_hl << 2),
981 (char *)&icmp->icmp_ip + sizeof(*oip), 8);
982 icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
987 * Need to exit out of these so we don't recursively call rw_enter
990 return send_ip(fin, mb);