1 /* $KAME: altq_subr.c,v 1.23 2004/04/20 16:10:06 itojun Exp $ */
4 * Copyright (C) 1997-2003
5 * Sony Computer Science Laboratories Inc. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include "opt_inet6.h"
33 #include <sys/param.h>
34 #include <sys/malloc.h>
36 #include <sys/systm.h>
38 #include <sys/socket.h>
39 #include <sys/socketvar.h>
40 #include <sys/kernel.h>
41 #include <sys/callout.h>
42 #include <sys/errno.h>
43 #include <sys/syslog.h>
44 #include <sys/sysctl.h>
45 #include <sys/queue.h>
46 #include <sys/thread2.h>
49 #include <net/if_dl.h>
50 #include <net/if_types.h>
51 #include <net/ifq_var.h>
52 #include <net/netmsg2.h>
53 #include <net/netisr2.h>
55 #include <netinet/in.h>
56 #include <netinet/in_systm.h>
57 #include <netinet/ip.h>
59 #include <netinet/ip6.h>
61 #include <netinet/tcp.h>
62 #include <netinet/udp.h>
64 #include <net/pf/pfvar.h>
65 #include <net/altq/altq.h>
67 /* machine dependent clock related includes */
68 #include <machine/clock.h> /* for tsc_frequency */
69 #include <machine/md_var.h> /* for cpu_feature */
70 #include <machine/specialreg.h> /* for CPUID_TSC */
73 * internal function prototypes
75 static void tbr_timeout(void *);
76 static void tbr_timeout_dispatch(netmsg_t);
77 static int altq_enable_locked(struct ifaltq *);
78 static int altq_disable_locked(struct ifaltq *);
79 static int altq_detach_locked(struct ifaltq *);
80 static int tbr_set_locked(struct ifaltq *, struct tb_profile *);
82 int (*altq_input)(struct mbuf *, int) = NULL;
83 static int tbr_timer = 0; /* token bucket regulator timer */
84 static struct callout tbr_callout;
85 static struct netmsg_base tbr_timeout_netmsg;
87 int pfaltq_running; /* keep track of running state */
89 MALLOC_DEFINE(M_ALTQ, "altq", "ALTQ structures");
92 * alternate queueing support routines
95 /* look up the queue state by the interface name and the queueing type. */
97 altq_lookup(const char *name, int type)
101 if ((ifp = ifunit(name)) != NULL) {
102 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
103 return (ifp->if_snd.altq_disc);
110 altq_attach(struct ifaltq *ifq, int type, void *discipline,
111 altq_mapsubq_t mapsubq,
112 ifsq_enqueue_t enqueue, ifsq_dequeue_t dequeue, ifsq_request_t request,
114 void *(*classify)(struct ifaltq *, struct mbuf *, struct altq_pktattr *))
116 if (!ifq_is_ready(ifq))
119 ifq->altq_type = type;
120 ifq->altq_disc = discipline;
121 ifq->altq_clfier = clfier;
122 ifq->altq_classify = classify;
123 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
124 ifq_set_methods(ifq, mapsubq, enqueue, dequeue, request);
129 altq_detach_locked(struct ifaltq *ifq)
131 if (!ifq_is_ready(ifq))
133 if (ifq_is_enabled(ifq))
135 if (!ifq_is_attached(ifq))
138 ifq_set_classic(ifq);
139 ifq->altq_type = ALTQT_NONE;
140 ifq->altq_disc = NULL;
141 ifq->altq_clfier = NULL;
142 ifq->altq_classify = NULL;
143 ifq->altq_flags &= ALTQF_CANTCHANGE;
148 altq_detach(struct ifaltq *ifq)
153 error = altq_detach_locked(ifq);
159 altq_enable_locked(struct ifaltq *ifq)
161 if (!ifq_is_ready(ifq))
163 if (ifq_is_enabled(ifq))
166 ifq_purge_all_locked(ifq);
168 ifq->altq_flags |= ALTQF_ENABLED;
169 if (ifq->altq_clfier != NULL)
170 ifq->altq_flags |= ALTQF_CLASSIFY;
175 altq_enable(struct ifaltq *ifq)
180 error = altq_enable_locked(ifq);
186 altq_disable_locked(struct ifaltq *ifq)
188 if (!ifq_is_enabled(ifq))
191 ifq_purge_all_locked(ifq);
192 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
197 altq_disable(struct ifaltq *ifq)
202 error = altq_disable_locked(ifq);
208 * internal representation of token bucket parameters
209 * rate: byte_per_unittime << 32
210 * (((bits_per_sec) / 8) << 32) / machclk_freq
215 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
216 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
219 tbr_dequeue(struct ifaltq_subque *ifsq, int op)
221 struct ifaltq *ifq = ifsq->ifsq_altq;
222 struct tb_regulator *tbr;
227 if (ifsq_get_index(ifsq) != ALTQ_SUBQ_INDEX_DEFAULT) {
229 * Race happened, the unrelated subqueue was
230 * picked during the packet scheduler transition.
232 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL);
238 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
239 /* if this is a remove after poll, bypass tbr check */
241 /* update token only when it is negative */
242 if (tbr->tbr_token <= 0) {
243 now = read_machclk();
244 interval = now - tbr->tbr_last;
245 if (interval >= tbr->tbr_filluptime)
246 tbr->tbr_token = tbr->tbr_depth;
248 tbr->tbr_token += interval * tbr->tbr_rate;
249 if (tbr->tbr_token > tbr->tbr_depth)
250 tbr->tbr_token = tbr->tbr_depth;
254 /* if token is still negative, don't allow dequeue */
255 if (tbr->tbr_token <= 0) {
261 if (ifq_is_enabled(ifq))
262 m = (*ifsq->ifsq_dequeue)(ifsq, op);
264 m = ifsq_classic_dequeue(ifsq, op);
266 if (m != NULL && op == ALTDQ_REMOVE)
267 tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
268 tbr->tbr_lastop = op;
274 * set a token bucket regulator.
275 * if the specified rate is zero, the token bucket regulator is deleted.
278 tbr_set_locked(struct ifaltq *ifq, struct tb_profile *profile)
280 struct tb_regulator *tbr, *otbr;
282 if (machclk_freq == 0)
284 if (machclk_freq == 0) {
285 kprintf("%s: no cpu clock available!\n", __func__);
289 if (profile->rate == 0) {
290 /* delete this tbr */
291 if ((tbr = ifq->altq_tbr) == NULL)
293 ifq->altq_tbr = NULL;
298 tbr = kmalloc(sizeof(*tbr), M_ALTQ, M_WAITOK | M_ZERO);
299 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
300 tbr->tbr_depth = TBR_SCALE(profile->depth);
301 if (tbr->tbr_rate > 0)
302 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
304 tbr->tbr_filluptime = 0xffffffffffffffffLL;
305 tbr->tbr_token = tbr->tbr_depth;
306 tbr->tbr_last = read_machclk();
307 tbr->tbr_lastop = ALTDQ_REMOVE;
309 otbr = ifq->altq_tbr;
310 ifq->altq_tbr = tbr; /* set the new tbr */
314 else if (tbr_timer == 0) {
315 callout_reset_bycpu(&tbr_callout, 1, tbr_timeout, NULL, 0);
322 tbr_set(struct ifaltq *ifq, struct tb_profile *profile)
327 error = tbr_set_locked(ifq, profile);
333 tbr_timeout(void *arg __unused)
335 struct lwkt_msg *lmsg = &tbr_timeout_netmsg.lmsg;
337 KASSERT(mycpuid == 0, ("not on cpu0"));
339 if (lmsg->ms_flags & MSGF_DONE)
340 lwkt_sendmsg_oncpu(netisr_cpuport(0), lmsg);
345 * tbr_timeout goes through the interface list, and kicks the drivers
349 tbr_timeout_dispatch(netmsg_t nmsg)
351 const struct ifnet_array *arr;
357 lwkt_replymsg(&nmsg->lmsg, 0); /* reply ASAP */
361 arr = ifnet_array_get();
362 for (i = 0; i < arr->ifnet_count; ++i) {
363 struct ifnet *ifp = arr->ifnet_arr[i];
364 struct ifaltq_subque *ifsq;
366 if (ifp->if_snd.altq_tbr == NULL)
369 ifsq = &ifp->if_snd.altq_subq[ALTQ_SUBQ_INDEX_DEFAULT];
371 if (!ifsq_is_empty(ifsq) && ifp->if_start != NULL) {
372 ifsq_serialize_hw(ifsq);
373 (*ifp->if_start)(ifp, ifsq);
374 ifsq_deserialize_hw(ifsq);
378 callout_reset(&tbr_callout, 1, tbr_timeout, NULL);
380 tbr_timer = 0; /* don't need tbr_timer anymore */
384 * get token bucket regulator profile
387 tbr_get(struct ifaltq *ifq, struct tb_profile *profile)
389 struct tb_regulator *tbr;
391 if ((tbr = ifq->altq_tbr) == NULL) {
396 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
397 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
403 * attach a discipline to the interface. if one already exists, it is
407 altq_pfattach(struct pf_altq *a)
413 if (a->scheduler == ALTQT_NONE)
416 if (a->altq_disc == NULL)
421 ifp = ifunit(a->ifname);
430 switch (a->scheduler) {
433 error = cbq_pfattach(a, ifq);
438 error = priq_pfattach(a, ifq);
443 error = hfsc_pfattach(a, ifq);
448 error = fairq_pfattach(a, ifq);
456 /* if the state is running, enable altq */
457 if (error == 0 && pfaltq_running && ifq->altq_type != ALTQT_NONE &&
458 !ifq_is_enabled(ifq))
459 error = altq_enable_locked(ifq);
461 /* if altq is already enabled, reset set tokenbucket regulator */
462 if (error == 0 && ifq_is_enabled(ifq)) {
463 struct tb_profile tb;
465 tb.rate = a->ifbandwidth;
466 tb.depth = a->tbrsize;
467 error = tbr_set_locked(ifq, &tb);
476 * detach a discipline from the interface.
477 * it is possible that the discipline was already overridden by another
481 altq_pfdetach(struct pf_altq *a)
489 ifp = ifunit(a->ifname);
496 /* if this discipline is no longer referenced, just return */
497 if (a->altq_disc == NULL) {
504 if (a->altq_disc != ifq->altq_disc)
507 if (ifq_is_enabled(ifq))
508 error = altq_disable_locked(ifq);
510 error = altq_detach_locked(ifq);
519 * add a discipline or a queue
522 altq_add(struct pf_altq *a)
526 if (a->qname[0] != 0)
527 return (altq_add_queue(a));
529 if (machclk_freq == 0)
531 if (machclk_freq == 0)
532 panic("altq_add: no cpu clock");
534 switch (a->scheduler) {
537 error = cbq_add_altq(a);
542 error = priq_add_altq(a);
547 error = hfsc_add_altq(a);
552 error = fairq_add_altq(a);
563 * remove a discipline or a queue
566 altq_remove(struct pf_altq *a)
570 if (a->qname[0] != 0)
571 return (altq_remove_queue(a));
573 switch (a->scheduler) {
576 error = cbq_remove_altq(a);
581 error = priq_remove_altq(a);
586 error = hfsc_remove_altq(a);
591 error = fairq_remove_altq(a);
602 * add a queue to the discipline
605 altq_add_queue(struct pf_altq *a)
609 switch (a->scheduler) {
612 error = cbq_add_queue(a);
617 error = priq_add_queue(a);
622 error = hfsc_add_queue(a);
627 error = fairq_add_queue(a);
638 * remove a queue from the discipline
641 altq_remove_queue(struct pf_altq *a)
645 switch (a->scheduler) {
648 error = cbq_remove_queue(a);
653 error = priq_remove_queue(a);
658 error = hfsc_remove_queue(a);
663 error = fairq_remove_queue(a);
674 * get queue statistics
677 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
681 switch (a->scheduler) {
684 error = cbq_getqstats(a, ubuf, nbytes);
689 error = priq_getqstats(a, ubuf, nbytes);
694 error = hfsc_getqstats(a, ubuf, nbytes);
699 error = fairq_getqstats(a, ubuf, nbytes);
710 * read and write diffserv field in IPv4 or IPv6 header
713 read_dsfield(struct mbuf *m, struct altq_pktattr *pktattr)
716 uint8_t ds_field = 0;
718 if (pktattr == NULL ||
719 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
722 /* verify that pattr_hdr is within the mbuf data */
723 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
724 if ((pktattr->pattr_hdr >= m0->m_data) &&
725 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
729 /* ick, pattr_hdr is stale */
730 pktattr->pattr_af = AF_UNSPEC;
732 kprintf("read_dsfield: can't locate header!\n");
737 if (pktattr->pattr_af == AF_INET) {
738 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
741 return ((uint8_t)0); /* version mismatch! */
742 ds_field = ip->ip_tos;
745 else if (pktattr->pattr_af == AF_INET6) {
746 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
749 flowlabel = ntohl(ip6->ip6_flow);
750 if ((flowlabel >> 28) != 6)
751 return ((uint8_t)0); /* version mismatch! */
752 ds_field = (flowlabel >> 20) & 0xff;
759 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, uint8_t dsfield)
763 if (pktattr == NULL ||
764 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
767 /* verify that pattr_hdr is within the mbuf data */
768 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
769 if ((pktattr->pattr_hdr >= m0->m_data) &&
770 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
774 /* ick, pattr_hdr is stale */
775 pktattr->pattr_af = AF_UNSPEC;
777 kprintf("write_dsfield: can't locate header!\n");
782 if (pktattr->pattr_af == AF_INET) {
783 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
788 return; /* version mismatch! */
790 dsfield |= old & 3; /* leave CU bits */
793 ip->ip_tos = dsfield;
795 * update checksum (from RFC1624)
796 * HC' = ~(~HC + ~m + m')
798 sum = ~ntohs(ip->ip_sum) & 0xffff;
799 sum += 0xff00 + (~old & 0xff) + dsfield;
800 sum = (sum >> 16) + (sum & 0xffff);
801 sum += (sum >> 16); /* add carry */
803 ip->ip_sum = htons(~sum & 0xffff);
806 else if (pktattr->pattr_af == AF_INET6) {
807 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
810 flowlabel = ntohl(ip6->ip6_flow);
811 if ((flowlabel >> 28) != 6)
812 return; /* version mismatch! */
813 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
814 ip6->ip6_flow = htonl(flowlabel);
820 * high resolution clock support taking advantage of a machine dependent
821 * high resolution time counter (e.g., timestamp counter of intel pentium).
823 * - 64-bit-long monotonically-increasing counter
824 * - frequency range is 100M-4GHz (CPU speed)
826 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
827 #define MACHCLK_SHIFT 8
829 static int machclk_usepcc;
830 uint64_t machclk_freq = 0;
831 uint32_t machclk_per_tick = 0;
836 callout_init_mp(&tbr_callout);
837 netmsg_init(&tbr_timeout_netmsg, NULL, &netisr_adone_rport,
838 MSGF_PRIORITY, tbr_timeout_dispatch);
846 #if defined(__x86_64__)
847 if (tsc_mpsync && tsc_present)
848 machclk_freq = tsc_frequency;
855 if (machclk_usepcc) {
857 kprintf("altq: CPU clock: %juHz\n", (uintmax_t)machclk_freq);
860 /* emulate 256MHz using microuptime() */
861 machclk_freq = 1000000LLU << MACHCLK_SHIFT;
863 kprintf("altq: emulate %juHz cpu clock\n",
864 (uintmax_t)machclk_freq);
867 machclk_per_tick = machclk_freq / hz;
875 if (machclk_usepcc) {
876 #ifdef _RDTSC_SUPPORTED_
879 panic("read_machclk");
885 val = (((uint64_t)tv.tv_sec * 1000000 + tv.tv_usec) <<