1 /* $KAME: altq_subr.c,v 1.23 2004/04/20 16:10:06 itojun Exp $ */
2 /* $DragonFly: src/sys/net/altq/altq_subr.c,v 1.1 2005/02/11 22:25:57 joerg Exp $ */
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 #include "opt_inet6.h"
34 #include <sys/param.h>
35 #include <sys/malloc.h>
37 #include <sys/systm.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
41 #include <sys/kernel.h>
42 #include <sys/callout.h>
43 #include <sys/errno.h>
44 #include <sys/syslog.h>
45 #include <sys/sysctl.h>
46 #include <sys/queue.h>
49 #include <net/if_dl.h>
50 #include <net/if_types.h>
51 #include <net/ifq_var.h>
53 #include <netinet/in.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/ip.h>
57 #include <netinet/ip6.h>
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
62 #include <net/pf/pfvar.h>
63 #include <net/altq/altq.h>
65 /* machine dependent clock related includes */
67 #include <machine/clock.h> /* for tsc_freq */
68 #include <machine/md_var.h> /* for cpu_feature */
69 #include <machine/specialreg.h> /* for CPUID_TSC */
73 * internal function prototypes
75 static void tbr_timeout(void *);
76 int (*altq_input)(struct mbuf *, int) = NULL;
77 static int tbr_timer = 0; /* token bucket regulator timer */
78 static struct callout tbr_callout;
80 int pfaltq_running; /* keep track of running state */
82 MALLOC_DEFINE(M_ALTQ, "altq", "ALTQ structures");
85 * alternate queueing support routines
88 /* look up the queue state by the interface name and the queueing type. */
90 altq_lookup(const char *name, int type)
94 if ((ifp = ifunit(name)) != NULL) {
95 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
96 return (ifp->if_snd.altq_disc);
103 altq_attach(struct ifaltq *ifq, int type, void *discipline,
104 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *),
105 struct mbuf *(*dequeue)(struct ifaltq *, int),
106 int (*request)(struct ifaltq *, int, void *),
108 void *(*classify)(struct ifaltq *, struct mbuf *,
109 struct altq_pktattr *))
111 if (!ifq_is_ready(ifq))
114 ifq->altq_type = type;
115 ifq->altq_disc = discipline;
116 ifq->altq_enqueue = enqueue;
117 ifq->altq_dequeue = dequeue;
118 ifq->altq_request = request;
119 ifq->altq_clfier = clfier;
120 ifq->altq_classify = classify;
121 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
126 altq_detach(struct ifaltq *ifq)
128 if (!ifq_is_ready(ifq))
130 if (ifq_is_enabled(ifq))
132 if (!ifq_is_attached(ifq))
135 ifq->altq_type = ALTQT_NONE;
136 ifq->altq_disc = NULL;
137 ifq->altq_enqueue = NULL;
138 ifq->altq_dequeue = NULL;
139 ifq->altq_request = NULL;
140 ifq->altq_clfier = NULL;
141 ifq->altq_classify = NULL;
142 ifq->altq_flags &= ALTQF_CANTCHANGE;
147 altq_enable(struct ifaltq *ifq)
151 if (!ifq_is_ready(ifq))
153 if (ifq_is_enabled(ifq))
158 KKASSERT(ifq->ifq_len == 0);
159 ifq->altq_flags |= ALTQF_ENABLED;
160 if (ifq->altq_clfier != NULL)
161 ifq->altq_flags |= ALTQF_CLASSIFY;
168 altq_disable(struct ifaltq *ifq)
172 if (!ifq_is_enabled(ifq))
177 KKASSERT(ifq->ifq_len == 0);
178 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
184 * internal representation of token bucket parameters
185 * rate: byte_per_unittime << 32
186 * (((bits_per_sec) / 8) << 32) / machclk_freq
191 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
192 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
195 tbr_dequeue(struct ifaltq *ifq, int op)
197 struct tb_regulator *tbr;
203 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
204 /* if this is a remove after poll, bypass tbr check */
206 /* update token only when it is negative */
207 if (tbr->tbr_token <= 0) {
208 now = read_machclk();
209 interval = now - tbr->tbr_last;
210 if (interval >= tbr->tbr_filluptime)
211 tbr->tbr_token = tbr->tbr_depth;
213 tbr->tbr_token += interval * tbr->tbr_rate;
214 if (tbr->tbr_token > tbr->tbr_depth)
215 tbr->tbr_token = tbr->tbr_depth;
219 /* if token is still negative, don't allow dequeue */
220 if (tbr->tbr_token <= 0)
224 if (ifq_is_enabled(ifq))
225 m = (*ifq->altq_dequeue)(ifq, op);
226 else if (op == ALTDQ_POLL)
231 if (m != NULL && op == ALTDQ_REMOVE)
232 tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
233 tbr->tbr_lastop = op;
238 * set a token bucket regulator.
239 * if the specified rate is zero, the token bucket regulator is deleted.
242 tbr_set(struct ifaltq *ifq, struct tb_profile *profile)
244 struct tb_regulator *tbr, *otbr;
246 if (machclk_freq == 0)
248 if (machclk_freq == 0) {
249 printf("tbr_set: no cpu clock available!\n");
253 if (profile->rate == 0) {
254 /* delete this tbr */
255 if ((tbr = ifq->altq_tbr) == NULL)
257 ifq->altq_tbr = NULL;
262 tbr = malloc(sizeof(*tbr), M_ALTQ, M_WAITOK | M_ZERO);
263 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
264 tbr->tbr_depth = TBR_SCALE(profile->depth);
265 if (tbr->tbr_rate > 0)
266 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
268 tbr->tbr_filluptime = 0xffffffffffffffffLL;
269 tbr->tbr_token = tbr->tbr_depth;
270 tbr->tbr_last = read_machclk();
271 tbr->tbr_lastop = ALTDQ_REMOVE;
273 otbr = ifq->altq_tbr;
274 ifq->altq_tbr = tbr; /* set the new tbr */
278 else if (tbr_timer == 0) {
279 callout_reset(&tbr_callout, 1, tbr_timeout, NULL);
286 * tbr_timeout goes through the interface list, and kicks the drivers
290 tbr_timeout(void *arg)
297 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
298 if (ifp->if_snd.altq_tbr == NULL)
301 if (!ifq_is_empty(&ifp->if_snd) && ifp->if_start != NULL)
302 (*ifp->if_start)(ifp);
306 callout_reset(&tbr_callout, 1, tbr_timeout, NULL);
308 tbr_timer = 0; /* don't need tbr_timer anymore */
312 * get token bucket regulator profile
315 tbr_get(struct ifaltq *ifq, struct tb_profile *profile)
317 struct tb_regulator *tbr;
319 if ((tbr = ifq->altq_tbr) == NULL) {
324 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
325 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
331 * attach a discipline to the interface. if one already exists, it is
335 altq_pfattach(struct pf_altq *a)
338 struct tb_profile tb;
341 switch (a->scheduler) {
346 error = cbq_pfattach(a);
351 error = priq_pfattach(a);
356 error = hfsc_pfattach(a);
363 ifp = ifunit(a->ifname);
365 /* if the state is running, enable altq */
366 if (error == 0 && pfaltq_running &&
367 ifp != NULL && ifp->if_snd.altq_type != ALTQT_NONE &&
368 !ifq_is_enabled(&ifp->if_snd))
369 error = altq_enable(&ifp->if_snd);
371 /* if altq is already enabled, reset set tokenbucket regulator */
372 if (error == 0 && ifp != NULL && ifq_is_enabled(&ifp->if_snd)) {
373 tb.rate = a->ifbandwidth;
374 tb.depth = a->tbrsize;
376 error = tbr_set(&ifp->if_snd, &tb);
384 * detach a discipline from the interface.
385 * it is possible that the discipline was already overridden by another
389 altq_pfdetach(struct pf_altq *a)
394 if ((ifp = ifunit(a->ifname)) == NULL)
397 /* if this discipline is no longer referenced, just return */
398 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
402 if (ifq_is_enabled(&ifp->if_snd))
403 error = altq_disable(&ifp->if_snd);
405 error = altq_detach(&ifp->if_snd);
412 * add a discipline or a queue
415 altq_add(struct pf_altq *a)
419 if (a->qname[0] != 0)
420 return (altq_add_queue(a));
422 if (machclk_freq == 0)
424 if (machclk_freq == 0)
425 panic("altq_add: no cpu clock");
427 switch (a->scheduler) {
430 error = cbq_add_altq(a);
435 error = priq_add_altq(a);
440 error = hfsc_add_altq(a);
451 * remove a discipline or a queue
454 altq_remove(struct pf_altq *a)
458 if (a->qname[0] != 0)
459 return (altq_remove_queue(a));
461 switch (a->scheduler) {
464 error = cbq_remove_altq(a);
469 error = priq_remove_altq(a);
474 error = hfsc_remove_altq(a);
485 * add a queue to the discipline
488 altq_add_queue(struct pf_altq *a)
492 switch (a->scheduler) {
495 error = cbq_add_queue(a);
500 error = priq_add_queue(a);
505 error = hfsc_add_queue(a);
516 * remove a queue from the discipline
519 altq_remove_queue(struct pf_altq *a)
523 switch (a->scheduler) {
526 error = cbq_remove_queue(a);
531 error = priq_remove_queue(a);
536 error = hfsc_remove_queue(a);
547 * get queue statistics
550 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
554 switch (a->scheduler) {
557 error = cbq_getqstats(a, ubuf, nbytes);
562 error = priq_getqstats(a, ubuf, nbytes);
567 error = hfsc_getqstats(a, ubuf, nbytes);
578 * read and write diffserv field in IPv4 or IPv6 header
581 read_dsfield(struct mbuf *m, struct altq_pktattr *pktattr)
584 uint8_t ds_field = 0;
586 if (pktattr == NULL ||
587 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
590 /* verify that pattr_hdr is within the mbuf data */
591 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
592 if ((pktattr->pattr_hdr >= m0->m_data) &&
593 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
597 /* ick, pattr_hdr is stale */
598 pktattr->pattr_af = AF_UNSPEC;
600 printf("read_dsfield: can't locate header!\n");
605 if (pktattr->pattr_af == AF_INET) {
606 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
609 return ((uint8_t)0); /* version mismatch! */
610 ds_field = ip->ip_tos;
613 else if (pktattr->pattr_af == AF_INET6) {
614 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
617 flowlabel = ntohl(ip6->ip6_flow);
618 if ((flowlabel >> 28) != 6)
619 return ((uint8_t)0); /* version mismatch! */
620 ds_field = (flowlabel >> 20) & 0xff;
627 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, uint8_t dsfield)
631 if (pktattr == NULL ||
632 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
635 /* verify that pattr_hdr is within the mbuf data */
636 for (m0 = m; m0 != NULL; m0 = m0->m_next) {
637 if ((pktattr->pattr_hdr >= m0->m_data) &&
638 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
642 /* ick, pattr_hdr is stale */
643 pktattr->pattr_af = AF_UNSPEC;
645 printf("write_dsfield: can't locate header!\n");
650 if (pktattr->pattr_af == AF_INET) {
651 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
656 return; /* version mismatch! */
658 dsfield |= old & 3; /* leave CU bits */
661 ip->ip_tos = dsfield;
663 * update checksum (from RFC1624)
664 * HC' = ~(~HC + ~m + m')
666 sum = ~ntohs(ip->ip_sum) & 0xffff;
667 sum += 0xff00 + (~old & 0xff) + dsfield;
668 sum = (sum >> 16) + (sum & 0xffff);
669 sum += (sum >> 16); /* add carry */
671 ip->ip_sum = htons(~sum & 0xffff);
674 else if (pktattr->pattr_af == AF_INET6) {
675 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
678 flowlabel = ntohl(ip6->ip6_flow);
679 if ((flowlabel >> 28) != 6)
680 return; /* version mismatch! */
681 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
682 ip6->ip6_flow = htonl(flowlabel);
688 * high resolution clock support taking advantage of a machine dependent
689 * high resolution time counter (e.g., timestamp counter of intel pentium).
691 * - 64-bit-long monotonically-increasing counter
692 * - frequency range is 100M-4GHz (CPU speed)
694 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
695 #define MACHCLK_SHIFT 8
698 uint32_t machclk_freq = 0;
699 uint32_t machclk_per_tick = 0;
704 callout_init(&tbr_callout);
708 #if !defined(__i386__) || defined(ALTQ_NOPCC)
710 #elif defined(__DragonFly__) && defined(SMP)
712 #elif defined(__i386__)
713 /* check if TSC is available */
714 if (machclk_usepcc == 1 && (cpu_feature & CPUID_TSC) == 0)
718 if (machclk_usepcc == 0) {
719 /* emulate 256MHz using microtime() */
720 machclk_freq = 1000000 << MACHCLK_SHIFT;
721 machclk_per_tick = machclk_freq / hz;
723 printf("altq: emulate %uHz cpu clock\n", machclk_freq);
729 * if the clock frequency (of Pentium TSC or Alpha PCC) is
730 * accessible, just use it.
733 machclk_freq = tsc_freq;
735 #error "machclk_freq interface not implemented"
739 * if we don't know the clock frequency, measure it.
741 if (machclk_freq == 0) {
743 struct timeval tv_start, tv_end;
744 uint64_t start, end, diff;
747 microtime(&tv_start);
748 start = read_machclk();
749 timo = hz; /* 1 sec */
750 tsleep(&wait, PCATCH, "init_machclk", timo);
752 end = read_machclk();
753 diff = (uint64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
754 + tv_end.tv_usec - tv_start.tv_usec;
756 machclk_freq = (u_int)((end - start) * 1000000 / diff);
759 machclk_per_tick = machclk_freq / hz;
762 printf("altq: CPU clock: %uHz\n", machclk_freq);
771 if (machclk_usepcc) {
772 #if defined(__i386__)
775 panic("read_machclk");
781 val = (((uint64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
782 + tv.tv_usec) << MACHCLK_SHIFT);