if: Multiple TX queue support step 3 of 3; map CPUID to subqueue
[dragonfly.git] / sys / net / altq / altq_fairq.c
CommitLineData
5950bf01
MD
1/*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
9db4b353 34 * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.2 2008/05/14 11:59:23 sephe Exp $
5950bf01
MD
35 */
36/*
37 * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
38 * fairq. The fairq algorithm is completely different then priq, of course,
39 * but because I used priq's skeleton I believe I should include priq's
40 * copyright.
41 *
42 * Copyright (C) 2000-2003
43 * Sony Computer Science Laboratories Inc. All rights reserved.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 * notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 * notice, this list of conditions and the following disclaimer in the
52 * documentation and/or other materials provided with the distribution.
53 *
54 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 */
66
67/*
68 * FAIRQ - take traffic classified by keep state (hashed into
70224baa
JL
69 * pf->state_hash) and bucketize it. Fairly extract
70 * the first packet from each bucket in a round-robin fashion.
5950bf01
MD
71 *
72 * TODO - better overall qlimit support (right now it is per-bucket).
73 * - NOTE: red etc is per bucket, not overall.
74 * - better service curve support.
75 *
76 * EXAMPLE:
77 *
78 * altq on em0 fairq bandwidth 650Kb queue { std, bulk }
df5dfeb3 79 * queue std priority 3 bandwidth 200Kb \
5950bf01
MD
80 * fairq (buckets 64, default, hogs 1Kb) qlimit 50
81 * queue bulk priority 2 bandwidth 100Kb \
82 * fairq (buckets 64, hogs 1Kb) qlimit 50
83 *
df5dfeb3
MD
84 * NOTE: When the aggregate bandwidth is less than the link bandwidth
85 * any remaining bandwidth is dynamically assigned using the
86 * existing bandwidth specs as weightings.
87 *
5950bf01
MD
88 * pass out on em0 from any to any keep state queue std
89 * pass out on em0 inet proto tcp ..... port ... keep state queue bulk
90 */
91#include "opt_altq.h"
92#include "opt_inet.h"
93#include "opt_inet6.h"
94
95#ifdef ALTQ_FAIRQ /* fairq is enabled in the kernel conf */
96
97#include <sys/param.h>
98#include <sys/malloc.h>
99#include <sys/mbuf.h>
100#include <sys/socket.h>
101#include <sys/sockio.h>
102#include <sys/systm.h>
103#include <sys/proc.h>
104#include <sys/errno.h>
105#include <sys/kernel.h>
106#include <sys/queue.h>
107#include <sys/thread.h>
108
109#include <net/if.h>
110#include <net/ifq_var.h>
111#include <netinet/in.h>
112
113#include <net/pf/pfvar.h>
114#include <net/altq/altq.h>
115#include <net/altq/altq_fairq.h>
116
117#include <sys/thread2.h>
118
f0a26983
SZ
119#define FAIRQ_SUBQ_INDEX ALTQ_SUBQ_INDEX_DEFAULT
120#define FAIRQ_LOCK(ifq) \
121 ALTQ_SQ_LOCK(&(ifq)->altq_subq[FAIRQ_SUBQ_INDEX])
122#define FAIRQ_UNLOCK(ifq) \
123 ALTQ_SQ_UNLOCK(&(ifq)->altq_subq[FAIRQ_SUBQ_INDEX])
124
5950bf01
MD
125/*
126 * function prototypes
127 */
128static int fairq_clear_interface(struct fairq_if *);
f0a26983 129static int fairq_request(struct ifaltq_subque *, int, void *);
5950bf01 130static void fairq_purge(struct fairq_if *);
df5dfeb3
MD
131static struct fairq_class *fairq_class_create(struct fairq_if *, int,
132 int, u_int, struct fairq_opts *, int);
5950bf01 133static int fairq_class_destroy(struct fairq_class *);
f0a26983 134static int fairq_enqueue(struct ifaltq_subque *, struct mbuf *,
df5dfeb3 135 struct altq_pktattr *);
f0a26983 136static struct mbuf *fairq_dequeue(struct ifaltq_subque *, struct mbuf *, int);
5950bf01 137
315a7da3 138static int fairq_addq(struct fairq_class *, struct mbuf *, int hash);
5950bf01
MD
139static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
140static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
141static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
142static void fairq_purgeq(struct fairq_class *);
143
df5dfeb3
MD
144static void get_class_stats(struct fairq_classstats *,
145 struct fairq_class *);
5950bf01
MD
146static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
147
148int
9db4b353 149fairq_pfattach(struct pf_altq *a, struct ifaltq *ifq)
5950bf01 150{
2cc2f639
SZ
151 return altq_attach(ifq, ALTQT_FAIRQ, a->altq_disc, ifq_mapsubq_default,
152 fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL);
5950bf01
MD
153}
154
155int
156fairq_add_altq(struct pf_altq *a)
157{
158 struct fairq_if *pif;
159 struct ifnet *ifp;
160
161 if ((ifp = ifunit(a->ifname)) == NULL)
162 return (EINVAL);
163 if (!ifq_is_ready(&ifp->if_snd))
164 return (ENODEV);
165
166 pif = kmalloc(sizeof(*pif), M_ALTQ, M_WAITOK | M_ZERO);
167 pif->pif_bandwidth = a->ifbandwidth;
168 pif->pif_maxpri = -1;
169 pif->pif_ifq = &ifp->if_snd;
9275f515 170 ifq_purge_all(&ifp->if_snd);
5950bf01
MD
171
172 /* keep the state in pf_altq */
173 a->altq_disc = pif;
174
175 return (0);
176}
177
178int
179fairq_remove_altq(struct pf_altq *a)
180{
181 struct fairq_if *pif;
182
183 if ((pif = a->altq_disc) == NULL)
184 return (EINVAL);
185 a->altq_disc = NULL;
186
187 fairq_clear_interface(pif);
188
189 kfree(pif, M_ALTQ);
190 return (0);
191}
192
9db4b353
SZ
193static int
194fairq_add_queue_locked(struct pf_altq *a, struct fairq_if *pif)
5950bf01 195{
5950bf01
MD
196 struct fairq_class *cl;
197
9db4b353
SZ
198 KKASSERT(a->priority < FAIRQ_MAXPRI);
199 KKASSERT(a->qid != 0);
5950bf01 200
5950bf01
MD
201 if (pif->pif_classes[a->priority] != NULL)
202 return (EBUSY);
203 if (clh_to_clp(pif, a->qid) != NULL)
204 return (EBUSY);
205
206 cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
207 &a->pq_u.fairq_opts, a->qid);
208 if (cl == NULL)
209 return (ENOMEM);
210
211 return (0);
212}
213
214int
9db4b353 215fairq_add_queue(struct pf_altq *a)
5950bf01
MD
216{
217 struct fairq_if *pif;
9db4b353
SZ
218 struct ifaltq *ifq;
219 int error;
5950bf01 220
9db4b353
SZ
221 /* check parameters */
222 if (a->priority >= FAIRQ_MAXPRI)
223 return (EINVAL);
224 if (a->qid == 0)
225 return (EINVAL);
226
227 /* XXX not MP safe */
5950bf01
MD
228 if ((pif = a->altq_disc) == NULL)
229 return (EINVAL);
9db4b353
SZ
230 ifq = pif->pif_ifq;
231
f0a26983 232 FAIRQ_LOCK(ifq);
9db4b353 233 error = fairq_add_queue_locked(a, pif);
f0a26983 234 FAIRQ_UNLOCK(ifq);
9db4b353
SZ
235
236 return error;
237}
238
239static int
240fairq_remove_queue_locked(struct pf_altq *a, struct fairq_if *pif)
241{
242 struct fairq_class *cl;
5950bf01
MD
243
244 if ((cl = clh_to_clp(pif, a->qid)) == NULL)
245 return (EINVAL);
246
247 return (fairq_class_destroy(cl));
248}
249
250int
9db4b353
SZ
251fairq_remove_queue(struct pf_altq *a)
252{
253 struct fairq_if *pif;
254 struct ifaltq *ifq;
255 int error;
256
257 /* XXX not MP safe */
258 if ((pif = a->altq_disc) == NULL)
259 return (EINVAL);
260 ifq = pif->pif_ifq;
261
f0a26983 262 FAIRQ_LOCK(ifq);
9db4b353 263 error = fairq_remove_queue_locked(a, pif);
f0a26983 264 FAIRQ_UNLOCK(ifq);
9db4b353
SZ
265
266 return error;
267}
268
269int
5950bf01
MD
270fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
271{
272 struct fairq_if *pif;
273 struct fairq_class *cl;
274 struct fairq_classstats stats;
9db4b353 275 struct ifaltq *ifq;
5950bf01
MD
276 int error = 0;
277
9db4b353
SZ
278 if (*nbytes < sizeof(stats))
279 return (EINVAL);
280
281 /* XXX not MP safe */
5950bf01
MD
282 if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
283 return (EBADF);
9db4b353 284 ifq = pif->pif_ifq;
5950bf01 285
f0a26983 286 FAIRQ_LOCK(ifq);
5950bf01 287
9db4b353 288 if ((cl = clh_to_clp(pif, a->qid)) == NULL) {
f0a26983 289 FAIRQ_UNLOCK(ifq);
5950bf01 290 return (EINVAL);
9db4b353 291 }
5950bf01
MD
292
293 get_class_stats(&stats, cl);
294
f0a26983 295 FAIRQ_UNLOCK(ifq);
9db4b353 296
5950bf01
MD
297 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
298 return (error);
299 *nbytes = sizeof(stats);
300 return (0);
301}
302
303/*
304 * bring the interface back to the initial state by discarding
305 * all the filters and classes.
306 */
307static int
308fairq_clear_interface(struct fairq_if *pif)
309{
310 struct fairq_class *cl;
311 int pri;
312
313 /* clear out the classes */
314 for (pri = 0; pri <= pif->pif_maxpri; pri++) {
315 if ((cl = pif->pif_classes[pri]) != NULL)
316 fairq_class_destroy(cl);
317 }
318
319 return (0);
320}
321
322static int
f0a26983 323fairq_request(struct ifaltq_subque *ifsq, int req, void *arg)
5950bf01 324{
f0a26983 325 struct ifaltq *ifq = ifsq->ifsq_altq;
5950bf01
MD
326 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
327
328 crit_enter();
329 switch (req) {
330 case ALTRQ_PURGE:
f0a26983
SZ
331 if (ifsq_get_index(ifsq) == FAIRQ_SUBQ_INDEX) {
332 fairq_purge(pif);
333 } else {
334 /*
335 * Race happened, the unrelated subqueue was
336 * picked during the packet scheduler transition.
337 */
338 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL);
339 }
5950bf01
MD
340 break;
341 }
342 crit_exit();
343 return (0);
344}
345
346/* discard all the queued packets on the interface */
347static void
348fairq_purge(struct fairq_if *pif)
349{
350 struct fairq_class *cl;
351 int pri;
352
353 for (pri = 0; pri <= pif->pif_maxpri; pri++) {
354 if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
355 fairq_purgeq(cl);
356 }
357 if (ifq_is_enabled(pif->pif_ifq))
f0a26983 358 pif->pif_ifq->altq_subq[FAIRQ_SUBQ_INDEX].ifq_len = 0;
5950bf01
MD
359}
360
361static struct fairq_class *
362fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
363 u_int bandwidth, struct fairq_opts *opts, int qid)
364{
365 struct fairq_class *cl;
366 int flags = opts->flags;
367 u_int nbuckets = opts->nbuckets;
368 int i;
369
370#ifndef ALTQ_RED
371 if (flags & FARF_RED) {
372#ifdef ALTQ_DEBUG
373 kprintf("fairq_class_create: RED not configured for FAIRQ!\n");
374#endif
375 return (NULL);
376 }
377#endif
378 if (nbuckets == 0)
379 nbuckets = 256;
380 if (nbuckets > FAIRQ_MAX_BUCKETS)
381 nbuckets = FAIRQ_MAX_BUCKETS;
382 /* enforce power-of-2 size */
383 while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
384 ++nbuckets;
385
386 if ((cl = pif->pif_classes[pri]) != NULL) {
387 /* modify the class instead of creating a new one */
388 crit_enter();
389 if (cl->cl_head)
390 fairq_purgeq(cl);
391 crit_exit();
392#ifdef ALTQ_RIO
393 if (cl->cl_qtype == Q_RIO)
394 rio_destroy((rio_t *)cl->cl_red);
395#endif
396#ifdef ALTQ_RED
397 if (cl->cl_qtype == Q_RED)
398 red_destroy(cl->cl_red);
399#endif
400 } else {
401 cl = kmalloc(sizeof(*cl), M_ALTQ, M_WAITOK | M_ZERO);
402 cl->cl_nbuckets = nbuckets;
403 cl->cl_nbucket_mask = nbuckets - 1;
404
405 cl->cl_buckets = kmalloc(sizeof(*cl->cl_buckets) *
406 cl->cl_nbuckets,
407 M_ALTQ, M_WAITOK | M_ZERO);
408 cl->cl_head = NULL;
409 }
410
411 pif->pif_classes[pri] = cl;
412 if (flags & FARF_DEFAULTCLASS)
413 pif->pif_default = cl;
414 if (qlimit == 0)
415 qlimit = 50; /* use default */
416 cl->cl_qlimit = qlimit;
417 for (i = 0; i < cl->cl_nbuckets; ++i) {
418 qlimit(&cl->cl_buckets[i].queue) = qlimit;
419 }
df5dfeb3 420 cl->cl_bandwidth = bandwidth / 8; /* cvt to bytes per second */
5950bf01
MD
421 cl->cl_qtype = Q_DROPTAIL;
422 cl->cl_flags = flags & FARF_USERFLAGS;
423 cl->cl_pri = pri;
424 if (pri > pif->pif_maxpri)
425 pif->pif_maxpri = pri;
426 cl->cl_pif = pif;
427 cl->cl_handle = qid;
428 cl->cl_hogs_m1 = opts->hogs_m1 / 8;
429 cl->cl_lssc_m1 = opts->lssc_m1 / 8; /* NOT YET USED */
df5dfeb3 430 cl->cl_bw_current = 0;
5950bf01
MD
431
432#ifdef ALTQ_RED
433 if (flags & (FARF_RED|FARF_RIO)) {
434 int red_flags, red_pkttime;
435
436 red_flags = 0;
437 if (flags & FARF_ECN)
438 red_flags |= REDF_ECN;
439#ifdef ALTQ_RIO
440 if (flags & FARF_CLEARDSCP)
441 red_flags |= RIOF_CLEARDSCP;
442#endif
443 if (pif->pif_bandwidth < 8)
444 red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
445 else
446 red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
447 * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
448#ifdef ALTQ_RIO
449 if (flags & FARF_RIO) {
450 cl->cl_red = (red_t *)rio_alloc(0, NULL,
451 red_flags, red_pkttime);
452 if (cl->cl_red != NULL)
453 cl->cl_qtype = Q_RIO;
454 } else
455#endif
456 if (flags & FARF_RED) {
457 cl->cl_red = red_alloc(0, 0,
458 cl->cl_qlimit * 10/100,
459 cl->cl_qlimit * 30/100,
460 red_flags, red_pkttime);
461 if (cl->cl_red != NULL)
462 cl->cl_qtype = Q_RED;
463 }
464 }
465#endif /* ALTQ_RED */
466
467 return (cl);
468}
469
470static int
471fairq_class_destroy(struct fairq_class *cl)
472{
473 struct fairq_if *pif;
474 int pri;
475
476 crit_enter();
477
478 if (cl->cl_head)
479 fairq_purgeq(cl);
480
481 pif = cl->cl_pif;
482 pif->pif_classes[cl->cl_pri] = NULL;
483 if (pif->pif_poll_cache == cl)
484 pif->pif_poll_cache = NULL;
485 if (pif->pif_maxpri == cl->cl_pri) {
486 for (pri = cl->cl_pri; pri >= 0; pri--)
487 if (pif->pif_classes[pri] != NULL) {
488 pif->pif_maxpri = pri;
489 break;
490 }
491 if (pri < 0)
492 pif->pif_maxpri = -1;
493 }
494 crit_exit();
495
496 if (cl->cl_red != NULL) {
497#ifdef ALTQ_RIO
498 if (cl->cl_qtype == Q_RIO)
499 rio_destroy((rio_t *)cl->cl_red);
500#endif
501#ifdef ALTQ_RED
502 if (cl->cl_qtype == Q_RED)
503 red_destroy(cl->cl_red);
504#endif
505 }
506 kfree(cl->cl_buckets, M_ALTQ);
507 cl->cl_head = NULL; /* sanity */
508 cl->cl_polled = NULL; /* sanity */
509 cl->cl_buckets = NULL; /* sanity */
510 kfree(cl, M_ALTQ);
511
512 return (0);
513}
514
515/*
516 * fairq_enqueue is an enqueue function to be registered to
517 * (*altq_enqueue) in struct ifaltq.
518 */
519static int
f0a26983
SZ
520fairq_enqueue(struct ifaltq_subque *ifsq, struct mbuf *m,
521 struct altq_pktattr *pktattr)
5950bf01 522{
f0a26983 523 struct ifaltq *ifq = ifsq->ifsq_altq;
5950bf01
MD
524 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
525 struct fairq_class *cl;
526 int error;
527 int len;
315a7da3 528 int hash;
5950bf01 529
f0a26983
SZ
530 if (ifsq_get_index(ifsq) != FAIRQ_SUBQ_INDEX) {
531 /*
532 * Race happened, the unrelated subqueue was
533 * picked during the packet scheduler transition.
534 */
535 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL);
536 m_freem(m);
537 return ENOBUFS;
538 }
539
5950bf01
MD
540 crit_enter();
541
542 /* grab class set by classifier */
543 if ((m->m_flags & M_PKTHDR) == 0) {
544 /* should not happen */
545 if_printf(ifq->altq_ifp, "altq: packet does not have pkthdr\n");
546 m_freem(m);
547 error = ENOBUFS;
548 goto done;
549 }
550
315a7da3
JL
551 if (m->m_pkthdr.fw_flags & PF_MBUF_STRUCTURE) {
552 cl = clh_to_clp(pif, m->m_pkthdr.pf.qid);
553 if (m->m_pkthdr.pf.flags & PF_TAG_STATE_HASHED)
554 hash = (int)m->m_pkthdr.pf.state_hash;
555 else
556 hash = 0;
557 } else {
5950bf01 558 cl = NULL;
315a7da3
JL
559 hash = 0;
560 }
5950bf01
MD
561 if (cl == NULL) {
562 cl = pif->pif_default;
563 if (cl == NULL) {
564 m_freem(m);
565 error = ENOBUFS;
566 goto done;
567 }
568 }
569 cl->cl_flags |= FARF_HAS_PACKETS;
570 cl->cl_pktattr = NULL;
571 len = m_pktlen(m);
315a7da3 572 if (fairq_addq(cl, m, hash) != 0) {
5950bf01
MD
573 /* drop occurred. mbuf was freed in fairq_addq. */
574 PKTCNTR_ADD(&cl->cl_dropcnt, len);
575 error = ENOBUFS;
576 goto done;
577 }
f0a26983 578 ifsq->ifq_len++;
5950bf01
MD
579 error = 0;
580done:
581 crit_exit();
582 return (error);
583}
584
585/*
586 * fairq_dequeue is a dequeue function to be registered to
587 * (*altq_dequeue) in struct ifaltq.
588 *
589 * note: ALTDQ_POLL returns the next packet without removing the packet
590 * from the queue. ALTDQ_REMOVE is a normal dequeue operation.
591 * ALTDQ_REMOVE must return the same packet if called immediately
592 * after ALTDQ_POLL.
593 */
594static struct mbuf *
f0a26983 595fairq_dequeue(struct ifaltq_subque *ifsq, struct mbuf *mpolled, int op)
5950bf01 596{
f0a26983 597 struct ifaltq *ifq = ifsq->ifsq_altq;
5950bf01
MD
598 struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
599 struct fairq_class *cl;
600 struct fairq_class *best_cl;
601 struct mbuf *best_m;
602 struct mbuf *m;
603 uint64_t cur_time = read_machclk();
df5dfeb3
MD
604 u_int best_scale;
605 u_int scale;
5950bf01
MD
606 int pri;
607 int hit_limit;
608
f0a26983
SZ
609 if (ifsq_get_index(ifsq) != FAIRQ_SUBQ_INDEX) {
610 /*
611 * Race happened, the unrelated subqueue was
612 * picked during the packet scheduler transition.
613 */
614 ifsq_classic_request(ifsq, ALTRQ_PURGE, NULL);
615 return NULL;
616 }
617
618 if (ifsq_is_empty(ifsq)) {
5950bf01
MD
619 /* no packet in the queue */
620 KKASSERT(mpolled == NULL);
621 return (NULL);
622 }
623
624 crit_enter();
625 if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
626 best_cl = pif->pif_poll_cache;
627 m = fairq_getq(best_cl, cur_time);
628 pif->pif_poll_cache = NULL;
629 if (m) {
f0a26983 630 ifsq->ifq_len--;
5950bf01
MD
631 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
632 }
633 } else {
634 best_cl = NULL;
635 best_m = NULL;
df5dfeb3 636 best_scale = 0xFFFFFFFFU;
5950bf01
MD
637
638 for (pri = pif->pif_maxpri; pri >= 0; pri--) {
639 if ((cl = pif->pif_classes[pri]) == NULL)
640 continue;
641 if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
642 continue;
643 m = fairq_pollq(cl, cur_time, &hit_limit);
644 if (m == NULL) {
645 cl->cl_flags &= ~FARF_HAS_PACKETS;
646 continue;
647 }
648
649 /*
df5dfeb3
MD
650 * We can halt the search immediately if the queue
651 * did not hit its bandwidth limit.
5950bf01 652 */
df5dfeb3 653 if (hit_limit == 0) {
5950bf01
MD
654 best_cl = cl;
655 best_m = m;
df5dfeb3 656 break;
5950bf01
MD
657 }
658
659 /*
df5dfeb3
MD
660 * Otherwise calculate the scale factor and select
661 * the queue with the lowest scale factor. This
662 * apportions any unused bandwidth weighted by
663 * the relative bandwidth specification.
5950bf01 664 */
df5dfeb3
MD
665 scale = cl->cl_bw_current * 100 / cl->cl_bandwidth;
666 if (scale < best_scale) {
667 best_cl = cl;
668 best_m = m;
669 best_scale = scale;
670 }
5950bf01 671 }
df5dfeb3 672
5950bf01
MD
673 if (op == ALTDQ_POLL) {
674 pif->pif_poll_cache = best_cl;
675 m = best_m;
676 } else if (best_cl) {
677 m = fairq_getq(best_cl, cur_time);
678 KKASSERT(best_m == m);
f0a26983 679 ifsq->ifq_len--;
5950bf01
MD
680 PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
681 } else {
682 m = NULL;
683 }
684 }
685 crit_exit();
686 KKASSERT(mpolled == NULL || mpolled == m);
687 return (m);
688}
689
690static int
315a7da3 691fairq_addq(struct fairq_class *cl, struct mbuf *m, int hash)
5950bf01
MD
692{
693 fairq_bucket_t *b;
694 u_int hindex;
695 uint64_t bw;
696
697 /*
698 * If the packet doesn't have any keep state put it on the end of
699 * our queue. XXX this can result in out of order delivery.
700 */
315a7da3 701 if (hash == 0) {
5950bf01
MD
702 if (cl->cl_head)
703 b = cl->cl_head->prev;
704 else
705 b = &cl->cl_buckets[0];
706 } else {
315a7da3 707 hindex = hash & cl->cl_nbucket_mask;
5950bf01
MD
708 b = &cl->cl_buckets[hindex];
709 }
710
711 /*
712 * Add the bucket to the end of the circular list of active buckets.
713 *
714 * As a special case we add the bucket to the beginning of the list
715 * instead of the end if it was not previously on the list and if
716 * its traffic is less then the hog level.
717 */
718 if (b->in_use == 0) {
719 b->in_use = 1;
720 if (cl->cl_head == NULL) {
721 cl->cl_head = b;
722 b->next = b;
723 b->prev = b;
724 } else {
725 b->next = cl->cl_head;
726 b->prev = cl->cl_head->prev;
727 b->prev->next = b;
728 b->next->prev = b;
729
730 if (b->bw_delta && cl->cl_hogs_m1) {
731 bw = b->bw_bytes * machclk_freq / b->bw_delta;
732 if (bw < cl->cl_hogs_m1)
733 cl->cl_head = b;
734 }
735 }
736 }
737
738#ifdef ALTQ_RIO
739 if (cl->cl_qtype == Q_RIO)
740 return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
741#endif
742#ifdef ALTQ_RED
743 if (cl->cl_qtype == Q_RED)
744 return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
745#endif
746 if (qlen(&b->queue) >= qlimit(&b->queue)) {
747 m_freem(m);
748 return (-1);
749 }
750
751 if (cl->cl_flags & FARF_CLEARDSCP)
752 write_dsfield(m, cl->cl_pktattr, 0);
753
754 _addq(&b->queue, m);
755
756 return (0);
757}
758
759static struct mbuf *
760fairq_getq(struct fairq_class *cl, uint64_t cur_time)
761{
762 fairq_bucket_t *b;
763 struct mbuf *m;
764
765 b = fairq_selectq(cl, 0);
766 if (b == NULL)
767 m = NULL;
768#ifdef ALTQ_RIO
769 else if (cl->cl_qtype == Q_RIO)
770 m = rio_getq((rio_t *)cl->cl_red, &b->queue);
771#endif
772#ifdef ALTQ_RED
773 else if (cl->cl_qtype == Q_RED)
774 m = red_getq(cl->cl_red, &b->queue);
775#endif
776 else
777 m = _getq(&b->queue);
778
779 /*
780 * Calculate the BW change
781 */
782 if (m != NULL) {
783 uint64_t delta;
784
785 /*
786 * Per-class bandwidth calculation
787 */
788 delta = (cur_time - cl->cl_last_time);
789 if (delta > machclk_freq * 8)
790 delta = machclk_freq * 8;
791 cl->cl_bw_delta += delta;
792 cl->cl_bw_bytes += m->m_pkthdr.len;
793 cl->cl_last_time = cur_time;
df5dfeb3
MD
794 if (cl->cl_bw_delta > machclk_freq) {
795 cl->cl_bw_delta -= cl->cl_bw_delta >> 2;
796 cl->cl_bw_bytes -= cl->cl_bw_bytes >> 2;
797 }
5950bf01
MD
798
799 /*
800 * Per-bucket bandwidth calculation
801 */
802 delta = (cur_time - b->last_time);
803 if (delta > machclk_freq * 8)
804 delta = machclk_freq * 8;
805 b->bw_delta += delta;
806 b->bw_bytes += m->m_pkthdr.len;
807 b->last_time = cur_time;
df5dfeb3
MD
808 if (b->bw_delta > machclk_freq) {
809 b->bw_delta -= b->bw_delta >> 2;
810 b->bw_bytes -= b->bw_bytes >> 2;
811 }
5950bf01
MD
812 }
813 return(m);
814}
815
816/*
817 * Figure out what the next packet would be if there were no limits. If
818 * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
819 * it is set to 0. A non-NULL mbuf is returned either way.
820 */
821static struct mbuf *
822fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
823{
824 fairq_bucket_t *b;
825 struct mbuf *m;
826 uint64_t delta;
827 uint64_t bw;
828
829 *hit_limit = 0;
830 b = fairq_selectq(cl, 1);
831 if (b == NULL)
832 return(NULL);
833 m = qhead(&b->queue);
834
835 /*
836 * Did this packet exceed the class bandwidth? Calculate the
837 * bandwidth component of the packet.
838 *
839 * - Calculate bytes per second
840 */
841 delta = cur_time - cl->cl_last_time;
842 if (delta > machclk_freq * 8)
843 delta = machclk_freq * 8;
844 cl->cl_bw_delta += delta;
845 cl->cl_last_time = cur_time;
846 if (cl->cl_bw_delta) {
847 bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
848
849 if (bw > cl->cl_bandwidth)
850 *hit_limit = 1;
df5dfeb3 851 cl->cl_bw_current = bw;
5950bf01
MD
852#if 0
853 kprintf("BW %6lld relative to %6u %d queue %p\n",
854 bw, cl->cl_bandwidth, *hit_limit, b);
855#endif
856 }
857 return(m);
858}
859
860/*
861 * Locate the next queue we want to pull a packet out of. This code
862 * is also responsible for removing empty buckets from the circular list.
863 */
864static
865fairq_bucket_t *
866fairq_selectq(struct fairq_class *cl, int ispoll)
867{
868 fairq_bucket_t *b;
869 uint64_t bw;
870
871 if (ispoll == 0 && cl->cl_polled) {
872 b = cl->cl_polled;
873 cl->cl_polled = NULL;
874 return(b);
875 }
876
877 while ((b = cl->cl_head) != NULL) {
878 /*
879 * Remove empty queues from consideration
880 */
881 if (qempty(&b->queue)) {
882 b->in_use = 0;
883 cl->cl_head = b->next;
884 if (cl->cl_head == b) {
885 cl->cl_head = NULL;
886 } else {
887 b->next->prev = b->prev;
888 b->prev->next = b->next;
889 }
890 continue;
891 }
892
893 /*
894 * Advance the round robin. Queues with bandwidths less
895 * then the hog bandwidth are allowed to burst.
896 */
897 if (cl->cl_hogs_m1 == 0) {
898 cl->cl_head = b->next;
899 } else if (b->bw_delta) {
900 bw = b->bw_bytes * machclk_freq / b->bw_delta;
901 if (bw >= cl->cl_hogs_m1) {
902 cl->cl_head = b->next;
903 }
904 /*
905 * XXX TODO -
906 */
907 }
908
909 /*
910 * Return bucket b.
911 */
912 break;
913 }
914 if (ispoll)
915 cl->cl_polled = b;
916 return(b);
917}
918
919static void
920fairq_purgeq(struct fairq_class *cl)
921{
922 fairq_bucket_t *b;
923 struct mbuf *m;
924
925 while ((b = fairq_selectq(cl, 0)) != NULL) {
926 while ((m = _getq(&b->queue)) != NULL) {
927 PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
928 m_freem(m);
929 }
930 KKASSERT(qlen(&b->queue) == 0);
931 }
932}
933
934static void
935get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
936{
937 fairq_bucket_t *b;
938
939 sp->class_handle = cl->cl_handle;
940 sp->qlimit = cl->cl_qlimit;
941 sp->xmit_cnt = cl->cl_xmitcnt;
942 sp->drop_cnt = cl->cl_dropcnt;
943 sp->qtype = cl->cl_qtype;
944 sp->qlength = 0;
945
946 if (cl->cl_head) {
947 b = cl->cl_head;
948 do {
949 sp->qlength += qlen(&b->queue);
950 b = b->next;
951 } while (b != cl->cl_head);
952 }
953
954#ifdef ALTQ_RED
955 if (cl->cl_qtype == Q_RED)
956 red_getstats(cl->cl_red, &sp->red[0]);
957#endif
958#ifdef ALTQ_RIO
959 if (cl->cl_qtype == Q_RIO)
960 rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
961#endif
962}
963
964/* convert a class handle to the corresponding class pointer */
965static struct fairq_class *
966clh_to_clp(struct fairq_if *pif, uint32_t chandle)
967{
968 struct fairq_class *cl;
969 int idx;
970
971 if (chandle == 0)
972 return (NULL);
973
974 for (idx = pif->pif_maxpri; idx >= 0; idx--)
975 if ((cl = pif->pif_classes[idx]) != NULL &&
976 cl->cl_handle == chandle)
977 return (cl);
978
979 return (NULL);
980}
981
982#endif /* ALTQ_FAIRQ */