2 * Copyright (c) 1999 Cameron Grant <cg@FreeBSD.org>
3 * Copyright (c) 2003 Orion Hodson <orion@FreeBSD.org>
4 * Copyright (c) 2005 Ariff Abdullah <ariff@FreeBSD.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * *New* and rewritten soft sample rate converter supporting arbitrary sample
32 * rates, fine grained scaling/coefficients and a unified up/down stereo
33 * converter. Most of the disclaimers from orion's notes also applies
34 * here, regarding linear interpolation deficiencies and pre/post
35 * anti-aliasing filtering issues. This version comes with a much simpler and
36 * tighter interface, although it works almost exactly like the older one.
38 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
40 * This new implementation is fully dedicated in memory of Cameron Grant, *
41 * the creator of the magnificent, highly addictive feeder infrastructure. *
43 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
48 * This rate conversion code uses linear interpolation without any
49 * pre- or post- interpolation filtering to combat aliasing. This
50 * greatly limits the sound quality and should be addressed at some
51 * stage in the future.
53 * Since this accuracy of interpolation is sensitive and examination
54 * of the algorithm output is harder from the kernel, the code is
55 * designed to be compiled in the kernel and in a userland test
56 * harness. This is done by selectively including and excluding code
57 * with several portions based on whether _KERNEL is defined. It's a
58 * little ugly, but exceedingly useful. The testsuite and its
59 * revisions can be found at:
60 * http://people.freebsd.org/~orion/files/feedrate/
62 * Special thanks to Ken Marx for exposing flaws in the code and for
65 * $FreeBSD: src/sys/dev/sound/pcm/feeder_rate.c,v 1.11.2.2 2006/01/29 02:27:28 ariff Exp $
66 * $DragonFly: src/sys/dev/sound/pcm/feeder_rate.c,v 1.5 2007/01/04 21:47:03 corecode Exp $
69 #include <dev/sound/pcm/sound.h>
70 #include "feeder_if.h"
72 SND_DECLARE_FILE("$DragonFly: src/sys/dev/sound/pcm/feeder_rate.c,v 1.5 2007/01/04 21:47:03 corecode Exp $");
74 #define RATE_ASSERT(x, y) /* KASSERT(x,y) */
75 #define RATE_TEST(x, y) /* if (!(x)) printf y */
76 #define RATE_TRACE(x...) /* printf(x) */
78 MALLOC_DEFINE(M_RATEFEEDER, "ratefeed", "pcm rate feeder");
80 #define FEEDBUFSZ 8192
83 /* 8000 * 138 or 11025 * 100 . This is insane, indeed! */
84 #define RATEMAX 1102500
88 #define FEEDRATE_CONVERT_64 0
89 #define FEEDRATE_CONVERT_SCALE64 1
90 #define FEEDRATE_CONVERT_SCALE32 2
91 #define FEEDRATE_CONVERT_PLAIN 3
92 #define FEEDRATE_CONVERT_FIXED 4
93 #define FEEDRATE_CONVERT_OPTIMAL 5
94 #define FEEDRATE_CONVERT_WORST 6
96 #define FEEDRATE_64_MAXROLL 32
97 #define FEEDRATE_32_MAXROLL 16
99 struct feed_rate_info {
100 uint32_t src, dst; /* rounded source / destination rates */
101 uint32_t rsrc, rdst; /* original source / destination rates */
102 uint32_t gx, gy; /* interpolation / decimation ratio */
103 uint32_t alpha; /* interpolation distance */
104 uint32_t pos, bpos; /* current sample / buffer positions */
105 uint32_t bufsz; /* total buffer size */
106 uint32_t stray; /* stray bytes */
107 int32_t scale, roll; /* scale / roll factor */
109 uint32_t (*convert)(struct feed_rate_info *, int16_t *, uint32_t);
113 feed_convert_64(struct feed_rate_info *, int16_t *, uint32_t);
115 feed_convert_scale64(struct feed_rate_info *, int16_t *, uint32_t);
117 feed_convert_scale32(struct feed_rate_info *, int16_t *, uint32_t);
119 feed_convert_plain(struct feed_rate_info *, int16_t *, uint32_t);
121 int feeder_rate_ratemin = RATEMIN;
122 int feeder_rate_ratemax = RATEMAX;
124 * See 'Feeder Scaling Type' below..
126 static int feeder_rate_scaling = FEEDRATE_CONVERT_OPTIMAL;
127 static int feeder_rate_buffersize = FEEDBUFSZ & ~1;
131 * sysctls.. I love sysctls..
133 TUNABLE_INT("hw.snd.feeder_rate_ratemin", &feeder_rate_ratemin);
134 TUNABLE_INT("hw.snd.feeder_rate_ratemax", &feeder_rate_ratemin);
135 TUNABLE_INT("hw.snd.feeder_rate_scaling", &feeder_rate_scaling);
136 TUNABLE_INT("hw.snd.feeder_rate_buffersize", &feeder_rate_buffersize);
139 sysctl_hw_snd_feeder_rate_ratemin(SYSCTL_HANDLER_ARGS)
143 val = feeder_rate_ratemin;
144 err = sysctl_handle_int(oidp, &val, sizeof(val), req);
145 if (val < 1 || val >= feeder_rate_ratemax)
148 feeder_rate_ratemin = val;
151 SYSCTL_PROC(_hw_snd, OID_AUTO, feeder_rate_ratemin, CTLTYPE_INT | CTLFLAG_RW,
152 0, sizeof(int), sysctl_hw_snd_feeder_rate_ratemin, "I", "");
155 sysctl_hw_snd_feeder_rate_ratemax(SYSCTL_HANDLER_ARGS)
159 val = feeder_rate_ratemax;
160 err = sysctl_handle_int(oidp, &val, sizeof(val), req);
161 if (val <= feeder_rate_ratemin || val > 0x7fffff)
164 feeder_rate_ratemax = val;
167 SYSCTL_PROC(_hw_snd, OID_AUTO, feeder_rate_ratemax, CTLTYPE_INT | CTLFLAG_RW,
168 0, sizeof(int), sysctl_hw_snd_feeder_rate_ratemax, "I", "");
171 sysctl_hw_snd_feeder_rate_scaling(SYSCTL_HANDLER_ARGS)
175 val = feeder_rate_scaling;
176 err = sysctl_handle_int(oidp, &val, sizeof(val), req);
178 * Feeder Scaling Type
179 * ===================
181 * 1. Plain 64bit (high precision)
182 * 2. 64bit scaling (high precision, CPU friendly, but can
183 * cause gain up/down).
184 * 3. 32bit scaling (somehow can cause hz roundup, gain
186 * 4. Plain copy (default if src == dst. Except if src == dst,
187 * this is the worst / silly conversion method!).
191 * 0 - Plain 64bit - no fallback.
192 * 1 - 64bit scaling - no fallback.
193 * 2 - 32bit scaling - no fallback.
194 * 3 - Plain copy - no fallback.
195 * 4 - Fixed rate. Means that, choose optimal conversion method
196 * without causing hz roundup.
197 * 32bit scaling (as long as hz roundup does not occur),
198 * 64bit scaling, Plain 64bit.
199 * 5 - Optimal / CPU friendly (DEFAULT).
200 * 32bit scaling, 64bit scaling, Plain 64bit
201 * 6 - Optimal to worst, no 64bit arithmetic involved.
202 * 32bit scaling, Plain copy.
204 if (val < FEEDRATE_CONVERT_64 || val > FEEDRATE_CONVERT_WORST)
207 feeder_rate_scaling = val;
210 SYSCTL_PROC(_hw_snd, OID_AUTO, feeder_rate_scaling, CTLTYPE_INT | CTLFLAG_RW,
211 0, sizeof(int), sysctl_hw_snd_feeder_rate_scaling, "I", "");
214 sysctl_hw_snd_feeder_rate_buffersize(SYSCTL_HANDLER_ARGS)
218 val = feeder_rate_buffersize;
219 err = sysctl_handle_int(oidp, &val, sizeof(val), req);
221 * Don't waste too much kernel space
223 if (val < 2 || val > 65536)
226 feeder_rate_buffersize = val & ~1;
229 SYSCTL_PROC(_hw_snd, OID_AUTO, feeder_rate_buffersize, CTLTYPE_INT | CTLFLAG_RW,
230 0, sizeof(int), sysctl_hw_snd_feeder_rate_buffersize, "I", "");
234 feed_speed_ratio(uint32_t x, uint32_t y, uint32_t *gx, uint32_t *gy)
236 uint32_t w, src = x, dst = y;
248 feed_scale_roll(uint32_t dst, int32_t *scale, int32_t *roll, int32_t max)
254 for (j = MAXGAIN; j >= MINGAIN; j -= 3) {
255 for (troll = 0; troll < max; troll++) {
256 tscale = (1 << troll) / dst;
257 k = (tscale * dst * 100) >> troll;
258 if (k > j && k <= 100) {
268 feed_get_best_coef(uint32_t *src, uint32_t *dst, uint32_t *gx, uint32_t *gy,
269 int32_t *scale, int32_t *roll)
271 uint32_t tsrc, tdst, sscale, dscale;
272 int32_t tscale, troll;
273 int i, j, hzmin, hzmax;
276 for (i = 0; i < 2; i++) {
277 hzmin = (ROUNDHZ * i) + 1;
278 hzmax = hzmin + ROUNDHZ;
279 for (j = hzmin; j < hzmax; j++) {
280 tsrc = *src - (*src % j);
282 if (tsrc < 1 || tdst < 1)
284 feed_speed_ratio(tsrc, tdst, &sscale, &dscale);
285 feed_scale_roll(dscale, &tscale, &troll,
286 FEEDRATE_32_MAXROLL);
287 if (tscale != -1 && troll != -1) {
296 for (j = hzmin; j < hzmax; j++) {
297 tsrc = *src - (*src % j);
298 tdst = *dst - (*dst % j);
299 if (tsrc < 1 || tdst < 1)
301 feed_speed_ratio(tsrc, tdst, &sscale, &dscale);
302 feed_scale_roll(dscale, &tscale, &troll,
303 FEEDRATE_32_MAXROLL);
304 if (tscale != -1 && troll != -1) {
314 for (j = hzmin; j < hzmax; j++) {
316 tdst = *dst - (*dst % j);
317 if (tsrc < 1 || tdst < 1)
319 feed_speed_ratio(tsrc, tdst, &sscale, &dscale);
320 feed_scale_roll(dscale, &tscale, &troll,
321 FEEDRATE_32_MAXROLL);
322 if (tscale != -1 && troll != -1) {
334 feed_speed_ratio(*src, *dst, gx, gy);
335 feed_scale_roll(*gy, scale, roll, FEEDRATE_32_MAXROLL);
340 feed_rate_reset(struct feed_rate_info *info)
344 info->src = info->rsrc;
345 info->dst = info->rdst;
351 feed_rate_setup(struct pcm_feeder *f)
353 struct feed_rate_info *info = f->data;
360 feed_rate_reset(info);
361 if (info->src == info->dst) {
363 * No conversion ever needed. Just do plain copy.
365 info->convert = feed_convert_plain;
369 switch (feeder_rate_scaling) {
370 case FEEDRATE_CONVERT_64:
371 feed_speed_ratio(info->src, info->dst,
372 &info->gx, &info->gy);
373 info->convert = feed_convert_64;
375 case FEEDRATE_CONVERT_SCALE64:
376 feed_speed_ratio(info->src, info->dst,
377 &info->gx, &info->gy);
378 feed_scale_roll(info->gy, &info->scale,
379 &info->roll, FEEDRATE_64_MAXROLL);
380 if (info->scale == -1 || info->roll == -1)
382 info->convert = feed_convert_scale64;
384 case FEEDRATE_CONVERT_SCALE32:
385 r = feed_get_best_coef(&info->src, &info->dst,
386 &info->gx, &info->gy, &info->scale,
390 info->convert = feed_convert_scale32;
392 case FEEDRATE_CONVERT_PLAIN:
393 feed_speed_ratio(info->src, info->dst,
394 &info->gx, &info->gy);
395 info->convert = feed_convert_plain;
397 case FEEDRATE_CONVERT_FIXED:
398 r = feed_get_best_coef(&info->src, &info->dst,
399 &info->gx, &info->gy, &info->scale,
401 if (r != 0 && info->src == info->rsrc &&
402 info->dst == info->rdst)
403 info->convert = feed_convert_scale32;
406 feed_rate_reset(info);
407 feed_speed_ratio(info->src, info->dst,
408 &info->gx, &info->gy);
409 feed_scale_roll(info->gy, &info->scale,
410 &info->roll, FEEDRATE_64_MAXROLL);
411 if (info->scale != -1 && info->roll != -1)
412 info->convert = feed_convert_scale64;
414 info->convert = feed_convert_64;
417 case FEEDRATE_CONVERT_OPTIMAL:
418 r = feed_get_best_coef(&info->src, &info->dst,
419 &info->gx, &info->gy, &info->scale,
422 info->convert = feed_convert_scale32;
425 feed_rate_reset(info);
426 feed_speed_ratio(info->src, info->dst,
427 &info->gx, &info->gy);
428 feed_scale_roll(info->gy, &info->scale,
429 &info->roll, FEEDRATE_64_MAXROLL);
430 if (info->scale != -1 && info->roll != -1)
431 info->convert = feed_convert_scale64;
433 info->convert = feed_convert_64;
436 case FEEDRATE_CONVERT_WORST:
437 r = feed_get_best_coef(&info->src, &info->dst,
438 &info->gx, &info->gy, &info->scale,
441 info->convert = feed_convert_scale32;
444 feed_rate_reset(info);
445 feed_speed_ratio(info->src, info->dst,
446 &info->gx, &info->gy);
447 info->convert = feed_convert_plain;
455 if (info->gx == 0 || info->gy == 0)
458 * No need to interpolate/decimate, just do plain copy.
459 * This probably caused by Hz roundup.
461 if (info->gx == info->gy)
462 info->convert = feed_convert_plain;
468 feed_rate_set(struct pcm_feeder *f, int what, int value)
470 struct feed_rate_info *info = f->data;
472 if (value < feeder_rate_ratemin || value > feeder_rate_ratemax)
485 return feed_rate_setup(f);
489 feed_rate_get(struct pcm_feeder *f, int what)
491 struct feed_rate_info *info = f->data;
494 * Return *real* src/dst rate.
508 feed_rate_init(struct pcm_feeder *f)
510 struct feed_rate_info *info;
512 info = kmalloc(sizeof(*info), M_RATEFEEDER, M_NOWAIT | M_ZERO);
516 * bufsz = sample from last cycle + conversion space
518 info->bufsz = 2 + feeder_rate_buffersize;
519 info->buffer = kmalloc(sizeof(*info->buffer) * info->bufsz,
520 M_RATEFEEDER, M_NOWAIT | M_ZERO);
521 if (info->buffer == NULL) {
522 kfree(info, M_RATEFEEDER);
525 info->rsrc = DSP_DEFAULT_SPEED;
526 info->rdst = DSP_DEFAULT_SPEED;
528 return feed_rate_setup(f);
532 feed_rate_free(struct pcm_feeder *f)
534 struct feed_rate_info *info = f->data;
538 kfree(info->buffer, M_RATEFEEDER);
539 kfree(info, M_RATEFEEDER);
546 feed_convert_64(struct feed_rate_info *info, int16_t *dst, uint32_t max)
548 int64_t x, alpha, distance;
550 int32_t pos, bpos, gx, gy;
553 * Plain, straight forward 64bit arith. No bit-magic applied here.
570 distance = gy - alpha;
571 x = (alpha * src[pos - 2]) + (distance * src[pos]);
573 x = (alpha * src[pos - 1]) + (distance * src[pos + 1]);
585 feed_convert_scale64(struct feed_rate_info *info, int16_t *dst, uint32_t max)
587 int64_t x, alpha, distance;
589 int32_t pos, bpos, gx, gy, roll;
596 alpha = info->alpha * info->scale;
597 gx = info->gx * info->scale;
598 gy = info->gy * info->scale;
610 distance = gy - alpha;
611 x = (alpha * src[pos - 2]) + (distance * src[pos]);
612 dst[ret++] = x >> roll;
613 x = (alpha * src[pos - 1]) + (distance * src[pos + 1]);
614 dst[ret++] = x >> roll;
619 info->alpha = alpha / info->scale;
625 feed_convert_scale32(struct feed_rate_info *info, int16_t *dst, uint32_t max)
628 int32_t x, pos, bpos, gx, gy, alpha, roll, distance;
635 alpha = info->alpha * info->scale;
636 gx = info->gx * info->scale;
637 gy = info->gy * info->scale;
649 distance = gy - alpha;
650 x = (alpha * src[pos - 2]) + (distance * src[pos]);
651 dst[ret++] = x >> roll;
652 x = (alpha * src[pos - 1]) + (distance * src[pos + 1]);
653 dst[ret++] = x >> roll;
658 info->alpha = alpha / info->scale;
664 feed_convert_plain(struct feed_rate_info *info, int16_t *dst, uint32_t max)
667 int32_t pos, bpos, gx, gy, alpha;
687 dst[ret++] = src[pos];
688 dst[ret++] = src[pos + 1];
699 feed_rate(struct pcm_feeder *f, struct pcm_channel *c, uint8_t *b,
700 uint32_t count, void *source)
702 struct feed_rate_info *info = f->data;
705 int16_t *dst = (int16_t *)b;
707 * This loop has been optimized to generalize both up / down
708 * sampling without causing missing samples or excessive buffer
711 RATE_TEST(count >= 4 && (count & 3) == 0,
712 ("%s: Count size not byte integral (%d)\n", __func__, count));
717 slot = (((info->gx * (count >> 1)) + info->gy - info->alpha - 1) / info->gy) << 1;
718 RATE_TEST((slot & 1) == 0, ("%s: Slot count not sample integral (%d)\n",
721 * Optimize buffer feeding aggressively to ensure calculated slot
722 * can be fitted nicely into available buffer free space, hence
723 * avoiding multiple feeding.
725 RATE_TEST(info->stray == 0, ("%s: [1] Stray bytes: %u\n",
726 __func__,info->stray));
727 if (info->pos != 2 && info->bpos - info->pos == 2 &&
728 info->bpos + slot > info->bufsz) {
730 * Copy last unit sample and its previous to
731 * beginning of buffer.
733 info->buffer[0] = info->buffer[info->pos - 2];
734 info->buffer[1] = info->buffer[info->pos - 1];
735 info->buffer[2] = info->buffer[info->pos];
736 info->buffer[3] = info->buffer[info->pos + 1];
740 RATE_ASSERT(slot >= 0, ("%s: Negative Slot: %d\n",
745 fetch = (info->bufsz - info->bpos) << 1;
746 fetch -= info->stray;
747 RATE_ASSERT(fetch >= 0,
748 ("%s: [1] Buffer overrun: %d > %d\n",
749 __func__, info->bpos, info->bufsz));
750 if ((slot << 1) < fetch)
753 RATE_ASSERT(((info->bpos << 1) - info->stray) >= 0 &&
754 ((info->bpos << 1) - info->stray) < (info->bufsz << 1),
755 ("%s: DANGER - BUFFER OVERRUN! bufsz=%d, pos=%d\n", __func__,
756 info->bufsz << 1, (info->bpos << 1) - info->stray));
757 fetch = FEEDER_FEED(f->source, c,
758 (uint8_t *)(info->buffer) + (info->bpos << 1) - info->stray,
763 RATE_TEST((fetch & 3) == 0,
764 ("%s: Fetch size not byte integral (%d)\n",
766 info->stray += fetch & 3;
767 RATE_TEST(info->stray == 0,
768 ("%s: Stray bytes detected (%d)\n",
769 __func__, info->stray));
774 RATE_ASSERT(slot >= 0,
775 ("%s: Negative Slot: %d\n", __func__,
779 if (info->bpos == info->bufsz)
784 if (info->pos == info->bpos) {
785 RATE_TEST(info->pos == 2,
786 ("%s: EOF while in progress\n", __func__));
789 RATE_ASSERT(info->pos <= info->bpos,
790 ("%s: [2] Buffer overrun: %d > %d\n", __func__,
791 info->pos, info->bpos));
792 RATE_ASSERT(info->pos < info->bpos,
793 ("%s: Zero buffer!\n", __func__));
794 RATE_ASSERT(((info->bpos - info->pos) & 1) == 0,
795 ("%s: Buffer not sample integral (%d)\n",
796 __func__, info->bpos - info->pos));
797 i += info->convert(info, dst + i, count - i);
798 RATE_ASSERT(info->pos <= info->bpos,
799 ("%s: [3] Buffer overrun: %d > %d\n",
800 __func__, info->pos, info->bpos));
801 if (info->pos == info->bpos) {
803 * End of buffer cycle. Copy last unit sample
804 * to beginning of buffer so next cycle can
805 * interpolate using it.
807 RATE_TEST(info->stray == 0, ("%s: [2] Stray bytes: %u\n", __func__, info->stray));
808 info->buffer[0] = info->buffer[info->pos - 2];
809 info->buffer[1] = info->buffer[info->pos - 1];
817 RATE_TEST(count == i, ("Expect: %u , Got: %u\n", count << 1, i << 1));
819 RATE_TEST(info->stray == 0, ("%s: [3] Stray bytes: %u\n", __func__, info->stray));
823 static struct pcm_feederdesc feeder_rate_desc[] = {
824 {FEEDER_RATE, AFMT_S16_LE | AFMT_STEREO, AFMT_S16_LE | AFMT_STEREO, 0},
827 static kobj_method_t feeder_rate_methods[] = {
828 KOBJMETHOD(feeder_init, feed_rate_init),
829 KOBJMETHOD(feeder_free, feed_rate_free),
830 KOBJMETHOD(feeder_set, feed_rate_set),
831 KOBJMETHOD(feeder_get, feed_rate_get),
832 KOBJMETHOD(feeder_feed, feed_rate),
835 FEEDER_DECLARE(feeder_rate, 2, NULL);