1 /* ******************************************************************
3 part of Finite State Entropy library
4 Copyright (C) 2013-present, Yann Collet.
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 You can contact the author at :
32 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
35 /* **************************************************************
37 ****************************************************************/
38 #include <string.h> /* memcpy, memset */
40 #include "bitstream.h" /* BIT_* */
41 #include "fse.h" /* to compress headers */
42 #define HUF_STATIC_LINKING_ONLY
44 #include "error_private.h"
46 /* **************************************************************
48 ****************************************************************/
50 /* These two optional macros force the use one way or another of the two
51 * Huffman decompression implementations. You can't force in both directions
54 #if defined(HUF_FORCE_DECOMPRESS_X1) && \
55 defined(HUF_FORCE_DECOMPRESS_X2)
56 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
60 /* **************************************************************
62 ****************************************************************/
63 #define HUF_isError ERR_isError
65 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
69 /* **************************************************************
70 * Byte alignment for workSpace management
71 ****************************************************************/
72 #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
73 #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
76 /* **************************************************************
77 * BMI2 Variant Wrappers
78 ****************************************************************/
81 #define HUF_DGEN(fn) \
83 static size_t fn##_default( \
84 void* dst, size_t dstSize, \
85 const void* cSrc, size_t cSrcSize, \
86 const HUF_DTable* DTable) \
88 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
91 static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
92 void* dst, size_t dstSize, \
93 const void* cSrc, size_t cSrcSize, \
94 const HUF_DTable* DTable) \
96 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
99 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
100 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
103 return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
105 return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
110 #define HUF_DGEN(fn) \
111 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
112 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
115 return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
121 /*-***************************/
122 /* generic DTableDesc */
123 /*-***************************/
124 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
126 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
129 memcpy(&dtd, table, sizeof(dtd));
134 #ifndef HUF_FORCE_DECOMPRESS_X2
136 /*-***************************/
137 /* single-symbol decoding */
138 /*-***************************/
139 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
141 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
146 void* const dtPtr = DTable + 1;
147 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
151 size_t spaceUsed32 = 0;
153 rankVal = (U32 *)workSpace + spaceUsed32;
154 spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
155 huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
156 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
158 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
160 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
161 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
163 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
164 if (HUF_isError(iSize)) return iSize;
167 { DTableDesc dtd = HUF_getDTableDesc(DTable);
168 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
170 dtd.tableLog = (BYTE)tableLog;
171 memcpy(DTable, &dtd, sizeof(dtd));
174 /* Calculate starting value for each rank */
175 { U32 n, nextRankStart = 0;
176 for (n=1; n<tableLog+1; n++) {
177 U32 const current = nextRankStart;
178 nextRankStart += (rankVal[n] << (n-1));
179 rankVal[n] = current;
184 for (n=0; n<nbSymbols; n++) {
185 U32 const w = huffWeight[n];
186 U32 const length = (1 << w) >> 1;
189 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
190 for (u = rankVal[w]; u < rankVal[w] + length; u++)
192 rankVal[w] += length;
198 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
200 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
201 return HUF_readDTableX1_wksp(DTable, src, srcSize,
202 workSpace, sizeof(workSpace));
205 FORCE_INLINE_TEMPLATE BYTE
206 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
208 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
209 BYTE const c = dt[val].byte;
210 BIT_skipBits(Dstream, dt[val].nbBits);
214 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
215 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
217 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
218 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
219 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
221 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
223 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
226 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
228 BYTE* const pStart = p;
230 /* up to 4 symbols at a time */
231 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
232 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
233 HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
234 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
235 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
238 /* [0-3] symbols remaining */
240 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
241 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
243 /* no more data to retrieve from bitstream, no need to reload */
245 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
250 FORCE_INLINE_TEMPLATE size_t
251 HUF_decompress1X1_usingDTable_internal_body(
252 void* dst, size_t dstSize,
253 const void* cSrc, size_t cSrcSize,
254 const HUF_DTable* DTable)
256 BYTE* op = (BYTE*)dst;
257 BYTE* const oend = op + dstSize;
258 const void* dtPtr = DTable + 1;
259 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
261 DTableDesc const dtd = HUF_getDTableDesc(DTable);
262 U32 const dtLog = dtd.tableLog;
264 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
266 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
268 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
273 FORCE_INLINE_TEMPLATE size_t
274 HUF_decompress4X1_usingDTable_internal_body(
275 void* dst, size_t dstSize,
276 const void* cSrc, size_t cSrcSize,
277 const HUF_DTable* DTable)
280 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
282 { const BYTE* const istart = (const BYTE*) cSrc;
283 BYTE* const ostart = (BYTE*) dst;
284 BYTE* const oend = ostart + dstSize;
285 const void* const dtPtr = DTable + 1;
286 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
293 size_t const length1 = MEM_readLE16(istart);
294 size_t const length2 = MEM_readLE16(istart+2);
295 size_t const length3 = MEM_readLE16(istart+4);
296 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
297 const BYTE* const istart1 = istart + 6; /* jumpTable */
298 const BYTE* const istart2 = istart1 + length1;
299 const BYTE* const istart3 = istart2 + length2;
300 const BYTE* const istart4 = istart3 + length3;
301 const size_t segmentSize = (dstSize+3) / 4;
302 BYTE* const opStart2 = ostart + segmentSize;
303 BYTE* const opStart3 = opStart2 + segmentSize;
304 BYTE* const opStart4 = opStart3 + segmentSize;
306 BYTE* op2 = opStart2;
307 BYTE* op3 = opStart3;
308 BYTE* op4 = opStart4;
309 U32 endSignal = BIT_DStream_unfinished;
310 DTableDesc const dtd = HUF_getDTableDesc(DTable);
311 U32 const dtLog = dtd.tableLog;
313 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
314 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
315 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
316 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
317 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
319 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
320 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
321 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
322 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
323 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
324 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
325 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
326 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
327 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
328 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
329 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
330 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
331 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
332 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
333 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
334 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
335 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
336 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
337 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
338 BIT_reloadDStream(&bitD1);
339 BIT_reloadDStream(&bitD2);
340 BIT_reloadDStream(&bitD3);
341 BIT_reloadDStream(&bitD4);
344 /* check corruption */
345 /* note : should not be necessary : op# advance in lock step, and we control op4.
346 * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
347 if (op1 > opStart2) return ERROR(corruption_detected);
348 if (op2 > opStart3) return ERROR(corruption_detected);
349 if (op3 > opStart4) return ERROR(corruption_detected);
350 /* note : op4 supposed already verified within main loop */
352 /* finish bitStreams one by one */
353 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
354 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
355 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
356 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
359 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
360 if (!endCheck) return ERROR(corruption_detected); }
368 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
371 const HUF_DTable *DTable);
373 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
374 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
378 size_t HUF_decompress1X1_usingDTable(
379 void* dst, size_t dstSize,
380 const void* cSrc, size_t cSrcSize,
381 const HUF_DTable* DTable)
383 DTableDesc dtd = HUF_getDTableDesc(DTable);
384 if (dtd.tableType != 0) return ERROR(GENERIC);
385 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
388 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
389 const void* cSrc, size_t cSrcSize,
390 void* workSpace, size_t wkspSize)
392 const BYTE* ip = (const BYTE*) cSrc;
394 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
395 if (HUF_isError(hSize)) return hSize;
396 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
397 ip += hSize; cSrcSize -= hSize;
399 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
403 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
404 const void* cSrc, size_t cSrcSize)
406 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
407 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
408 workSpace, sizeof(workSpace));
411 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
413 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
414 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
417 size_t HUF_decompress4X1_usingDTable(
418 void* dst, size_t dstSize,
419 const void* cSrc, size_t cSrcSize,
420 const HUF_DTable* DTable)
422 DTableDesc dtd = HUF_getDTableDesc(DTable);
423 if (dtd.tableType != 0) return ERROR(GENERIC);
424 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
427 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
428 const void* cSrc, size_t cSrcSize,
429 void* workSpace, size_t wkspSize, int bmi2)
431 const BYTE* ip = (const BYTE*) cSrc;
433 size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
434 workSpace, wkspSize);
435 if (HUF_isError(hSize)) return hSize;
436 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
437 ip += hSize; cSrcSize -= hSize;
439 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
442 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
443 const void* cSrc, size_t cSrcSize,
444 void* workSpace, size_t wkspSize)
446 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
450 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
452 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
453 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
454 workSpace, sizeof(workSpace));
456 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
458 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
459 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
462 #endif /* HUF_FORCE_DECOMPRESS_X2 */
465 #ifndef HUF_FORCE_DECOMPRESS_X1
467 /* *************************/
468 /* double-symbols decoding */
469 /* *************************/
471 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
472 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
473 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
474 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
477 /* HUF_fillDTableX2Level2() :
478 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
479 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
480 const U32* rankValOrigin, const int minWeight,
481 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
482 U32 nbBitsBaseline, U16 baseSeq)
485 U32 rankVal[HUF_TABLELOG_MAX + 1];
487 /* get pre-calculated rankVal */
488 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
490 /* fill skipped values */
492 U32 i, skipSize = rankVal[minWeight];
493 MEM_writeLE16(&(DElt.sequence), baseSeq);
494 DElt.nbBits = (BYTE)(consumed);
496 for (i = 0; i < skipSize; i++)
501 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
502 const U32 symbol = sortedSymbols[s].symbol;
503 const U32 weight = sortedSymbols[s].weight;
504 const U32 nbBits = nbBitsBaseline - weight;
505 const U32 length = 1 << (sizeLog-nbBits);
506 const U32 start = rankVal[weight];
508 const U32 end = start + length;
510 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
511 DElt.nbBits = (BYTE)(nbBits + consumed);
513 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
515 rankVal[weight] += length;
520 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
521 const sortedSymbol_t* sortedList, const U32 sortedListSize,
522 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
523 const U32 nbBitsBaseline)
525 U32 rankVal[HUF_TABLELOG_MAX + 1];
526 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
527 const U32 minBits = nbBitsBaseline - maxWeight;
530 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
533 for (s=0; s<sortedListSize; s++) {
534 const U16 symbol = sortedList[s].symbol;
535 const U32 weight = sortedList[s].weight;
536 const U32 nbBits = nbBitsBaseline - weight;
537 const U32 start = rankVal[weight];
538 const U32 length = 1 << (targetLog-nbBits);
540 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
542 int minWeight = nbBits + scaleLog;
543 if (minWeight < 1) minWeight = 1;
544 sortedRank = rankStart[minWeight];
545 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
546 rankValOrigin[nbBits], minWeight,
547 sortedList+sortedRank, sortedListSize-sortedRank,
548 nbBitsBaseline, symbol);
551 MEM_writeLE16(&(DElt.sequence), symbol);
552 DElt.nbBits = (BYTE)(nbBits);
554 { U32 const end = start + length;
556 for (u = start; u < end; u++) DTable[u] = DElt;
558 rankVal[weight] += length;
562 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
563 const void* src, size_t srcSize,
564 void* workSpace, size_t wkspSize)
566 U32 tableLog, maxW, sizeOfSort, nbSymbols;
567 DTableDesc dtd = HUF_getDTableDesc(DTable);
568 U32 const maxTableLog = dtd.maxTableLog;
570 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
571 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
574 rankValCol_t* rankVal;
577 sortedSymbol_t* sortedSymbol;
579 size_t spaceUsed32 = 0;
581 rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
582 spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
583 rankStats = (U32 *)workSpace + spaceUsed32;
584 spaceUsed32 += HUF_TABLELOG_MAX + 1;
585 rankStart0 = (U32 *)workSpace + spaceUsed32;
586 spaceUsed32 += HUF_TABLELOG_MAX + 2;
587 sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
588 spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
589 weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
590 spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
592 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
594 rankStart = rankStart0 + 1;
595 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
597 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
598 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
599 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
601 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
602 if (HUF_isError(iSize)) return iSize;
605 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
608 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
610 /* Get start index of each weight */
611 { U32 w, nextRankStart = 0;
612 for (w=1; w<maxW+1; w++) {
613 U32 current = nextRankStart;
614 nextRankStart += rankStats[w];
615 rankStart[w] = current;
617 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
618 sizeOfSort = nextRankStart;
621 /* sort symbols by weight */
623 for (s=0; s<nbSymbols; s++) {
624 U32 const w = weightList[s];
625 U32 const r = rankStart[w]++;
626 sortedSymbol[r].symbol = (BYTE)s;
627 sortedSymbol[r].weight = (BYTE)w;
629 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
633 { U32* const rankVal0 = rankVal[0];
634 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
637 for (w=1; w<maxW+1; w++) {
638 U32 current = nextRankVal;
639 nextRankVal += rankStats[w] << (w+rescale);
640 rankVal0[w] = current;
642 { U32 const minBits = tableLog+1 - maxW;
644 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
645 U32* const rankValPtr = rankVal[consumed];
647 for (w = 1; w < maxW+1; w++) {
648 rankValPtr[w] = rankVal0[w] >> consumed;
651 HUF_fillDTableX2(dt, maxTableLog,
652 sortedSymbol, sizeOfSort,
653 rankStart0, rankVal, maxW,
656 dtd.tableLog = (BYTE)maxTableLog;
658 memcpy(DTable, &dtd, sizeof(dtd));
662 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
664 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
665 return HUF_readDTableX2_wksp(DTable, src, srcSize,
666 workSpace, sizeof(workSpace));
670 FORCE_INLINE_TEMPLATE U32
671 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
673 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
674 memcpy(op, dt+val, 2);
675 BIT_skipBits(DStream, dt[val].nbBits);
676 return dt[val].length;
679 FORCE_INLINE_TEMPLATE U32
680 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
682 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
683 memcpy(op, dt+val, 1);
684 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
686 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
687 BIT_skipBits(DStream, dt[val].nbBits);
688 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
689 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
690 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
695 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
696 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
698 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
699 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
700 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
702 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
704 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
707 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
708 const HUF_DEltX2* const dt, const U32 dtLog)
710 BYTE* const pStart = p;
712 /* up to 8 symbols at a time */
713 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
714 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
715 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
716 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
717 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
720 /* closer to end : up to 2 symbols at a time */
721 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
722 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
725 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
728 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
733 FORCE_INLINE_TEMPLATE size_t
734 HUF_decompress1X2_usingDTable_internal_body(
735 void* dst, size_t dstSize,
736 const void* cSrc, size_t cSrcSize,
737 const HUF_DTable* DTable)
742 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
745 { BYTE* const ostart = (BYTE*) dst;
746 BYTE* const oend = ostart + dstSize;
747 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
748 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
749 DTableDesc const dtd = HUF_getDTableDesc(DTable);
750 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
754 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
761 FORCE_INLINE_TEMPLATE size_t
762 HUF_decompress4X2_usingDTable_internal_body(
763 void* dst, size_t dstSize,
764 const void* cSrc, size_t cSrcSize,
765 const HUF_DTable* DTable)
767 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
769 { const BYTE* const istart = (const BYTE*) cSrc;
770 BYTE* const ostart = (BYTE*) dst;
771 BYTE* const oend = ostart + dstSize;
772 const void* const dtPtr = DTable+1;
773 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
780 size_t const length1 = MEM_readLE16(istart);
781 size_t const length2 = MEM_readLE16(istart+2);
782 size_t const length3 = MEM_readLE16(istart+4);
783 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
784 const BYTE* const istart1 = istart + 6; /* jumpTable */
785 const BYTE* const istart2 = istart1 + length1;
786 const BYTE* const istart3 = istart2 + length2;
787 const BYTE* const istart4 = istart3 + length3;
788 size_t const segmentSize = (dstSize+3) / 4;
789 BYTE* const opStart2 = ostart + segmentSize;
790 BYTE* const opStart3 = opStart2 + segmentSize;
791 BYTE* const opStart4 = opStart3 + segmentSize;
793 BYTE* op2 = opStart2;
794 BYTE* op3 = opStart3;
795 BYTE* op4 = opStart4;
797 DTableDesc const dtd = HUF_getDTableDesc(DTable);
798 U32 const dtLog = dtd.tableLog;
800 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
801 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
802 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
803 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
804 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
806 /* 16-32 symbols per loop (4-8 symbols per stream) */
807 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
808 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
809 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
810 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
811 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
812 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
813 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
814 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
815 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
816 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
817 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
818 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
819 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
820 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
821 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
822 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
823 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
824 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
826 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
829 /* check corruption */
830 if (op1 > opStart2) return ERROR(corruption_detected);
831 if (op2 > opStart3) return ERROR(corruption_detected);
832 if (op3 > opStart4) return ERROR(corruption_detected);
833 /* note : op4 already verified within main loop */
835 /* finish bitStreams one by one */
836 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
837 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
838 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
839 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
842 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
843 if (!endCheck) return ERROR(corruption_detected); }
850 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
851 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
853 size_t HUF_decompress1X2_usingDTable(
854 void* dst, size_t dstSize,
855 const void* cSrc, size_t cSrcSize,
856 const HUF_DTable* DTable)
858 DTableDesc dtd = HUF_getDTableDesc(DTable);
859 if (dtd.tableType != 1) return ERROR(GENERIC);
860 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
863 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
864 const void* cSrc, size_t cSrcSize,
865 void* workSpace, size_t wkspSize)
867 const BYTE* ip = (const BYTE*) cSrc;
869 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
870 workSpace, wkspSize);
871 if (HUF_isError(hSize)) return hSize;
872 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
873 ip += hSize; cSrcSize -= hSize;
875 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
879 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
880 const void* cSrc, size_t cSrcSize)
882 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
883 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
884 workSpace, sizeof(workSpace));
887 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
889 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
890 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
893 size_t HUF_decompress4X2_usingDTable(
894 void* dst, size_t dstSize,
895 const void* cSrc, size_t cSrcSize,
896 const HUF_DTable* DTable)
898 DTableDesc dtd = HUF_getDTableDesc(DTable);
899 if (dtd.tableType != 1) return ERROR(GENERIC);
900 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
903 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
904 const void* cSrc, size_t cSrcSize,
905 void* workSpace, size_t wkspSize, int bmi2)
907 const BYTE* ip = (const BYTE*) cSrc;
909 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
910 workSpace, wkspSize);
911 if (HUF_isError(hSize)) return hSize;
912 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
913 ip += hSize; cSrcSize -= hSize;
915 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
918 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
919 const void* cSrc, size_t cSrcSize,
920 void* workSpace, size_t wkspSize)
922 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
926 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
927 const void* cSrc, size_t cSrcSize)
929 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
930 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
931 workSpace, sizeof(workSpace));
934 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
936 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
937 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
940 #endif /* HUF_FORCE_DECOMPRESS_X1 */
943 /* ***********************************/
944 /* Universal decompression selectors */
945 /* ***********************************/
947 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
948 const void* cSrc, size_t cSrcSize,
949 const HUF_DTable* DTable)
951 DTableDesc const dtd = HUF_getDTableDesc(DTable);
952 #if defined(HUF_FORCE_DECOMPRESS_X1)
954 assert(dtd.tableType == 0);
955 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
956 #elif defined(HUF_FORCE_DECOMPRESS_X2)
958 assert(dtd.tableType == 1);
959 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
961 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
962 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
966 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
967 const void* cSrc, size_t cSrcSize,
968 const HUF_DTable* DTable)
970 DTableDesc const dtd = HUF_getDTableDesc(DTable);
971 #if defined(HUF_FORCE_DECOMPRESS_X1)
973 assert(dtd.tableType == 0);
974 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
975 #elif defined(HUF_FORCE_DECOMPRESS_X2)
977 assert(dtd.tableType == 1);
978 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
980 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
981 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
986 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
987 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
988 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
990 /* single, double, quad */
991 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
992 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
993 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
994 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
995 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
996 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
997 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
998 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
999 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
1000 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
1001 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1002 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1003 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1004 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1005 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1006 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1010 /** HUF_selectDecoder() :
1011 * Tells which decoder is likely to decode faster,
1012 * based on a set of pre-computed metrics.
1013 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1014 * Assumption : 0 < dstSize <= 128 KB */
1015 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1017 assert(dstSize > 0);
1018 assert(dstSize <= 128*1024);
1019 #if defined(HUF_FORCE_DECOMPRESS_X1)
1023 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1028 /* decoder timing evaluation */
1029 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1030 U32 const D256 = (U32)(dstSize >> 8);
1031 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1032 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1033 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1034 return DTime1 < DTime0;
1040 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1042 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1044 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1045 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1048 /* validation checks */
1049 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1050 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1051 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1052 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1054 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1055 #if defined(HUF_FORCE_DECOMPRESS_X1)
1057 assert(algoNb == 0);
1058 return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1059 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1061 assert(algoNb == 1);
1062 return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1064 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1069 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1071 /* validation checks */
1072 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1073 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1074 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1075 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1077 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1078 #if defined(HUF_FORCE_DECOMPRESS_X1)
1080 assert(algoNb == 0);
1081 return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1082 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1084 assert(algoNb == 1);
1085 return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1087 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1088 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1093 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1095 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1096 return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1097 workSpace, sizeof(workSpace));
1101 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1102 size_t dstSize, const void* cSrc,
1103 size_t cSrcSize, void* workSpace,
1106 /* validation checks */
1107 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1108 if (cSrcSize == 0) return ERROR(corruption_detected);
1110 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1111 #if defined(HUF_FORCE_DECOMPRESS_X1)
1113 assert(algoNb == 0);
1114 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1115 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1117 assert(algoNb == 1);
1118 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1120 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1121 cSrcSize, workSpace, wkspSize):
1122 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1127 size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1128 const void* cSrc, size_t cSrcSize,
1129 void* workSpace, size_t wkspSize)
1131 /* validation checks */
1132 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1133 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1134 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1135 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1137 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1138 #if defined(HUF_FORCE_DECOMPRESS_X1)
1140 assert(algoNb == 0);
1141 return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1142 cSrcSize, workSpace, wkspSize);
1143 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1145 assert(algoNb == 1);
1146 return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1147 cSrcSize, workSpace, wkspSize);
1149 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1150 cSrcSize, workSpace, wkspSize):
1151 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1152 cSrcSize, workSpace, wkspSize);
1157 size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1158 const void* cSrc, size_t cSrcSize)
1160 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1161 return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1162 workSpace, sizeof(workSpace));
1166 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1168 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1169 #if defined(HUF_FORCE_DECOMPRESS_X1)
1171 assert(dtd.tableType == 0);
1172 return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1173 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1175 assert(dtd.tableType == 1);
1176 return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1178 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1179 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1183 #ifndef HUF_FORCE_DECOMPRESS_X2
1184 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1186 const BYTE* ip = (const BYTE*) cSrc;
1188 size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1189 if (HUF_isError(hSize)) return hSize;
1190 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1191 ip += hSize; cSrcSize -= hSize;
1193 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1197 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1199 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1200 #if defined(HUF_FORCE_DECOMPRESS_X1)
1202 assert(dtd.tableType == 0);
1203 return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1204 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1206 assert(dtd.tableType == 1);
1207 return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1209 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1210 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1214 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1216 /* validation checks */
1217 if (dstSize == 0) return ERROR(dstSize_tooSmall);
1218 if (cSrcSize == 0) return ERROR(corruption_detected);
1220 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1221 #if defined(HUF_FORCE_DECOMPRESS_X1)
1223 assert(algoNb == 0);
1224 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1225 #elif defined(HUF_FORCE_DECOMPRESS_X2)
1227 assert(algoNb == 1);
1228 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1230 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1231 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);