2 LZ4 - Fast LZ compression algorithm
3 Copyright (C) 2011-2013, Yann Collet.
4 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are
10 * Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 * Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following disclaimer
14 in the documentation and/or other materials provided with the
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 You can contact the author at :
30 - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
31 - LZ4 source repository : http://code.google.com/p/lz4/
35 Note : this source file requires "hammer2_lz4_encoder.h"
38 //**************************************
40 //**************************************
42 // Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB;
43 // 16 -> 64KB; 20 -> 1MB; etc.)
44 // Increasing memory usage improves compression ratio
45 // Reduced memory usage can improve speed, due to cache effect
46 // Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
47 #define MEMORY_USAGE 14
50 // Select how default compression function will allocate memory for its
52 // in memory stack (0:default, fastest), or in memory heap (1:requires
53 // memory allocation (malloc)).
54 // Default allocation strategy is to use stack (HEAPMODE 0)
55 // Note : explicit functions *_stack* and *_heap* are unaffected by this setting
58 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
59 // This will provide a small boost to performance for big endian cpu,
60 // but the resulting compressed stream will be incompatible with little-endian CPU.
61 // You can set this option to 1 in situations where data will remain within
63 // This option is useless on Little_Endian CPU (such as x86)
64 //#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
67 //**************************************
68 // CPU Feature Detection
69 //**************************************
71 #if (defined(__x86_64__) || defined(_M_X64)) // Detects 64 bits mode
77 //This reduced library code is only Little Endian compatible,
78 //if the need arises, please look for the appropriate defines in the
79 //original complete LZ4 library.
80 //Same is true for unaligned memory access which is enabled by default,
81 //hardware bit count, also enabled by default, and Microsoft/Visual
84 //**************************************
86 //**************************************
87 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
88 /* "restrict" is a known keyword */
90 # define restrict // Disable restrict
93 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
95 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
96 # define expect(expr,value) (__builtin_expect ((expr),(value)) )
98 # define expect(expr,value) (expr)
101 #define likely(expr) expect((expr) != 0, 1)
102 #define unlikely(expr) expect((expr) != 0, 0)
105 //**************************************
107 //**************************************
109 #include "hammer2_lz4.h"
110 #include <sys/malloc.h> //for malloc macros, hammer2.h includes sys/param.h
113 //Declaration for kmalloc functions
114 static MALLOC_DEFINE(C_HASHTABLE, "comphashtable",
115 "A hash table used by LZ4 compression function.");
118 //**************************************
120 //**************************************
121 #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
122 # include <sys/stdint.h>
123 typedef uint8_t BYTE;
124 typedef uint16_t U16;
125 typedef uint32_t U32;
127 typedef uint64_t U64;
129 typedef unsigned char BYTE;
130 typedef unsigned short U16;
131 typedef unsigned int U32;
132 typedef signed int S32;
133 typedef unsigned long long U64;
136 #if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
137 # define _PACKED __attribute__ ((packed))
142 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
143 # pragma pack(push, 1)
146 typedef struct _U16_S { U16 v; } _PACKED U16_S;
147 typedef struct _U32_S { U32 v; } _PACKED U32_S;
148 typedef struct _U64_S { U64 v; } _PACKED U64_S;
150 #if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
154 #define A64(x) (((U64_S *)(x))->v)
155 #define A32(x) (((U32_S *)(x))->v)
156 #define A16(x) (((U16_S *)(x))->v)
159 //**************************************
161 //**************************************
162 #define HASHTABLESIZE (1 << MEMORY_USAGE)
167 #define LASTLITERALS 5
168 #define MFLIMIT (COPYLENGTH+MINMATCH)
169 #define MINLENGTH (MFLIMIT+1)
171 #define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
172 #define SKIPSTRENGTH 6
173 // Increasing this value will make the compression run slower on
174 // incompressible data
177 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
180 #define ML_MASK ((1U<<ML_BITS)-1)
181 #define RUN_BITS (8-ML_BITS)
182 #define RUN_MASK ((1U<<RUN_BITS)-1)
185 //**************************************
186 // Architecture-specific macros
187 //**************************************
188 #if LZ4_ARCH64 // 64-bit
192 # define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8;
193 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d)
194 # define LZ4_SECURECOPY(s,d,e) if (d<e) LZ4_WILDCOPY(s,d,e)
196 # define INITBASE(base) BYTE* base = ip
201 # define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4;
202 # define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d);
203 # define LZ4_SECURECOPY LZ4_WILDCOPY
205 # define INITBASE(base) int base = 0
208 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
209 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p);
212 # define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i);
216 #else // Little Endian
217 # define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
218 # define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; }
222 //**************************************
224 //**************************************
225 #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d<e);
226 #define LZ4_BLINDCOPY(s,d,l) { BYTE* e=(d)+(l); LZ4_WILDCOPY(s,d,e); d=e; }
229 //****************************
231 //****************************
237 LZ4_NbCommonBytes (register U64 val)
239 #if defined(LZ4_BIG_ENDIAN)
240 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
242 _BitScanReverse64( &r, val );
244 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
245 return (__builtin_clzll(val) >> 3);
248 if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
249 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
254 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
256 _BitScanForward64( &r, val );
258 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
259 return (__builtin_ctzll(val) >> 3);
261 static int DeBruijnBytePos[64] = {
262 0, 0, 0, 0, 0, 1, 1, 2, 0, 3,
263 1, 3, 1, 4, 2, 7, 0, 2, 3, 6,
264 1, 5, 3, 5, 1, 3, 4, 4, 2, 5,
265 6, 7, 7, 0, 1, 2, 3, 3, 4, 6,
266 2, 6, 5, 5, 3, 4, 5, 6, 7, 1,
267 2, 4, 6, 4, 4, 5, 7, 2, 6, 5,
269 return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58];
279 LZ4_NbCommonBytes (register U32 val)
281 #if defined(LZ4_BIG_ENDIAN)
282 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
284 _BitScanReverse( &r, val );
286 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
287 return (__builtin_clz(val) >> 3);
290 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
295 # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
297 _BitScanForward( &r, val );
299 # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
300 return (__builtin_ctz(val) >> 3);
302 static int DeBruijnBytePos[32] = {
303 0, 0, 3, 0, 3, 1, 3, 0, 3, 2,
304 2, 1, 3, 2, 0, 1, 3, 3, 1, 2,
305 2, 2, 2, 0, 3, 1, 2, 0, 1, 0,
307 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
316 //******************************
317 // Compression functions
318 //******************************
320 #include "hammer2_lz4_encoder.h"
323 void* LZ4_createHeapMemory();
324 int LZ4_freeHeapMemory(void* ctx);
326 Used to allocate and free hashTable memory
327 to be used by the LZ4_compress_heap* family of functions.
328 LZ4_createHeapMemory() returns NULL is memory allocation fails.
333 return kmalloc(HASHTABLESIZE, C_HASHTABLE, M_INTWAIT);
339 kfree(ctx, C_HASHTABLE);
344 LZ4_compress_limitedOutput(char* source, char* dest, int inputSize, int maxOutputSize)
346 void* ctx = LZ4_create();
348 if (ctx == NULL) return 0; // Failed allocation => compression not done
349 if (inputSize < LZ4_64KLIMIT)
350 result = LZ4_compress64k_heap_limitedOutput(ctx, source, dest,
351 inputSize, maxOutputSize);
352 else result = LZ4_compress_heap_limitedOutput(ctx, source, dest,
353 inputSize, maxOutputSize);
359 //****************************
360 // Decompression functions
361 //****************************
363 typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
364 typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } end_directive;
365 typedef enum { full = 0, partial = 1 } exit_directive;
368 // This generic decompression function cover all use cases.
369 // It shall be instanciated several times, using different sets of directives
370 // Note that it is essential this generic function is really inlined,
371 // in order to remove useless branches during compilation optimisation.
374 int LZ4_decompress_generic(
379 // OutputSize must be != 0; if endOnInput==endOnInputSize,
380 // this value is the max size of Output Buffer.
382 int endOnInput, // endOnOutputSize, endOnInputSize
383 int prefix64k, // noPrefix, withPrefix
384 int partialDecoding, // full, partial
385 int targetOutputSize // only used if partialDecoding==partial
389 BYTE* restrict ip = (BYTE*) source;
391 BYTE* iend = ip + inputSize;
393 BYTE* op = (BYTE*) dest;
394 BYTE* oend = op + outputSize;
396 BYTE* oexit = op + targetOutputSize;
398 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
400 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
405 if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;
406 // targetOutputSize too large, better decode everything
407 if unlikely(outputSize==0) goto _output_error;
408 // Empty output buffer
419 if ((length=(token>>ML_BITS)) == RUN_MASK)
422 while (((endOnInput)?ip<iend:1) && (s==255))
431 if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT))
432 || (ip+length>iend-(2+1+LASTLITERALS))) )
433 || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
437 if (cpy > oend) goto _output_error;
438 // Error : write attempt beyond end of output buffer
439 if ((endOnInput) && (ip+length > iend)) goto _output_error;
440 // Error : read attempt beyond end of input buffer
444 if ((!endOnInput) && (cpy != oend)) goto _output_error;
445 // Error : block decoding must stop exactly there,
446 // due to parsing restrictions
447 if ((endOnInput) && ((ip+length != iend) || (cpy > oend)))
449 // Error : not enough place for another match (min 4) + 5 literals
451 memcpy(op, ip, length);
455 // Necessarily EOF, due to parsing restrictions
457 LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy;
460 LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
461 if ((prefix64k==noPrefix) && unlikely(ref < (BYTE*)dest))
462 goto _output_error; // Error : offset outside destination buffer
465 if ((length=(token&ML_MASK)) == ML_MASK)
467 while (endOnInput ? ip<iend-(LASTLITERALS+1) : 1)
468 // A minimum nb of input bytes must remain for LASTLITERALS + token
472 if (s==255) continue;
477 // copy repeated sequence
478 if unlikely((op-ref)<STEPSIZE)
481 size_t dec64 = dec64table[op-ref];
483 const size_t dec64 = 0;
489 op += 4, ref += 4; ref -= dec32table[op-ref];
491 op += STEPSIZE-4; ref -= dec64;
492 } else { LZ4_COPYSTEP(ref,op); }
493 cpy = op + length - (STEPSIZE-4);
495 if unlikely(cpy>oend-(COPYLENGTH)-(STEPSIZE-4))
497 if (cpy > oend-LASTLITERALS) goto _output_error;
498 // Error : last 5 bytes must be literals
499 LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH));
500 while(op<cpy) *op++=*ref++;
504 LZ4_WILDCOPY(ref, op, cpy);
505 op=cpy; // correction
510 return (int) (((char*)op)-dest); // Nb of output bytes decoded
512 return (int) (((char*)ip)-source); // Nb of input bytes read
514 // Overflow error detected
516 return (int) (-(((char*)ip)-source))-1;
521 LZ4_decompress_safe(char* source, char* dest, int inputSize, int maxOutputSize)
523 return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize,
524 endOnInputSize, noPrefix, full, 0);