contrib/xz/src/common/tuklib_integer.h

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       tuklib_integer.h
   4 /// \brief      Various integer and bit operations
   5 ///
   6 /// This file provides macros or functions to do some basic integer and bit
   7 /// operations.
   8 ///
   9 /// Native endian inline functions (XX = 16, 32, or 64):
  10 ///   - Unaligned native endian reads: readXXne(ptr)
  11 ///   - Unaligned native endian writes: writeXXne(ptr, num)
  12 ///   - Aligned native endian reads: aligned_readXXne(ptr)
  13 ///   - Aligned native endian writes: aligned_writeXXne(ptr, num)
  14 ///
  15 /// Endianness-converting integer operations (these can be macros!)
  16 /// (XX = 16, 32, or 64; Y = b or l):
  17 ///   - Byte swapping: bswapXX(num)
  18 ///   - Byte order conversions to/from native (byteswaps if Y isn't
  19 ///     the native endianness): convXXYe(num)
  20 ///   - Unaligned reads (16/32-bit only): readXXYe(ptr)
  21 ///   - Unaligned writes (16/32-bit only): writeXXYe(ptr, num)
  22 ///   - Aligned reads: aligned_readXXYe(ptr)
  23 ///   - Aligned writes: aligned_writeXXYe(ptr, num)
  24 ///
  25 /// Since the above can macros, the arguments should have no side effects
  26 /// because they may be evaluated more than once.
  27 ///
  28 /// Bit scan operations for non-zero 32-bit integers (inline functions):
  29 ///   - Bit scan reverse (find highest non-zero bit): bsr32(num)
  30 ///   - Count leading zeros: clz32(num)
  31 ///   - Count trailing zeros: ctz32(num)
  32 ///   - Bit scan forward (simply an alias for ctz32()): bsf32(num)
  33 ///
  34 /// The above bit scan operations return 0-31. If num is zero,
  35 /// the result is undefined.
  36 //
  37 //  Authors:    Lasse Collin
  38 //              Joachim Henke
  39 //
  40 //  This file has been put into the public domain.
  41 //  You can do whatever you want with this file.
  42 //
  43 ///////////////////////////////////////////////////////////////////////////////
  44
  45 #ifndef TUKLIB_INTEGER_H
  46 #define TUKLIB_INTEGER_H
  47
  48 #include "tuklib_common.h"
  49 #include <string.h>
  50
  51 // Newer Intel C compilers require immintrin.h for _bit_scan_reverse()
  52 // and such functions.
  53 #if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1500)
  54 #       include <immintrin.h>
  55 #endif
  56
  57
  58 ///////////////////
  59 // Byte swapping //
  60 ///////////////////
  61
  62 #if defined(HAVE___BUILTIN_BSWAPXX)
  63         // GCC >= 4.8 and Clang
  64 #       define bswap16(n) __builtin_bswap16(n)
  65 #       define bswap32(n) __builtin_bswap32(n)
  66 #       define bswap64(n) __builtin_bswap64(n)
  67
  68 #elif defined(HAVE_BYTESWAP_H)
  69         // glibc, uClibc, dietlibc
  70 #       include <byteswap.h>
  71 #       ifdef HAVE_BSWAP_16
  72 #               define bswap16(num) bswap_16(num)
  73 #       endif
  74 #       ifdef HAVE_BSWAP_32
  75 #               define bswap32(num) bswap_32(num)
  76 #       endif
  77 #       ifdef HAVE_BSWAP_64
  78 #               define bswap64(num) bswap_64(num)
  79 #       endif
  80
  81 #elif defined(HAVE_SYS_ENDIAN_H)
  82         // *BSDs and Darwin
  83 #       include <sys/endian.h>
  84
  85 #elif defined(HAVE_SYS_BYTEORDER_H)
  86         // Solaris
  87 #       include <sys/byteorder.h>
  88 #       ifdef BSWAP_16
  89 #               define bswap16(num) BSWAP_16(num)
  90 #       endif
  91 #       ifdef BSWAP_32
  92 #               define bswap32(num) BSWAP_32(num)
  93 #       endif
  94 #       ifdef BSWAP_64
  95 #               define bswap64(num) BSWAP_64(num)
  96 #       endif
  97 #       ifdef BE_16
  98 #               define conv16be(num) BE_16(num)
  99 #       endif
 100 #       ifdef BE_32
 101 #               define conv32be(num) BE_32(num)
 102 #       endif
 103 #       ifdef BE_64
 104 #               define conv64be(num) BE_64(num)
 105 #       endif
 106 #       ifdef LE_16
 107 #               define conv16le(num) LE_16(num)
 108 #       endif
 109 #       ifdef LE_32
 110 #               define conv32le(num) LE_32(num)
 111 #       endif
 112 #       ifdef LE_64
 113 #               define conv64le(num) LE_64(num)
 114 #       endif
 115 #endif
 116
 117 #ifndef bswap16
 118 #       define bswap16(n) (uint16_t)( \
 119                   (((n) & 0x00FFU) << 8) \
 120                 | (((n) & 0xFF00U) >> 8) \
 121         )
 122 #endif
 123
 124 #ifndef bswap32
 125 #       define bswap32(n) (uint32_t)( \
 126                   (((n) & UINT32_C(0x000000FF)) << 24) \
 127                 | (((n) & UINT32_C(0x0000FF00)) << 8) \
 128                 | (((n) & UINT32_C(0x00FF0000)) >> 8) \
 129                 | (((n) & UINT32_C(0xFF000000)) >> 24) \
 130         )
 131 #endif
 132
 133 #ifndef bswap64
 134 #       define bswap64(n) (uint64_t)( \
 135                   (((n) & UINT64_C(0x00000000000000FF)) << 56) \
 136                 | (((n) & UINT64_C(0x000000000000FF00)) << 40) \
 137                 | (((n) & UINT64_C(0x0000000000FF0000)) << 24) \
 138                 | (((n) & UINT64_C(0x00000000FF000000)) << 8) \
 139                 | (((n) & UINT64_C(0x000000FF00000000)) >> 8) \
 140                 | (((n) & UINT64_C(0x0000FF0000000000)) >> 24) \
 141                 | (((n) & UINT64_C(0x00FF000000000000)) >> 40) \
 142                 | (((n) & UINT64_C(0xFF00000000000000)) >> 56) \
 143         )
 144 #endif
 145
 146 // Define conversion macros using the basic byte swapping macros.
 147 #ifdef WORDS_BIGENDIAN
 148 #       ifndef conv16be
 149 #               define conv16be(num) ((uint16_t)(num))
 150 #       endif
 151 #       ifndef conv32be
 152 #               define conv32be(num) ((uint32_t)(num))
 153 #       endif
 154 #       ifndef conv64be
 155 #               define conv64be(num) ((uint64_t)(num))
 156 #       endif
 157 #       ifndef conv16le
 158 #               define conv16le(num) bswap16(num)
 159 #       endif
 160 #       ifndef conv32le
 161 #               define conv32le(num) bswap32(num)
 162 #       endif
 163 #       ifndef conv64le
 164 #               define conv64le(num) bswap64(num)
 165 #       endif
 166 #else
 167 #       ifndef conv16be
 168 #               define conv16be(num) bswap16(num)
 169 #       endif
 170 #       ifndef conv32be
 171 #               define conv32be(num) bswap32(num)
 172 #       endif
 173 #       ifndef conv64be
 174 #               define conv64be(num) bswap64(num)
 175 #       endif
 176 #       ifndef conv16le
 177 #               define conv16le(num) ((uint16_t)(num))
 178 #       endif
 179 #       ifndef conv32le
 180 #               define conv32le(num) ((uint32_t)(num))
 181 #       endif
 182 #       ifndef conv64le
 183 #               define conv64le(num) ((uint64_t)(num))
 184 #       endif
 185 #endif
 186
 187
 188 ////////////////////////////////
 189 // Unaligned reads and writes //
 190 ////////////////////////////////
 191
 192 // The traditional way of casting e.g. *(const uint16_t *)uint8_pointer
 193 // is bad even if the uint8_pointer is properly aligned because this kind
 194 // of casts break strict aliasing rules and result in undefined behavior.
 195 // With unaligned pointers it's even worse: compilers may emit vector
 196 // instructions that require aligned pointers even if non-vector
 197 // instructions work with unaligned pointers.
 198 //
 199 // Using memcpy() is the standard compliant way to do unaligned access.
 200 // Many modern compilers inline it so there is no function call overhead.
 201 // For those compilers that don't handle the memcpy() method well, the
 202 // old casting method (that violates strict aliasing) can be requested at
 203 // build time. A third method, casting to a packed struct, would also be
 204 // an option but isn't provided to keep things simpler (it's already a mess).
 205 // Hopefully this is flexible enough in practice.
 206
 207 static inline uint16_t
 208 read16ne(const uint8_t *buf)
 209 {
 210 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
 211                 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
 212         return *(const uint16_t *)buf;
 213 #else
 214         uint16_t num;
 215         memcpy(&num, buf, sizeof(num));
 216         return num;
 217 #endif
 218 }
 219
 220
 221 static inline uint32_t
 222 read32ne(const uint8_t *buf)
 223 {
 224 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
 225                 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
 226         return *(const uint32_t *)buf;
 227 #else
 228         uint32_t num;
 229         memcpy(&num, buf, sizeof(num));
 230         return num;
 231 #endif
 232 }
 233
 234
 235 static inline uint64_t
 236 read64ne(const uint8_t *buf)
 237 {
 238 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
 239                 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
 240         return *(const uint64_t *)buf;
 241 #else
 242         uint64_t num;
 243         memcpy(&num, buf, sizeof(num));
 244         return num;
 245 #endif
 246 }
 247
 248
 249 static inline void
 250 write16ne(uint8_t *buf, uint16_t num)
 251 {
 252 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
 253                 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
 254         *(uint16_t *)buf = num;
 255 #else
 256         memcpy(buf, &num, sizeof(num));
 257 #endif
 258         return;
 259 }
 260
 261
 262 static inline void
 263 write32ne(uint8_t *buf, uint32_t num)
 264 {
 265 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
 266                 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
 267         *(uint32_t *)buf = num;
 268 #else
 269         memcpy(buf, &num, sizeof(num));
 270 #endif
 271         return;
 272 }
 273
 274
 275 static inline void
 276 write64ne(uint8_t *buf, uint64_t num)
 277 {
 278 #if defined(TUKLIB_FAST_UNALIGNED_ACCESS) \
 279                 && defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING)
 280         *(uint64_t *)buf = num;
 281 #else
 282         memcpy(buf, &num, sizeof(num));
 283 #endif
 284         return;
 285 }
 286
 287
 288 static inline uint16_t
 289 read16be(const uint8_t *buf)
 290 {
 291 #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
 292         uint16_t num = read16ne(buf);
 293         return conv16be(num);
 294 #else
 295         uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
 296         return num;
 297 #endif
 298 }
 299
 300
 301 static inline uint16_t
 302 read16le(const uint8_t *buf)
 303 {
 304 #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
 305         uint16_t num = read16ne(buf);
 306         return conv16le(num);
 307 #else
 308         uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
 309         return num;
 310 #endif
 311 }
 312
 313
 314 static inline uint32_t
 315 read32be(const uint8_t *buf)
 316 {
 317 #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
 318         uint32_t num = read32ne(buf);
 319         return conv32be(num);
 320 #else
 321         uint32_t num = (uint32_t)buf[0] << 24;
 322         num |= (uint32_t)buf[1] << 16;
 323         num |= (uint32_t)buf[2] << 8;
 324         num |= (uint32_t)buf[3];
 325         return num;
 326 #endif
 327 }
 328
 329
 330 static inline uint32_t
 331 read32le(const uint8_t *buf)
 332 {
 333 #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
 334         uint32_t num = read32ne(buf);
 335         return conv32le(num);
 336 #else
 337         uint32_t num = (uint32_t)buf[0];
 338         num |= (uint32_t)buf[1] << 8;
 339         num |= (uint32_t)buf[2] << 16;
 340         num |= (uint32_t)buf[3] << 24;
 341         return num;
 342 #endif
 343 }
 344
 345
 346 // NOTE: Possible byte swapping must be done in a macro to allow the compiler
 347 // to optimize byte swapping of constants when using glibc's or *BSD's
 348 // byte swapping macros. The actual write is done in an inline function
 349 // to make type checking of the buf pointer possible.
 350 #if defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
 351 #       define write16be(buf, num) write16ne(buf, conv16be(num))
 352 #       define write32be(buf, num) write32ne(buf, conv32be(num))
 353 #endif
 354
 355 #if !defined(WORDS_BIGENDIAN) || defined(TUKLIB_FAST_UNALIGNED_ACCESS)
 356 #       define write16le(buf, num) write16ne(buf, conv16le(num))
 357 #       define write32le(buf, num) write32ne(buf, conv32le(num))
 358 #endif
 359
 360
 361 #ifndef write16be
 362 static inline void
 363 write16be(uint8_t *buf, uint16_t num)
 364 {
 365         buf[0] = (uint8_t)(num >> 8);
 366         buf[1] = (uint8_t)num;
 367         return;
 368 }
 369 #endif
 370
 371
 372 #ifndef write16le
 373 static inline void
 374 write16le(uint8_t *buf, uint16_t num)
 375 {
 376         buf[0] = (uint8_t)num;
 377         buf[1] = (uint8_t)(num >> 8);
 378         return;
 379 }
 380 #endif
 381
 382
 383 #ifndef write32be
 384 static inline void
 385 write32be(uint8_t *buf, uint32_t num)
 386 {
 387         buf[0] = (uint8_t)(num >> 24);
 388         buf[1] = (uint8_t)(num >> 16);
 389         buf[2] = (uint8_t)(num >> 8);
 390         buf[3] = (uint8_t)num;
 391         return;
 392 }
 393 #endif
 394
 395
 396 #ifndef write32le
 397 static inline void
 398 write32le(uint8_t *buf, uint32_t num)
 399 {
 400         buf[0] = (uint8_t)num;
 401         buf[1] = (uint8_t)(num >> 8);
 402         buf[2] = (uint8_t)(num >> 16);
 403         buf[3] = (uint8_t)(num >> 24);
 404         return;
 405 }
 406 #endif
 407
 408
 409 //////////////////////////////
 410 // Aligned reads and writes //
 411 //////////////////////////////
 412
 413 // Separate functions for aligned reads and writes are provided since on
 414 // strict-align archs aligned access is much faster than unaligned access.
 415 //
 416 // Just like in the unaligned case, memcpy() is needed to avoid
 417 // strict aliasing violations. However, on archs that don't support
 418 // unaligned access the compiler cannot know that the pointers given
 419 // to memcpy() are aligned which results in slow code. As of C11 there is
 420 // no standard way to tell the compiler that we know that the address is
 421 // aligned but some compilers have language extensions to do that. With
 422 // such language extensions the memcpy() method gives excellent results.
 423 //
 424 // What to do on a strict-align system when no known language extentensions
 425 // are available? Falling back to byte-by-byte access would be safe but ruin
 426 // optimizations that have been made specifically with aligned access in mind.
 427 // As a compromise, aligned reads will fall back to non-compliant type punning
 428 // but aligned writes will be byte-by-byte, that is, fast reads are preferred
 429 // over fast writes. This obviously isn't great but hopefully it's a working
 430 // compromise for now.
 431 //
 432 // __builtin_assume_aligned is support by GCC >= 4.7 and clang >= 3.6.
 433 #ifdef HAVE___BUILTIN_ASSUME_ALIGNED
 434 #       define tuklib_memcpy_aligned(dest, src, size) \
 435                 memcpy(dest, __builtin_assume_aligned(src, size), size)
 436 #else
 437 #       define tuklib_memcpy_aligned(dest, src, size) \
 438                 memcpy(dest, src, size)
 439 #       ifndef TUKLIB_FAST_UNALIGNED_ACCESS
 440 #               define TUKLIB_USE_UNSAFE_ALIGNED_READS 1
 441 #       endif
 442 #endif
 443
 444
 445 static inline uint16_t
 446 aligned_read16ne(const uint8_t *buf)
 447 {
 448 #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
 449                 || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
 450         return *(const uint16_t *)buf;
 451 #else
 452         uint16_t num;
 453         tuklib_memcpy_aligned(&num, buf, sizeof(num));
 454         return num;
 455 #endif
 456 }
 457
 458
 459 static inline uint32_t
 460 aligned_read32ne(const uint8_t *buf)
 461 {
 462 #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
 463                 || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
 464         return *(const uint32_t *)buf;
 465 #else
 466         uint32_t num;
 467         tuklib_memcpy_aligned(&num, buf, sizeof(num));
 468         return num;
 469 #endif
 470 }
 471
 472
 473 static inline uint64_t
 474 aligned_read64ne(const uint8_t *buf)
 475 {
 476 #if defined(TUKLIB_USE_UNSAFE_TYPE_PUNNING) \
 477                 || defined(TUKLIB_USE_UNSAFE_ALIGNED_READS)
 478         return *(const uint64_t *)buf;
 479 #else
 480         uint64_t num;
 481         tuklib_memcpy_aligned(&num, buf, sizeof(num));
 482         return num;
 483 #endif
 484 }
 485
 486
 487 static inline void
 488 aligned_write16ne(uint8_t *buf, uint16_t num)
 489 {
 490 #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
 491         *(uint16_t *)buf = num;
 492 #else
 493         tuklib_memcpy_aligned(buf, &num, sizeof(num));
 494 #endif
 495         return;
 496 }
 497
 498
 499 static inline void
 500 aligned_write32ne(uint8_t *buf, uint32_t num)
 501 {
 502 #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
 503         *(uint32_t *)buf = num;
 504 #else
 505         tuklib_memcpy_aligned(buf, &num, sizeof(num));
 506 #endif
 507         return;
 508 }
 509
 510
 511 static inline void
 512 aligned_write64ne(uint8_t *buf, uint64_t num)
 513 {
 514 #ifdef TUKLIB_USE_UNSAFE_TYPE_PUNNING
 515         *(uint64_t *)buf = num;
 516 #else
 517         tuklib_memcpy_aligned(buf, &num, sizeof(num));
 518 #endif
 519         return;
 520 }
 521
 522
 523 static inline uint16_t
 524 aligned_read16be(const uint8_t *buf)
 525 {
 526         uint16_t num = aligned_read16ne(buf);
 527         return conv16be(num);
 528 }
 529
 530
 531 static inline uint16_t
 532 aligned_read16le(const uint8_t *buf)
 533 {
 534         uint16_t num = aligned_read16ne(buf);
 535         return conv16le(num);
 536 }
 537
 538
 539 static inline uint32_t
 540 aligned_read32be(const uint8_t *buf)
 541 {
 542         uint32_t num = aligned_read32ne(buf);
 543         return conv32be(num);
 544 }
 545
 546
 547 static inline uint32_t
 548 aligned_read32le(const uint8_t *buf)
 549 {
 550         uint32_t num = aligned_read32ne(buf);
 551         return conv32le(num);
 552 }
 553
 554
 555 static inline uint64_t
 556 aligned_read64be(const uint8_t *buf)
 557 {
 558         uint64_t num = aligned_read64ne(buf);
 559         return conv64be(num);
 560 }
 561
 562
 563 static inline uint64_t
 564 aligned_read64le(const uint8_t *buf)
 565 {
 566         uint64_t num = aligned_read64ne(buf);
 567         return conv64le(num);
 568 }
 569
 570
 571 // These need to be macros like in the unaligned case.
 572 #define aligned_write16be(buf, num) aligned_write16ne((buf), conv16be(num))
 573 #define aligned_write16le(buf, num) aligned_write16ne((buf), conv16le(num))
 574 #define aligned_write32be(buf, num) aligned_write32ne((buf), conv32be(num))
 575 #define aligned_write32le(buf, num) aligned_write32ne((buf), conv32le(num))
 576 #define aligned_write64be(buf, num) aligned_write64ne((buf), conv64be(num))
 577 #define aligned_write64le(buf, num) aligned_write64ne((buf), conv64le(num))
 578
 579
 580 ////////////////////
 581 // Bit operations //
 582 ////////////////////
 583
 584 static inline uint32_t
 585 bsr32(uint32_t n)
 586 {
 587         // Check for ICC first, since it tends to define __GNUC__ too.
 588 #if defined(__INTEL_COMPILER)
 589         return _bit_scan_reverse(n);
 590
 591 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
 592         // GCC >= 3.4 has __builtin_clz(), which gives good results on
 593         // multiple architectures. On x86, __builtin_clz() ^ 31U becomes
 594         // either plain BSR (so the XOR gets optimized away) or LZCNT and
 595         // XOR (if -march indicates that SSE4a instructions are supported).
 596         return (uint32_t)__builtin_clz(n) ^ 31U;
 597
 598 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 599         uint32_t i;
 600         __asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
 601         return i;
 602
 603 #elif defined(_MSC_VER)
 604         unsigned long i;
 605         _BitScanReverse(&i, n);
 606         return i;
 607
 608 #else
 609         uint32_t i = 31;
 610
 611         if ((n & 0xFFFF0000) == 0) {
 612                 n <<= 16;
 613                 i = 15;
 614         }
 615
 616         if ((n & 0xFF000000) == 0) {
 617                 n <<= 8;
 618                 i -= 8;
 619         }
 620
 621         if ((n & 0xF0000000) == 0) {
 622                 n <<= 4;
 623                 i -= 4;
 624         }
 625
 626         if ((n & 0xC0000000) == 0) {
 627                 n <<= 2;
 628                 i -= 2;
 629         }
 630
 631         if ((n & 0x80000000) == 0)
 632                 --i;
 633
 634         return i;
 635 #endif
 636 }
 637
 638
 639 static inline uint32_t
 640 clz32(uint32_t n)
 641 {
 642 #if defined(__INTEL_COMPILER)
 643         return _bit_scan_reverse(n) ^ 31U;
 644
 645 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
 646         return (uint32_t)__builtin_clz(n);
 647
 648 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 649         uint32_t i;
 650         __asm__("bsrl %1, %0\n\t"
 651                 "xorl $31, %0"
 652                 : "=r" (i) : "rm" (n));
 653         return i;
 654
 655 #elif defined(_MSC_VER)
 656         unsigned long i;
 657         _BitScanReverse(&i, n);
 658         return i ^ 31U;
 659
 660 #else
 661         uint32_t i = 0;
 662
 663         if ((n & 0xFFFF0000) == 0) {
 664                 n <<= 16;
 665                 i = 16;
 666         }
 667
 668         if ((n & 0xFF000000) == 0) {
 669                 n <<= 8;
 670                 i += 8;
 671         }
 672
 673         if ((n & 0xF0000000) == 0) {
 674                 n <<= 4;
 675                 i += 4;
 676         }
 677
 678         if ((n & 0xC0000000) == 0) {
 679                 n <<= 2;
 680                 i += 2;
 681         }
 682
 683         if ((n & 0x80000000) == 0)
 684                 ++i;
 685
 686         return i;
 687 #endif
 688 }
 689
 690
 691 static inline uint32_t
 692 ctz32(uint32_t n)
 693 {
 694 #if defined(__INTEL_COMPILER)
 695         return _bit_scan_forward(n);
 696
 697 #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
 698         return (uint32_t)__builtin_ctz(n);
 699
 700 #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 701         uint32_t i;
 702         __asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
 703         return i;
 704
 705 #elif defined(_MSC_VER)
 706         unsigned long i;
 707         _BitScanForward(&i, n);
 708         return i;
 709
 710 #else
 711         uint32_t i = 0;
 712
 713         if ((n & 0x0000FFFF) == 0) {
 714                 n >>= 16;
 715                 i = 16;
 716         }
 717
 718         if ((n & 0x000000FF) == 0) {
 719                 n >>= 8;
 720                 i += 8;
 721         }
 722
 723         if ((n & 0x0000000F) == 0) {
 724                 n >>= 4;
 725                 i += 4;
 726         }
 727
 728         if ((n & 0x00000003) == 0) {
 729                 n >>= 2;
 730                 i += 2;
 731         }
 732
 733         if ((n & 0x00000001) == 0)
 734                 ++i;
 735
 736         return i;
 737 #endif
 738 }
 739
 740 #define bsf32 ctz32
 741
 742 #endif