contrib/xz/src/xz/list.c

   1 ///////////////////////////////////////////////////////////////////////////////
   2 //
   3 /// \file       list.c
   4 /// \brief      Listing information about .xz files
   5 //
   6 //  Author:     Lasse Collin
   7 //
   8 //  This file has been put into the public domain.
   9 //  You can do whatever you want with this file.
  10 //
  11 ///////////////////////////////////////////////////////////////////////////////
  12
  13 #include "private.h"
  14 #include "tuklib_integer.h"
  15
  16
  17 /// Information about a .xz file
  18 typedef struct {
  19         /// Combined Index of all Streams in the file
  20         lzma_index *idx;
  21
  22         /// Total amount of Stream Padding
  23         uint64_t stream_padding;
  24
  25         /// Highest memory usage so far
  26         uint64_t memusage_max;
  27
  28         /// True if all Blocks so far have Compressed Size and
  29         /// Uncompressed Size fields
  30         bool all_have_sizes;
  31
  32         /// Oldest XZ Utils version that will decompress the file
  33         uint32_t min_version;
  34
  35 } xz_file_info;
  36
  37 #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
  38
  39
  40 /// Information about a .xz Block
  41 typedef struct {
  42         /// Size of the Block Header
  43         uint32_t header_size;
  44
  45         /// A few of the Block Flags as a string
  46         char flags[3];
  47
  48         /// Size of the Compressed Data field in the Block
  49         lzma_vli compressed_size;
  50
  51         /// Decoder memory usage for this Block
  52         uint64_t memusage;
  53
  54         /// The filter chain of this Block in human-readable form
  55         char filter_chain[FILTERS_STR_SIZE];
  56
  57 } block_header_info;
  58
  59
  60 /// Check ID to string mapping
  61 static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = {
  62         // TRANSLATORS: Indicates that there is no integrity check.
  63         // This string is used in tables, so the width must not
  64         // exceed ten columns with a fixed-width font.
  65         N_("None"),
  66         "CRC32",
  67         // TRANSLATORS: Indicates that integrity check name is not known,
  68         // but the Check ID is known (here 2). This and other "Unknown-N"
  69         // strings are used in tables, so the width must not exceed ten
  70         // columns with a fixed-width font. It's OK to omit the dash if
  71         // you need space for one extra letter, but don't use spaces.
  72         N_("Unknown-2"),
  73         N_("Unknown-3"),
  74         "CRC64",
  75         N_("Unknown-5"),
  76         N_("Unknown-6"),
  77         N_("Unknown-7"),
  78         N_("Unknown-8"),
  79         N_("Unknown-9"),
  80         "SHA-256",
  81         N_("Unknown-11"),
  82         N_("Unknown-12"),
  83         N_("Unknown-13"),
  84         N_("Unknown-14"),
  85         N_("Unknown-15"),
  86 };
  87
  88 /// Buffer size for get_check_names(). This may be a bit ridiculous,
  89 /// but at least it's enough if some language needs many multibyte chars.
  90 #define CHECKS_STR_SIZE 1024
  91
  92
  93 /// Value of the Check field as hexadecimal string.
  94 /// This is set by parse_check_value().
  95 static char check_value[2 * LZMA_CHECK_SIZE_MAX + 1];
  96
  97
  98 /// Totals that are displayed if there was more than one file.
  99 /// The "files" counter is also used in print_info_adv() to show
 100 /// the file number.
 101 static struct {
 102         uint64_t files;
 103         uint64_t streams;
 104         uint64_t blocks;
 105         uint64_t compressed_size;
 106         uint64_t uncompressed_size;
 107         uint64_t stream_padding;
 108         uint64_t memusage_max;
 109         uint32_t checks;
 110         uint32_t min_version;
 111         bool all_have_sizes;
 112 } totals = { 0, 0, 0, 0, 0, 0, 0, 0, 50000002, true };
 113
 114
 115 /// Convert XZ Utils version number to a string.
 116 static const char *
 117 xz_ver_to_str(uint32_t ver)
 118 {
 119         static char buf[32];
 120
 121         unsigned int major = ver / 10000000U;
 122         ver -= major * 10000000U;
 123
 124         unsigned int minor = ver / 10000U;
 125         ver -= minor * 10000U;
 126
 127         unsigned int patch = ver / 10U;
 128         ver -= patch * 10U;
 129
 130         const char *stability = ver == 0 ? "alpha" : ver == 1 ? "beta" : "";
 131
 132         snprintf(buf, sizeof(buf), "%u.%u.%u%s",
 133                         major, minor, patch, stability);
 134         return buf;
 135 }
 136
 137
 138 /// \brief      Parse the Index(es) from the given .xz file
 139 ///
 140 /// \param      xfi     Pointer to structure where the decoded information
 141 ///                     is stored.
 142 /// \param      pair    Input file
 143 ///
 144 /// \return     On success, false is returned. On error, true is returned.
 145 ///
 146 // TODO: This function is pretty big. liblzma should have a function that
 147 // takes a callback function to parse the Index(es) from a .xz file to make
 148 // it easy for applications.
 149 static bool
 150 parse_indexes(xz_file_info *xfi, file_pair *pair)
 151 {
 152         if (pair->src_st.st_size <= 0) {
 153                 message_error(_("%s: File is empty"), pair->src_name);
 154                 return true;
 155         }
 156
 157         if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
 158                 message_error(_("%s: Too small to be a valid .xz file"),
 159                                 pair->src_name);
 160                 return true;
 161         }
 162
 163         io_buf buf;
 164         lzma_stream_flags header_flags;
 165         lzma_stream_flags footer_flags;
 166         lzma_ret ret;
 167
 168         // lzma_stream for the Index decoder
 169         lzma_stream strm = LZMA_STREAM_INIT;
 170
 171         // All Indexes decoded so far
 172         lzma_index *combined_index = NULL;
 173
 174         // The Index currently being decoded
 175         lzma_index *this_index = NULL;
 176
 177         // Current position in the file. We parse the file backwards so
 178         // initialize it to point to the end of the file.
 179         off_t pos = pair->src_st.st_size;
 180
 181         // Each loop iteration decodes one Index.
 182         do {
 183                 // Check that there is enough data left to contain at least
 184                 // the Stream Header and Stream Footer. This check cannot
 185                 // fail in the first pass of this loop.
 186                 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
 187                         message_error("%s: %s", pair->src_name,
 188                                         message_strm(LZMA_DATA_ERROR));
 189                         goto error;
 190                 }
 191
 192                 pos -= LZMA_STREAM_HEADER_SIZE;
 193                 lzma_vli stream_padding = 0;
 194
 195                 // Locate the Stream Footer. There may be Stream Padding which
 196                 // we must skip when reading backwards.
 197                 while (true) {
 198                         if (pos < LZMA_STREAM_HEADER_SIZE) {
 199                                 message_error("%s: %s", pair->src_name,
 200                                                 message_strm(
 201                                                         LZMA_DATA_ERROR));
 202                                 goto error;
 203                         }
 204
 205                         if (io_pread(pair, &buf,
 206                                         LZMA_STREAM_HEADER_SIZE, pos))
 207                                 goto error;
 208
 209                         // Stream Padding is always a multiple of four bytes.
 210                         int i = 2;
 211                         if (buf.u32[i] != 0)
 212                                 break;
 213
 214                         // To avoid calling io_pread() for every four bytes
 215                         // of Stream Padding, take advantage that we read
 216                         // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
 217                         // check them too before calling io_pread() again.
 218                         do {
 219                                 stream_padding += 4;
 220                                 pos -= 4;
 221                                 --i;
 222                         } while (i >= 0 && buf.u32[i] == 0);
 223                 }
 224
 225                 // Decode the Stream Footer.
 226                 ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
 227                 if (ret != LZMA_OK) {
 228                         message_error("%s: %s", pair->src_name,
 229                                         message_strm(ret));
 230                         goto error;
 231                 }
 232
 233                 // Check that the Stream Footer doesn't specify something
 234                 // that we don't support. This can only happen if the xz
 235                 // version is older than liblzma and liblzma supports
 236                 // something new.
 237                 //
 238                 // It is enough to check Stream Footer. Stream Header must
 239                 // match when it is compared against Stream Footer with
 240                 // lzma_stream_flags_compare().
 241                 if (footer_flags.version != 0) {
 242                         message_error("%s: %s", pair->src_name,
 243                                         message_strm(LZMA_OPTIONS_ERROR));
 244                         goto error;
 245                 }
 246
 247                 // Check that the size of the Index field looks sane.
 248                 lzma_vli index_size = footer_flags.backward_size;
 249                 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
 250                         message_error("%s: %s", pair->src_name,
 251                                         message_strm(LZMA_DATA_ERROR));
 252                         goto error;
 253                 }
 254
 255                 // Set pos to the beginning of the Index.
 256                 pos -= index_size;
 257
 258                 // See how much memory we can use for decoding this Index.
 259                 uint64_t memlimit = hardware_memlimit_get(MODE_LIST);
 260                 uint64_t memused = 0;
 261                 if (combined_index != NULL) {
 262                         memused = lzma_index_memused(combined_index);
 263                         if (memused > memlimit)
 264                                 message_bug();
 265
 266                         memlimit -= memused;
 267                 }
 268
 269                 // Decode the Index.
 270                 ret = lzma_index_decoder(&strm, &this_index, memlimit);
 271                 if (ret != LZMA_OK) {
 272                         message_error("%s: %s", pair->src_name,
 273                                         message_strm(ret));
 274                         goto error;
 275                 }
 276
 277                 do {
 278                         // Don't give the decoder more input than the
 279                         // Index size.
 280                         strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
 281                         if (io_pread(pair, &buf, strm.avail_in, pos))
 282                                 goto error;
 283
 284                         pos += strm.avail_in;
 285                         index_size -= strm.avail_in;
 286
 287                         strm.next_in = buf.u8;
 288                         ret = lzma_code(&strm, LZMA_RUN);
 289
 290                 } while (ret == LZMA_OK);
 291
 292                 // If the decoding seems to be successful, check also that
 293                 // the Index decoder consumed as much input as indicated
 294                 // by the Backward Size field.
 295                 if (ret == LZMA_STREAM_END)
 296                         if (index_size != 0 || strm.avail_in != 0)
 297                                 ret = LZMA_DATA_ERROR;
 298
 299                 if (ret != LZMA_STREAM_END) {
 300                         // LZMA_BUFFER_ERROR means that the Index decoder
 301                         // would have liked more input than what the Index
 302                         // size should be according to Stream Footer.
 303                         // The message for LZMA_DATA_ERROR makes more
 304                         // sense in that case.
 305                         if (ret == LZMA_BUF_ERROR)
 306                                 ret = LZMA_DATA_ERROR;
 307
 308                         message_error("%s: %s", pair->src_name,
 309                                         message_strm(ret));
 310
 311                         // If the error was too low memory usage limit,
 312                         // show also how much memory would have been needed.
 313                         if (ret == LZMA_MEMLIMIT_ERROR) {
 314                                 uint64_t needed = lzma_memusage(&strm);
 315                                 if (UINT64_MAX - needed < memused)
 316                                         needed = UINT64_MAX;
 317                                 else
 318                                         needed += memused;
 319
 320                                 message_mem_needed(V_ERROR, needed);
 321                         }
 322
 323                         goto error;
 324                 }
 325
 326                 // Decode the Stream Header and check that its Stream Flags
 327                 // match the Stream Footer.
 328                 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
 329                 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
 330                         message_error("%s: %s", pair->src_name,
 331                                         message_strm(LZMA_DATA_ERROR));
 332                         goto error;
 333                 }
 334
 335                 pos -= lzma_index_total_size(this_index);
 336                 if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos))
 337                         goto error;
 338
 339                 ret = lzma_stream_header_decode(&header_flags, buf.u8);
 340                 if (ret != LZMA_OK) {
 341                         message_error("%s: %s", pair->src_name,
 342                                         message_strm(ret));
 343                         goto error;
 344                 }
 345
 346                 ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
 347                 if (ret != LZMA_OK) {
 348                         message_error("%s: %s", pair->src_name,
 349                                         message_strm(ret));
 350                         goto error;
 351                 }
 352
 353                 // Store the decoded Stream Flags into this_index. This is
 354                 // needed so that we can print which Check is used in each
 355                 // Stream.
 356                 ret = lzma_index_stream_flags(this_index, &footer_flags);
 357                 if (ret != LZMA_OK)
 358                         message_bug();
 359
 360                 // Store also the size of the Stream Padding field. It is
 361                 // needed to show the offsets of the Streams correctly.
 362                 ret = lzma_index_stream_padding(this_index, stream_padding);
 363                 if (ret != LZMA_OK)
 364                         message_bug();
 365
 366                 if (combined_index != NULL) {
 367                         // Append the earlier decoded Indexes
 368                         // after this_index.
 369                         ret = lzma_index_cat(
 370                                         this_index, combined_index, NULL);
 371                         if (ret != LZMA_OK) {
 372                                 message_error("%s: %s", pair->src_name,
 373                                                 message_strm(ret));
 374                                 goto error;
 375                         }
 376                 }
 377
 378                 combined_index = this_index;
 379                 this_index = NULL;
 380
 381                 xfi->stream_padding += stream_padding;
 382
 383         } while (pos > 0);
 384
 385         lzma_end(&strm);
 386
 387         // All OK. Make combined_index available to the caller.
 388         xfi->idx = combined_index;
 389         return false;
 390
 391 error:
 392         // Something went wrong, free the allocated memory.
 393         lzma_end(&strm);
 394         lzma_index_end(combined_index, NULL);
 395         lzma_index_end(this_index, NULL);
 396         return true;
 397 }
 398
 399
 400 /// \brief      Parse the Block Header
 401 ///
 402 /// The result is stored into *bhi. The caller takes care of initializing it.
 403 ///
 404 /// \return     False on success, true on error.
 405 static bool
 406 parse_block_header(file_pair *pair, const lzma_index_iter *iter,
 407                 block_header_info *bhi, xz_file_info *xfi)
 408 {
 409 #if IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX
 410 #       error IO_BUFFER_SIZE < LZMA_BLOCK_HEADER_SIZE_MAX
 411 #endif
 412
 413         // Get the whole Block Header with one read, but don't read past
 414         // the end of the Block (or even its Check field).
 415         const uint32_t size = my_min(iter->block.total_size
 416                                 - lzma_check_size(iter->stream.flags->check),
 417                         LZMA_BLOCK_HEADER_SIZE_MAX);
 418         io_buf buf;
 419         if (io_pread(pair, &buf, size, iter->block.compressed_file_offset))
 420                 return true;
 421
 422         // Zero would mean Index Indicator and thus not a valid Block.
 423         if (buf.u8[0] == 0)
 424                 goto data_error;
 425
 426         // Initialize the block structure and decode Block Header Size.
 427         lzma_filter filters[LZMA_FILTERS_MAX + 1];
 428         lzma_block block;
 429         block.version = 0;
 430         block.check = iter->stream.flags->check;
 431         block.filters = filters;
 432
 433         block.header_size = lzma_block_header_size_decode(buf.u8[0]);
 434         if (block.header_size > size)
 435                 goto data_error;
 436
 437         // Decode the Block Header.
 438         switch (lzma_block_header_decode(&block, NULL, buf.u8)) {
 439         case LZMA_OK:
 440                 break;
 441
 442         case LZMA_OPTIONS_ERROR:
 443                 message_error("%s: %s", pair->src_name,
 444                                 message_strm(LZMA_OPTIONS_ERROR));
 445                 return true;
 446
 447         case LZMA_DATA_ERROR:
 448                 goto data_error;
 449
 450         default:
 451                 message_bug();
 452         }
 453
 454         // Check the Block Flags. These must be done before calling
 455         // lzma_block_compressed_size(), because it overwrites
 456         // block.compressed_size.
 457         bhi->flags[0] = block.compressed_size != LZMA_VLI_UNKNOWN
 458                         ? 'c' : '-';
 459         bhi->flags[1] = block.uncompressed_size != LZMA_VLI_UNKNOWN
 460                         ? 'u' : '-';
 461         bhi->flags[2] = '\0';
 462
 463         // Collect information if all Blocks have both Compressed Size
 464         // and Uncompressed Size fields. They can be useful e.g. for
 465         // multi-threaded decompression so it can be useful to know it.
 466         xfi->all_have_sizes &= block.compressed_size != LZMA_VLI_UNKNOWN
 467                         && block.uncompressed_size != LZMA_VLI_UNKNOWN;
 468
 469         // Validate or set block.compressed_size.
 470         switch (lzma_block_compressed_size(&block,
 471                         iter->block.unpadded_size)) {
 472         case LZMA_OK:
 473                 // Validate also block.uncompressed_size if it is present.
 474                 // If it isn't present, there's no need to set it since
 475                 // we aren't going to actually decompress the Block; if
 476                 // we were decompressing, then we should set it so that
 477                 // the Block decoder could validate the Uncompressed Size
 478                 // that was stored in the Index.
 479                 if (block.uncompressed_size == LZMA_VLI_UNKNOWN
 480                                 || block.uncompressed_size
 481                                         == iter->block.uncompressed_size)
 482                         break;
 483
 484                 // If the above fails, the file is corrupt so
 485                 // LZMA_DATA_ERROR is a good error code.
 486
 487         // Fall through
 488
 489         case LZMA_DATA_ERROR:
 490                 // Free the memory allocated by lzma_block_header_decode().
 491                 for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
 492                         free(filters[i].options);
 493
 494                 goto data_error;
 495
 496         default:
 497                 message_bug();
 498         }
 499
 500         // Copy the known sizes.
 501         bhi->header_size = block.header_size;
 502         bhi->compressed_size = block.compressed_size;
 503
 504         // Calculate the decoder memory usage and update the maximum
 505         // memory usage of this Block.
 506         bhi->memusage = lzma_raw_decoder_memusage(filters);
 507         if (xfi->memusage_max < bhi->memusage)
 508                 xfi->memusage_max = bhi->memusage;
 509
 510         // Determine the minimum XZ Utils version that supports this Block.
 511         //
 512         // Currently the only thing that 5.0.0 doesn't support is empty
 513         // LZMA2 Block. This decoder bug was fixed in 5.0.2.
 514         {
 515                 size_t i = 0;
 516                 while (filters[i + 1].id != LZMA_VLI_UNKNOWN)
 517                         ++i;
 518
 519                 if (filters[i].id == LZMA_FILTER_LZMA2
 520                                 && iter->block.uncompressed_size == 0
 521                                 && xfi->min_version < 50000022U)
 522                         xfi->min_version = 50000022U;
 523         }
 524
 525         // Convert the filter chain to human readable form.
 526         message_filters_to_str(bhi->filter_chain, filters, false);
 527
 528         // Free the memory allocated by lzma_block_header_decode().
 529         for (size_t i = 0; filters[i].id != LZMA_VLI_UNKNOWN; ++i)
 530                 free(filters[i].options);
 531
 532         return false;
 533
 534 data_error:
 535         // Show the error message.
 536         message_error("%s: %s", pair->src_name,
 537                         message_strm(LZMA_DATA_ERROR));
 538         return true;
 539 }
 540
 541
 542 /// \brief      Parse the Check field and put it into check_value[]
 543 ///
 544 /// \return     False on success, true on error.
 545 static bool
 546 parse_check_value(file_pair *pair, const lzma_index_iter *iter)
 547 {
 548         // Don't read anything from the file if there is no integrity Check.
 549         if (iter->stream.flags->check == LZMA_CHECK_NONE) {
 550                 snprintf(check_value, sizeof(check_value), "---");
 551                 return false;
 552         }
 553
 554         // Locate and read the Check field.
 555         const uint32_t size = lzma_check_size(iter->stream.flags->check);
 556         const off_t offset = iter->block.compressed_file_offset
 557                         + iter->block.total_size - size;
 558         io_buf buf;
 559         if (io_pread(pair, &buf, size, offset))
 560                 return true;
 561
 562         // CRC32 and CRC64 are in little endian. Guess that all the future
 563         // 32-bit and 64-bit Check values are little endian too. It shouldn't
 564         // be a too big problem if this guess is wrong.
 565         if (size == 4)
 566                 snprintf(check_value, sizeof(check_value),
 567                                 "%08" PRIx32, conv32le(buf.u32[0]));
 568         else if (size == 8)
 569                 snprintf(check_value, sizeof(check_value),
 570                                 "%016" PRIx64, conv64le(buf.u64[0]));
 571         else
 572                 for (size_t i = 0; i < size; ++i)
 573                         snprintf(check_value + i * 2, 3, "%02x", buf.u8[i]);
 574
 575         return false;
 576 }
 577
 578
 579 /// \brief      Parse detailed information about a Block
 580 ///
 581 /// Since this requires seek(s), listing information about all Blocks can
 582 /// be slow.
 583 ///
 584 /// \param      pair    Input file
 585 /// \param      iter    Location of the Block whose Check value should
 586 ///                     be printed.
 587 /// \param      bhi     Pointer to structure where to store the information
 588 ///                     about the Block Header field.
 589 ///
 590 /// \return     False on success, true on error. If an error occurs,
 591 ///             the error message is printed too so the caller doesn't
 592 ///             need to worry about that.
 593 static bool
 594 parse_details(file_pair *pair, const lzma_index_iter *iter,
 595                 block_header_info *bhi, xz_file_info *xfi)
 596 {
 597         if (parse_block_header(pair, iter, bhi, xfi))
 598                 return true;
 599
 600         if (parse_check_value(pair, iter))
 601                 return true;
 602
 603         return false;
 604 }
 605
 606
 607 /// \brief      Get the compression ratio
 608 ///
 609 /// This has slightly different format than that is used in message.c.
 610 static const char *
 611 get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
 612 {
 613         if (uncompressed_size == 0)
 614                 return "---";
 615
 616         const double ratio = (double)(compressed_size)
 617                         / (double)(uncompressed_size);
 618         if (ratio > 9.999)
 619                 return "---";
 620
 621         static char buf[16];
 622         snprintf(buf, sizeof(buf), "%.3f", ratio);
 623         return buf;
 624 }
 625
 626
 627 /// \brief      Get a comma-separated list of Check names
 628 ///
 629 /// The check names are translated with gettext except when in robot mode.
 630 ///
 631 /// \param      buf     Buffer to hold the resulting string
 632 /// \param      checks  Bit mask of Checks to print
 633 /// \param      space_after_comma
 634 ///                     It's better to not use spaces in table-like listings,
 635 ///                     but in more verbose formats a space after a comma
 636 ///                     is good for readability.
 637 static void
 638 get_check_names(char buf[CHECKS_STR_SIZE],
 639                 uint32_t checks, bool space_after_comma)
 640 {
 641         // If we get called when there are no Checks to print, set checks
 642         // to 1 so that we print "None". This can happen in the robot mode
 643         // when printing the totals line if there are no valid input files.
 644         if (checks == 0)
 645                 checks = 1;
 646
 647         char *pos = buf;
 648         size_t left = CHECKS_STR_SIZE;
 649
 650         const char *sep = space_after_comma ? ", " : ",";
 651         bool comma = false;
 652
 653         for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) {
 654                 if (checks & (UINT32_C(1) << i)) {
 655                         my_snprintf(&pos, &left, "%s%s",
 656                                         comma ? sep : "",
 657                                         opt_robot ? check_names[i]
 658                                                 : _(check_names[i]));
 659                         comma = true;
 660                 }
 661         }
 662
 663         return;
 664 }
 665
 666
 667 static bool
 668 print_info_basic(const xz_file_info *xfi, file_pair *pair)
 669 {
 670         static bool headings_displayed = false;
 671         if (!headings_displayed) {
 672                 headings_displayed = true;
 673                 // TRANSLATORS: These are column headings. From Strms (Streams)
 674                 // to Ratio, the columns are right aligned. Check and Filename
 675                 // are left aligned. If you need longer words, it's OK to
 676                 // use two lines here. Test with "xz -l foo.xz".
 677                 puts(_("Strms  Blocks   Compressed Uncompressed  Ratio  "
 678                                 "Check   Filename"));
 679         }
 680
 681         char checks[CHECKS_STR_SIZE];
 682         get_check_names(checks, lzma_index_checks(xfi->idx), false);
 683
 684         const char *cols[7] = {
 685                 uint64_to_str(lzma_index_stream_count(xfi->idx), 0),
 686                 uint64_to_str(lzma_index_block_count(xfi->idx), 1),
 687                 uint64_to_nicestr(lzma_index_file_size(xfi->idx),
 688                         NICESTR_B, NICESTR_TIB, false, 2),
 689                 uint64_to_nicestr(lzma_index_uncompressed_size(xfi->idx),
 690                         NICESTR_B, NICESTR_TIB, false, 3),
 691                 get_ratio(lzma_index_file_size(xfi->idx),
 692                         lzma_index_uncompressed_size(xfi->idx)),
 693                 checks,
 694                 pair->src_name,
 695         };
 696         printf("%*s %*s  %*s  %*s  %*s  %-*s %s\n",
 697                         tuklib_mbstr_fw(cols[0], 5), cols[0],
 698                         tuklib_mbstr_fw(cols[1], 7), cols[1],
 699                         tuklib_mbstr_fw(cols[2], 11), cols[2],
 700                         tuklib_mbstr_fw(cols[3], 11), cols[3],
 701                         tuklib_mbstr_fw(cols[4], 5), cols[4],
 702                         tuklib_mbstr_fw(cols[5], 7), cols[5],
 703                         cols[6]);
 704
 705         return false;
 706 }
 707
 708
 709 static void
 710 print_adv_helper(uint64_t stream_count, uint64_t block_count,
 711                 uint64_t compressed_size, uint64_t uncompressed_size,
 712                 uint32_t checks, uint64_t stream_padding)
 713 {
 714         char checks_str[CHECKS_STR_SIZE];
 715         get_check_names(checks_str, checks, true);
 716
 717         printf(_("  Streams:            %s\n"),
 718                         uint64_to_str(stream_count, 0));
 719         printf(_("  Blocks:             %s\n"),
 720                         uint64_to_str(block_count, 0));
 721         printf(_("  Compressed size:    %s\n"),
 722                         uint64_to_nicestr(compressed_size,
 723                                 NICESTR_B, NICESTR_TIB, true, 0));
 724         printf(_("  Uncompressed size:  %s\n"),
 725                         uint64_to_nicestr(uncompressed_size,
 726                                 NICESTR_B, NICESTR_TIB, true, 0));
 727         printf(_("  Ratio:              %s\n"),
 728                         get_ratio(compressed_size, uncompressed_size));
 729         printf(_("  Check:              %s\n"), checks_str);
 730         printf(_("  Stream padding:     %s\n"),
 731                         uint64_to_nicestr(stream_padding,
 732                                 NICESTR_B, NICESTR_TIB, true, 0));
 733         return;
 734 }
 735
 736
 737 static bool
 738 print_info_adv(xz_file_info *xfi, file_pair *pair)
 739 {
 740         // Print the overall information.
 741         print_adv_helper(lzma_index_stream_count(xfi->idx),
 742                         lzma_index_block_count(xfi->idx),
 743                         lzma_index_file_size(xfi->idx),
 744                         lzma_index_uncompressed_size(xfi->idx),
 745                         lzma_index_checks(xfi->idx),
 746                         xfi->stream_padding);
 747
 748         // Size of the biggest Check. This is used to calculate the width
 749         // of the CheckVal field. The table would get insanely wide if
 750         // we always reserved space for 64-byte Check (128 chars as hex).
 751         uint32_t check_max = 0;
 752
 753         // Print information about the Streams.
 754         //
 755         // TRANSLATORS: The second line is column headings. All except
 756         // Check are right aligned; Check is left aligned. Test with
 757         // "xz -lv foo.xz".
 758         puts(_("  Streams:\n    Stream    Blocks"
 759                         "      CompOffset    UncompOffset"
 760                         "        CompSize      UncompSize  Ratio"
 761                         "  Check      Padding"));
 762
 763         lzma_index_iter iter;
 764         lzma_index_iter_init(&iter, xfi->idx);
 765
 766         while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) {
 767                 const char *cols1[4] = {
 768                         uint64_to_str(iter.stream.number, 0),
 769                         uint64_to_str(iter.stream.block_count, 1),
 770                         uint64_to_str(iter.stream.compressed_offset, 2),
 771                         uint64_to_str(iter.stream.uncompressed_offset, 3),
 772                 };
 773                 printf("    %*s %*s %*s %*s ",
 774                                 tuklib_mbstr_fw(cols1[0], 6), cols1[0],
 775                                 tuklib_mbstr_fw(cols1[1], 9), cols1[1],
 776                                 tuklib_mbstr_fw(cols1[2], 15), cols1[2],
 777                                 tuklib_mbstr_fw(cols1[3], 15), cols1[3]);
 778
 779                 const char *cols2[5] = {
 780                         uint64_to_str(iter.stream.compressed_size, 0),
 781                         uint64_to_str(iter.stream.uncompressed_size, 1),
 782                         get_ratio(iter.stream.compressed_size,
 783                                 iter.stream.uncompressed_size),
 784                         _(check_names[iter.stream.flags->check]),
 785                         uint64_to_str(iter.stream.padding, 2),
 786                 };
 787                 printf("%*s %*s  %*s  %-*s %*s\n",
 788                                 tuklib_mbstr_fw(cols2[0], 15), cols2[0],
 789                                 tuklib_mbstr_fw(cols2[1], 15), cols2[1],
 790                                 tuklib_mbstr_fw(cols2[2], 5), cols2[2],
 791                                 tuklib_mbstr_fw(cols2[3], 10), cols2[3],
 792                                 tuklib_mbstr_fw(cols2[4], 7), cols2[4]);
 793
 794                 // Update the maximum Check size.
 795                 if (lzma_check_size(iter.stream.flags->check) > check_max)
 796                         check_max = lzma_check_size(iter.stream.flags->check);
 797         }
 798
 799         // Cache the verbosity level to a local variable.
 800         const bool detailed = message_verbosity_get() >= V_DEBUG;
 801
 802         // Information collected from Block Headers
 803         block_header_info bhi;
 804
 805         // Print information about the Blocks but only if there is
 806         // at least one Block.
 807         if (lzma_index_block_count(xfi->idx) > 0) {
 808                 // Calculate the width of the CheckVal field.
 809                 const int checkval_width = my_max(8, 2 * check_max);
 810
 811                 // TRANSLATORS: The second line is column headings. All
 812                 // except Check are right aligned; Check is left aligned.
 813                 printf(_("  Blocks:\n    Stream     Block"
 814                         "      CompOffset    UncompOffset"
 815                         "       TotalSize      UncompSize  Ratio  Check"));
 816
 817                 if (detailed) {
 818                         // TRANSLATORS: These are additional column headings
 819                         // for the most verbose listing mode. CheckVal
 820                         // (Check value), Flags, and Filters are left aligned.
 821                         // Header (Block Header Size), CompSize, and MemUsage
 822                         // are right aligned. %*s is replaced with 0-120
 823                         // spaces to make the CheckVal column wide enough.
 824                         // Test with "xz -lvv foo.xz".
 825                         printf(_("      CheckVal %*s Header  Flags        "
 826                                         "CompSize    MemUsage  Filters"),
 827                                         checkval_width - 8, "");
 828                 }
 829
 830                 putchar('\n');
 831
 832                 lzma_index_iter_init(&iter, xfi->idx);
 833
 834                 // Iterate over the Blocks.
 835                 while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
 836                         if (detailed && parse_details(pair, &iter, &bhi, xfi))
 837                                         return true;
 838
 839                         const char *cols1[4] = {
 840                                 uint64_to_str(iter.stream.number, 0),
 841                                 uint64_to_str(
 842                                         iter.block.number_in_stream, 1),
 843                                 uint64_to_str(
 844                                         iter.block.compressed_file_offset, 2),
 845                                 uint64_to_str(
 846                                         iter.block.uncompressed_file_offset, 3)
 847                         };
 848                         printf("    %*s %*s %*s %*s ",
 849                                 tuklib_mbstr_fw(cols1[0], 6), cols1[0],
 850                                 tuklib_mbstr_fw(cols1[1], 9), cols1[1],
 851                                 tuklib_mbstr_fw(cols1[2], 15), cols1[2],
 852                                 tuklib_mbstr_fw(cols1[3], 15), cols1[3]);
 853
 854                         const char *cols2[4] = {
 855                                 uint64_to_str(iter.block.total_size, 0),
 856                                 uint64_to_str(iter.block.uncompressed_size,
 857                                                 1),
 858                                 get_ratio(iter.block.total_size,
 859                                         iter.block.uncompressed_size),
 860                                 _(check_names[iter.stream.flags->check])
 861                         };
 862                         printf("%*s %*s  %*s  %-*s",
 863                                 tuklib_mbstr_fw(cols2[0], 15), cols2[0],
 864                                 tuklib_mbstr_fw(cols2[1], 15), cols2[1],
 865                                 tuklib_mbstr_fw(cols2[2], 5), cols2[2],
 866                                 tuklib_mbstr_fw(cols2[3], detailed ? 11 : 1),
 867                                         cols2[3]);
 868
 869                         if (detailed) {
 870                                 const lzma_vli compressed_size
 871                                                 = iter.block.unpadded_size
 872                                                 - bhi.header_size
 873                                                 - lzma_check_size(
 874                                                 iter.stream.flags->check);
 875
 876                                 const char *cols3[6] = {
 877                                         check_value,
 878                                         uint64_to_str(bhi.header_size, 0),
 879                                         bhi.flags,
 880                                         uint64_to_str(compressed_size, 1),
 881                                         uint64_to_str(
 882                                                 round_up_to_mib(bhi.memusage),
 883                                                 2),
 884                                         bhi.filter_chain
 885                                 };
 886                                 // Show MiB for memory usage, because it
 887                                 // is the only size which is not in bytes.
 888                                 printf("%-*s  %*s  %-5s %*s %*s MiB  %s",
 889                                         checkval_width, cols3[0],
 890                                         tuklib_mbstr_fw(cols3[1], 6), cols3[1],
 891                                         cols3[2],
 892                                         tuklib_mbstr_fw(cols3[3], 15),
 893                                                 cols3[3],
 894                                         tuklib_mbstr_fw(cols3[4], 7), cols3[4],
 895                                         cols3[5]);
 896                         }
 897
 898                         putchar('\n');
 899                 }
 900         }
 901
 902         if (detailed) {
 903                 printf(_("  Memory needed:      %s MiB\n"), uint64_to_str(
 904                                 round_up_to_mib(xfi->memusage_max), 0));
 905                 printf(_("  Sizes in headers:   %s\n"),
 906                                 xfi->all_have_sizes ? _("Yes") : _("No"));
 907                 printf(_("  Minimum XZ Utils version: %s\n"),
 908                                 xz_ver_to_str(xfi->min_version));
 909         }
 910
 911         return false;
 912 }
 913
 914
 915 static bool
 916 print_info_robot(xz_file_info *xfi, file_pair *pair)
 917 {
 918         char checks[CHECKS_STR_SIZE];
 919         get_check_names(checks, lzma_index_checks(xfi->idx), false);
 920
 921         printf("name\t%s\n", pair->src_name);
 922
 923         printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
 924                         "\t%s\t%s\t%" PRIu64 "\n",
 925                         lzma_index_stream_count(xfi->idx),
 926                         lzma_index_block_count(xfi->idx),
 927                         lzma_index_file_size(xfi->idx),
 928                         lzma_index_uncompressed_size(xfi->idx),
 929                         get_ratio(lzma_index_file_size(xfi->idx),
 930                                 lzma_index_uncompressed_size(xfi->idx)),
 931                         checks,
 932                         xfi->stream_padding);
 933
 934         if (message_verbosity_get() >= V_VERBOSE) {
 935                 lzma_index_iter iter;
 936                 lzma_index_iter_init(&iter, xfi->idx);
 937
 938                 while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM))
 939                         printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
 940                                 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
 941                                 "\t%s\t%s\t%" PRIu64 "\n",
 942                                 iter.stream.number,
 943                                 iter.stream.block_count,
 944                                 iter.stream.compressed_offset,
 945                                 iter.stream.uncompressed_offset,
 946                                 iter.stream.compressed_size,
 947                                 iter.stream.uncompressed_size,
 948                                 get_ratio(iter.stream.compressed_size,
 949                                         iter.stream.uncompressed_size),
 950                                 check_names[iter.stream.flags->check],
 951                                 iter.stream.padding);
 952
 953                 lzma_index_iter_rewind(&iter);
 954                 block_header_info bhi;
 955
 956                 while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
 957                         if (message_verbosity_get() >= V_DEBUG
 958                                         && parse_details(
 959                                                 pair, &iter, &bhi, xfi))
 960                                 return true;
 961
 962                         printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
 963                                         "\t%" PRIu64 "\t%" PRIu64
 964                                         "\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s",
 965                                         iter.stream.number,
 966                                         iter.block.number_in_stream,
 967                                         iter.block.number_in_file,
 968                                         iter.block.compressed_file_offset,
 969                                         iter.block.uncompressed_file_offset,
 970                                         iter.block.total_size,
 971                                         iter.block.uncompressed_size,
 972                                         get_ratio(iter.block.total_size,
 973                                                 iter.block.uncompressed_size),
 974                                         check_names[iter.stream.flags->check]);
 975
 976                         if (message_verbosity_get() >= V_DEBUG)
 977                                 printf("\t%s\t%" PRIu32 "\t%s\t%" PRIu64
 978                                                 "\t%" PRIu64 "\t%s",
 979                                                 check_value,
 980                                                 bhi.header_size,
 981                                                 bhi.flags,
 982                                                 bhi.compressed_size,
 983                                                 bhi.memusage,
 984                                                 bhi.filter_chain);
 985
 986                         putchar('\n');
 987                 }
 988         }
 989
 990         if (message_verbosity_get() >= V_DEBUG)
 991                 printf("summary\t%" PRIu64 "\t%s\t%" PRIu32 "\n",
 992                                 xfi->memusage_max,
 993                                 xfi->all_have_sizes ? "yes" : "no",
 994                                 xfi->min_version);
 995
 996         return false;
 997 }
 998
 999
1000 static void
1001 update_totals(const xz_file_info *xfi)
1002 {
1003         // TODO: Integer overflow checks
1004         ++totals.files;
1005         totals.streams += lzma_index_stream_count(xfi->idx);
1006         totals.blocks += lzma_index_block_count(xfi->idx);
1007         totals.compressed_size += lzma_index_file_size(xfi->idx);
1008         totals.uncompressed_size += lzma_index_uncompressed_size(xfi->idx);
1009         totals.stream_padding += xfi->stream_padding;
1010         totals.checks |= lzma_index_checks(xfi->idx);
1011
1012         if (totals.memusage_max < xfi->memusage_max)
1013                 totals.memusage_max = xfi->memusage_max;
1014
1015         if (totals.min_version < xfi->min_version)
1016                 totals.min_version = xfi->min_version;
1017
1018         totals.all_have_sizes &= xfi->all_have_sizes;
1019
1020         return;
1021 }
1022
1023
1024 static void
1025 print_totals_basic(void)
1026 {
1027         // Print a separator line.
1028         char line[80];
1029         memset(line, '-', sizeof(line));
1030         line[sizeof(line) - 1] = '\0';
1031         puts(line);
1032
1033         // Get the check names.
1034         char checks[CHECKS_STR_SIZE];
1035         get_check_names(checks, totals.checks, false);
1036
1037         // Print the totals except the file count, which needs
1038         // special handling.
1039         printf("%5s %7s  %11s  %11s  %5s  %-7s ",
1040                         uint64_to_str(totals.streams, 0),
1041                         uint64_to_str(totals.blocks, 1),
1042                         uint64_to_nicestr(totals.compressed_size,
1043                                 NICESTR_B, NICESTR_TIB, false, 2),
1044                         uint64_to_nicestr(totals.uncompressed_size,
1045                                 NICESTR_B, NICESTR_TIB, false, 3),
1046                         get_ratio(totals.compressed_size,
1047                                 totals.uncompressed_size),
1048                         checks);
1049
1050         // Since we print totals only when there are at least two files,
1051         // the English message will always use "%s files". But some other
1052         // languages need different forms for different plurals so we
1053         // have to translate this with ngettext().
1054         //
1055         // TRANSLATORS: %s is an integer. Only the plural form of this
1056         // message is used (e.g. "2 files"). Test with "xz -l foo.xz bar.xz".
1057         printf(ngettext("%s file\n", "%s files\n",
1058                         totals.files <= ULONG_MAX ? totals.files
1059                                 : (totals.files % 1000000) + 1000000),
1060                         uint64_to_str(totals.files, 0));
1061
1062         return;
1063 }
1064
1065
1066 static void
1067 print_totals_adv(void)
1068 {
1069         putchar('\n');
1070         puts(_("Totals:"));
1071         printf(_("  Number of files:    %s\n"),
1072                         uint64_to_str(totals.files, 0));
1073         print_adv_helper(totals.streams, totals.blocks,
1074                         totals.compressed_size, totals.uncompressed_size,
1075                         totals.checks, totals.stream_padding);
1076
1077         if (message_verbosity_get() >= V_DEBUG) {
1078                 printf(_("  Memory needed:      %s MiB\n"), uint64_to_str(
1079                                 round_up_to_mib(totals.memusage_max), 0));
1080                 printf(_("  Sizes in headers:   %s\n"),
1081                                 totals.all_have_sizes ? _("Yes") : _("No"));
1082                 printf(_("  Minimum XZ Utils version: %s\n"),
1083                                 xz_ver_to_str(totals.min_version));
1084         }
1085
1086         return;
1087 }
1088
1089
1090 static void
1091 print_totals_robot(void)
1092 {
1093         char checks[CHECKS_STR_SIZE];
1094         get_check_names(checks, totals.checks, false);
1095
1096         printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
1097                         "\t%s\t%s\t%" PRIu64 "\t%" PRIu64,
1098                         totals.streams,
1099                         totals.blocks,
1100                         totals.compressed_size,
1101                         totals.uncompressed_size,
1102                         get_ratio(totals.compressed_size,
1103                                 totals.uncompressed_size),
1104                         checks,
1105                         totals.stream_padding,
1106                         totals.files);
1107
1108         if (message_verbosity_get() >= V_DEBUG)
1109                 printf("\t%" PRIu64 "\t%s\t%" PRIu32,
1110                                 totals.memusage_max,
1111                                 totals.all_have_sizes ? "yes" : "no",
1112                                 totals.min_version);
1113
1114         putchar('\n');
1115
1116         return;
1117 }
1118
1119
1120 extern void
1121 list_totals(void)
1122 {
1123         if (opt_robot) {
1124                 // Always print totals in --robot mode. It can be convenient
1125                 // in some cases and doesn't complicate usage of the
1126                 // single-file case much.
1127                 print_totals_robot();
1128
1129         } else if (totals.files > 1) {
1130                 // For non-robot mode, totals are printed only if there
1131                 // is more than one file.
1132                 if (message_verbosity_get() <= V_WARNING)
1133                         print_totals_basic();
1134                 else
1135                         print_totals_adv();
1136         }
1137
1138         return;
1139 }
1140
1141
1142 extern void
1143 list_file(const char *filename)
1144 {
1145         if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO)
1146                 message_fatal(_("--list works only on .xz files "
1147                                 "(--format=xz or --format=auto)"));
1148
1149         message_filename(filename);
1150
1151         if (filename == stdin_filename) {
1152                 message_error(_("--list does not support reading from "
1153                                 "standard input"));
1154                 return;
1155         }
1156
1157         // Unset opt_stdout so that io_open_src() won't accept special files.
1158         // Set opt_force so that io_open_src() will follow symlinks.
1159         opt_stdout = false;
1160         opt_force = true;
1161         file_pair *pair = io_open_src(filename);
1162         if (pair == NULL)
1163                 return;
1164
1165         xz_file_info xfi = XZ_FILE_INFO_INIT;
1166         if (!parse_indexes(&xfi, pair)) {
1167                 bool fail;
1168
1169                 // We have three main modes:
1170                 //  - --robot, which has submodes if --verbose is specified
1171                 //    once or twice
1172                 //  - Normal --list without --verbose
1173                 //  - --list with one or two --verbose
1174                 if (opt_robot)
1175                         fail = print_info_robot(&xfi, pair);
1176                 else if (message_verbosity_get() <= V_WARNING)
1177                         fail = print_info_basic(&xfi, pair);
1178                 else
1179                         fail = print_info_adv(&xfi, pair);
1180
1181                 // Update the totals that are displayed after all
1182                 // the individual files have been listed. Don't count
1183                 // broken files.
1184                 if (!fail)
1185                         update_totals(&xfi);
1186
1187                 lzma_index_end(xfi.idx, NULL);
1188         }
1189
1190         io_close(pair, false);
1191         return;
1192 }