Upgrade xz from 5.2.2 to 5.2.4 on the vendor branch.
[dragonfly.git] / contrib / xz / src / xz / args.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25 bool opt_ignore_check = false;
26
27 // We don't modify or free() this, but we need to assign it in some
28 // non-const pointers.
29 const char stdin_filename[] = "(stdin)";
30
31
32 /// Parse and set the memory usage limit for compression and/or decompression.
33 static void
34 parse_memlimit(const char *name, const char *name_percentage, char *str,
35                 bool set_compress, bool set_decompress)
36 {
37         bool is_percentage = false;
38         uint64_t value;
39
40         const size_t len = strlen(str);
41         if (len > 0 && str[len - 1] == '%') {
42                 str[len - 1] = '\0';
43                 is_percentage = true;
44                 value = str_to_uint64(name_percentage, str, 1, 100);
45         } else {
46                 // On 32-bit systems, SIZE_MAX would make more sense than
47                 // UINT64_MAX. But use UINT64_MAX still so that scripts
48                 // that assume > 4 GiB values don't break.
49                 value = str_to_uint64(name, str, 0, UINT64_MAX);
50         }
51
52         hardware_memlimit_set(
53                         value, set_compress, set_decompress, is_percentage);
54         return;
55 }
56
57
58 static void
59 parse_block_list(char *str)
60 {
61         // It must be non-empty and not begin with a comma.
62         if (str[0] == '\0' || str[0] == ',')
63                 message_fatal(_("%s: Invalid argument to --block-list"), str);
64
65         // Count the number of comma-separated strings.
66         size_t count = 1;
67         for (size_t i = 0; str[i] != '\0'; ++i)
68                 if (str[i] == ',')
69                         ++count;
70
71         // Prevent an unlikely integer overflow.
72         if (count > SIZE_MAX / sizeof(uint64_t) - 1)
73                 message_fatal(_("%s: Too many arguments to --block-list"),
74                                 str);
75
76         // Allocate memory to hold all the sizes specified.
77         // If --block-list was specified already, its value is forgotten.
78         free(opt_block_list);
79         opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
80
81         for (size_t i = 0; i < count; ++i) {
82                 // Locate the next comma and replace it with \0.
83                 char *p = strchr(str, ',');
84                 if (p != NULL)
85                         *p = '\0';
86
87                 if (str[0] == '\0') {
88                         // There is no string, that is, a comma follows
89                         // another comma. Use the previous value.
90                         //
91                         // NOTE: We checked earler that the first char
92                         // of the whole list cannot be a comma.
93                         assert(i > 0);
94                         opt_block_list[i] = opt_block_list[i - 1];
95                 } else {
96                         opt_block_list[i] = str_to_uint64("block-list", str,
97                                         0, UINT64_MAX);
98
99                         // Zero indicates no more new Blocks.
100                         if (opt_block_list[i] == 0) {
101                                 if (i + 1 != count)
102                                         message_fatal(_("0 can only be used "
103                                                         "as the last element "
104                                                         "in --block-list"));
105
106                                 opt_block_list[i] = UINT64_MAX;
107                         }
108                 }
109
110                 str = p + 1;
111         }
112
113         // Terminate the array.
114         opt_block_list[count] = 0;
115         return;
116 }
117
118
119 static void
120 parse_real(args_info *args, int argc, char **argv)
121 {
122         enum {
123                 OPT_X86 = INT_MIN,
124                 OPT_POWERPC,
125                 OPT_IA64,
126                 OPT_ARM,
127                 OPT_ARMTHUMB,
128                 OPT_SPARC,
129                 OPT_DELTA,
130                 OPT_LZMA1,
131                 OPT_LZMA2,
132
133                 OPT_SINGLE_STREAM,
134                 OPT_NO_SPARSE,
135                 OPT_FILES,
136                 OPT_FILES0,
137                 OPT_BLOCK_SIZE,
138                 OPT_BLOCK_LIST,
139                 OPT_MEM_COMPRESS,
140                 OPT_MEM_DECOMPRESS,
141                 OPT_NO_ADJUST,
142                 OPT_INFO_MEMORY,
143                 OPT_ROBOT,
144                 OPT_FLUSH_TIMEOUT,
145                 OPT_IGNORE_CHECK,
146         };
147
148         static const char short_opts[]
149                         = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
150
151         static const struct option long_opts[] = {
152                 // Operation mode
153                 { "compress",     no_argument,       NULL,  'z' },
154                 { "decompress",   no_argument,       NULL,  'd' },
155                 { "uncompress",   no_argument,       NULL,  'd' },
156                 { "test",         no_argument,       NULL,  't' },
157                 { "list",         no_argument,       NULL,  'l' },
158
159                 // Operation modifiers
160                 { "keep",         no_argument,       NULL,  'k' },
161                 { "force",        no_argument,       NULL,  'f' },
162                 { "stdout",       no_argument,       NULL,  'c' },
163                 { "to-stdout",    no_argument,       NULL,  'c' },
164                 { "single-stream", no_argument,      NULL,  OPT_SINGLE_STREAM },
165                 { "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
166                 { "suffix",       required_argument, NULL,  'S' },
167                 // { "recursive",      no_argument,       NULL,  'r' }, // TODO
168                 { "files",        optional_argument, NULL,  OPT_FILES },
169                 { "files0",       optional_argument, NULL,  OPT_FILES0 },
170
171                 // Basic compression settings
172                 { "format",       required_argument, NULL,  'F' },
173                 { "check",        required_argument, NULL,  'C' },
174                 { "ignore-check", no_argument,       NULL,  OPT_IGNORE_CHECK },
175                 { "block-size",   required_argument, NULL,  OPT_BLOCK_SIZE },
176                 { "block-list",  required_argument, NULL,  OPT_BLOCK_LIST },
177                 { "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
178                 { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
179                 { "memlimit",     required_argument, NULL,  'M' },
180                 { "memory",       required_argument, NULL,  'M' }, // Old alias
181                 { "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
182                 { "threads",      required_argument, NULL,  'T' },
183                 { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
184
185                 { "extreme",      no_argument,       NULL,  'e' },
186                 { "fast",         no_argument,       NULL,  '0' },
187                 { "best",         no_argument,       NULL,  '9' },
188
189                 // Filters
190                 { "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
191                 { "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
192                 { "x86",          optional_argument, NULL,  OPT_X86 },
193                 { "powerpc",      optional_argument, NULL,  OPT_POWERPC },
194                 { "ia64",         optional_argument, NULL,  OPT_IA64 },
195                 { "arm",          optional_argument, NULL,  OPT_ARM },
196                 { "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
197                 { "sparc",        optional_argument, NULL,  OPT_SPARC },
198                 { "delta",        optional_argument, NULL,  OPT_DELTA },
199
200                 // Other options
201                 { "quiet",        no_argument,       NULL,  'q' },
202                 { "verbose",      no_argument,       NULL,  'v' },
203                 { "no-warn",      no_argument,       NULL,  'Q' },
204                 { "robot",        no_argument,       NULL,  OPT_ROBOT },
205                 { "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
206                 { "help",         no_argument,       NULL,  'h' },
207                 { "long-help",    no_argument,       NULL,  'H' },
208                 { "version",      no_argument,       NULL,  'V' },
209
210                 { NULL,           0,                 NULL,   0 }
211         };
212
213         int c;
214
215         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
216                         != -1) {
217                 switch (c) {
218                 // Compression preset (also for decompression if --format=raw)
219                 case '0': case '1': case '2': case '3': case '4':
220                 case '5': case '6': case '7': case '8': case '9':
221                         coder_set_preset(c - '0');
222                         break;
223
224                 // --memlimit-compress
225                 case OPT_MEM_COMPRESS:
226                         parse_memlimit("memlimit-compress",
227                                         "memlimit-compress%", optarg,
228                                         true, false);
229                         break;
230
231                 // --memlimit-decompress
232                 case OPT_MEM_DECOMPRESS:
233                         parse_memlimit("memlimit-decompress",
234                                         "memlimit-decompress%", optarg,
235                                         false, true);
236                         break;
237
238                 // --memlimit
239                 case 'M':
240                         parse_memlimit("memlimit", "memlimit%", optarg,
241                                         true, true);
242                         break;
243
244                 // --suffix
245                 case 'S':
246                         suffix_set(optarg);
247                         break;
248
249                 case 'T':
250                         // The max is from src/liblzma/common/common.h.
251                         hardware_threads_set(str_to_uint64("threads",
252                                         optarg, 0, 16384));
253                         break;
254
255                 // --version
256                 case 'V':
257                         // This doesn't return.
258                         message_version();
259
260                 // --stdout
261                 case 'c':
262                         opt_stdout = true;
263                         break;
264
265                 // --decompress
266                 case 'd':
267                         opt_mode = MODE_DECOMPRESS;
268                         break;
269
270                 // --extreme
271                 case 'e':
272                         coder_set_extreme();
273                         break;
274
275                 // --force
276                 case 'f':
277                         opt_force = true;
278                         break;
279
280                 // --info-memory
281                 case OPT_INFO_MEMORY:
282                         // This doesn't return.
283                         hardware_memlimit_show();
284
285                 // --help
286                 case 'h':
287                         // This doesn't return.
288                         message_help(false);
289
290                 // --long-help
291                 case 'H':
292                         // This doesn't return.
293                         message_help(true);
294
295                 // --list
296                 case 'l':
297                         opt_mode = MODE_LIST;
298                         break;
299
300                 // --keep
301                 case 'k':
302                         opt_keep_original = true;
303                         break;
304
305                 // --quiet
306                 case 'q':
307                         message_verbosity_decrease();
308                         break;
309
310                 case 'Q':
311                         set_exit_no_warn();
312                         break;
313
314                 case 't':
315                         opt_mode = MODE_TEST;
316                         break;
317
318                 // --verbose
319                 case 'v':
320                         message_verbosity_increase();
321                         break;
322
323                 // --robot
324                 case OPT_ROBOT:
325                         opt_robot = true;
326
327                         // This is to make sure that floating point numbers
328                         // always have a dot as decimal separator.
329                         setlocale(LC_NUMERIC, "C");
330                         break;
331
332                 case 'z':
333                         opt_mode = MODE_COMPRESS;
334                         break;
335
336                 // Filter setup
337
338                 case OPT_X86:
339                         coder_add_filter(LZMA_FILTER_X86,
340                                         options_bcj(optarg));
341                         break;
342
343                 case OPT_POWERPC:
344                         coder_add_filter(LZMA_FILTER_POWERPC,
345                                         options_bcj(optarg));
346                         break;
347
348                 case OPT_IA64:
349                         coder_add_filter(LZMA_FILTER_IA64,
350                                         options_bcj(optarg));
351                         break;
352
353                 case OPT_ARM:
354                         coder_add_filter(LZMA_FILTER_ARM,
355                                         options_bcj(optarg));
356                         break;
357
358                 case OPT_ARMTHUMB:
359                         coder_add_filter(LZMA_FILTER_ARMTHUMB,
360                                         options_bcj(optarg));
361                         break;
362
363                 case OPT_SPARC:
364                         coder_add_filter(LZMA_FILTER_SPARC,
365                                         options_bcj(optarg));
366                         break;
367
368                 case OPT_DELTA:
369                         coder_add_filter(LZMA_FILTER_DELTA,
370                                         options_delta(optarg));
371                         break;
372
373                 case OPT_LZMA1:
374                         coder_add_filter(LZMA_FILTER_LZMA1,
375                                         options_lzma(optarg));
376                         break;
377
378                 case OPT_LZMA2:
379                         coder_add_filter(LZMA_FILTER_LZMA2,
380                                         options_lzma(optarg));
381                         break;
382
383                 // Other
384
385                 // --format
386                 case 'F': {
387                         // Just in case, support both "lzma" and "alone" since
388                         // the latter was used for forward compatibility in
389                         // LZMA Utils 4.32.x.
390                         static const struct {
391                                 char str[8];
392                                 enum format_type format;
393                         } types[] = {
394                                 { "auto",   FORMAT_AUTO },
395                                 { "xz",     FORMAT_XZ },
396                                 { "lzma",   FORMAT_LZMA },
397                                 { "alone",  FORMAT_LZMA },
398                                 // { "gzip",   FORMAT_GZIP },
399                                 // { "gz",     FORMAT_GZIP },
400                                 { "raw",    FORMAT_RAW },
401                         };
402
403                         size_t i = 0;
404                         while (strcmp(types[i].str, optarg) != 0)
405                                 if (++i == ARRAY_SIZE(types))
406                                         message_fatal(_("%s: Unknown file "
407                                                         "format type"),
408                                                         optarg);
409
410                         opt_format = types[i].format;
411                         break;
412                 }
413
414                 // --check
415                 case 'C': {
416                         static const struct {
417                                 char str[8];
418                                 lzma_check check;
419                         } types[] = {
420                                 { "none",   LZMA_CHECK_NONE },
421                                 { "crc32",  LZMA_CHECK_CRC32 },
422                                 { "crc64",  LZMA_CHECK_CRC64 },
423                                 { "sha256", LZMA_CHECK_SHA256 },
424                         };
425
426                         size_t i = 0;
427                         while (strcmp(types[i].str, optarg) != 0) {
428                                 if (++i == ARRAY_SIZE(types))
429                                         message_fatal(_("%s: Unsupported "
430                                                         "integrity "
431                                                         "check type"), optarg);
432                         }
433
434                         // Use a separate check in case we are using different
435                         // liblzma than what was used to compile us.
436                         if (!lzma_check_is_supported(types[i].check))
437                                 message_fatal(_("%s: Unsupported integrity "
438                                                 "check type"), optarg);
439
440                         coder_set_check(types[i].check);
441                         break;
442                 }
443
444                 case OPT_IGNORE_CHECK:
445                         opt_ignore_check = true;
446                         break;
447
448                 case OPT_BLOCK_SIZE:
449                         opt_block_size = str_to_uint64("block-size", optarg,
450                                         0, LZMA_VLI_MAX);
451                         break;
452
453                 case OPT_BLOCK_LIST: {
454                         parse_block_list(optarg);
455                         break;
456                 }
457
458                 case OPT_SINGLE_STREAM:
459                         opt_single_stream = true;
460                         break;
461
462                 case OPT_NO_SPARSE:
463                         io_no_sparse();
464                         break;
465
466                 case OPT_FILES:
467                         args->files_delim = '\n';
468
469                 // Fall through
470
471                 case OPT_FILES0:
472                         if (args->files_name != NULL)
473                                 message_fatal(_("Only one file can be "
474                                                 "specified with `--files' "
475                                                 "or `--files0'."));
476
477                         if (optarg == NULL) {
478                                 args->files_name = (char *)stdin_filename;
479                                 args->files_file = stdin;
480                         } else {
481                                 args->files_name = optarg;
482                                 args->files_file = fopen(optarg,
483                                                 c == OPT_FILES ? "r" : "rb");
484                                 if (args->files_file == NULL)
485                                         message_fatal("%s: %s", optarg,
486                                                         strerror(errno));
487                         }
488
489                         break;
490
491                 case OPT_NO_ADJUST:
492                         opt_auto_adjust = false;
493                         break;
494
495                 case OPT_FLUSH_TIMEOUT:
496                         opt_flush_timeout = str_to_uint64("flush-timeout",
497                                         optarg, 0, UINT64_MAX);
498                         break;
499
500                 default:
501                         message_try_help();
502                         tuklib_exit(E_ERROR, E_ERROR, false);
503                 }
504         }
505
506         return;
507 }
508
509
510 static void
511 parse_environment(args_info *args, char *argv0, const char *varname)
512 {
513         char *env = getenv(varname);
514         if (env == NULL)
515                 return;
516
517         // We modify the string, so make a copy of it.
518         env = xstrdup(env);
519
520         // Calculate the number of arguments in env. argc stats at one
521         // to include space for the program name.
522         int argc = 1;
523         bool prev_was_space = true;
524         for (size_t i = 0; env[i] != '\0'; ++i) {
525                 // NOTE: Cast to unsigned char is needed so that correct
526                 // value gets passed to isspace(), which expects
527                 // unsigned char cast to int. Casting to int is done
528                 // automatically due to integer promotion, but we need to
529                 // force char to unsigned char manually. Otherwise 8-bit
530                 // characters would get promoted to wrong value if
531                 // char is signed.
532                 if (isspace((unsigned char)env[i])) {
533                         prev_was_space = true;
534                 } else if (prev_was_space) {
535                         prev_was_space = false;
536
537                         // Keep argc small enough to fit into a signed int
538                         // and to keep it usable for memory allocation.
539                         if (++argc == my_min(
540                                         INT_MAX, SIZE_MAX / sizeof(char *)))
541                                 message_fatal(_("The environment variable "
542                                                 "%s contains too many "
543                                                 "arguments"), varname);
544                 }
545         }
546
547         // Allocate memory to hold pointers to the arguments. Add one to get
548         // space for the terminating NULL (if some systems happen to need it).
549         char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
550         argv[0] = argv0;
551         argv[argc] = NULL;
552
553         // Go through the string again. Split the arguments using '\0'
554         // characters and add pointers to the resulting strings to argv.
555         argc = 1;
556         prev_was_space = true;
557         for (size_t i = 0; env[i] != '\0'; ++i) {
558                 if (isspace((unsigned char)env[i])) {
559                         prev_was_space = true;
560                         env[i] = '\0';
561                 } else if (prev_was_space) {
562                         prev_was_space = false;
563                         argv[argc++] = env + i;
564                 }
565         }
566
567         // Parse the argument list we got from the environment. All non-option
568         // arguments i.e. filenames are ignored.
569         parse_real(args, argc, argv);
570
571         // Reset the state of the getopt_long() so that we can parse the
572         // command line options too. There are two incompatible ways to
573         // do it.
574 #ifdef HAVE_OPTRESET
575         // BSD
576         optind = 1;
577         optreset = 1;
578 #else
579         // GNU, Solaris
580         optind = 0;
581 #endif
582
583         // We don't need the argument list from environment anymore.
584         free(argv);
585         free(env);
586
587         return;
588 }
589
590
591 extern void
592 args_parse(args_info *args, int argc, char **argv)
593 {
594         // Initialize those parts of *args that we need later.
595         args->files_name = NULL;
596         args->files_file = NULL;
597         args->files_delim = '\0';
598
599         // Check how we were called.
600         {
601                 // Remove the leading path name, if any.
602                 const char *name = strrchr(argv[0], '/');
603                 if (name == NULL)
604                         name = argv[0];
605                 else
606                         ++name;
607
608                 // NOTE: It's possible that name[0] is now '\0' if argv[0]
609                 // is weird, but it doesn't matter here.
610
611                 // Look for full command names instead of substrings like
612                 // "un", "cat", and "lz" to reduce possibility of false
613                 // positives when the programs have been renamed.
614                 if (strstr(name, "xzcat") != NULL) {
615                         opt_mode = MODE_DECOMPRESS;
616                         opt_stdout = true;
617                 } else if (strstr(name, "unxz") != NULL) {
618                         opt_mode = MODE_DECOMPRESS;
619                 } else if (strstr(name, "lzcat") != NULL) {
620                         opt_format = FORMAT_LZMA;
621                         opt_mode = MODE_DECOMPRESS;
622                         opt_stdout = true;
623                 } else if (strstr(name, "unlzma") != NULL) {
624                         opt_format = FORMAT_LZMA;
625                         opt_mode = MODE_DECOMPRESS;
626                 } else if (strstr(name, "lzma") != NULL) {
627                         opt_format = FORMAT_LZMA;
628                 }
629         }
630
631         // First the flags from the environment
632         parse_environment(args, argv[0], "XZ_DEFAULTS");
633         parse_environment(args, argv[0], "XZ_OPT");
634
635         // Then from the command line
636         parse_real(args, argc, argv);
637
638         // If encoder or decoder support was omitted at build time,
639         // show an error now so that the rest of the code can rely on
640         // that whatever is in opt_mode is also supported.
641 #ifndef HAVE_ENCODERS
642         if (opt_mode == MODE_COMPRESS)
643                 message_fatal(_("Compression support was disabled "
644                                 "at build time"));
645 #endif
646 #ifndef HAVE_DECODERS
647         // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS
648         // is the only valid choice.
649         if (opt_mode != MODE_COMPRESS)
650                 message_fatal(_("Decompression support was disabled "
651                                 "at build time"));
652 #endif
653
654         // Never remove the source file when the destination is not on disk.
655         // In test mode the data is written nowhere, but setting opt_stdout
656         // will make the rest of the code behave well.
657         if (opt_stdout || opt_mode == MODE_TEST) {
658                 opt_keep_original = true;
659                 opt_stdout = true;
660         }
661
662         // When compressing, if no --format flag was used, or it
663         // was --format=auto, we compress to the .xz format.
664         if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
665                 opt_format = FORMAT_XZ;
666
667         // Compression settings need to be validated (options themselves and
668         // their memory usage) when compressing to any file format. It has to
669         // be done also when uncompressing raw data, since for raw decoding
670         // the options given on the command line are used to know what kind
671         // of raw data we are supposed to decode.
672         if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
673                 coder_set_compression_settings();
674
675         // If no filenames are given, use stdin.
676         if (argv[optind] == NULL && args->files_name == NULL) {
677                 // We don't modify or free() the "-" constant. The caller
678                 // modifies this so don't make the struct itself const.
679                 static char *names_stdin[2] = { (char *)"-", NULL };
680                 args->arg_names = names_stdin;
681                 args->arg_count = 1;
682         } else {
683                 // We got at least one filename from the command line, or
684                 // --files or --files0 was specified.
685                 args->arg_names = argv + optind;
686                 args->arg_count = argc - optind;
687         }
688
689         return;
690 }
691
692
693 #ifndef NDEBUG
694 extern void
695 args_free(void)
696 {
697         free(opt_block_list);
698         return;
699 }
700 #endif