stat(1): Staticize.
[dragonfly.git] / usr.bin / sort / sort.c
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: head/usr.bin/sort/sort.c 281182 2015-04-07 01:17:49Z pfg $
28  */
29
30
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #if defined(SORT_RANDOM)
41 #include <md5.h>
42 #endif
43 #include <regex.h>
44 #include <signal.h>
45 #include <stdbool.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <wchar.h>
51 #include <wctype.h>
52
53 #include "coll.h"
54 #include "file.h"
55 #include "sort.h"
56
57 #ifndef WITHOUT_NLS
58 #include <nl_types.h>
59 nl_catd catalog;
60 #endif
61
62 #if defined(SORT_RANDOM)
63 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
64 #else
65 #define OPTIONS "bcCdfghik:Mmno:rsS:t:T:uVz"
66 #endif
67
68 #if defined(SORT_RANDOM)
69 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
70 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
71
72 static bool need_random;
73 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
74 static const void *random_seed;
75 static size_t random_seed_size;
76
77 MD5_CTX md5_ctx;
78 #endif
79
80 /*
81  * Default messages to use when NLS is disabled or no catalogue
82  * is found.
83  */
84 const char *nlsstr[] = { "",
85 /* 1*/"mutually exclusive flags",
86 /* 2*/"extra argument not allowed with -c",
87 /* 3*/"Unknown feature",
88 /* 4*/"Wrong memory buffer specification",
89 /* 5*/"0 field in key specs",
90 /* 6*/"0 column in key specs",
91 /* 7*/"Wrong file mode",
92 /* 8*/"Cannot open file for reading",
93 /* 9*/"Radix sort cannot be used with these sort options",
94 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
95 /*11*/"Invalid key position",
96 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
97       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
98       "[-o outfile] [--batch-size size] [--files0-from file] "
99       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
100       "[--mmap] "
101 #if defined(SORT_THREADS)
102       "[--parallel thread_no] "
103 #endif
104       "[--human-numeric-sort] "
105 #if defined(SORT_RANDOM)
106       "[--version-sort] [--random-sort [--random-source file]] "
107 #else
108       "[--version-sort] "
109 #endif
110       "[--compress-program program] [file ...]\n" };
111
112 struct sort_opts sort_opts_vals;
113
114 bool debug_sort;
115 bool need_hint;
116
117 #if defined(SORT_THREADS)
118 unsigned int ncpu = 1;
119 size_t nthreads = 1;
120 #endif
121
122 static bool gnusort_numeric_compatibility;
123
124 static struct sort_mods default_sort_mods_object;
125 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
126
127 static bool print_symbols_on_debug;
128
129 /*
130  * Arguments from file (when file0-from option is used:
131  */
132 static size_t argc_from_file0 = (size_t)-1;
133 static char **argv_from_file0;
134
135 /*
136  * Placeholder symbols for options which have no single-character equivalent
137  */
138 enum
139 {
140         SORT_OPT = CHAR_MAX + 1,
141         HELP_OPT,
142         FF_OPT,
143         BS_OPT,
144         VERSION_OPT,
145         DEBUG_OPT,
146 #if defined(SORT_THREADS)
147         PARALLEL_OPT,
148 #endif
149 #if defined(SORT_RANDOM)
150         RANDOMSOURCE_OPT,
151 #endif
152         COMPRESSPROGRAM_OPT,
153         QSORT_OPT,
154         MERGESORT_OPT,
155         HEAPSORT_OPT,
156         RADIXSORT_OPT,
157         MMAP_OPT
158 };
159
160 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
161 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
162
163 static struct option long_options[] = {
164                                 { "batch-size", required_argument, NULL, BS_OPT },
165                                 { "buffer-size", required_argument, NULL, 'S' },
166                                 { "check", optional_argument, NULL, 'c' },
167                                 { "check=silent|quiet", optional_argument, NULL, 'C' },
168                                 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
169                                 { "debug", no_argument, NULL, DEBUG_OPT },
170                                 { "dictionary-order", no_argument, NULL, 'd' },
171                                 { "field-separator", required_argument, NULL, 't' },
172                                 { "files0-from", required_argument, NULL, FF_OPT },
173                                 { "general-numeric-sort", no_argument, NULL, 'g' },
174                                 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
175                                 { "help",no_argument, NULL, HELP_OPT },
176                                 { "human-numeric-sort", no_argument, NULL, 'h' },
177                                 { "ignore-leading-blanks", no_argument, NULL, 'b' },
178                                 { "ignore-case", no_argument, NULL, 'f' },
179                                 { "ignore-nonprinting", no_argument, NULL, 'i' },
180                                 { "key", required_argument, NULL, 'k' },
181                                 { "merge", no_argument, NULL, 'm' },
182                                 { "mergesort", no_argument, NULL, MERGESORT_OPT },
183                                 { "mmap", no_argument, NULL, MMAP_OPT },
184                                 { "month-sort", no_argument, NULL, 'M' },
185                                 { "numeric-sort", no_argument, NULL, 'n' },
186                                 { "output", required_argument, NULL, 'o' },
187 #if defined(SORT_THREADS)
188                                 { "parallel", required_argument, NULL, PARALLEL_OPT },
189 #endif
190                                 { "qsort", no_argument, NULL, QSORT_OPT },
191                                 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
192 #if defined(SORT_RANDOM)
193                                 { "random-sort", no_argument, NULL, 'R' },
194                                 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
195 #endif
196                                 { "reverse", no_argument, NULL, 'r' },
197                                 { "sort", required_argument, NULL, SORT_OPT },
198                                 { "stable", no_argument, NULL, 's' },
199                                 { "temporary-directory",required_argument, NULL, 'T' },
200                                 { "unique", no_argument, NULL, 'u' },
201                                 { "version", no_argument, NULL, VERSION_OPT },
202                                 { "version-sort",no_argument, NULL, 'V' },
203                                 { "zero-terminated", no_argument, NULL, 'z' },
204                                 { NULL, no_argument, NULL, 0 }
205 };
206
207 void fix_obsolete_keys(int *argc, char **argv);
208
209 /*
210  * Check where sort modifier is present
211  */
212 static bool
213 sort_modifier_empty(struct sort_mods *sm)
214 {
215
216         if (sm == NULL)
217                 return (true);
218         return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
219 #ifdef SORT_RANDOM
220             sm->Rflag ||
221 #endif
222             sm->rflag || sm->hflag || sm->dflag || sm->fflag));
223 }
224
225 /*
226  * Print out usage text.
227  */
228 static void
229 usage(bool opt_err)
230 {
231 //      struct option *o;
232         FILE *out;
233
234         out = stdout;
235 //      o = &(long_options[0]);
236
237         if (opt_err)
238                 out = stderr;
239         fprintf(out, getstr(12), getprogname());
240         if (opt_err)
241                 exit(2);
242         exit(0);
243 }
244
245 /*
246  * Read input file names from a file (file0-from option).
247  */
248 static void
249 read_fns_from_file0(const char *fn)
250 {
251         FILE *f;
252         char *line = NULL;
253         size_t linesize = 0;
254         ssize_t linelen;
255
256         if (fn == NULL)
257                 return;
258
259         f = fopen(fn, "r");
260         if (f == NULL)
261                 err(2, "%s", fn);
262
263         while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
264                 if (*line != '\0') {
265                         if (argc_from_file0 == (size_t) - 1)
266                                 argc_from_file0 = 0;
267                         ++argc_from_file0;
268                         argv_from_file0 = sort_realloc(argv_from_file0,
269                             argc_from_file0 * sizeof(char *));
270                         if (argv_from_file0 == NULL)
271                                 err(2, NULL);
272                         argv_from_file0[argc_from_file0 - 1] = line;
273                 } else {
274                         free(line);
275                 }
276                 line = NULL;
277                 linesize = 0;
278         }
279         if (ferror(f))
280                 err(2, "%s: getdelim", fn);
281
282         closefile(f, fn);
283 }
284
285 /*
286  * Check how much RAM is available for the sort.
287  */
288 static void
289 set_hw_params(void)
290 {
291         long pages, psize;
292
293         pages = psize = 0;
294
295 #if defined(SORT_THREADS)
296         ncpu = 1;
297 #endif
298
299         pages = sysconf(_SC_PHYS_PAGES);
300         if (pages < 1) {
301                 perror("sysconf pages");
302                 pages = 1;
303         }
304         psize = sysconf(_SC_PAGESIZE);
305         if (psize < 1) {
306                 perror("sysconf psize");
307                 psize = 4096;
308         }
309 #if defined(SORT_THREADS)
310         ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
311         if (ncpu < 1)
312                 ncpu = 1;
313         else if(ncpu > 32)
314                 ncpu = 32;
315
316         nthreads = ncpu;
317 #endif
318
319         free_memory = (unsigned long long) pages * (unsigned long long) psize;
320         available_free_memory = free_memory / 2;
321
322         if (available_free_memory < 1024)
323                 available_free_memory = 1024;
324 }
325
326 /*
327  * Convert "plain" symbol to wide symbol, with default value.
328  */
329 static void
330 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
331 {
332
333         if (wc && c) {
334                 int res;
335
336                 res = mbtowc(wc, c, MB_CUR_MAX);
337                 if (res < 1)
338                         *wc = def;
339         }
340 }
341
342 /*
343  * Set current locale symbols.
344  */
345 static void
346 set_locale(void)
347 {
348         struct lconv *lc;
349         const char *locale;
350
351         setlocale(LC_ALL, "");
352
353         lc = localeconv();
354
355         if (lc) {
356                 /* obtain LC_NUMERIC info */
357                 /* Convert to wide char form */
358                 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
359                     symbol_decimal_point);
360                 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
361                     symbol_thousands_sep);
362                 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
363                     symbol_positive_sign);
364                 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
365                     symbol_negative_sign);
366         }
367
368         if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
369                 gnusort_numeric_compatibility = true;
370
371         locale = setlocale(LC_COLLATE, NULL);
372
373         if (locale) {
374                 char *tmpl;
375                 const char *cclocale;
376
377                 tmpl = sort_strdup(locale);
378                 cclocale = setlocale(LC_COLLATE, "C");
379                 if (cclocale && !strcmp(cclocale, tmpl))
380                         byte_sort = true;
381                 else {
382                         const char *pclocale;
383
384                         pclocale = setlocale(LC_COLLATE, "POSIX");
385                         if (pclocale && !strcmp(pclocale, tmpl))
386                                 byte_sort = true;
387                 }
388                 setlocale(LC_COLLATE, tmpl);
389                 sort_free(tmpl);
390         }
391 }
392
393 /*
394  * Set directory temporary files.
395  */
396 static void
397 set_tmpdir(void)
398 {
399         char *td;
400
401         td = getenv("TMPDIR");
402         if (td != NULL)
403                 tmpdir = sort_strdup(td);
404 }
405
406 /*
407  * Parse -S option.
408  */
409 static unsigned long long
410 parse_memory_buffer_value(const char *value)
411 {
412
413         if (value == NULL)
414                 return (available_free_memory);
415         else {
416                 char *endptr;
417                 unsigned long long membuf;
418
419                 endptr = NULL;
420                 errno = 0;
421                 membuf = strtoll(value, &endptr, 10);
422
423                 if (errno != 0) {
424                         warn("%s",getstr(4));
425                         membuf = available_free_memory;
426                 } else {
427                         switch (*endptr){
428                         case 'Y':
429                                 membuf *= 1024;
430                                 /* FALLTHROUGH */
431                         case 'Z':
432                                 membuf *= 1024;
433                                 /* FALLTHROUGH */
434                         case 'E':
435                                 membuf *= 1024;
436                                 /* FALLTHROUGH */
437                         case 'P':
438                                 membuf *= 1024;
439                                 /* FALLTHROUGH */
440                         case 'T':
441                                 membuf *= 1024;
442                                 /* FALLTHROUGH */
443                         case 'G':
444                                 membuf *= 1024;
445                                 /* FALLTHROUGH */
446                         case 'M':
447                                 membuf *= 1024;
448                                 /* FALLTHROUGH */
449                         case '\0':
450                         case 'K':
451                                 membuf *= 1024;
452                                 /* FALLTHROUGH */
453                         case 'b':
454                                 break;
455                         case '%':
456                                 membuf = (available_free_memory * membuf) /
457                                     100;
458                                 break;
459                         default:
460                                 warnc(EINVAL, "%s", optarg);
461                                 membuf = available_free_memory;
462                         }
463                 }
464                 return (membuf);
465         }
466 }
467
468 /*
469  * Signal handler that clears the temporary files.
470  */
471 static void
472 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
473     void *context __unused)
474 {
475
476         clear_tmp_files();
477         exit(-1);
478 }
479
480 /*
481  * Set signal handler on panic signals.
482  */
483 static void
484 set_signal_handler(void)
485 {
486         struct sigaction sa;
487
488         memset(&sa, 0, sizeof(sa));
489         sa.sa_sigaction = &sig_handler;
490         sa.sa_flags = SA_SIGINFO;
491
492         if (sigaction(SIGTERM, &sa, NULL) < 0) {
493                 perror("sigaction");
494                 return;
495         }
496         if (sigaction(SIGHUP, &sa, NULL) < 0) {
497                 perror("sigaction");
498                 return;
499         }
500         if (sigaction(SIGINT, &sa, NULL) < 0) {
501                 perror("sigaction");
502                 return;
503         }
504         if (sigaction(SIGQUIT, &sa, NULL) < 0) {
505                 perror("sigaction");
506                 return;
507         }
508         if (sigaction(SIGABRT, &sa, NULL) < 0) {
509                 perror("sigaction");
510                 return;
511         }
512         if (sigaction(SIGBUS, &sa, NULL) < 0) {
513                 perror("sigaction");
514                 return;
515         }
516         if (sigaction(SIGSEGV, &sa, NULL) < 0) {
517                 perror("sigaction");
518                 return;
519         }
520         if (sigaction(SIGUSR1, &sa, NULL) < 0) {
521                 perror("sigaction");
522                 return;
523         }
524         if (sigaction(SIGUSR2, &sa, NULL) < 0) {
525                 perror("sigaction");
526                 return;
527         }
528 }
529
530 /*
531  * Print "unknown" message and exit with status 2.
532  */
533 static void
534 unknown(const char *what)
535 {
536
537         errx(2, "%s: %s", getstr(3), what);
538 }
539
540 /*
541  * Check whether contradictory input options are used.
542  */
543 static void
544 check_mutually_exclusive_flags(char c, bool *mef_flags)
545 {
546         int fo_index, mec;
547         bool found_others, found_this;
548
549         found_others = found_this = false;
550         fo_index = 0;
551
552         for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
553                 mec = mutually_exclusive_flags[i];
554
555                 if (mec != c) {
556                         if (mef_flags[i]) {
557                                 if (found_this)
558                                         errx(1, "%c:%c: %s", c, mec, getstr(1));
559                                 found_others = true;
560                                 fo_index = i;
561                         }
562                 } else {
563                         if (found_others)
564                                 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
565                         mef_flags[i] = true;
566                         found_this = true;
567                 }
568         }
569 }
570
571 /*
572  * Initialise sort opts data.
573  */
574 static void
575 set_sort_opts(void)
576 {
577
578         memset(&default_sort_mods_object, 0,
579             sizeof(default_sort_mods_object));
580         memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
581         default_sort_mods_object.func =
582             get_sort_func(&default_sort_mods_object);
583 }
584
585 /*
586  * Set a sort modifier on a sort modifiers object.
587  */
588 static bool
589 set_sort_modifier(struct sort_mods *sm, int c)
590 {
591
592         if (sm) {
593                 switch (c){
594                 case 'b':
595                         sm->bflag = true;
596                         break;
597                 case 'd':
598                         sm->dflag = true;
599                         break;
600                 case 'f':
601                         sm->fflag = true;
602                         break;
603                 case 'g':
604                         sm->gflag = true;
605                         need_hint = true;
606                         break;
607                 case 'i':
608                         sm->iflag = true;
609                         break;
610 #ifdef SORT_RANDOM
611                 case 'R':
612                         sm->Rflag = true;
613                         need_random = true;
614                         break;
615 #endif
616                 case 'M':
617                         initialise_months();
618                         sm->Mflag = true;
619                         need_hint = true;
620                         break;
621                 case 'n':
622                         sm->nflag = true;
623                         need_hint = true;
624                         print_symbols_on_debug = true;
625                         break;
626                 case 'r':
627                         sm->rflag = true;
628                         break;
629                 case 'V':
630                         sm->Vflag = true;
631                         break;
632                 case 'h':
633                         sm->hflag = true;
634                         need_hint = true;
635                         print_symbols_on_debug = true;
636                         break;
637                 default:
638                         return false;
639                 }
640                 sort_opts_vals.complex_sort = true;
641                 sm->func = get_sort_func(sm);
642         }
643         return (true);
644 }
645
646 /*
647  * Parse POS in -k option.
648  */
649 static int
650 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
651 {
652         regmatch_t pmatch[4];
653         regex_t re;
654         char *c, *f;
655         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
656         size_t len, nmatch;
657         int ret;
658
659         ret = -1;
660         nmatch = 4;
661         c = f = NULL;
662
663         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
664                 return (-1);
665
666         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
667                 goto end;
668
669         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
670                 goto end;
671
672         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
673                 goto end;
674
675         len = pmatch[1].rm_eo - pmatch[1].rm_so;
676         f = sort_malloc((len + 1) * sizeof(char));
677
678         strncpy(f, s + pmatch[1].rm_so, len);
679         f[len] = '\0';
680
681         if (second) {
682                 errno = 0;
683                 ks->f2 = (size_t) strtoul(f, NULL, 10);
684                 if (errno != 0)
685                         err(2, "-k");
686                 if (ks->f2 == 0) {
687                         warn("%s",getstr(5));
688                         goto end;
689                 }
690         } else {
691                 errno = 0;
692                 ks->f1 = (size_t) strtoul(f, NULL, 10);
693                 if (errno != 0)
694                         err(2, "-k");
695                 if (ks->f1 == 0) {
696                         warn("%s",getstr(5));
697                         goto end;
698                 }
699         }
700
701         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
702                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
703                 c = sort_malloc((len + 1) * sizeof(char));
704
705                 strncpy(c, s + pmatch[2].rm_so + 1, len);
706                 c[len] = '\0';
707
708                 if (second) {
709                         errno = 0;
710                         ks->c2 = (size_t) strtoul(c, NULL, 10);
711                         if (errno != 0)
712                                 err(2, "-k");
713                 } else {
714                         errno = 0;
715                         ks->c1 = (size_t) strtoul(c, NULL, 10);
716                         if (errno != 0)
717                                 err(2, "-k");
718                         if (ks->c1 == 0) {
719                                 warn("%s",getstr(6));
720                                 goto end;
721                         }
722                 }
723         } else {
724                 if (second)
725                         ks->c2 = 0;
726                 else
727                         ks->c1 = 1;
728         }
729
730         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
731                 regoff_t i = 0;
732
733                 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
734                         check_mutually_exclusive_flags(s[i], mef_flags);
735                         if (s[i] == 'b') {
736                                 if (second)
737                                         ks->pos2b = true;
738                                 else
739                                         ks->pos1b = true;
740                         } else if (!set_sort_modifier(&(ks->sm), s[i]))
741                                 goto end;
742                 }
743         }
744
745         ret = 0;
746
747 end:
748
749         if (c)
750                 sort_free(c);
751         if (f)
752                 sort_free(f);
753         regfree(&re);
754
755         return (ret);
756 }
757
758 /*
759  * Parse -k option value.
760  */
761 static int
762 parse_k(const char *s, struct key_specs *ks)
763 {
764         int ret = -1;
765         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
766             { false, false, false, false, false, false };
767
768         if (s && *s) {
769                 char *sptr;
770
771                 sptr = strchr(s, ',');
772                 if (sptr) {
773                         size_t size1;
774                         char *pos1, *pos2;
775
776                         size1 = sptr - s;
777
778                         if (size1 < 1)
779                                 return (-1);
780                         pos1 = sort_malloc((size1 + 1) * sizeof(char));
781
782                         strncpy(pos1, s, size1);
783                         pos1[size1] = '\0';
784
785                         ret = parse_pos(pos1, ks, mef_flags, false);
786
787                         sort_free(pos1);
788                         if (ret < 0)
789                                 return (ret);
790
791                         pos2 = sort_strdup(sptr + 1);
792                         ret = parse_pos(pos2, ks, mef_flags, true);
793                         sort_free(pos2);
794                 } else
795                         ret = parse_pos(s, ks, mef_flags, false);
796         }
797
798         return (ret);
799 }
800
801 /*
802  * Parse POS in +POS -POS option.
803  */
804 static int
805 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
806 {
807         regex_t re;
808         regmatch_t pmatch[4];
809         char *c, *f;
810         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
811         int ret;
812         size_t len, nmatch;
813
814         ret = -1;
815         nmatch = 4;
816         c = f = NULL;
817         *nc = *nf = 0;
818
819         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
820                 return (-1);
821
822         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
823                 goto end;
824
825         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
826                 goto end;
827
828         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
829                 goto end;
830
831         len = pmatch[1].rm_eo - pmatch[1].rm_so;
832         f = sort_malloc((len + 1) * sizeof(char));
833
834         strncpy(f, s + pmatch[1].rm_so, len);
835         f[len] = '\0';
836
837         errno = 0;
838         *nf = (size_t) strtoul(f, NULL, 10);
839         if (errno != 0)
840                 errx(2, "%s", getstr(11));
841
842         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
843                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
844                 c = sort_malloc((len + 1) * sizeof(char));
845
846                 strncpy(c, s + pmatch[2].rm_so + 1, len);
847                 c[len] = '\0';
848
849                 errno = 0;
850                 *nc = (size_t) strtoul(c, NULL, 10);
851                 if (errno != 0)
852                         errx(2, "%s", getstr(11));
853         }
854
855         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
856
857                 len = pmatch[3].rm_eo - pmatch[3].rm_so;
858
859                 strncpy(sopts, s + pmatch[3].rm_so, len);
860                 sopts[len] = '\0';
861         }
862
863         ret = 0;
864
865 end:
866         if (c)
867                 sort_free(c);
868         if (f)
869                 sort_free(f);
870         regfree(&re);
871
872         return (ret);
873 }
874
875 /*
876  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
877  */
878 void
879 fix_obsolete_keys(int *argc, char **argv)
880 {
881         char sopt[129];
882
883         for (int i = 1; i < *argc; i++) {
884                 char *arg1;
885
886                 arg1 = argv[i];
887
888                 if (strlen(arg1) > 1 && arg1[0] == '+') {
889                         int c1, f1;
890                         char sopts1[128];
891
892                         sopts1[0] = 0;
893                         c1 = f1 = 0;
894
895                         if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
896                                 continue;
897                         else {
898                                 f1 += 1;
899                                 c1 += 1;
900                                 if (i + 1 < *argc) {
901                                         char *arg2 = argv[i + 1];
902
903                                         if (strlen(arg2) > 1 &&
904                                             arg2[0] == '-') {
905                                                 int c2, f2;
906                                                 char sopts2[128];
907
908                                                 sopts2[0] = 0;
909                                                 c2 = f2 = 0;
910
911                                                 if (parse_pos_obs(arg2 + 1,
912                                                     &f2, &c2, sopts2) >= 0) {
913                                                         if (c2 > 0)
914                                                                 f2 += 1;
915                                                         sprintf(sopt, "-k%d.%d%s,%d.%d%s",
916                                                             f1, c1, sopts1, f2, c2, sopts2);
917                                                         argv[i] = sort_strdup(sopt);
918                                                         for (int j = i + 1; j + 1 < *argc; j++)
919                                                                 argv[j] = argv[j + 1];
920                                                         *argc -= 1;
921                                                         continue;
922                                                 }
923                                         }
924                                 }
925                                 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
926                                 argv[i] = sort_strdup(sopt);
927                         }
928                 }
929         }
930 }
931
932 /*
933  * Set random seed
934  */
935 #if defined(SORT_RANDOM)
936 static void
937 set_random_seed(void)
938 {
939         if (need_random) {
940
941                 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
942                         FILE* fseed;
943                         MD5_CTX ctx;
944                         char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
945                         size_t sz = 0;
946
947                         fseed = openfile(random_source, "r");
948                         while (!feof(fseed)) {
949                                 int cr;
950
951                                 cr = fgetc(fseed);
952                                 if (cr == EOF)
953                                         break;
954
955                                 rsd[sz++] = (char) cr;
956
957                                 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
958                                         break;
959                         }
960
961                         closefile(fseed, random_source);
962
963                         MD5Init(&ctx);
964                         MD5Update(&ctx, rsd, sz);
965
966                         random_seed = MD5End(&ctx, NULL);
967                         random_seed_size = strlen(random_seed);
968
969                 } else {
970                         MD5_CTX ctx;
971                         char *b;
972
973                         MD5Init(&ctx);
974                         b = MD5File(random_source, NULL);
975                         if (b == NULL)
976                                 err(2, NULL);
977
978                         random_seed = b;
979                         random_seed_size = strlen(b);
980                 }
981
982                 MD5Init(&md5_ctx);
983                 if(random_seed_size>0) {
984                         MD5Update(&md5_ctx, random_seed, random_seed_size);
985                 }
986         }
987 }
988 #endif
989
990 /*
991  * Main function.
992  */
993 int
994 main(int argc, char **argv)
995 {
996         char *outfile, *real_outfile;
997         int c, result;
998         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
999             { false, false, false, false, false, false };
1000
1001         result = 0;
1002         outfile = sort_strdup("-");
1003         real_outfile = NULL;
1004
1005         struct sort_mods *sm = &default_sort_mods_object;
1006
1007         init_tmp_files();
1008
1009         set_signal_handler();
1010
1011         set_hw_params();
1012         set_locale();
1013         set_tmpdir();
1014         set_sort_opts();
1015
1016         fix_obsolete_keys(&argc, argv);
1017
1018         while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1019             != -1)) {
1020
1021                 check_mutually_exclusive_flags(c, mef_flags);
1022
1023                 if (!set_sort_modifier(sm, c)) {
1024
1025                         switch (c) {
1026                         case 'c':
1027                                 sort_opts_vals.cflag = true;
1028                                 if (optarg) {
1029                                         if (!strcmp(optarg, "diagnose-first"))
1030                                                 ;
1031                                         else if (!strcmp(optarg, "silent") ||
1032                                             !strcmp(optarg, "quiet"))
1033                                                 sort_opts_vals.csilentflag = true;
1034                                         else if (*optarg)
1035                                                 unknown(optarg);
1036                                 }
1037                                 break;
1038                         case 'C':
1039                                 sort_opts_vals.cflag = true;
1040                                 sort_opts_vals.csilentflag = true;
1041                                 break;
1042                         case 'k':
1043                         {
1044                                 sort_opts_vals.complex_sort = true;
1045                                 sort_opts_vals.kflag = true;
1046
1047                                 keys_num++;
1048                                 keys = sort_realloc(keys, keys_num *
1049                                     sizeof(struct key_specs));
1050                                 memset(&(keys[keys_num - 1]), 0,
1051                                     sizeof(struct key_specs));
1052
1053                                 if (parse_k(optarg, &(keys[keys_num - 1]))
1054                                     < 0) {
1055                                         errc(2, EINVAL, "-k %s", optarg);
1056                                 }
1057
1058                                 break;
1059                         }
1060                         case 'm':
1061                                 sort_opts_vals.mflag = true;
1062                                 break;
1063                         case 'o':
1064                                 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1065                                 strcpy(outfile, optarg);
1066                                 break;
1067                         case 's':
1068                                 sort_opts_vals.sflag = true;
1069                                 break;
1070                         case 'S':
1071                                 available_free_memory =
1072                                     parse_memory_buffer_value(optarg);
1073                                 break;
1074                         case 'T':
1075                                 tmpdir = sort_strdup(optarg);
1076                                 break;
1077                         case 't':
1078                                 while (strlen(optarg) > 1) {
1079                                         if (optarg[0] != '\\') {
1080                                                 errc(2, EINVAL, "%s", optarg);
1081                                         }
1082                                         optarg += 1;
1083                                         if (*optarg == '0') {
1084                                                 *optarg = 0;
1085                                                 break;
1086                                         }
1087                                 }
1088                                 sort_opts_vals.tflag = true;
1089                                 sort_opts_vals.field_sep = btowc(optarg[0]);
1090                                 if (sort_opts_vals.field_sep == WEOF) {
1091                                         errno = EINVAL;
1092                                         err(2, NULL);
1093                                 }
1094                                 if (!gnusort_numeric_compatibility) {
1095                                         if (symbol_decimal_point == sort_opts_vals.field_sep)
1096                                                 symbol_decimal_point = WEOF;
1097                                         if (symbol_thousands_sep == sort_opts_vals.field_sep)
1098                                                 symbol_thousands_sep = WEOF;
1099                                         if (symbol_negative_sign == sort_opts_vals.field_sep)
1100                                                 symbol_negative_sign = WEOF;
1101                                         if (symbol_positive_sign == sort_opts_vals.field_sep)
1102                                                 symbol_positive_sign = WEOF;
1103                                 }
1104                                 break;
1105                         case 'u':
1106                                 sort_opts_vals.uflag = true;
1107                                 /* stable sort for the correct unique val */
1108                                 sort_opts_vals.sflag = true;
1109                                 break;
1110                         case 'z':
1111                                 sort_opts_vals.zflag = true;
1112                                 break;
1113                         case SORT_OPT:
1114                                 if (optarg) {
1115                                         if (!strcmp(optarg, "general-numeric"))
1116                                                 set_sort_modifier(sm, 'g');
1117                                         else if (!strcmp(optarg, "human-numeric"))
1118                                                 set_sort_modifier(sm, 'h');
1119                                         else if (!strcmp(optarg, "numeric"))
1120                                                 set_sort_modifier(sm, 'n');
1121                                         else if (!strcmp(optarg, "month"))
1122                                                 set_sort_modifier(sm, 'M');
1123 #if defined(SORT_RANDOM)
1124                                         else if (!strcmp(optarg, "random"))
1125                                                 set_sort_modifier(sm, 'R');
1126 #endif
1127                                         else
1128                                                 unknown(optarg);
1129                                 }
1130                                 break;
1131 #if defined(SORT_THREADS)
1132                         case PARALLEL_OPT:
1133                                 nthreads = (size_t)(atoi(optarg));
1134                                 if (nthreads < 1)
1135                                         nthreads = 1;
1136                                 if (nthreads > 1024)
1137                                         nthreads = 1024;
1138                                 break;
1139 #endif
1140                         case QSORT_OPT:
1141                                 sort_opts_vals.sort_method = SORT_QSORT;
1142                                 break;
1143                         case MERGESORT_OPT:
1144                                 sort_opts_vals.sort_method = SORT_MERGESORT;
1145                                 break;
1146                         case MMAP_OPT:
1147                                 use_mmap = true;
1148                                 break;
1149                         case HEAPSORT_OPT:
1150                                 sort_opts_vals.sort_method = SORT_HEAPSORT;
1151                                 break;
1152                         case RADIXSORT_OPT:
1153                                 sort_opts_vals.sort_method = SORT_RADIXSORT;
1154                                 break;
1155 #if defined(SORT_RANDOM)
1156                         case RANDOMSOURCE_OPT:
1157                                 random_source = strdup(optarg);
1158                                 break;
1159 #endif
1160                         case COMPRESSPROGRAM_OPT:
1161                                 compress_program = strdup(optarg);
1162                                 break;
1163                         case FF_OPT:
1164                                 read_fns_from_file0(optarg);
1165                                 break;
1166                         case BS_OPT:
1167                         {
1168                                 errno = 0;
1169                                 long mof = strtol(optarg, NULL, 10);
1170                                 if (errno != 0)
1171                                         err(2, "--batch-size");
1172                                 if (mof >= 2)
1173                                         max_open_files = (size_t) mof + 1;
1174                         }
1175                                 break;
1176                         case VERSION_OPT:
1177                                 printf("%s\n", VERSION);
1178                                 exit(EXIT_SUCCESS);
1179                                 /* NOTREACHED */
1180                                 break;
1181                         case DEBUG_OPT:
1182                                 debug_sort = true;
1183                                 break;
1184                         case HELP_OPT:
1185                                 usage(false);
1186                                 /* NOTREACHED */
1187                                 break;
1188                         default:
1189                                 usage(true);
1190                                 /* NOTREACHED */
1191                         }
1192                 }
1193         }
1194
1195         argc -= optind;
1196         argv += optind;
1197
1198 #ifndef WITHOUT_NLS
1199         catalog = catopen("sort", NL_CAT_LOCALE);
1200 #endif
1201
1202         if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1203                 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1204
1205 #ifndef WITHOUT_NLS
1206         catclose(catalog);
1207 #endif
1208
1209         if (keys_num == 0) {
1210                 keys_num = 1;
1211                 keys = sort_realloc(keys, sizeof(struct key_specs));
1212                 memset(&(keys[0]), 0, sizeof(struct key_specs));
1213                 keys[0].c1 = 1;
1214                 keys[0].pos1b = default_sort_mods->bflag;
1215                 keys[0].pos2b = default_sort_mods->bflag;
1216                 memcpy(&(keys[0].sm), default_sort_mods,
1217                     sizeof(struct sort_mods));
1218         }
1219
1220         for (size_t i = 0; i < keys_num; i++) {
1221                 struct key_specs *ks;
1222
1223                 ks = &(keys[i]);
1224
1225                 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1226                     !(ks->pos2b)) {
1227                         ks->pos1b = sm->bflag;
1228                         ks->pos2b = sm->bflag;
1229                         memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1230                 }
1231
1232                 ks->sm.func = get_sort_func(&(ks->sm));
1233         }
1234
1235         if (argv_from_file0) {
1236                 argc = argc_from_file0;
1237                 argv = argv_from_file0;
1238         }
1239
1240         if (debug_sort) {
1241                 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1242 #if defined(SORT_THREADS)
1243                 printf("Number of CPUs: %d\n",(int)ncpu);
1244                 nthreads = 1;
1245 #endif
1246                 printf("Using collate rules of %s locale\n",
1247                     setlocale(LC_COLLATE, NULL));
1248                 if (byte_sort)
1249                         printf("Byte sort is used\n");
1250                 if (print_symbols_on_debug) {
1251                         printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1252                         if (symbol_thousands_sep)
1253                                 printf("Thousands separator: <%lc>\n",
1254                                     symbol_thousands_sep);
1255                         printf("Positive sign: <%lc>\n", symbol_positive_sign);
1256                         printf("Negative sign: <%lc>\n", symbol_negative_sign);
1257                 }
1258         }
1259
1260 #if defined(SORT_RANDOM)
1261         set_random_seed();
1262 #endif
1263
1264         /* Case when the outfile equals one of the input files: */
1265         if (strcmp(outfile, "-")) {
1266
1267                 for(int i = 0; i < argc; ++i) {
1268                         if (strcmp(argv[i], outfile) == 0) {
1269                                 real_outfile = sort_strdup(outfile);
1270                                 for(;;) {
1271                                         char* tmp = sort_malloc(strlen(outfile) +
1272                                             strlen(".tmp") + 1);
1273
1274                                         strcpy(tmp, outfile);
1275                                         strcpy(tmp + strlen(tmp), ".tmp");
1276                                         sort_free(outfile);
1277                                         outfile = tmp;
1278                                         if (access(outfile, F_OK) < 0)
1279                                                 break;
1280                                 }
1281                                 tmp_file_atexit(outfile);
1282                         }
1283                 }
1284         }
1285
1286 #if defined(SORT_THREADS)
1287         if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1288                 nthreads = 1;
1289 #endif
1290
1291         if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1292                 struct file_list fl;
1293                 struct sort_list list;
1294
1295                 sort_list_init(&list);
1296                 file_list_init(&fl, true);
1297
1298                 if (argc < 1)
1299                         procfile("-", &list, &fl);
1300                 else {
1301                         while (argc > 0) {
1302                                 procfile(*argv, &list, &fl);
1303                                 --argc;
1304                                 ++argv;
1305                         }
1306                 }
1307
1308                 if (fl.count < 1)
1309                         sort_list_to_file(&list, outfile);
1310                 else {
1311                         if (list.count > 0) {
1312                                 char *flast = new_tmp_file_name();
1313
1314                                 sort_list_to_file(&list, flast);
1315                                 file_list_add(&fl, flast, false);
1316                         }
1317                         merge_files(&fl, outfile);
1318                 }
1319
1320                 file_list_clean(&fl);
1321
1322                 /*
1323                  * We are about to exit the program, so we can ignore
1324                  * the clean-up for speed
1325                  *
1326                  * sort_list_clean(&list);
1327                  */
1328
1329         } else if (sort_opts_vals.cflag) {
1330                 result = (argc == 0) ? (check("-")) : (check(*argv));
1331         } else if (sort_opts_vals.mflag) {
1332                 struct file_list fl;
1333
1334                 file_list_init(&fl, false);
1335                 file_list_populate(&fl, argc, argv, true);
1336                 merge_files(&fl, outfile);
1337                 file_list_clean(&fl);
1338         }
1339
1340         if (real_outfile) {
1341                 unlink(real_outfile);
1342                 if (rename(outfile, real_outfile) < 0)
1343                         err(2, NULL);
1344                 sort_free(real_outfile);
1345         }
1346
1347         sort_free(outfile);
1348
1349         return (result);
1350 }