1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991, 1992, 1993, 1994, 1995 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
22 /* $FreeBSD: src/gnu/usr.bin/sort/sort.c,v 1.15.2.4 2002/04/17 11:41:42 ache Exp $ */
26 /* Get isblank from GNU libc. */
29 #include <sys/types.h>
37 #include "long-options.h"
54 /* Undefine, to avoid warning about redefinition on some systems. */
56 #define min(a, b) ((a) < (b) ? (a) : (b))
58 #define UCHAR_LIM (UCHAR_MAX + 1)
59 #define UCHAR(c) ((unsigned char) (c))
61 #ifndef DEFAULT_TMPDIR
62 #define DEFAULT_TMPDIR "/tmp"
65 /* The kind of blanks for '-b' to skip in various options. */
66 enum blanktype { bl_start, bl_end, bl_both };
68 /* Lines are held in core as counted strings. */
71 char *text; /* Text of the line. */
72 int length; /* Length not including final newline. */
73 char *keybeg; /* Start of first key. */
74 char *keylim; /* Limit of first key. */
77 /* Arrays of lines. */
80 struct line *lines; /* Dynamically allocated array of lines. */
81 int used; /* Number of slots used. */
82 int alloc; /* Number of slots allocated. */
83 int limit; /* Max number of slots to allocate. */
89 char *buf; /* Dynamically allocated buffer. */
90 int used; /* Number of bytes used. */
91 int alloc; /* Number of bytes allocated. */
92 int left; /* Number of bytes left after line parsing. */
97 int sword; /* Zero-origin 'word' to start at. */
98 int schar; /* Additional characters to skip. */
99 int skipsblanks; /* Skip leading white space at start. */
100 int eword; /* Zero-origin first word after field. */
101 int echar; /* Additional characters in field. */
102 int skipeblanks; /* Skip trailing white space at finish. */
103 int *ignore; /* Boolean array of characters to ignore. */
104 char *translate; /* Translation applied to characters. */
105 int numeric; /* Flag for numeric comparison. Handle
106 strings of digits with optional decimal
107 point, but no exponential notation. */
108 int general_numeric; /* Flag for general, numeric comparison.
109 Handle numbers in exponential notation. */
110 int month; /* Flag for comparison by month name. */
111 int reverse; /* Reverse the sense of comparison. */
112 struct keyfield *next; /* Next keyfield to try. */
121 /* The name this program was run with. */
124 /* Table of digits. */
125 static int digits[UCHAR_LIM];
127 /* Table of white space. */
128 static int blanks[UCHAR_LIM];
130 /* Table of non-printing characters. */
131 static int nonprinting[UCHAR_LIM];
133 /* Table of non-dictionary characters (not letters, digits, or blanks). */
134 static int nondictionary[UCHAR_LIM];
136 /* Translation table folding lower case to upper. */
137 static char fold_toupper[UCHAR_LIM];
139 /* Table mapping 3-letter month names to integers.
140 Alphabetic order allows binary search. */
141 static struct month const monthtab[] =
157 /* During the merge phase, the number of files to merge at once. */
160 /* Initial buffer size for in core sorting. Will not grow unless a
161 line longer than this is seen. */
162 static int sortalloc = 512 * 1024;
164 /* Initial buffer size for in core merge buffers. Bear in mind that
165 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
166 static int mergealloc = 16 * 1024;
168 /* Guess of average line length. */
169 static int linelength = 30;
171 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
172 #define LINEALLOC (256 * 1024)
174 /* Prefix for temporary file names. */
175 static char *temp_file_prefix;
177 /* Flag to reverse the order of all comparisons. */
180 /* Flag for stable sort. This turns off the last ditch bytewise
181 comparison of lines, and instead leaves lines in the same order
182 they were read if all keys compare equal. */
185 /* Tab character separating fields. If NUL, then fields are separated
186 by the empty string between a non-whitespace character and a whitespace
190 /* Flag to remove consecutive duplicate lines from the output.
191 Only the last of a sequence of equal lines will be output. */
194 /* Nonzero if any of the input files are the standard input. */
195 static int have_read_stdin;
197 /* Lists of key field comparisons to be tried. */
198 static struct keyfield keyhead;
201 static unsigned char decimal_point;
204 COLLDIFF (int a, int b)
208 if ((unsigned char)a == (unsigned char)b)
212 return strcoll(s[0], s[1]);
216 collcmp(char *a, char *b, int mini)
230 #endif /* __FreeBSD__ */
236 fprintf (stderr, _("Try `%s --help' for more information.\n"),
241 Usage: %s [OPTION]... [FILE]...\n\
245 Write sorted concatenation of all FILE(s) to standard output.\n\
247 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
248 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
249 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
250 -b ignore leading blanks in sort fields or keys\n\
251 -c check if given files already sorted, do not sort\n\
252 -d consider only [a-zA-Z0-9 ] characters in keys\n\
253 -f fold lower case to upper case characters in keys\n\
254 -g compare according to general numerical value, imply -b\n\
255 -i consider only [\\040-\\0176] characters in keys\n\
256 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
257 -m merge already sorted files, do not sort\n\
258 -n compare according to string numerical value, imply -b\n\
259 -o FILE write result on FILE instead of standard output\n\
260 -r reverse the result of comparisons\n\
261 -s stabilize sort by disabling last resort comparison\n\
262 -t SEP use SEParator instead of non- to whitespace transition\n\
263 -u with -c, check for strict ordering\n\
264 -u with -m, only output the first of an equal sequence\n\
265 --help display this help and exit\n\
266 --version output version information and exit\n\
268 POS is F[.C][OPTS], where F is the field number and C the character\n\
269 position in the field, both counted from zero. OPTS is made up of one\n\
270 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
271 for that key. If no key given, use the entire line as key. With no\n\
272 FILE, or when FILE is -, read standard input.\n\
279 /* The list of temporary files. */
280 static struct tempnode
283 struct tempnode *next;
286 /* Clean up any remaining temporary files. */
291 struct tempnode *node;
293 for (node = temphead.next; node; node = node->next)
297 /* Allocate N bytes of memory dynamically, with error checking. */
300 xmalloc (unsigned int n)
307 error (0, 0, _("virtual memory exhausted"));
314 /* Change the size of an allocated block of memory P to N bytes,
316 If P is NULL, run xmalloc.
317 If N is 0, run free and return NULL. */
320 xrealloc (char *p, unsigned int n)
332 error (0, 0, _("virtual memory exhausted"));
340 xtmpfopen (const char *file)
345 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
346 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
348 error (0, errno, "%s", file);
357 xfopen (const char *file, const char *how)
361 if (strcmp (file, "-") == 0)
367 if ((fp = fopen (file, how)) == NULL)
369 error (0, errno, "%s", file);
385 /* Allow reading stdin from tty more than once. */
389 else if (fp == stdout)
391 if (fflush (fp) != 0)
393 error (0, errno, _("flushing file"));
400 if (fclose (fp) != 0)
402 error (0, errno, _("error closing file"));
410 xfwrite (const char *buf, int size, int nelem, FILE *fp)
412 if (fwrite (buf, size, nelem, fp) != nelem)
414 error (0, errno, _("write error"));
420 /* Return a name for a temporary file. */
426 int len = strlen (temp_file_prefix);
427 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
428 struct tempnode *node;
430 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
434 (len && temp_file_prefix[len - 1] != '/') ? "/" : "");
436 if ((fd = mkstemp(name)) == -1)
438 error (0, errno, _("mkstemp error"));
445 node->next = temphead.next;
446 temphead.next = node;
450 /* Search through the list of temporary files for NAME;
451 remove it if it is found on the list. */
456 struct tempnode *node, *temp;
458 for (node = &temphead; node->next; node = node->next)
459 if (!strcmp (name, node->next->name))
466 node->next = temp->next;
467 free ((char *) temp);
471 /* Initialize the character class tables. */
478 for (i = 0; i < UCHAR_LIM; ++i)
486 if (!ISALNUM (i) && !ISBLANK (i))
487 nondictionary[i] = 1;
489 fold_toupper[i] = toupper (i);
495 /* Initialize BUF, allocating ALLOC bytes initially. */
498 initbuf (struct buffer *buf, int alloc)
501 buf->buf = xmalloc (buf->alloc);
502 buf->used = buf->left = 0;
505 /* Fill BUF reading from FP, moving buf->left bytes from the end
506 of buf->buf to the beginning first. If EOF is reached and the
507 file wasn't terminated by a newline, supply one. Return a count
508 of bytes buffered. */
511 fillbuf (struct buffer *buf, FILE *fp)
515 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
516 buf->used = buf->left;
518 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, '\n', buf->used)))
520 if (buf->used == buf->alloc)
523 buf->buf = xrealloc (buf->buf, buf->alloc);
525 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
528 error (0, errno, _("read error"));
535 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != '\n')
537 if (buf->used == buf->alloc)
540 buf->buf = xrealloc (buf->buf, buf->alloc);
542 buf->buf[buf->used++] = '\n';
548 /* Initialize LINES, allocating space for ALLOC lines initially.
549 LIMIT is the maximum possible number of lines to allocate space
553 initlines (struct lines *lines, int alloc, int limit)
555 lines->alloc = alloc;
556 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
558 lines->limit = limit;
561 /* Return a pointer to the first character of the field specified
565 begfield (const struct line *line, const struct keyfield *key)
567 register char *ptr = line->text, *lim = ptr + line->length;
568 register int sword = key->sword, schar = key->schar;
571 while (ptr < lim && sword--)
573 while (ptr < lim && *ptr != tab)
579 while (ptr < lim && sword--)
581 while (ptr < lim && blanks[UCHAR (*ptr)])
583 while (ptr < lim && !blanks[UCHAR (*ptr)])
587 if (key->skipsblanks)
588 while (ptr < lim && blanks[UCHAR (*ptr)])
591 if (ptr + schar <= lim)
599 /* Return the limit of (a pointer to the first character after) the field
600 in LINE specified by KEY. */
603 limfield (const struct line *line, const struct keyfield *key)
605 register char *ptr = line->text, *lim = ptr + line->length;
606 register int eword = key->eword, echar = key->echar;
608 /* Note: from the POSIX spec:
609 The leading field separator itself is included in
610 a field when -t is not used. FIXME: move this comment up... */
612 /* Move PTR past EWORD fields or to one past the last byte on LINE,
613 whichever comes first. If there are more than EWORD fields, leave
614 PTR pointing at the beginning of the field having zero-based index,
615 EWORD. If a delimiter character was specified (via -t), then that
616 `beginning' is the first character following the delimiting TAB.
617 Otherwise, leave PTR pointing at the first `blank' character after
618 the preceding field. */
620 while (ptr < lim && eword--)
622 while (ptr < lim && *ptr != tab)
624 if (ptr < lim && (eword || echar > 0))
628 while (ptr < lim && eword--)
630 while (ptr < lim && blanks[UCHAR (*ptr)])
632 while (ptr < lim && !blanks[UCHAR (*ptr)])
636 /* Make LIM point to the end of (one byte past) the current field. */
640 newlim = memchr (ptr, tab, lim - ptr);
648 while (newlim < lim && blanks[UCHAR (*newlim)])
650 while (newlim < lim && !blanks[UCHAR (*newlim)])
655 /* If we're skipping leading blanks, don't start counting characters
656 until after skipping past any leading blanks. */
657 if (key->skipsblanks)
658 while (ptr < lim && blanks[UCHAR (*ptr)])
661 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
662 if (ptr + echar <= lim)
673 trim_trailing_blanks (const char *a_start, char **a_end)
675 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
679 /* Find the lines in BUF, storing pointers and lengths in LINES.
680 Also replace newlines in BUF with NULs. */
683 findlines (struct buffer *buf, struct lines *lines)
685 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
686 struct keyfield *key = keyhead.next;
690 while (beg < lim && (ptr = memchr (beg, '\n', lim - beg))
691 && lines->used < lines->limit)
693 /* There are various places in the code that rely on a NUL
694 being at the end of in-core lines; NULs inside the lines
695 will not cause trouble, though. */
698 if (lines->used == lines->alloc)
701 lines->lines = (struct line *)
702 xrealloc ((char *) lines->lines,
703 lines->alloc * sizeof (struct line));
706 lines->lines[lines->used].text = beg;
707 lines->lines[lines->used].length = ptr - beg;
709 /* Precompute the position of the first key for efficiency. */
713 lines->lines[lines->used].keylim =
714 limfield (&lines->lines[lines->used], key);
716 lines->lines[lines->used].keylim = ptr;
719 lines->lines[lines->used].keybeg =
720 begfield (&lines->lines[lines->used], key);
723 if (key->skipsblanks)
724 while (blanks[UCHAR (*beg)])
726 lines->lines[lines->used].keybeg = beg;
728 if (key->skipeblanks)
730 trim_trailing_blanks (lines->lines[lines->used].keybeg,
731 &lines->lines[lines->used].keylim);
736 lines->lines[lines->used].keybeg = 0;
737 lines->lines[lines->used].keylim = 0;
744 buf->left = lim - beg;
747 /* Compare strings A and B containing decimal fractions < 1. Each string
748 should begin with a decimal point followed immediately by the digits
749 of the fraction. Strings not of this form are considered to be zero. */
752 fraccompare (register const char *a, register const char *b)
754 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
757 if (tmpa == decimal_point && tmpb == decimal_point)
759 if (tmpa == '.' && tmpb == '.')
763 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
764 while (tmpa == tmpb && digits[tmpa]);
765 if (digits[tmpa] && digits[tmpb])
786 else if (tmpa == decimal_point)
788 else if (tmpa == '.')
799 else if (tmpb == decimal_point)
801 else if (tmpb == '.')
814 /* Compare strings A and B as numbers without explicitly converting them to
815 machine numbers. Comparatively slow for short strings, but asymptotically
819 numcompare (register const char *a, register const char *b)
821 register int tmpa, tmpb, loga, logb, tmp;
839 if (tmpa == decimal_point)
851 if (tmpb == decimal_point)
866 while (tmpa == tmpb && digits[tmpa])
867 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
870 if ((tmpa == decimal_point && !digits[tmpb]) ||
871 (tmpb == decimal_point && !digits[tmpa]))
873 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
875 return -fraccompare (a, b);
878 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
884 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
889 if ((tmp = logb - loga) != 0)
896 return COLLDIFF (tmpb, tmpa);
901 else if (tmpb == '-')
907 if (tmpb == decimal_point)
919 if (tmpa == decimal_point)
937 while (tmpa == tmpb && digits[tmpa])
938 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
941 if ((tmpa == decimal_point && !digits[tmpb]) ||
942 (tmpb == decimal_point && !digits[tmpa]))
944 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
946 return fraccompare (a, b);
949 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
955 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
960 if ((tmp = loga - logb) != 0)
967 return COLLDIFF (tmpa, tmpb);
975 general_numcompare (const char *sa, const char *sb)
978 /* FIXME: add option to warn about failed conversions. */
979 /* FIXME: maybe add option to try expensive FP conversion
980 only if A and B can't be compared more cheaply/accurately. */
981 if (xstrtod (sa, NULL, &a))
985 if (xstrtod (sb, NULL, &b))
989 return a == b ? 0 : a < b ? -1 : 1;
992 /* Return an integer <= 12 associated with month name S with length LEN,
993 0 if the name in S is not recognized. */
996 getmonth (const char *s, int len)
999 register int i, lo = 0, hi = 12;
1001 while (len > 0 && blanks[UCHAR(*s)])
1007 for (i = 0; i < 3; ++i)
1008 month[i] = fold_toupper[UCHAR (s[i])];
1012 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
1016 if (!strcmp (month, monthtab[lo].name))
1017 return monthtab[lo].val;
1021 /* Compare two lines A and B trying every key in sequence until there
1022 are no more keys or a difference is found. */
1025 keycompare (const struct line *a, const struct line *b)
1027 register char *texta, *textb, *lima, *limb, *translate;
1028 register int *ignore;
1029 struct keyfield *key;
1030 int diff = 0, iter = 0, lena, lenb;
1032 for (key = keyhead.next; key; key = key->next, ++iter)
1034 ignore = key->ignore;
1035 translate = key->translate;
1037 /* Find the beginning and limit of each field. */
1038 if (iter || a->keybeg == NULL || b->keybeg == NULL)
1040 if (key->eword >= 0)
1041 lima = limfield (a, key), limb = limfield (b, key);
1043 lima = a->text + a->length, limb = b->text + b->length;
1045 if (key->sword >= 0)
1046 texta = begfield (a, key), textb = begfield (b, key);
1049 texta = a->text, textb = b->text;
1050 if (key->skipsblanks)
1052 while (texta < lima && blanks[UCHAR (*texta)])
1054 while (textb < limb && blanks[UCHAR (*textb)])
1061 /* For the first iteration only, the key positions have
1062 been precomputed for us. */
1063 texta = a->keybeg, lima = a->keylim;
1064 textb = b->keybeg, limb = b->keylim;
1067 /* Find the lengths. */
1068 lena = lima - texta, lenb = limb - textb;
1074 if (key->skipeblanks)
1076 char *a_end = texta + lena;
1077 char *b_end = textb + lenb;
1078 trim_trailing_blanks (texta, &a_end);
1079 trim_trailing_blanks (textb, &b_end);
1080 lena = a_end - texta;
1081 lenb = b_end - textb;
1084 /* Actually compare the fields. */
1089 char savea = *lima, saveb = *limb;
1091 *lima = *limb = '\0';
1092 diff = numcompare (texta, textb);
1093 *lima = savea, *limb = saveb;
1096 diff = numcompare (texta, textb);
1099 return key->reverse ? -diff : diff;
1102 else if (key->general_numeric)
1106 char savea = *lima, saveb = *limb;
1108 *lima = *limb = '\0';
1109 diff = general_numcompare (texta, textb);
1110 *lima = savea, *limb = saveb;
1113 diff = general_numcompare (texta, textb);
1116 return key->reverse ? -diff : diff;
1119 else if (key->month)
1121 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1123 return key->reverse ? -diff : diff;
1126 else if (ignore && translate)
1129 #define CMP_FUNC(A, B) COLLDIFF ((A), (B))
1131 #define CMP_FUNC(A, B) (A) - (B)
1133 #define CMP_WITH_IGNORE(A, B) \
1136 while (texta < lima && textb < limb) \
1138 while (texta < lima && ignore[UCHAR (*texta)]) \
1140 while (textb < limb && ignore[UCHAR (*textb)]) \
1142 if (texta < lima && textb < limb) \
1146 diff = CMP_FUNC((A), (B)); \
1153 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1155 else if (texta < lima && textb == limb \
1156 && !ignore[UCHAR (*texta)]) \
1162 while (texta < lima && ignore[UCHAR (*texta)]) \
1164 while (textb < limb && ignore[UCHAR (*textb)]) \
1167 if (texta == lima && textb < limb) \
1169 else if (texta < lima && textb == limb) \
1172 /* Relative lengths are meaningless if characters were ignored. \
1173 Handling this case here avoids what might be an invalid length \
1174 comparison below. */ \
1175 if (diff == 0 && texta == lima && textb == limb) \
1180 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1182 CMP_WITH_IGNORE (*texta, *textb);
1184 while (texta < lima && textb < limb)
1186 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1189 diff = COLLDIFF (translate[UCHAR (*--texta)],
1190 translate[UCHAR (*--textb)]);
1192 diff = (translate[UCHAR (*--texta)]
1193 - translate[UCHAR (*--textb)]);
1200 diff = collcmp (texta, textb, min (lena, lenb));
1202 diff = memcmp (texta, textb, min (lena, lenb));
1206 return key->reverse ? -diff : diff;
1207 if ((diff = lena - lenb) != 0)
1208 return key->reverse ? -diff : diff;
1214 /* Compare two lines A and B, returning negative, zero, or positive
1215 depending on whether A compares less than, equal to, or greater than B. */
1218 compare (register const struct line *a, register const struct line *b)
1220 int diff, tmpa, tmpb, mini;
1222 /* First try to compare on the specified keys (if any).
1223 The only two cases with no key at all are unadorned sort,
1224 and unadorned sort -r. */
1227 diff = keycompare (a, b);
1230 if (unique || stable)
1234 /* If the keys all compare equal (or no keys were specified)
1235 fall through to the default byte-by-byte comparison. */
1236 tmpa = a->length, tmpb = b->length;
1237 mini = min (tmpa, tmpb);
1242 char *ap = a->text, *bp = b->text;
1245 diff = UCHAR (*ap) - UCHAR (*bp);
1250 diff = collcmp (ap, bp, mini);
1252 diff = memcmp (ap, bp, mini);
1261 return reverse ? -diff : diff;
1264 /* Check that the lines read from the given FP come in order. Return
1265 1 if they do and 0 if there is a disorder.
1266 FIXME: return number of first out-of-order line if not sorted. */
1271 struct buffer buf; /* Input buffer. */
1272 struct lines lines; /* Lines scanned from the buffer. */
1273 struct line temp; /* Copy of previous line. */
1274 int cc; /* Character count. */
1275 int alloc, sorted = 1;
1277 initbuf (&buf, mergealloc);
1278 initlines (&lines, mergealloc / linelength + 1,
1279 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1281 temp.text = xmalloc (alloc);
1283 cc = fillbuf (&buf, fp);
1287 findlines (&buf, &lines);
1291 struct line *prev_line; /* Pointer to previous line. */
1292 int cmp; /* Result of calling compare. */
1295 /* Compare each line in the buffer with its successor. */
1296 for (i = 0; i < lines.used - 1; ++i)
1298 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1299 if ((unique && cmp >= 0) || (cmp > 0))
1306 /* Save the last line of the buffer and refill the buffer. */
1307 prev_line = lines.lines + (lines.used - 1);
1308 if (prev_line->length > alloc)
1310 while (prev_line->length + 1 > alloc)
1312 temp.text = xrealloc (temp.text, alloc);
1314 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1315 temp.length = prev_line->length;
1316 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1317 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1319 cc = fillbuf (&buf, fp);
1323 findlines (&buf, &lines);
1324 /* Make sure the line saved from the old buffer contents is
1325 less than or equal to the first line of the new buffer. */
1326 cmp = compare (&temp, &lines.lines[0]);
1327 if ((unique && cmp >= 0) || (cmp > 0))
1337 free ((char *) lines.lines);
1342 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1343 Close FPS before returning. */
1346 mergefps (FILE **fps, register int nfps, FILE *ofp)
1348 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1349 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1350 struct line saved; /* Saved line for unique check. */
1351 int savedflag = 0; /* True if there is a saved line. */
1352 int savealloc; /* Size allocated for the saved line. */
1353 int cur[NMERGE]; /* Current line in each line table. */
1354 int ord[NMERGE]; /* Table representing a permutation of fps,
1355 such that lines[ord[0]].lines[cur[ord[0]]]
1356 is the smallest line and will be next
1358 register int i, j, t;
1360 #ifdef lint /* Suppress `used before initialized' warning. */
1364 /* Allocate space for a saved line if necessary. */
1367 savealloc = linelength;
1368 saved.text = xmalloc (savealloc);
1371 /* Read initial lines from each input file. */
1372 for (i = 0; i < nfps; ++i)
1374 initbuf (&buffer[i], mergealloc);
1375 /* If a file is empty, eliminate it from future consideration. */
1376 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1380 for (j = i; j < nfps; ++j)
1381 fps[j] = fps[j + 1];
1384 free (buffer[i].buf);
1387 initlines (&lines[i], mergealloc / linelength + 1,
1388 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1389 findlines (&buffer[i], &lines[i]);
1394 /* Set up the ord table according to comparisons among input lines.
1395 Since this only reorders two items if one is strictly greater than
1396 the other, it is stable. */
1397 for (i = 0; i < nfps; ++i)
1399 for (i = 1; i < nfps; ++i)
1400 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1401 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1402 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1404 /* Repeatedly output the smallest line until no input remains. */
1407 /* If uniqified output is turned on, output only the first of
1408 an identical series of lines. */
1411 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1413 xfwrite (saved.text, 1, saved.length, ofp);
1419 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1421 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1423 saved.text = xrealloc (saved.text, savealloc);
1425 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1426 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1428 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1430 saved.keybeg = saved.text +
1431 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1432 - lines[ord[0]].lines[cur[ord[0]]].text);
1434 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1436 saved.keylim = saved.text +
1437 (lines[ord[0]].lines[cur[ord[0]]].keylim
1438 - lines[ord[0]].lines[cur[ord[0]]].text);
1445 xfwrite (lines[ord[0]].lines[cur[ord[0]]].text, 1,
1446 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1450 /* Check if we need to read more lines into core. */
1451 if (++cur[ord[0]] == lines[ord[0]].used)
1452 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1454 findlines (&buffer[ord[0]], &lines[ord[0]]);
1459 /* We reached EOF on fps[ord[0]]. */
1460 for (i = 1; i < nfps; ++i)
1461 if (ord[i] > ord[0])
1464 xfclose (fps[ord[0]]);
1465 free (buffer[ord[0]].buf);
1466 free ((char *) lines[ord[0]].lines);
1467 for (i = ord[0]; i < nfps; ++i)
1469 fps[i] = fps[i + 1];
1470 buffer[i] = buffer[i + 1];
1471 lines[i] = lines[i + 1];
1472 cur[i] = cur[i + 1];
1474 for (i = 0; i < nfps; ++i)
1475 ord[i] = ord[i + 1];
1479 /* The new line just read in may be larger than other lines
1480 already in core; push it back in the queue until we encounter
1481 a line larger than it. */
1482 for (i = 1; i < nfps; ++i)
1484 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1485 &lines[ord[i]].lines[cur[ord[i]]]);
1487 t = ord[0] - ord[i];
1492 for (j = 1; j < i; ++j)
1493 ord[j - 1] = ord[j];
1497 if (unique && savedflag)
1499 xfwrite (saved.text, 1, saved.length, ofp);
1505 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1508 sortlines (struct line *lines, int nlines, struct line *temp)
1510 register struct line *lo, *hi, *t;
1511 register int nlo, nhi;
1515 if (compare (&lines[0], &lines[1]) > 0)
1516 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1526 sortlines (lo, nlo, temp);
1529 sortlines (hi, nhi, temp);
1534 if (compare (lo, hi) <= 0)
1535 *t++ = *lo++, --nlo;
1537 *t++ = *hi++, --nhi;
1541 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1545 /* Check that each of the NFILES FILES is ordered.
1546 Return a count of disordered files. */
1549 check (char **files, int nfiles)
1551 int i, disorders = 0;
1554 for (i = 0; i < nfiles; ++i)
1556 fp = xfopen (files[i], "r");
1559 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1566 /* Merge NFILES FILES onto OFP. */
1569 merge (char **files, int nfiles, FILE *ofp)
1573 FILE *fps[NMERGE], *tfp;
1575 while (nfiles > NMERGE)
1578 for (i = 0; i < nfiles / NMERGE; ++i)
1580 for (j = 0; j < NMERGE; ++j)
1581 fps[j] = xfopen (files[i * NMERGE + j], "r");
1582 tfp = xtmpfopen (temp = tempname ());
1583 mergefps (fps, NMERGE, tfp);
1585 for (j = 0; j < NMERGE; ++j)
1586 zaptemp (files[i * NMERGE + j]);
1589 for (j = 0; j < nfiles % NMERGE; ++j)
1590 fps[j] = xfopen (files[i * NMERGE + j], "r");
1591 tfp = xtmpfopen (temp = tempname ());
1592 mergefps (fps, nfiles % NMERGE, tfp);
1594 for (j = 0; j < nfiles % NMERGE; ++j)
1595 zaptemp (files[i * NMERGE + j]);
1600 for (i = 0; i < nfiles; ++i)
1601 fps[i] = xfopen (files[i], "r");
1602 mergefps (fps, i, ofp);
1603 for (i = 0; i < nfiles; ++i)
1607 /* Sort NFILES FILES onto OFP. */
1610 sort (char **files, int nfiles, FILE *ofp)
1617 struct tempnode *node;
1618 int n_temp_files = 0;
1621 initbuf (&buf, sortalloc);
1622 initlines (&lines, sortalloc / linelength + 1,
1623 LINEALLOC / sizeof (struct line));
1625 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1629 fp = xfopen (*files++, "r");
1630 while (fillbuf (&buf, fp))
1632 findlines (&buf, &lines);
1633 if (lines.used > ntmp)
1635 while (lines.used > ntmp)
1637 tmp = (struct line *)
1638 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1640 sortlines (lines.lines, lines.used, tmp);
1641 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1646 tfp = xtmpfopen (tempname ());
1648 for (i = 0; i < lines.used; ++i)
1649 if (!unique || i == 0
1650 || compare (&lines.lines[i], &lines.lines[i - 1]))
1652 xfwrite (lines.lines[i].text, 1, lines.lines[i].length, tfp);
1662 free ((char *) lines.lines);
1663 free ((char *) tmp);
1667 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1669 for (node = temphead.next; i > 0; node = node->next)
1670 tempfiles[--i] = node->name;
1671 merge (tempfiles, n_temp_files, ofp);
1672 free ((char *) tempfiles);
1676 /* Insert key KEY at the end of the list (`keyhead'). */
1679 insertkey (struct keyfield *key)
1681 struct keyfield *k = &keyhead;
1690 badfieldspec (const char *s)
1692 error (2, 0, _("invalid field specification `%s'"), s);
1695 /* Handle interrupts and hangups. */
1698 sighandler (int sig)
1701 struct sigaction sigact;
1703 sigact.sa_handler = SIG_DFL;
1704 sigemptyset (&sigact.sa_mask);
1705 sigact.sa_flags = 0;
1706 sigaction (sig, &sigact, NULL);
1707 #else /* !SA_INTERRUPT */
1708 signal (sig, SIG_DFL);
1709 #endif /* SA_INTERRUPT */
1711 kill (getpid (), sig);
1714 /* Set the ordering options for KEY specified in S.
1715 Return the address of the first character in S that
1716 is not a valid ordering option.
1717 BLANKTYPE is the kind of blanks that 'b' should skip. */
1720 set_ordering (register const char *s, struct keyfield *key,
1721 enum blanktype blanktype)
1728 if (blanktype == bl_start || blanktype == bl_both)
1729 key->skipsblanks = 1;
1730 if (blanktype == bl_end || blanktype == bl_both)
1731 key->skipeblanks = 1;
1734 key->ignore = nondictionary;
1737 key->translate = fold_toupper;
1740 key->general_numeric = 1;
1743 key->ignore = nonprinting;
1750 if (blanktype == bl_start || blanktype == bl_both)
1751 key->skipsblanks = 1;
1752 if (blanktype == bl_end || blanktype == bl_both)
1753 key->skipeblanks = 1;
1767 main (int argc, char **argv)
1769 struct keyfield *key = NULL, gkey;
1772 int checkonly = 0, mergeonly = 0, nfiles = 0;
1773 char *minus = "-", *outfile = minus, **files, *tmp;
1776 struct sigaction oldact, newact;
1777 #endif /* SA_INTERRUPT */
1780 (void) setlocale(LC_ALL, "");
1781 decimal_point = localeconv()->decimal_point[0];
1783 program_name = argv[0];
1785 parse_long_options (argc, argv, "sort", version_string, usage);
1787 have_read_stdin = 0;
1790 temp_file_prefix = getenv ("TMPDIR");
1791 if (temp_file_prefix == NULL)
1792 temp_file_prefix = DEFAULT_TMPDIR;
1795 newact.sa_handler = sighandler;
1796 sigemptyset (&newact.sa_mask);
1797 newact.sa_flags = 0;
1799 sigaction (SIGINT, NULL, &oldact);
1800 if (oldact.sa_handler != SIG_IGN)
1801 sigaction (SIGINT, &newact, NULL);
1802 sigaction (SIGHUP, NULL, &oldact);
1803 if (oldact.sa_handler != SIG_IGN)
1804 sigaction (SIGHUP, &newact, NULL);
1805 sigaction (SIGPIPE, NULL, &oldact);
1806 if (oldact.sa_handler != SIG_IGN)
1807 sigaction (SIGPIPE, &newact, NULL);
1808 sigaction (SIGTERM, NULL, &oldact);
1809 if (oldact.sa_handler != SIG_IGN)
1810 sigaction (SIGTERM, &newact, NULL);
1811 #else /* !SA_INTERRUPT */
1812 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1813 signal (SIGINT, sighandler);
1814 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1815 signal (SIGHUP, sighandler);
1816 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1817 signal (SIGPIPE, sighandler);
1818 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1819 signal (SIGTERM, sighandler);
1820 #endif /* !SA_INTERRUPT */
1822 gkey.sword = gkey.eword = -1;
1824 gkey.translate = NULL;
1825 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1826 gkey.skipsblanks = gkey.skipeblanks = 0;
1828 files = (char **) xmalloc (sizeof (char *) * argc);
1830 for (i = 1; i < argc; ++i)
1832 if (argv[i][0] == '+')
1836 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1839 key->translate = NULL;
1840 key->skipsblanks = key->skipeblanks = 0;
1841 key->numeric = key->general_numeric = key->month = key->reverse = 0;
1843 if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])))
1844 badfieldspec (argv[i]);
1845 for (t = 0; digits[UCHAR (*s)]; ++s)
1846 t = 10 * t + *s - '0';
1849 for (++s; digits[UCHAR (*s)]; ++s)
1850 t2 = 10 * t2 + *s - '0';
1858 s = set_ordering (s, key, bl_start);
1860 badfieldspec (argv[i]);
1862 else if (argv[i][0] == '-' && argv[i][1])
1865 if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))
1869 for (t = 0; digits[UCHAR (*s)]; ++s)
1870 t = t * 10 + *s - '0';
1873 for (++s; digits[UCHAR (*s)]; ++s)
1874 t2 = t2 * 10 + *s - '0';
1877 s = set_ordering (s, key, bl_end);
1879 badfieldspec (argv[i]);
1886 s = set_ordering (s, &gkey, bl_both);
1900 error (2, 0, _("option `-k' requires an argument"));
1906 key = (struct keyfield *)
1907 xmalloc (sizeof (struct keyfield));
1908 memset (key, 0, sizeof (struct keyfield));
1911 key->translate = NULL;
1912 key->skipsblanks = key->skipeblanks = 0;
1913 key->numeric = key->month = key->reverse = 0;
1915 if (!digits[UCHAR (*s)])
1916 badfieldspec (argv[i]);
1917 for (t = 0; digits[UCHAR (*s)]; ++s)
1918 t = 10 * t + *s - '0';
1921 /* Provoke with `sort -k0' */
1922 error (0, 0, _("the starting field number argument \
1923 to the `-k' option must be positive"));
1924 badfieldspec (argv[i]);
1930 if (!digits[UCHAR (s[1])])
1932 /* Provoke with `sort -k1.' */
1933 error (0, 0, _("starting field spec has `.' but \
1934 lacks following character offset"));
1935 badfieldspec (argv[i]);
1937 for (++s; digits[UCHAR (*s)]; ++s)
1938 t2 = 10 * t2 + *s - '0';
1941 /* Provoke with `sort -k1.0' */
1942 error (0, 0, _("starting field character offset \
1943 argument to the `-k' option\nmust be positive"));
1944 badfieldspec (argv[i]);
1955 s = set_ordering (s, key, bl_start);
1962 badfieldspec (argv[i]);
1965 /* Skip over comma. */
1969 /* Provoke with `sort -k1,' */
1970 error (0, 0, _("field specification has `,' but \
1971 lacks following field spec"));
1972 badfieldspec (argv[i]);
1975 for (t = 0; digits[UCHAR (*s)]; ++s)
1976 t = t * 10 + *s - '0';
1979 /* Provoke with `sort -k1,0' */
1980 error (0, 0, _("ending field number argument \
1981 to the `-k' option must be positive"));
1982 badfieldspec (argv[i]);
1988 if (!digits[UCHAR (s[1])])
1990 /* Provoke with `sort -k1,1.' */
1991 error (0, 0, _("ending field spec has `.' \
1992 but lacks following character offset"));
1993 badfieldspec (argv[i]);
1995 for (++s; digits[UCHAR (*s)]; ++s)
1996 t2 = t2 * 10 + *s - '0';
2000 /* `-k 2,3' is equivalent to `+1 -3'. */
2005 s = set_ordering (s, key, bl_end);
2007 badfieldspec (argv[i]);
2021 error (2, 0, _("option `-o' requires an argument"));
2023 outfile = argv[++i];
2032 else if (i < argc - 1)
2038 error (2, 0, _("option `-t' requires an argument"));
2042 temp_file_prefix = ++s;
2046 temp_file_prefix = argv[++i];
2048 error (2, 0, _("option `-T' requires an argument"));
2056 /* Accept and ignore e.g. -y0 for compatibility with
2060 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2068 else /* Not an option. */
2070 files[nfiles++] = argv[i];
2078 /* Inheritance of global options to individual keys. */
2079 for (key = keyhead.next; key; key = key->next)
2080 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2081 && !key->skipeblanks && !key->month && !key->numeric
2082 && !key->general_numeric)
2084 key->ignore = gkey.ignore;
2085 key->translate = gkey.translate;
2086 key->skipsblanks = gkey.skipsblanks;
2087 key->skipeblanks = gkey.skipeblanks;
2088 key->month = gkey.month;
2089 key->numeric = gkey.numeric;
2090 key->general_numeric = gkey.general_numeric;
2091 key->reverse = gkey.reverse;
2094 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2095 || gkey.skipeblanks || gkey.month || gkey.numeric
2096 || gkey.general_numeric))
2098 reverse = gkey.reverse;
2107 exit (check (files, nfiles) != 0);
2109 if (strcmp (outfile, "-"))
2111 struct stat outstat;
2112 if (stat (outfile, &outstat) == 0)
2114 /* The following code prevents a race condition when
2115 people use the brain dead shell programming idiom:
2116 cat file | sort -o file
2117 This feature is provided for historical compatibility,
2118 but we strongly discourage ever relying on this in
2119 new shell programs. */
2121 /* Temporarily copy each input file that might be another name
2122 for the output file. When in doubt (e.g. a pipe), copy. */
2123 for (i = 0; i < nfiles; ++i)
2129 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2132 if ((strcmp (files[i], "-")
2133 ? stat (files[i], &instat)
2134 : fstat (fileno (stdin), &instat)) != 0)
2136 error (0, errno, "%s", files[i]);
2140 if (S_ISREG (instat.st_mode)
2141 && (instat.st_ino != outstat.st_ino
2142 || instat.st_dev != outstat.st_dev))
2144 /* We know the files are distinct. */
2149 fp = xfopen (files[i], "r");
2151 ofp = xtmpfopen (tmp);
2152 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2153 xfwrite (buf, 1, cc, ofp);
2156 error (0, errno, "%s", files[i]);
2165 ofp = xfopen (outfile, "w");
2171 merge (files, nfiles, ofp);
2173 sort (files, nfiles, ofp);
2176 /* If we wait for the implicit flush on exit, and the parent process
2177 has closed stdout (e.g., exec >&- in a shell), then the output file
2178 winds up empty. I don't understand why. This is under SunOS,
2179 Solaris, Ultrix, and Irix. This premature fflush makes the output
2180 reappear. --karl@cs.umb.edu */
2181 if (fflush (ofp) < 0)
2182 error (1, errno, _("%s: write error"), outfile);
2184 if (have_read_stdin && fclose (stdin) == EOF)
2185 error (1, errno, outfile);
2186 if (ferror (stdout) || fclose (stdout) == EOF)
2187 error (1, errno, _("%s: write error"), outfile);