1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991, 1992, 1993, 1994, 1995 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
22 /* $FreeBSD: src/gnu/usr.bin/sort/sort.c,v 1.15.2.4 2002/04/17 11:41:42 ache Exp $ */
23 /* $DragonFly: src/gnu/usr.bin/sort/sort.c,v 1.2 2003/06/17 04:25:49 dillon Exp $ */
27 /* Get isblank from GNU libc. */
30 #include <sys/types.h>
38 #include "long-options.h"
55 /* Undefine, to avoid warning about redefinition on some systems. */
57 #define min(a, b) ((a) < (b) ? (a) : (b))
59 #define UCHAR_LIM (UCHAR_MAX + 1)
60 #define UCHAR(c) ((unsigned char) (c))
62 #ifndef DEFAULT_TMPDIR
63 #define DEFAULT_TMPDIR "/tmp"
66 /* The kind of blanks for '-b' to skip in various options. */
67 enum blanktype { bl_start, bl_end, bl_both };
69 /* Lines are held in core as counted strings. */
72 char *text; /* Text of the line. */
73 int length; /* Length not including final newline. */
74 char *keybeg; /* Start of first key. */
75 char *keylim; /* Limit of first key. */
78 /* Arrays of lines. */
81 struct line *lines; /* Dynamically allocated array of lines. */
82 int used; /* Number of slots used. */
83 int alloc; /* Number of slots allocated. */
84 int limit; /* Max number of slots to allocate. */
90 char *buf; /* Dynamically allocated buffer. */
91 int used; /* Number of bytes used. */
92 int alloc; /* Number of bytes allocated. */
93 int left; /* Number of bytes left after line parsing. */
98 int sword; /* Zero-origin 'word' to start at. */
99 int schar; /* Additional characters to skip. */
100 int skipsblanks; /* Skip leading white space at start. */
101 int eword; /* Zero-origin first word after field. */
102 int echar; /* Additional characters in field. */
103 int skipeblanks; /* Skip trailing white space at finish. */
104 int *ignore; /* Boolean array of characters to ignore. */
105 char *translate; /* Translation applied to characters. */
106 int numeric; /* Flag for numeric comparison. Handle
107 strings of digits with optional decimal
108 point, but no exponential notation. */
109 int general_numeric; /* Flag for general, numeric comparison.
110 Handle numbers in exponential notation. */
111 int month; /* Flag for comparison by month name. */
112 int reverse; /* Reverse the sense of comparison. */
113 struct keyfield *next; /* Next keyfield to try. */
122 /* The name this program was run with. */
125 /* Table of digits. */
126 static int digits[UCHAR_LIM];
128 /* Table of white space. */
129 static int blanks[UCHAR_LIM];
131 /* Table of non-printing characters. */
132 static int nonprinting[UCHAR_LIM];
134 /* Table of non-dictionary characters (not letters, digits, or blanks). */
135 static int nondictionary[UCHAR_LIM];
137 /* Translation table folding lower case to upper. */
138 static char fold_toupper[UCHAR_LIM];
140 /* Table mapping 3-letter month names to integers.
141 Alphabetic order allows binary search. */
142 static struct month const monthtab[] =
158 /* During the merge phase, the number of files to merge at once. */
161 /* Initial buffer size for in core sorting. Will not grow unless a
162 line longer than this is seen. */
163 static int sortalloc = 512 * 1024;
165 /* Initial buffer size for in core merge buffers. Bear in mind that
166 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
167 static int mergealloc = 16 * 1024;
169 /* Guess of average line length. */
170 static int linelength = 30;
172 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
173 #define LINEALLOC (256 * 1024)
175 /* Prefix for temporary file names. */
176 static char *temp_file_prefix;
178 /* Flag to reverse the order of all comparisons. */
181 /* Flag for stable sort. This turns off the last ditch bytewise
182 comparison of lines, and instead leaves lines in the same order
183 they were read if all keys compare equal. */
186 /* Tab character separating fields. If NUL, then fields are separated
187 by the empty string between a non-whitespace character and a whitespace
191 /* Flag to remove consecutive duplicate lines from the output.
192 Only the last of a sequence of equal lines will be output. */
195 /* Nonzero if any of the input files are the standard input. */
196 static int have_read_stdin;
198 /* Lists of key field comparisons to be tried. */
199 static struct keyfield keyhead;
202 static unsigned char decimal_point;
205 COLLDIFF (int a, int b)
209 if ((unsigned char)a == (unsigned char)b)
213 return strcoll(s[0], s[1]);
217 collcmp(char *a, char *b, int mini)
231 #endif /* __FreeBSD__ */
237 fprintf (stderr, _("Try `%s --help' for more information.\n"),
242 Usage: %s [OPTION]... [FILE]...\n\
246 Write sorted concatenation of all FILE(s) to standard output.\n\
248 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
249 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
250 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
251 -b ignore leading blanks in sort fields or keys\n\
252 -c check if given files already sorted, do not sort\n\
253 -d consider only [a-zA-Z0-9 ] characters in keys\n\
254 -f fold lower case to upper case characters in keys\n\
255 -g compare according to general numerical value, imply -b\n\
256 -i consider only [\\040-\\0176] characters in keys\n\
257 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
258 -m merge already sorted files, do not sort\n\
259 -n compare according to string numerical value, imply -b\n\
260 -o FILE write result on FILE instead of standard output\n\
261 -r reverse the result of comparisons\n\
262 -s stabilize sort by disabling last resort comparison\n\
263 -t SEP use SEParator instead of non- to whitespace transition\n\
264 -u with -c, check for strict ordering\n\
265 -u with -m, only output the first of an equal sequence\n\
266 --help display this help and exit\n\
267 --version output version information and exit\n\
269 POS is F[.C][OPTS], where F is the field number and C the character\n\
270 position in the field, both counted from zero. OPTS is made up of one\n\
271 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
272 for that key. If no key given, use the entire line as key. With no\n\
273 FILE, or when FILE is -, read standard input.\n\
280 /* The list of temporary files. */
281 static struct tempnode
284 struct tempnode *next;
287 /* Clean up any remaining temporary files. */
292 struct tempnode *node;
294 for (node = temphead.next; node; node = node->next)
298 /* Allocate N bytes of memory dynamically, with error checking. */
301 xmalloc (unsigned int n)
308 error (0, 0, _("virtual memory exhausted"));
315 /* Change the size of an allocated block of memory P to N bytes,
317 If P is NULL, run xmalloc.
318 If N is 0, run free and return NULL. */
321 xrealloc (char *p, unsigned int n)
333 error (0, 0, _("virtual memory exhausted"));
341 xtmpfopen (const char *file)
346 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
347 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
349 error (0, errno, "%s", file);
358 xfopen (const char *file, const char *how)
362 if (strcmp (file, "-") == 0)
368 if ((fp = fopen (file, how)) == NULL)
370 error (0, errno, "%s", file);
386 /* Allow reading stdin from tty more than once. */
390 else if (fp == stdout)
392 if (fflush (fp) != 0)
394 error (0, errno, _("flushing file"));
401 if (fclose (fp) != 0)
403 error (0, errno, _("error closing file"));
411 xfwrite (const char *buf, int size, int nelem, FILE *fp)
413 if (fwrite (buf, size, nelem, fp) != nelem)
415 error (0, errno, _("write error"));
421 /* Return a name for a temporary file. */
427 int len = strlen (temp_file_prefix);
428 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
429 struct tempnode *node;
431 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
435 (len && temp_file_prefix[len - 1] != '/') ? "/" : "");
437 if ((fd = mkstemp(name)) == -1)
439 error (0, errno, _("mkstemp error"));
446 node->next = temphead.next;
447 temphead.next = node;
451 /* Search through the list of temporary files for NAME;
452 remove it if it is found on the list. */
457 struct tempnode *node, *temp;
459 for (node = &temphead; node->next; node = node->next)
460 if (!strcmp (name, node->next->name))
467 node->next = temp->next;
468 free ((char *) temp);
472 /* Initialize the character class tables. */
479 for (i = 0; i < UCHAR_LIM; ++i)
487 if (!ISALNUM (i) && !ISBLANK (i))
488 nondictionary[i] = 1;
490 fold_toupper[i] = toupper (i);
496 /* Initialize BUF, allocating ALLOC bytes initially. */
499 initbuf (struct buffer *buf, int alloc)
502 buf->buf = xmalloc (buf->alloc);
503 buf->used = buf->left = 0;
506 /* Fill BUF reading from FP, moving buf->left bytes from the end
507 of buf->buf to the beginning first. If EOF is reached and the
508 file wasn't terminated by a newline, supply one. Return a count
509 of bytes buffered. */
512 fillbuf (struct buffer *buf, FILE *fp)
516 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
517 buf->used = buf->left;
519 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, '\n', buf->used)))
521 if (buf->used == buf->alloc)
524 buf->buf = xrealloc (buf->buf, buf->alloc);
526 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
529 error (0, errno, _("read error"));
536 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != '\n')
538 if (buf->used == buf->alloc)
541 buf->buf = xrealloc (buf->buf, buf->alloc);
543 buf->buf[buf->used++] = '\n';
549 /* Initialize LINES, allocating space for ALLOC lines initially.
550 LIMIT is the maximum possible number of lines to allocate space
554 initlines (struct lines *lines, int alloc, int limit)
556 lines->alloc = alloc;
557 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
559 lines->limit = limit;
562 /* Return a pointer to the first character of the field specified
566 begfield (const struct line *line, const struct keyfield *key)
568 register char *ptr = line->text, *lim = ptr + line->length;
569 register int sword = key->sword, schar = key->schar;
572 while (ptr < lim && sword--)
574 while (ptr < lim && *ptr != tab)
580 while (ptr < lim && sword--)
582 while (ptr < lim && blanks[UCHAR (*ptr)])
584 while (ptr < lim && !blanks[UCHAR (*ptr)])
588 if (key->skipsblanks)
589 while (ptr < lim && blanks[UCHAR (*ptr)])
592 if (ptr + schar <= lim)
600 /* Return the limit of (a pointer to the first character after) the field
601 in LINE specified by KEY. */
604 limfield (const struct line *line, const struct keyfield *key)
606 register char *ptr = line->text, *lim = ptr + line->length;
607 register int eword = key->eword, echar = key->echar;
609 /* Note: from the POSIX spec:
610 The leading field separator itself is included in
611 a field when -t is not used. FIXME: move this comment up... */
613 /* Move PTR past EWORD fields or to one past the last byte on LINE,
614 whichever comes first. If there are more than EWORD fields, leave
615 PTR pointing at the beginning of the field having zero-based index,
616 EWORD. If a delimiter character was specified (via -t), then that
617 `beginning' is the first character following the delimiting TAB.
618 Otherwise, leave PTR pointing at the first `blank' character after
619 the preceding field. */
621 while (ptr < lim && eword--)
623 while (ptr < lim && *ptr != tab)
625 if (ptr < lim && (eword || echar > 0))
629 while (ptr < lim && eword--)
631 while (ptr < lim && blanks[UCHAR (*ptr)])
633 while (ptr < lim && !blanks[UCHAR (*ptr)])
637 /* Make LIM point to the end of (one byte past) the current field. */
641 newlim = memchr (ptr, tab, lim - ptr);
649 while (newlim < lim && blanks[UCHAR (*newlim)])
651 while (newlim < lim && !blanks[UCHAR (*newlim)])
656 /* If we're skipping leading blanks, don't start counting characters
657 until after skipping past any leading blanks. */
658 if (key->skipsblanks)
659 while (ptr < lim && blanks[UCHAR (*ptr)])
662 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
663 if (ptr + echar <= lim)
674 trim_trailing_blanks (const char *a_start, char **a_end)
676 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
680 /* Find the lines in BUF, storing pointers and lengths in LINES.
681 Also replace newlines in BUF with NULs. */
684 findlines (struct buffer *buf, struct lines *lines)
686 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
687 struct keyfield *key = keyhead.next;
691 while (beg < lim && (ptr = memchr (beg, '\n', lim - beg))
692 && lines->used < lines->limit)
694 /* There are various places in the code that rely on a NUL
695 being at the end of in-core lines; NULs inside the lines
696 will not cause trouble, though. */
699 if (lines->used == lines->alloc)
702 lines->lines = (struct line *)
703 xrealloc ((char *) lines->lines,
704 lines->alloc * sizeof (struct line));
707 lines->lines[lines->used].text = beg;
708 lines->lines[lines->used].length = ptr - beg;
710 /* Precompute the position of the first key for efficiency. */
714 lines->lines[lines->used].keylim =
715 limfield (&lines->lines[lines->used], key);
717 lines->lines[lines->used].keylim = ptr;
720 lines->lines[lines->used].keybeg =
721 begfield (&lines->lines[lines->used], key);
724 if (key->skipsblanks)
725 while (blanks[UCHAR (*beg)])
727 lines->lines[lines->used].keybeg = beg;
729 if (key->skipeblanks)
731 trim_trailing_blanks (lines->lines[lines->used].keybeg,
732 &lines->lines[lines->used].keylim);
737 lines->lines[lines->used].keybeg = 0;
738 lines->lines[lines->used].keylim = 0;
745 buf->left = lim - beg;
748 /* Compare strings A and B containing decimal fractions < 1. Each string
749 should begin with a decimal point followed immediately by the digits
750 of the fraction. Strings not of this form are considered to be zero. */
753 fraccompare (register const char *a, register const char *b)
755 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
758 if (tmpa == decimal_point && tmpb == decimal_point)
760 if (tmpa == '.' && tmpb == '.')
764 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
765 while (tmpa == tmpb && digits[tmpa]);
766 if (digits[tmpa] && digits[tmpb])
787 else if (tmpa == decimal_point)
789 else if (tmpa == '.')
800 else if (tmpb == decimal_point)
802 else if (tmpb == '.')
815 /* Compare strings A and B as numbers without explicitly converting them to
816 machine numbers. Comparatively slow for short strings, but asymptotically
820 numcompare (register const char *a, register const char *b)
822 register int tmpa, tmpb, loga, logb, tmp;
840 if (tmpa == decimal_point)
852 if (tmpb == decimal_point)
867 while (tmpa == tmpb && digits[tmpa])
868 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
871 if ((tmpa == decimal_point && !digits[tmpb]) ||
872 (tmpb == decimal_point && !digits[tmpa]))
874 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
876 return -fraccompare (a, b);
879 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
885 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
890 if ((tmp = logb - loga) != 0)
897 return COLLDIFF (tmpb, tmpa);
902 else if (tmpb == '-')
908 if (tmpb == decimal_point)
920 if (tmpa == decimal_point)
938 while (tmpa == tmpb && digits[tmpa])
939 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
942 if ((tmpa == decimal_point && !digits[tmpb]) ||
943 (tmpb == decimal_point && !digits[tmpa]))
945 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
947 return fraccompare (a, b);
950 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
956 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
961 if ((tmp = loga - logb) != 0)
968 return COLLDIFF (tmpa, tmpb);
976 general_numcompare (const char *sa, const char *sb)
979 /* FIXME: add option to warn about failed conversions. */
980 /* FIXME: maybe add option to try expensive FP conversion
981 only if A and B can't be compared more cheaply/accurately. */
982 if (xstrtod (sa, NULL, &a))
986 if (xstrtod (sb, NULL, &b))
990 return a == b ? 0 : a < b ? -1 : 1;
993 /* Return an integer <= 12 associated with month name S with length LEN,
994 0 if the name in S is not recognized. */
997 getmonth (const char *s, int len)
1000 register int i, lo = 0, hi = 12;
1002 while (len > 0 && blanks[UCHAR(*s)])
1008 for (i = 0; i < 3; ++i)
1009 month[i] = fold_toupper[UCHAR (s[i])];
1013 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
1017 if (!strcmp (month, monthtab[lo].name))
1018 return monthtab[lo].val;
1022 /* Compare two lines A and B trying every key in sequence until there
1023 are no more keys or a difference is found. */
1026 keycompare (const struct line *a, const struct line *b)
1028 register char *texta, *textb, *lima, *limb, *translate;
1029 register int *ignore;
1030 struct keyfield *key;
1031 int diff = 0, iter = 0, lena, lenb;
1033 for (key = keyhead.next; key; key = key->next, ++iter)
1035 ignore = key->ignore;
1036 translate = key->translate;
1038 /* Find the beginning and limit of each field. */
1039 if (iter || a->keybeg == NULL || b->keybeg == NULL)
1041 if (key->eword >= 0)
1042 lima = limfield (a, key), limb = limfield (b, key);
1044 lima = a->text + a->length, limb = b->text + b->length;
1046 if (key->sword >= 0)
1047 texta = begfield (a, key), textb = begfield (b, key);
1050 texta = a->text, textb = b->text;
1051 if (key->skipsblanks)
1053 while (texta < lima && blanks[UCHAR (*texta)])
1055 while (textb < limb && blanks[UCHAR (*textb)])
1062 /* For the first iteration only, the key positions have
1063 been precomputed for us. */
1064 texta = a->keybeg, lima = a->keylim;
1065 textb = b->keybeg, limb = b->keylim;
1068 /* Find the lengths. */
1069 lena = lima - texta, lenb = limb - textb;
1075 if (key->skipeblanks)
1077 char *a_end = texta + lena;
1078 char *b_end = textb + lenb;
1079 trim_trailing_blanks (texta, &a_end);
1080 trim_trailing_blanks (textb, &b_end);
1081 lena = a_end - texta;
1082 lenb = b_end - textb;
1085 /* Actually compare the fields. */
1090 char savea = *lima, saveb = *limb;
1092 *lima = *limb = '\0';
1093 diff = numcompare (texta, textb);
1094 *lima = savea, *limb = saveb;
1097 diff = numcompare (texta, textb);
1100 return key->reverse ? -diff : diff;
1103 else if (key->general_numeric)
1107 char savea = *lima, saveb = *limb;
1109 *lima = *limb = '\0';
1110 diff = general_numcompare (texta, textb);
1111 *lima = savea, *limb = saveb;
1114 diff = general_numcompare (texta, textb);
1117 return key->reverse ? -diff : diff;
1120 else if (key->month)
1122 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1124 return key->reverse ? -diff : diff;
1127 else if (ignore && translate)
1130 #define CMP_FUNC(A, B) COLLDIFF ((A), (B))
1132 #define CMP_FUNC(A, B) (A) - (B)
1134 #define CMP_WITH_IGNORE(A, B) \
1137 while (texta < lima && textb < limb) \
1139 while (texta < lima && ignore[UCHAR (*texta)]) \
1141 while (textb < limb && ignore[UCHAR (*textb)]) \
1143 if (texta < lima && textb < limb) \
1147 diff = CMP_FUNC((A), (B)); \
1154 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1156 else if (texta < lima && textb == limb \
1157 && !ignore[UCHAR (*texta)]) \
1163 while (texta < lima && ignore[UCHAR (*texta)]) \
1165 while (textb < limb && ignore[UCHAR (*textb)]) \
1168 if (texta == lima && textb < limb) \
1170 else if (texta < lima && textb == limb) \
1173 /* Relative lengths are meaningless if characters were ignored. \
1174 Handling this case here avoids what might be an invalid length \
1175 comparison below. */ \
1176 if (diff == 0 && texta == lima && textb == limb) \
1181 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1183 CMP_WITH_IGNORE (*texta, *textb);
1185 while (texta < lima && textb < limb)
1187 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1190 diff = COLLDIFF (translate[UCHAR (*--texta)],
1191 translate[UCHAR (*--textb)]);
1193 diff = (translate[UCHAR (*--texta)]
1194 - translate[UCHAR (*--textb)]);
1201 diff = collcmp (texta, textb, min (lena, lenb));
1203 diff = memcmp (texta, textb, min (lena, lenb));
1207 return key->reverse ? -diff : diff;
1208 if ((diff = lena - lenb) != 0)
1209 return key->reverse ? -diff : diff;
1215 /* Compare two lines A and B, returning negative, zero, or positive
1216 depending on whether A compares less than, equal to, or greater than B. */
1219 compare (register const struct line *a, register const struct line *b)
1221 int diff, tmpa, tmpb, mini;
1223 /* First try to compare on the specified keys (if any).
1224 The only two cases with no key at all are unadorned sort,
1225 and unadorned sort -r. */
1228 diff = keycompare (a, b);
1231 if (unique || stable)
1235 /* If the keys all compare equal (or no keys were specified)
1236 fall through to the default byte-by-byte comparison. */
1237 tmpa = a->length, tmpb = b->length;
1238 mini = min (tmpa, tmpb);
1243 char *ap = a->text, *bp = b->text;
1246 diff = UCHAR (*ap) - UCHAR (*bp);
1251 diff = collcmp (ap, bp, mini);
1253 diff = memcmp (ap, bp, mini);
1262 return reverse ? -diff : diff;
1265 /* Check that the lines read from the given FP come in order. Return
1266 1 if they do and 0 if there is a disorder.
1267 FIXME: return number of first out-of-order line if not sorted. */
1272 struct buffer buf; /* Input buffer. */
1273 struct lines lines; /* Lines scanned from the buffer. */
1274 struct line temp; /* Copy of previous line. */
1275 int cc; /* Character count. */
1276 int alloc, sorted = 1;
1278 initbuf (&buf, mergealloc);
1279 initlines (&lines, mergealloc / linelength + 1,
1280 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1282 temp.text = xmalloc (alloc);
1284 cc = fillbuf (&buf, fp);
1288 findlines (&buf, &lines);
1292 struct line *prev_line; /* Pointer to previous line. */
1293 int cmp; /* Result of calling compare. */
1296 /* Compare each line in the buffer with its successor. */
1297 for (i = 0; i < lines.used - 1; ++i)
1299 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1300 if ((unique && cmp >= 0) || (cmp > 0))
1307 /* Save the last line of the buffer and refill the buffer. */
1308 prev_line = lines.lines + (lines.used - 1);
1309 if (prev_line->length > alloc)
1311 while (prev_line->length + 1 > alloc)
1313 temp.text = xrealloc (temp.text, alloc);
1315 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1316 temp.length = prev_line->length;
1317 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1318 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1320 cc = fillbuf (&buf, fp);
1324 findlines (&buf, &lines);
1325 /* Make sure the line saved from the old buffer contents is
1326 less than or equal to the first line of the new buffer. */
1327 cmp = compare (&temp, &lines.lines[0]);
1328 if ((unique && cmp >= 0) || (cmp > 0))
1338 free ((char *) lines.lines);
1343 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1344 Close FPS before returning. */
1347 mergefps (FILE **fps, register int nfps, FILE *ofp)
1349 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1350 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1351 struct line saved; /* Saved line for unique check. */
1352 int savedflag = 0; /* True if there is a saved line. */
1353 int savealloc; /* Size allocated for the saved line. */
1354 int cur[NMERGE]; /* Current line in each line table. */
1355 int ord[NMERGE]; /* Table representing a permutation of fps,
1356 such that lines[ord[0]].lines[cur[ord[0]]]
1357 is the smallest line and will be next
1359 register int i, j, t;
1361 #ifdef lint /* Suppress `used before initialized' warning. */
1365 /* Allocate space for a saved line if necessary. */
1368 savealloc = linelength;
1369 saved.text = xmalloc (savealloc);
1372 /* Read initial lines from each input file. */
1373 for (i = 0; i < nfps; ++i)
1375 initbuf (&buffer[i], mergealloc);
1376 /* If a file is empty, eliminate it from future consideration. */
1377 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1381 for (j = i; j < nfps; ++j)
1382 fps[j] = fps[j + 1];
1385 free (buffer[i].buf);
1388 initlines (&lines[i], mergealloc / linelength + 1,
1389 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1390 findlines (&buffer[i], &lines[i]);
1395 /* Set up the ord table according to comparisons among input lines.
1396 Since this only reorders two items if one is strictly greater than
1397 the other, it is stable. */
1398 for (i = 0; i < nfps; ++i)
1400 for (i = 1; i < nfps; ++i)
1401 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1402 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1403 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1405 /* Repeatedly output the smallest line until no input remains. */
1408 /* If uniqified output is turned on, output only the first of
1409 an identical series of lines. */
1412 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1414 xfwrite (saved.text, 1, saved.length, ofp);
1420 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1422 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1424 saved.text = xrealloc (saved.text, savealloc);
1426 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1427 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1429 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1431 saved.keybeg = saved.text +
1432 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1433 - lines[ord[0]].lines[cur[ord[0]]].text);
1435 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1437 saved.keylim = saved.text +
1438 (lines[ord[0]].lines[cur[ord[0]]].keylim
1439 - lines[ord[0]].lines[cur[ord[0]]].text);
1446 xfwrite (lines[ord[0]].lines[cur[ord[0]]].text, 1,
1447 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1451 /* Check if we need to read more lines into core. */
1452 if (++cur[ord[0]] == lines[ord[0]].used)
1453 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1455 findlines (&buffer[ord[0]], &lines[ord[0]]);
1460 /* We reached EOF on fps[ord[0]]. */
1461 for (i = 1; i < nfps; ++i)
1462 if (ord[i] > ord[0])
1465 xfclose (fps[ord[0]]);
1466 free (buffer[ord[0]].buf);
1467 free ((char *) lines[ord[0]].lines);
1468 for (i = ord[0]; i < nfps; ++i)
1470 fps[i] = fps[i + 1];
1471 buffer[i] = buffer[i + 1];
1472 lines[i] = lines[i + 1];
1473 cur[i] = cur[i + 1];
1475 for (i = 0; i < nfps; ++i)
1476 ord[i] = ord[i + 1];
1480 /* The new line just read in may be larger than other lines
1481 already in core; push it back in the queue until we encounter
1482 a line larger than it. */
1483 for (i = 1; i < nfps; ++i)
1485 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1486 &lines[ord[i]].lines[cur[ord[i]]]);
1488 t = ord[0] - ord[i];
1493 for (j = 1; j < i; ++j)
1494 ord[j - 1] = ord[j];
1498 if (unique && savedflag)
1500 xfwrite (saved.text, 1, saved.length, ofp);
1506 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1509 sortlines (struct line *lines, int nlines, struct line *temp)
1511 register struct line *lo, *hi, *t;
1512 register int nlo, nhi;
1516 if (compare (&lines[0], &lines[1]) > 0)
1517 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1527 sortlines (lo, nlo, temp);
1530 sortlines (hi, nhi, temp);
1535 if (compare (lo, hi) <= 0)
1536 *t++ = *lo++, --nlo;
1538 *t++ = *hi++, --nhi;
1542 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1546 /* Check that each of the NFILES FILES is ordered.
1547 Return a count of disordered files. */
1550 check (char **files, int nfiles)
1552 int i, disorders = 0;
1555 for (i = 0; i < nfiles; ++i)
1557 fp = xfopen (files[i], "r");
1560 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1567 /* Merge NFILES FILES onto OFP. */
1570 merge (char **files, int nfiles, FILE *ofp)
1574 FILE *fps[NMERGE], *tfp;
1576 while (nfiles > NMERGE)
1579 for (i = 0; i < nfiles / NMERGE; ++i)
1581 for (j = 0; j < NMERGE; ++j)
1582 fps[j] = xfopen (files[i * NMERGE + j], "r");
1583 tfp = xtmpfopen (temp = tempname ());
1584 mergefps (fps, NMERGE, tfp);
1586 for (j = 0; j < NMERGE; ++j)
1587 zaptemp (files[i * NMERGE + j]);
1590 for (j = 0; j < nfiles % NMERGE; ++j)
1591 fps[j] = xfopen (files[i * NMERGE + j], "r");
1592 tfp = xtmpfopen (temp = tempname ());
1593 mergefps (fps, nfiles % NMERGE, tfp);
1595 for (j = 0; j < nfiles % NMERGE; ++j)
1596 zaptemp (files[i * NMERGE + j]);
1601 for (i = 0; i < nfiles; ++i)
1602 fps[i] = xfopen (files[i], "r");
1603 mergefps (fps, i, ofp);
1604 for (i = 0; i < nfiles; ++i)
1608 /* Sort NFILES FILES onto OFP. */
1611 sort (char **files, int nfiles, FILE *ofp)
1618 struct tempnode *node;
1619 int n_temp_files = 0;
1622 initbuf (&buf, sortalloc);
1623 initlines (&lines, sortalloc / linelength + 1,
1624 LINEALLOC / sizeof (struct line));
1626 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1630 fp = xfopen (*files++, "r");
1631 while (fillbuf (&buf, fp))
1633 findlines (&buf, &lines);
1634 if (lines.used > ntmp)
1636 while (lines.used > ntmp)
1638 tmp = (struct line *)
1639 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1641 sortlines (lines.lines, lines.used, tmp);
1642 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1647 tfp = xtmpfopen (tempname ());
1649 for (i = 0; i < lines.used; ++i)
1650 if (!unique || i == 0
1651 || compare (&lines.lines[i], &lines.lines[i - 1]))
1653 xfwrite (lines.lines[i].text, 1, lines.lines[i].length, tfp);
1663 free ((char *) lines.lines);
1664 free ((char *) tmp);
1668 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1670 for (node = temphead.next; i > 0; node = node->next)
1671 tempfiles[--i] = node->name;
1672 merge (tempfiles, n_temp_files, ofp);
1673 free ((char *) tempfiles);
1677 /* Insert key KEY at the end of the list (`keyhead'). */
1680 insertkey (struct keyfield *key)
1682 struct keyfield *k = &keyhead;
1691 badfieldspec (const char *s)
1693 error (2, 0, _("invalid field specification `%s'"), s);
1696 /* Handle interrupts and hangups. */
1699 sighandler (int sig)
1702 struct sigaction sigact;
1704 sigact.sa_handler = SIG_DFL;
1705 sigemptyset (&sigact.sa_mask);
1706 sigact.sa_flags = 0;
1707 sigaction (sig, &sigact, NULL);
1708 #else /* !SA_INTERRUPT */
1709 signal (sig, SIG_DFL);
1710 #endif /* SA_INTERRUPT */
1712 kill (getpid (), sig);
1715 /* Set the ordering options for KEY specified in S.
1716 Return the address of the first character in S that
1717 is not a valid ordering option.
1718 BLANKTYPE is the kind of blanks that 'b' should skip. */
1721 set_ordering (register const char *s, struct keyfield *key,
1722 enum blanktype blanktype)
1729 if (blanktype == bl_start || blanktype == bl_both)
1730 key->skipsblanks = 1;
1731 if (blanktype == bl_end || blanktype == bl_both)
1732 key->skipeblanks = 1;
1735 key->ignore = nondictionary;
1738 key->translate = fold_toupper;
1741 key->general_numeric = 1;
1744 key->ignore = nonprinting;
1751 if (blanktype == bl_start || blanktype == bl_both)
1752 key->skipsblanks = 1;
1753 if (blanktype == bl_end || blanktype == bl_both)
1754 key->skipeblanks = 1;
1768 main (int argc, char **argv)
1770 struct keyfield *key = NULL, gkey;
1773 int checkonly = 0, mergeonly = 0, nfiles = 0;
1774 char *minus = "-", *outfile = minus, **files, *tmp;
1777 struct sigaction oldact, newact;
1778 #endif /* SA_INTERRUPT */
1781 (void) setlocale(LC_ALL, "");
1782 decimal_point = localeconv()->decimal_point[0];
1784 program_name = argv[0];
1786 parse_long_options (argc, argv, "sort", version_string, usage);
1788 have_read_stdin = 0;
1791 temp_file_prefix = getenv ("TMPDIR");
1792 if (temp_file_prefix == NULL)
1793 temp_file_prefix = DEFAULT_TMPDIR;
1796 newact.sa_handler = sighandler;
1797 sigemptyset (&newact.sa_mask);
1798 newact.sa_flags = 0;
1800 sigaction (SIGINT, NULL, &oldact);
1801 if (oldact.sa_handler != SIG_IGN)
1802 sigaction (SIGINT, &newact, NULL);
1803 sigaction (SIGHUP, NULL, &oldact);
1804 if (oldact.sa_handler != SIG_IGN)
1805 sigaction (SIGHUP, &newact, NULL);
1806 sigaction (SIGPIPE, NULL, &oldact);
1807 if (oldact.sa_handler != SIG_IGN)
1808 sigaction (SIGPIPE, &newact, NULL);
1809 sigaction (SIGTERM, NULL, &oldact);
1810 if (oldact.sa_handler != SIG_IGN)
1811 sigaction (SIGTERM, &newact, NULL);
1812 #else /* !SA_INTERRUPT */
1813 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1814 signal (SIGINT, sighandler);
1815 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1816 signal (SIGHUP, sighandler);
1817 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1818 signal (SIGPIPE, sighandler);
1819 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1820 signal (SIGTERM, sighandler);
1821 #endif /* !SA_INTERRUPT */
1823 gkey.sword = gkey.eword = -1;
1825 gkey.translate = NULL;
1826 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1827 gkey.skipsblanks = gkey.skipeblanks = 0;
1829 files = (char **) xmalloc (sizeof (char *) * argc);
1831 for (i = 1; i < argc; ++i)
1833 if (argv[i][0] == '+')
1837 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1840 key->translate = NULL;
1841 key->skipsblanks = key->skipeblanks = 0;
1842 key->numeric = key->general_numeric = key->month = key->reverse = 0;
1844 if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])))
1845 badfieldspec (argv[i]);
1846 for (t = 0; digits[UCHAR (*s)]; ++s)
1847 t = 10 * t + *s - '0';
1850 for (++s; digits[UCHAR (*s)]; ++s)
1851 t2 = 10 * t2 + *s - '0';
1859 s = set_ordering (s, key, bl_start);
1861 badfieldspec (argv[i]);
1863 else if (argv[i][0] == '-' && argv[i][1])
1866 if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))
1870 for (t = 0; digits[UCHAR (*s)]; ++s)
1871 t = t * 10 + *s - '0';
1874 for (++s; digits[UCHAR (*s)]; ++s)
1875 t2 = t2 * 10 + *s - '0';
1878 s = set_ordering (s, key, bl_end);
1880 badfieldspec (argv[i]);
1887 s = set_ordering (s, &gkey, bl_both);
1901 error (2, 0, _("option `-k' requires an argument"));
1907 key = (struct keyfield *)
1908 xmalloc (sizeof (struct keyfield));
1909 memset (key, 0, sizeof (struct keyfield));
1912 key->translate = NULL;
1913 key->skipsblanks = key->skipeblanks = 0;
1914 key->numeric = key->month = key->reverse = 0;
1916 if (!digits[UCHAR (*s)])
1917 badfieldspec (argv[i]);
1918 for (t = 0; digits[UCHAR (*s)]; ++s)
1919 t = 10 * t + *s - '0';
1922 /* Provoke with `sort -k0' */
1923 error (0, 0, _("the starting field number argument \
1924 to the `-k' option must be positive"));
1925 badfieldspec (argv[i]);
1931 if (!digits[UCHAR (s[1])])
1933 /* Provoke with `sort -k1.' */
1934 error (0, 0, _("starting field spec has `.' but \
1935 lacks following character offset"));
1936 badfieldspec (argv[i]);
1938 for (++s; digits[UCHAR (*s)]; ++s)
1939 t2 = 10 * t2 + *s - '0';
1942 /* Provoke with `sort -k1.0' */
1943 error (0, 0, _("starting field character offset \
1944 argument to the `-k' option\nmust be positive"));
1945 badfieldspec (argv[i]);
1956 s = set_ordering (s, key, bl_start);
1963 badfieldspec (argv[i]);
1966 /* Skip over comma. */
1970 /* Provoke with `sort -k1,' */
1971 error (0, 0, _("field specification has `,' but \
1972 lacks following field spec"));
1973 badfieldspec (argv[i]);
1976 for (t = 0; digits[UCHAR (*s)]; ++s)
1977 t = t * 10 + *s - '0';
1980 /* Provoke with `sort -k1,0' */
1981 error (0, 0, _("ending field number argument \
1982 to the `-k' option must be positive"));
1983 badfieldspec (argv[i]);
1989 if (!digits[UCHAR (s[1])])
1991 /* Provoke with `sort -k1,1.' */
1992 error (0, 0, _("ending field spec has `.' \
1993 but lacks following character offset"));
1994 badfieldspec (argv[i]);
1996 for (++s; digits[UCHAR (*s)]; ++s)
1997 t2 = t2 * 10 + *s - '0';
2001 /* `-k 2,3' is equivalent to `+1 -3'. */
2006 s = set_ordering (s, key, bl_end);
2008 badfieldspec (argv[i]);
2022 error (2, 0, _("option `-o' requires an argument"));
2024 outfile = argv[++i];
2033 else if (i < argc - 1)
2039 error (2, 0, _("option `-t' requires an argument"));
2043 temp_file_prefix = ++s;
2047 temp_file_prefix = argv[++i];
2049 error (2, 0, _("option `-T' requires an argument"));
2057 /* Accept and ignore e.g. -y0 for compatibility with
2061 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2069 else /* Not an option. */
2071 files[nfiles++] = argv[i];
2079 /* Inheritance of global options to individual keys. */
2080 for (key = keyhead.next; key; key = key->next)
2081 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2082 && !key->skipeblanks && !key->month && !key->numeric
2083 && !key->general_numeric)
2085 key->ignore = gkey.ignore;
2086 key->translate = gkey.translate;
2087 key->skipsblanks = gkey.skipsblanks;
2088 key->skipeblanks = gkey.skipeblanks;
2089 key->month = gkey.month;
2090 key->numeric = gkey.numeric;
2091 key->general_numeric = gkey.general_numeric;
2092 key->reverse = gkey.reverse;
2095 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2096 || gkey.skipeblanks || gkey.month || gkey.numeric
2097 || gkey.general_numeric))
2099 reverse = gkey.reverse;
2108 exit (check (files, nfiles) != 0);
2110 if (strcmp (outfile, "-"))
2112 struct stat outstat;
2113 if (stat (outfile, &outstat) == 0)
2115 /* The following code prevents a race condition when
2116 people use the brain dead shell programming idiom:
2117 cat file | sort -o file
2118 This feature is provided for historical compatibility,
2119 but we strongly discourage ever relying on this in
2120 new shell programs. */
2122 /* Temporarily copy each input file that might be another name
2123 for the output file. When in doubt (e.g. a pipe), copy. */
2124 for (i = 0; i < nfiles; ++i)
2130 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2133 if ((strcmp (files[i], "-")
2134 ? stat (files[i], &instat)
2135 : fstat (fileno (stdin), &instat)) != 0)
2137 error (0, errno, "%s", files[i]);
2141 if (S_ISREG (instat.st_mode)
2142 && (instat.st_ino != outstat.st_ino
2143 || instat.st_dev != outstat.st_dev))
2145 /* We know the files are distinct. */
2150 fp = xfopen (files[i], "r");
2152 ofp = xtmpfopen (tmp);
2153 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2154 xfwrite (buf, 1, cc, ofp);
2157 error (0, errno, "%s", files[i]);
2166 ofp = xfopen (outfile, "w");
2172 merge (files, nfiles, ofp);
2174 sort (files, nfiles, ofp);
2177 /* If we wait for the implicit flush on exit, and the parent process
2178 has closed stdout (e.g., exec >&- in a shell), then the output file
2179 winds up empty. I don't understand why. This is under SunOS,
2180 Solaris, Ultrix, and Irix. This premature fflush makes the output
2181 reappear. --karl@cs.umb.edu */
2182 if (fflush (ofp) < 0)
2183 error (1, errno, _("%s: write error"), outfile);
2185 if (have_read_stdin && fclose (stdin) == EOF)
2186 error (1, errno, outfile);
2187 if (ferror (stdout) || fclose (stdout) == EOF)
2188 error (1, errno, _("%s: write error"), outfile);