1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991, 1992, 1993, 1994, 1995 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
23 * $FreeBSD: src/gnu/usr.bin/sort/sort.c,v 1.15.2.4 2002/04/17 11:41:42 ache Exp $
24 * $DragonFly: src/gnu/usr.bin/sort/sort.c,v 1.3 2004/02/03 19:22:59 dillon Exp $
29 /* Get isblank from GNU libc. */
32 #include <sys/types.h>
40 #include "long-options.h"
57 /* Undefine, to avoid warning about redefinition on some systems. */
59 #define min(a, b) ((a) < (b) ? (a) : (b))
61 #define UCHAR_LIM (UCHAR_MAX + 1)
62 #define UCHAR(c) ((unsigned char) (c))
64 #ifndef DEFAULT_TMPDIR
65 #define DEFAULT_TMPDIR "/tmp"
68 /* The kind of blanks for '-b' to skip in various options. */
69 enum blanktype { bl_start, bl_end, bl_both };
71 /* Lines are held in core as counted strings. */
74 char *text; /* Text of the line. */
75 int length; /* Length not including final newline. */
76 char *keybeg; /* Start of first key. */
77 char *keylim; /* Limit of first key. */
80 /* Arrays of lines. */
83 struct line *lines; /* Dynamically allocated array of lines. */
84 int used; /* Number of slots used. */
85 int alloc; /* Number of slots allocated. */
86 int limit; /* Max number of slots to allocate. */
92 char *buf; /* Dynamically allocated buffer. */
93 int used; /* Number of bytes used. */
94 int alloc; /* Number of bytes allocated. */
95 int left; /* Number of bytes left after line parsing. */
100 int sword; /* Zero-origin 'word' to start at. */
101 int schar; /* Additional characters to skip. */
102 int skipsblanks; /* Skip leading white space at start. */
103 int eword; /* Zero-origin first word after field. */
104 int echar; /* Additional characters in field. */
105 int skipeblanks; /* Skip trailing white space at finish. */
106 int *ignore; /* Boolean array of characters to ignore. */
107 char *translate; /* Translation applied to characters. */
108 int numeric; /* Flag for numeric comparison. Handle
109 strings of digits with optional decimal
110 point, but no exponential notation. */
111 int general_numeric; /* Flag for general, numeric comparison.
112 Handle numbers in exponential notation. */
113 int month; /* Flag for comparison by month name. */
114 int reverse; /* Reverse the sense of comparison. */
115 struct keyfield *next; /* Next keyfield to try. */
124 /* The name this program was run with. */
127 /* Table of digits. */
128 static int digits[UCHAR_LIM];
130 /* Table of white space. */
131 static int blanks[UCHAR_LIM];
133 /* Table of non-printing characters. */
134 static int nonprinting[UCHAR_LIM];
136 /* Table of non-dictionary characters (not letters, digits, or blanks). */
137 static int nondictionary[UCHAR_LIM];
139 /* Translation table folding lower case to upper. */
140 static char fold_toupper[UCHAR_LIM];
142 /* Table mapping 3-letter month names to integers.
143 Alphabetic order allows binary search. */
144 static struct month const monthtab[] =
160 /* During the merge phase, the number of files to merge at once. */
163 /* Initial buffer size for in core sorting. Will not grow unless a
164 line longer than this is seen. */
165 static int sortalloc = 512 * 1024;
167 /* Initial buffer size for in core merge buffers. Bear in mind that
168 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
169 static int mergealloc = 16 * 1024;
171 /* Guess of average line length. */
172 static int linelength = 30;
174 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
175 #define LINEALLOC (256 * 1024)
177 /* Prefix for temporary file names. */
178 static char *temp_file_prefix;
180 /* Flag to reverse the order of all comparisons. */
183 /* Flag for stable sort. This turns off the last ditch bytewise
184 comparison of lines, and instead leaves lines in the same order
185 they were read if all keys compare equal. */
188 /* Tab character separating fields. If NUL, then fields are separated
189 by the empty string between a non-whitespace character and a whitespace
193 /* Flag to remove consecutive duplicate lines from the output.
194 Only the last of a sequence of equal lines will be output. */
197 /* Nonzero if any of the input files are the standard input. */
198 static int have_read_stdin;
200 /* Lists of key field comparisons to be tried. */
201 static struct keyfield keyhead;
204 static unsigned char decimal_point;
207 COLLDIFF (int a, int b)
211 if ((unsigned char)a == (unsigned char)b)
215 return strcoll(s[0], s[1]);
219 collcmp(char *a, char *b, int mini)
233 #endif /* __DragonFly__ */
239 fprintf (stderr, _("Try `%s --help' for more information.\n"),
244 Usage: %s [OPTION]... [FILE]...\n\
248 Write sorted concatenation of all FILE(s) to standard output.\n\
250 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
251 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
252 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
253 -b ignore leading blanks in sort fields or keys\n\
254 -c check if given files already sorted, do not sort\n\
255 -d consider only [a-zA-Z0-9 ] characters in keys\n\
256 -f fold lower case to upper case characters in keys\n\
257 -g compare according to general numerical value, imply -b\n\
258 -i consider only [\\040-\\0176] characters in keys\n\
259 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
260 -m merge already sorted files, do not sort\n\
261 -n compare according to string numerical value, imply -b\n\
262 -o FILE write result on FILE instead of standard output\n\
263 -r reverse the result of comparisons\n\
264 -s stabilize sort by disabling last resort comparison\n\
265 -t SEP use SEParator instead of non- to whitespace transition\n\
266 -u with -c, check for strict ordering\n\
267 -u with -m, only output the first of an equal sequence\n\
268 --help display this help and exit\n\
269 --version output version information and exit\n\
271 POS is F[.C][OPTS], where F is the field number and C the character\n\
272 position in the field, both counted from zero. OPTS is made up of one\n\
273 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
274 for that key. If no key given, use the entire line as key. With no\n\
275 FILE, or when FILE is -, read standard input.\n\
282 /* The list of temporary files. */
283 static struct tempnode
286 struct tempnode *next;
289 /* Clean up any remaining temporary files. */
294 struct tempnode *node;
296 for (node = temphead.next; node; node = node->next)
300 /* Allocate N bytes of memory dynamically, with error checking. */
303 xmalloc (unsigned int n)
310 error (0, 0, _("virtual memory exhausted"));
317 /* Change the size of an allocated block of memory P to N bytes,
319 If P is NULL, run xmalloc.
320 If N is 0, run free and return NULL. */
323 xrealloc (char *p, unsigned int n)
335 error (0, 0, _("virtual memory exhausted"));
343 xtmpfopen (const char *file)
348 fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
349 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
351 error (0, errno, "%s", file);
360 xfopen (const char *file, const char *how)
364 if (strcmp (file, "-") == 0)
370 if ((fp = fopen (file, how)) == NULL)
372 error (0, errno, "%s", file);
388 /* Allow reading stdin from tty more than once. */
392 else if (fp == stdout)
394 if (fflush (fp) != 0)
396 error (0, errno, _("flushing file"));
403 if (fclose (fp) != 0)
405 error (0, errno, _("error closing file"));
413 xfwrite (const char *buf, int size, int nelem, FILE *fp)
415 if (fwrite (buf, size, nelem, fp) != nelem)
417 error (0, errno, _("write error"));
423 /* Return a name for a temporary file. */
429 int len = strlen (temp_file_prefix);
430 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
431 struct tempnode *node;
433 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
437 (len && temp_file_prefix[len - 1] != '/') ? "/" : "");
439 if ((fd = mkstemp(name)) == -1)
441 error (0, errno, _("mkstemp error"));
448 node->next = temphead.next;
449 temphead.next = node;
453 /* Search through the list of temporary files for NAME;
454 remove it if it is found on the list. */
459 struct tempnode *node, *temp;
461 for (node = &temphead; node->next; node = node->next)
462 if (!strcmp (name, node->next->name))
469 node->next = temp->next;
470 free ((char *) temp);
474 /* Initialize the character class tables. */
481 for (i = 0; i < UCHAR_LIM; ++i)
489 if (!ISALNUM (i) && !ISBLANK (i))
490 nondictionary[i] = 1;
492 fold_toupper[i] = toupper (i);
498 /* Initialize BUF, allocating ALLOC bytes initially. */
501 initbuf (struct buffer *buf, int alloc)
504 buf->buf = xmalloc (buf->alloc);
505 buf->used = buf->left = 0;
508 /* Fill BUF reading from FP, moving buf->left bytes from the end
509 of buf->buf to the beginning first. If EOF is reached and the
510 file wasn't terminated by a newline, supply one. Return a count
511 of bytes buffered. */
514 fillbuf (struct buffer *buf, FILE *fp)
518 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
519 buf->used = buf->left;
521 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, '\n', buf->used)))
523 if (buf->used == buf->alloc)
526 buf->buf = xrealloc (buf->buf, buf->alloc);
528 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
531 error (0, errno, _("read error"));
538 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != '\n')
540 if (buf->used == buf->alloc)
543 buf->buf = xrealloc (buf->buf, buf->alloc);
545 buf->buf[buf->used++] = '\n';
551 /* Initialize LINES, allocating space for ALLOC lines initially.
552 LIMIT is the maximum possible number of lines to allocate space
556 initlines (struct lines *lines, int alloc, int limit)
558 lines->alloc = alloc;
559 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
561 lines->limit = limit;
564 /* Return a pointer to the first character of the field specified
568 begfield (const struct line *line, const struct keyfield *key)
570 register char *ptr = line->text, *lim = ptr + line->length;
571 register int sword = key->sword, schar = key->schar;
574 while (ptr < lim && sword--)
576 while (ptr < lim && *ptr != tab)
582 while (ptr < lim && sword--)
584 while (ptr < lim && blanks[UCHAR (*ptr)])
586 while (ptr < lim && !blanks[UCHAR (*ptr)])
590 if (key->skipsblanks)
591 while (ptr < lim && blanks[UCHAR (*ptr)])
594 if (ptr + schar <= lim)
602 /* Return the limit of (a pointer to the first character after) the field
603 in LINE specified by KEY. */
606 limfield (const struct line *line, const struct keyfield *key)
608 register char *ptr = line->text, *lim = ptr + line->length;
609 register int eword = key->eword, echar = key->echar;
611 /* Note: from the POSIX spec:
612 The leading field separator itself is included in
613 a field when -t is not used. FIXME: move this comment up... */
615 /* Move PTR past EWORD fields or to one past the last byte on LINE,
616 whichever comes first. If there are more than EWORD fields, leave
617 PTR pointing at the beginning of the field having zero-based index,
618 EWORD. If a delimiter character was specified (via -t), then that
619 `beginning' is the first character following the delimiting TAB.
620 Otherwise, leave PTR pointing at the first `blank' character after
621 the preceding field. */
623 while (ptr < lim && eword--)
625 while (ptr < lim && *ptr != tab)
627 if (ptr < lim && (eword || echar > 0))
631 while (ptr < lim && eword--)
633 while (ptr < lim && blanks[UCHAR (*ptr)])
635 while (ptr < lim && !blanks[UCHAR (*ptr)])
639 /* Make LIM point to the end of (one byte past) the current field. */
643 newlim = memchr (ptr, tab, lim - ptr);
651 while (newlim < lim && blanks[UCHAR (*newlim)])
653 while (newlim < lim && !blanks[UCHAR (*newlim)])
658 /* If we're skipping leading blanks, don't start counting characters
659 until after skipping past any leading blanks. */
660 if (key->skipsblanks)
661 while (ptr < lim && blanks[UCHAR (*ptr)])
664 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
665 if (ptr + echar <= lim)
676 trim_trailing_blanks (const char *a_start, char **a_end)
678 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
682 /* Find the lines in BUF, storing pointers and lengths in LINES.
683 Also replace newlines in BUF with NULs. */
686 findlines (struct buffer *buf, struct lines *lines)
688 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
689 struct keyfield *key = keyhead.next;
693 while (beg < lim && (ptr = memchr (beg, '\n', lim - beg))
694 && lines->used < lines->limit)
696 /* There are various places in the code that rely on a NUL
697 being at the end of in-core lines; NULs inside the lines
698 will not cause trouble, though. */
701 if (lines->used == lines->alloc)
704 lines->lines = (struct line *)
705 xrealloc ((char *) lines->lines,
706 lines->alloc * sizeof (struct line));
709 lines->lines[lines->used].text = beg;
710 lines->lines[lines->used].length = ptr - beg;
712 /* Precompute the position of the first key for efficiency. */
716 lines->lines[lines->used].keylim =
717 limfield (&lines->lines[lines->used], key);
719 lines->lines[lines->used].keylim = ptr;
722 lines->lines[lines->used].keybeg =
723 begfield (&lines->lines[lines->used], key);
726 if (key->skipsblanks)
727 while (blanks[UCHAR (*beg)])
729 lines->lines[lines->used].keybeg = beg;
731 if (key->skipeblanks)
733 trim_trailing_blanks (lines->lines[lines->used].keybeg,
734 &lines->lines[lines->used].keylim);
739 lines->lines[lines->used].keybeg = 0;
740 lines->lines[lines->used].keylim = 0;
747 buf->left = lim - beg;
750 /* Compare strings A and B containing decimal fractions < 1. Each string
751 should begin with a decimal point followed immediately by the digits
752 of the fraction. Strings not of this form are considered to be zero. */
755 fraccompare (register const char *a, register const char *b)
757 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
760 if (tmpa == decimal_point && tmpb == decimal_point)
762 if (tmpa == '.' && tmpb == '.')
766 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
767 while (tmpa == tmpb && digits[tmpa]);
768 if (digits[tmpa] && digits[tmpb])
789 else if (tmpa == decimal_point)
791 else if (tmpa == '.')
802 else if (tmpb == decimal_point)
804 else if (tmpb == '.')
817 /* Compare strings A and B as numbers without explicitly converting them to
818 machine numbers. Comparatively slow for short strings, but asymptotically
822 numcompare (register const char *a, register const char *b)
824 register int tmpa, tmpb, loga, logb, tmp;
842 if (tmpa == decimal_point)
854 if (tmpb == decimal_point)
869 while (tmpa == tmpb && digits[tmpa])
870 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
873 if ((tmpa == decimal_point && !digits[tmpb]) ||
874 (tmpb == decimal_point && !digits[tmpa]))
876 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
878 return -fraccompare (a, b);
881 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
887 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
892 if ((tmp = logb - loga) != 0)
899 return COLLDIFF (tmpb, tmpa);
904 else if (tmpb == '-')
910 if (tmpb == decimal_point)
922 if (tmpa == decimal_point)
940 while (tmpa == tmpb && digits[tmpa])
941 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
944 if ((tmpa == decimal_point && !digits[tmpb]) ||
945 (tmpb == decimal_point && !digits[tmpa]))
947 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
949 return fraccompare (a, b);
952 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
958 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
963 if ((tmp = loga - logb) != 0)
970 return COLLDIFF (tmpa, tmpb);
978 general_numcompare (const char *sa, const char *sb)
981 /* FIXME: add option to warn about failed conversions. */
982 /* FIXME: maybe add option to try expensive FP conversion
983 only if A and B can't be compared more cheaply/accurately. */
984 if (xstrtod (sa, NULL, &a))
988 if (xstrtod (sb, NULL, &b))
992 return a == b ? 0 : a < b ? -1 : 1;
995 /* Return an integer <= 12 associated with month name S with length LEN,
996 0 if the name in S is not recognized. */
999 getmonth (const char *s, int len)
1002 register int i, lo = 0, hi = 12;
1004 while (len > 0 && blanks[UCHAR(*s)])
1010 for (i = 0; i < 3; ++i)
1011 month[i] = fold_toupper[UCHAR (s[i])];
1015 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
1019 if (!strcmp (month, monthtab[lo].name))
1020 return monthtab[lo].val;
1024 /* Compare two lines A and B trying every key in sequence until there
1025 are no more keys or a difference is found. */
1028 keycompare (const struct line *a, const struct line *b)
1030 register char *texta, *textb, *lima, *limb, *translate;
1031 register int *ignore;
1032 struct keyfield *key;
1033 int diff = 0, iter = 0, lena, lenb;
1035 for (key = keyhead.next; key; key = key->next, ++iter)
1037 ignore = key->ignore;
1038 translate = key->translate;
1040 /* Find the beginning and limit of each field. */
1041 if (iter || a->keybeg == NULL || b->keybeg == NULL)
1043 if (key->eword >= 0)
1044 lima = limfield (a, key), limb = limfield (b, key);
1046 lima = a->text + a->length, limb = b->text + b->length;
1048 if (key->sword >= 0)
1049 texta = begfield (a, key), textb = begfield (b, key);
1052 texta = a->text, textb = b->text;
1053 if (key->skipsblanks)
1055 while (texta < lima && blanks[UCHAR (*texta)])
1057 while (textb < limb && blanks[UCHAR (*textb)])
1064 /* For the first iteration only, the key positions have
1065 been precomputed for us. */
1066 texta = a->keybeg, lima = a->keylim;
1067 textb = b->keybeg, limb = b->keylim;
1070 /* Find the lengths. */
1071 lena = lima - texta, lenb = limb - textb;
1077 if (key->skipeblanks)
1079 char *a_end = texta + lena;
1080 char *b_end = textb + lenb;
1081 trim_trailing_blanks (texta, &a_end);
1082 trim_trailing_blanks (textb, &b_end);
1083 lena = a_end - texta;
1084 lenb = b_end - textb;
1087 /* Actually compare the fields. */
1092 char savea = *lima, saveb = *limb;
1094 *lima = *limb = '\0';
1095 diff = numcompare (texta, textb);
1096 *lima = savea, *limb = saveb;
1099 diff = numcompare (texta, textb);
1102 return key->reverse ? -diff : diff;
1105 else if (key->general_numeric)
1109 char savea = *lima, saveb = *limb;
1111 *lima = *limb = '\0';
1112 diff = general_numcompare (texta, textb);
1113 *lima = savea, *limb = saveb;
1116 diff = general_numcompare (texta, textb);
1119 return key->reverse ? -diff : diff;
1122 else if (key->month)
1124 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1126 return key->reverse ? -diff : diff;
1129 else if (ignore && translate)
1131 #ifdef __DragonFly__
1132 #define CMP_FUNC(A, B) COLLDIFF ((A), (B))
1134 #define CMP_FUNC(A, B) (A) - (B)
1136 #define CMP_WITH_IGNORE(A, B) \
1139 while (texta < lima && textb < limb) \
1141 while (texta < lima && ignore[UCHAR (*texta)]) \
1143 while (textb < limb && ignore[UCHAR (*textb)]) \
1145 if (texta < lima && textb < limb) \
1149 diff = CMP_FUNC((A), (B)); \
1156 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1158 else if (texta < lima && textb == limb \
1159 && !ignore[UCHAR (*texta)]) \
1165 while (texta < lima && ignore[UCHAR (*texta)]) \
1167 while (textb < limb && ignore[UCHAR (*textb)]) \
1170 if (texta == lima && textb < limb) \
1172 else if (texta < lima && textb == limb) \
1175 /* Relative lengths are meaningless if characters were ignored. \
1176 Handling this case here avoids what might be an invalid length \
1177 comparison below. */ \
1178 if (diff == 0 && texta == lima && textb == limb) \
1183 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1185 CMP_WITH_IGNORE (*texta, *textb);
1187 while (texta < lima && textb < limb)
1189 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1191 #ifdef __DragonFly__
1192 diff = COLLDIFF (translate[UCHAR (*--texta)],
1193 translate[UCHAR (*--textb)]);
1195 diff = (translate[UCHAR (*--texta)]
1196 - translate[UCHAR (*--textb)]);
1202 #ifdef __DragonFly__
1203 diff = collcmp (texta, textb, min (lena, lenb));
1205 diff = memcmp (texta, textb, min (lena, lenb));
1209 return key->reverse ? -diff : diff;
1210 if ((diff = lena - lenb) != 0)
1211 return key->reverse ? -diff : diff;
1217 /* Compare two lines A and B, returning negative, zero, or positive
1218 depending on whether A compares less than, equal to, or greater than B. */
1221 compare (register const struct line *a, register const struct line *b)
1223 int diff, tmpa, tmpb, mini;
1225 /* First try to compare on the specified keys (if any).
1226 The only two cases with no key at all are unadorned sort,
1227 and unadorned sort -r. */
1230 diff = keycompare (a, b);
1233 if (unique || stable)
1237 /* If the keys all compare equal (or no keys were specified)
1238 fall through to the default byte-by-byte comparison. */
1239 tmpa = a->length, tmpb = b->length;
1240 mini = min (tmpa, tmpb);
1245 char *ap = a->text, *bp = b->text;
1247 #ifndef __DragonFly__
1248 diff = UCHAR (*ap) - UCHAR (*bp);
1252 #ifdef __DragonFly__
1253 diff = collcmp (ap, bp, mini);
1255 diff = memcmp (ap, bp, mini);
1259 #ifndef __DragonFly__
1264 return reverse ? -diff : diff;
1267 /* Check that the lines read from the given FP come in order. Return
1268 1 if they do and 0 if there is a disorder.
1269 FIXME: return number of first out-of-order line if not sorted. */
1274 struct buffer buf; /* Input buffer. */
1275 struct lines lines; /* Lines scanned from the buffer. */
1276 struct line temp; /* Copy of previous line. */
1277 int cc; /* Character count. */
1278 int alloc, sorted = 1;
1280 initbuf (&buf, mergealloc);
1281 initlines (&lines, mergealloc / linelength + 1,
1282 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1284 temp.text = xmalloc (alloc);
1286 cc = fillbuf (&buf, fp);
1290 findlines (&buf, &lines);
1294 struct line *prev_line; /* Pointer to previous line. */
1295 int cmp; /* Result of calling compare. */
1298 /* Compare each line in the buffer with its successor. */
1299 for (i = 0; i < lines.used - 1; ++i)
1301 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1302 if ((unique && cmp >= 0) || (cmp > 0))
1309 /* Save the last line of the buffer and refill the buffer. */
1310 prev_line = lines.lines + (lines.used - 1);
1311 if (prev_line->length > alloc)
1313 while (prev_line->length + 1 > alloc)
1315 temp.text = xrealloc (temp.text, alloc);
1317 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1318 temp.length = prev_line->length;
1319 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1320 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1322 cc = fillbuf (&buf, fp);
1326 findlines (&buf, &lines);
1327 /* Make sure the line saved from the old buffer contents is
1328 less than or equal to the first line of the new buffer. */
1329 cmp = compare (&temp, &lines.lines[0]);
1330 if ((unique && cmp >= 0) || (cmp > 0))
1340 free ((char *) lines.lines);
1345 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1346 Close FPS before returning. */
1349 mergefps (FILE **fps, register int nfps, FILE *ofp)
1351 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1352 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1353 struct line saved; /* Saved line for unique check. */
1354 int savedflag = 0; /* True if there is a saved line. */
1355 int savealloc; /* Size allocated for the saved line. */
1356 int cur[NMERGE]; /* Current line in each line table. */
1357 int ord[NMERGE]; /* Table representing a permutation of fps,
1358 such that lines[ord[0]].lines[cur[ord[0]]]
1359 is the smallest line and will be next
1361 register int i, j, t;
1363 #ifdef lint /* Suppress `used before initialized' warning. */
1367 /* Allocate space for a saved line if necessary. */
1370 savealloc = linelength;
1371 saved.text = xmalloc (savealloc);
1374 /* Read initial lines from each input file. */
1375 for (i = 0; i < nfps; ++i)
1377 initbuf (&buffer[i], mergealloc);
1378 /* If a file is empty, eliminate it from future consideration. */
1379 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1383 for (j = i; j < nfps; ++j)
1384 fps[j] = fps[j + 1];
1387 free (buffer[i].buf);
1390 initlines (&lines[i], mergealloc / linelength + 1,
1391 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1392 findlines (&buffer[i], &lines[i]);
1397 /* Set up the ord table according to comparisons among input lines.
1398 Since this only reorders two items if one is strictly greater than
1399 the other, it is stable. */
1400 for (i = 0; i < nfps; ++i)
1402 for (i = 1; i < nfps; ++i)
1403 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1404 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1405 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1407 /* Repeatedly output the smallest line until no input remains. */
1410 /* If uniqified output is turned on, output only the first of
1411 an identical series of lines. */
1414 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1416 xfwrite (saved.text, 1, saved.length, ofp);
1422 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1424 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1426 saved.text = xrealloc (saved.text, savealloc);
1428 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1429 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1431 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1433 saved.keybeg = saved.text +
1434 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1435 - lines[ord[0]].lines[cur[ord[0]]].text);
1437 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1439 saved.keylim = saved.text +
1440 (lines[ord[0]].lines[cur[ord[0]]].keylim
1441 - lines[ord[0]].lines[cur[ord[0]]].text);
1448 xfwrite (lines[ord[0]].lines[cur[ord[0]]].text, 1,
1449 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1453 /* Check if we need to read more lines into core. */
1454 if (++cur[ord[0]] == lines[ord[0]].used)
1455 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1457 findlines (&buffer[ord[0]], &lines[ord[0]]);
1462 /* We reached EOF on fps[ord[0]]. */
1463 for (i = 1; i < nfps; ++i)
1464 if (ord[i] > ord[0])
1467 xfclose (fps[ord[0]]);
1468 free (buffer[ord[0]].buf);
1469 free ((char *) lines[ord[0]].lines);
1470 for (i = ord[0]; i < nfps; ++i)
1472 fps[i] = fps[i + 1];
1473 buffer[i] = buffer[i + 1];
1474 lines[i] = lines[i + 1];
1475 cur[i] = cur[i + 1];
1477 for (i = 0; i < nfps; ++i)
1478 ord[i] = ord[i + 1];
1482 /* The new line just read in may be larger than other lines
1483 already in core; push it back in the queue until we encounter
1484 a line larger than it. */
1485 for (i = 1; i < nfps; ++i)
1487 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1488 &lines[ord[i]].lines[cur[ord[i]]]);
1490 t = ord[0] - ord[i];
1495 for (j = 1; j < i; ++j)
1496 ord[j - 1] = ord[j];
1500 if (unique && savedflag)
1502 xfwrite (saved.text, 1, saved.length, ofp);
1508 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1511 sortlines (struct line *lines, int nlines, struct line *temp)
1513 register struct line *lo, *hi, *t;
1514 register int nlo, nhi;
1518 if (compare (&lines[0], &lines[1]) > 0)
1519 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1529 sortlines (lo, nlo, temp);
1532 sortlines (hi, nhi, temp);
1537 if (compare (lo, hi) <= 0)
1538 *t++ = *lo++, --nlo;
1540 *t++ = *hi++, --nhi;
1544 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1548 /* Check that each of the NFILES FILES is ordered.
1549 Return a count of disordered files. */
1552 check (char **files, int nfiles)
1554 int i, disorders = 0;
1557 for (i = 0; i < nfiles; ++i)
1559 fp = xfopen (files[i], "r");
1562 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1569 /* Merge NFILES FILES onto OFP. */
1572 merge (char **files, int nfiles, FILE *ofp)
1576 FILE *fps[NMERGE], *tfp;
1578 while (nfiles > NMERGE)
1581 for (i = 0; i < nfiles / NMERGE; ++i)
1583 for (j = 0; j < NMERGE; ++j)
1584 fps[j] = xfopen (files[i * NMERGE + j], "r");
1585 tfp = xtmpfopen (temp = tempname ());
1586 mergefps (fps, NMERGE, tfp);
1588 for (j = 0; j < NMERGE; ++j)
1589 zaptemp (files[i * NMERGE + j]);
1592 for (j = 0; j < nfiles % NMERGE; ++j)
1593 fps[j] = xfopen (files[i * NMERGE + j], "r");
1594 tfp = xtmpfopen (temp = tempname ());
1595 mergefps (fps, nfiles % NMERGE, tfp);
1597 for (j = 0; j < nfiles % NMERGE; ++j)
1598 zaptemp (files[i * NMERGE + j]);
1603 for (i = 0; i < nfiles; ++i)
1604 fps[i] = xfopen (files[i], "r");
1605 mergefps (fps, i, ofp);
1606 for (i = 0; i < nfiles; ++i)
1610 /* Sort NFILES FILES onto OFP. */
1613 sort (char **files, int nfiles, FILE *ofp)
1620 struct tempnode *node;
1621 int n_temp_files = 0;
1624 initbuf (&buf, sortalloc);
1625 initlines (&lines, sortalloc / linelength + 1,
1626 LINEALLOC / sizeof (struct line));
1628 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1632 fp = xfopen (*files++, "r");
1633 while (fillbuf (&buf, fp))
1635 findlines (&buf, &lines);
1636 if (lines.used > ntmp)
1638 while (lines.used > ntmp)
1640 tmp = (struct line *)
1641 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1643 sortlines (lines.lines, lines.used, tmp);
1644 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1649 tfp = xtmpfopen (tempname ());
1651 for (i = 0; i < lines.used; ++i)
1652 if (!unique || i == 0
1653 || compare (&lines.lines[i], &lines.lines[i - 1]))
1655 xfwrite (lines.lines[i].text, 1, lines.lines[i].length, tfp);
1665 free ((char *) lines.lines);
1666 free ((char *) tmp);
1670 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1672 for (node = temphead.next; i > 0; node = node->next)
1673 tempfiles[--i] = node->name;
1674 merge (tempfiles, n_temp_files, ofp);
1675 free ((char *) tempfiles);
1679 /* Insert key KEY at the end of the list (`keyhead'). */
1682 insertkey (struct keyfield *key)
1684 struct keyfield *k = &keyhead;
1693 badfieldspec (const char *s)
1695 error (2, 0, _("invalid field specification `%s'"), s);
1698 /* Handle interrupts and hangups. */
1701 sighandler (int sig)
1704 struct sigaction sigact;
1706 sigact.sa_handler = SIG_DFL;
1707 sigemptyset (&sigact.sa_mask);
1708 sigact.sa_flags = 0;
1709 sigaction (sig, &sigact, NULL);
1710 #else /* !SA_INTERRUPT */
1711 signal (sig, SIG_DFL);
1712 #endif /* SA_INTERRUPT */
1714 kill (getpid (), sig);
1717 /* Set the ordering options for KEY specified in S.
1718 Return the address of the first character in S that
1719 is not a valid ordering option.
1720 BLANKTYPE is the kind of blanks that 'b' should skip. */
1723 set_ordering (register const char *s, struct keyfield *key,
1724 enum blanktype blanktype)
1731 if (blanktype == bl_start || blanktype == bl_both)
1732 key->skipsblanks = 1;
1733 if (blanktype == bl_end || blanktype == bl_both)
1734 key->skipeblanks = 1;
1737 key->ignore = nondictionary;
1740 key->translate = fold_toupper;
1743 key->general_numeric = 1;
1746 key->ignore = nonprinting;
1753 if (blanktype == bl_start || blanktype == bl_both)
1754 key->skipsblanks = 1;
1755 if (blanktype == bl_end || blanktype == bl_both)
1756 key->skipeblanks = 1;
1770 main (int argc, char **argv)
1772 struct keyfield *key = NULL, gkey;
1775 int checkonly = 0, mergeonly = 0, nfiles = 0;
1776 char *minus = "-", *outfile = minus, **files, *tmp;
1779 struct sigaction oldact, newact;
1780 #endif /* SA_INTERRUPT */
1782 #ifdef __DragonFly__
1783 (void) setlocale(LC_ALL, "");
1784 decimal_point = localeconv()->decimal_point[0];
1786 program_name = argv[0];
1788 parse_long_options (argc, argv, "sort", version_string, usage);
1790 have_read_stdin = 0;
1793 temp_file_prefix = getenv ("TMPDIR");
1794 if (temp_file_prefix == NULL)
1795 temp_file_prefix = DEFAULT_TMPDIR;
1798 newact.sa_handler = sighandler;
1799 sigemptyset (&newact.sa_mask);
1800 newact.sa_flags = 0;
1802 sigaction (SIGINT, NULL, &oldact);
1803 if (oldact.sa_handler != SIG_IGN)
1804 sigaction (SIGINT, &newact, NULL);
1805 sigaction (SIGHUP, NULL, &oldact);
1806 if (oldact.sa_handler != SIG_IGN)
1807 sigaction (SIGHUP, &newact, NULL);
1808 sigaction (SIGPIPE, NULL, &oldact);
1809 if (oldact.sa_handler != SIG_IGN)
1810 sigaction (SIGPIPE, &newact, NULL);
1811 sigaction (SIGTERM, NULL, &oldact);
1812 if (oldact.sa_handler != SIG_IGN)
1813 sigaction (SIGTERM, &newact, NULL);
1814 #else /* !SA_INTERRUPT */
1815 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1816 signal (SIGINT, sighandler);
1817 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1818 signal (SIGHUP, sighandler);
1819 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1820 signal (SIGPIPE, sighandler);
1821 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1822 signal (SIGTERM, sighandler);
1823 #endif /* !SA_INTERRUPT */
1825 gkey.sword = gkey.eword = -1;
1827 gkey.translate = NULL;
1828 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1829 gkey.skipsblanks = gkey.skipeblanks = 0;
1831 files = (char **) xmalloc (sizeof (char *) * argc);
1833 for (i = 1; i < argc; ++i)
1835 if (argv[i][0] == '+')
1839 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1842 key->translate = NULL;
1843 key->skipsblanks = key->skipeblanks = 0;
1844 key->numeric = key->general_numeric = key->month = key->reverse = 0;
1846 if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])))
1847 badfieldspec (argv[i]);
1848 for (t = 0; digits[UCHAR (*s)]; ++s)
1849 t = 10 * t + *s - '0';
1852 for (++s; digits[UCHAR (*s)]; ++s)
1853 t2 = 10 * t2 + *s - '0';
1861 s = set_ordering (s, key, bl_start);
1863 badfieldspec (argv[i]);
1865 else if (argv[i][0] == '-' && argv[i][1])
1868 if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))
1872 for (t = 0; digits[UCHAR (*s)]; ++s)
1873 t = t * 10 + *s - '0';
1876 for (++s; digits[UCHAR (*s)]; ++s)
1877 t2 = t2 * 10 + *s - '0';
1880 s = set_ordering (s, key, bl_end);
1882 badfieldspec (argv[i]);
1889 s = set_ordering (s, &gkey, bl_both);
1903 error (2, 0, _("option `-k' requires an argument"));
1909 key = (struct keyfield *)
1910 xmalloc (sizeof (struct keyfield));
1911 memset (key, 0, sizeof (struct keyfield));
1914 key->translate = NULL;
1915 key->skipsblanks = key->skipeblanks = 0;
1916 key->numeric = key->month = key->reverse = 0;
1918 if (!digits[UCHAR (*s)])
1919 badfieldspec (argv[i]);
1920 for (t = 0; digits[UCHAR (*s)]; ++s)
1921 t = 10 * t + *s - '0';
1924 /* Provoke with `sort -k0' */
1925 error (0, 0, _("the starting field number argument \
1926 to the `-k' option must be positive"));
1927 badfieldspec (argv[i]);
1933 if (!digits[UCHAR (s[1])])
1935 /* Provoke with `sort -k1.' */
1936 error (0, 0, _("starting field spec has `.' but \
1937 lacks following character offset"));
1938 badfieldspec (argv[i]);
1940 for (++s; digits[UCHAR (*s)]; ++s)
1941 t2 = 10 * t2 + *s - '0';
1944 /* Provoke with `sort -k1.0' */
1945 error (0, 0, _("starting field character offset \
1946 argument to the `-k' option\nmust be positive"));
1947 badfieldspec (argv[i]);
1958 s = set_ordering (s, key, bl_start);
1965 badfieldspec (argv[i]);
1968 /* Skip over comma. */
1972 /* Provoke with `sort -k1,' */
1973 error (0, 0, _("field specification has `,' but \
1974 lacks following field spec"));
1975 badfieldspec (argv[i]);
1978 for (t = 0; digits[UCHAR (*s)]; ++s)
1979 t = t * 10 + *s - '0';
1982 /* Provoke with `sort -k1,0' */
1983 error (0, 0, _("ending field number argument \
1984 to the `-k' option must be positive"));
1985 badfieldspec (argv[i]);
1991 if (!digits[UCHAR (s[1])])
1993 /* Provoke with `sort -k1,1.' */
1994 error (0, 0, _("ending field spec has `.' \
1995 but lacks following character offset"));
1996 badfieldspec (argv[i]);
1998 for (++s; digits[UCHAR (*s)]; ++s)
1999 t2 = t2 * 10 + *s - '0';
2003 /* `-k 2,3' is equivalent to `+1 -3'. */
2008 s = set_ordering (s, key, bl_end);
2010 badfieldspec (argv[i]);
2024 error (2, 0, _("option `-o' requires an argument"));
2026 outfile = argv[++i];
2035 else if (i < argc - 1)
2041 error (2, 0, _("option `-t' requires an argument"));
2045 temp_file_prefix = ++s;
2049 temp_file_prefix = argv[++i];
2051 error (2, 0, _("option `-T' requires an argument"));
2059 /* Accept and ignore e.g. -y0 for compatibility with
2063 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2071 else /* Not an option. */
2073 files[nfiles++] = argv[i];
2081 /* Inheritance of global options to individual keys. */
2082 for (key = keyhead.next; key; key = key->next)
2083 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2084 && !key->skipeblanks && !key->month && !key->numeric
2085 && !key->general_numeric)
2087 key->ignore = gkey.ignore;
2088 key->translate = gkey.translate;
2089 key->skipsblanks = gkey.skipsblanks;
2090 key->skipeblanks = gkey.skipeblanks;
2091 key->month = gkey.month;
2092 key->numeric = gkey.numeric;
2093 key->general_numeric = gkey.general_numeric;
2094 key->reverse = gkey.reverse;
2097 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2098 || gkey.skipeblanks || gkey.month || gkey.numeric
2099 || gkey.general_numeric))
2101 reverse = gkey.reverse;
2110 exit (check (files, nfiles) != 0);
2112 if (strcmp (outfile, "-"))
2114 struct stat outstat;
2115 if (stat (outfile, &outstat) == 0)
2117 /* The following code prevents a race condition when
2118 people use the brain dead shell programming idiom:
2119 cat file | sort -o file
2120 This feature is provided for historical compatibility,
2121 but we strongly discourage ever relying on this in
2122 new shell programs. */
2124 /* Temporarily copy each input file that might be another name
2125 for the output file. When in doubt (e.g. a pipe), copy. */
2126 for (i = 0; i < nfiles; ++i)
2132 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2135 if ((strcmp (files[i], "-")
2136 ? stat (files[i], &instat)
2137 : fstat (fileno (stdin), &instat)) != 0)
2139 error (0, errno, "%s", files[i]);
2143 if (S_ISREG (instat.st_mode)
2144 && (instat.st_ino != outstat.st_ino
2145 || instat.st_dev != outstat.st_dev))
2147 /* We know the files are distinct. */
2152 fp = xfopen (files[i], "r");
2154 ofp = xtmpfopen (tmp);
2155 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2156 xfwrite (buf, 1, cc, ofp);
2159 error (0, errno, "%s", files[i]);
2168 ofp = xfopen (outfile, "w");
2174 merge (files, nfiles, ofp);
2176 sort (files, nfiles, ofp);
2179 /* If we wait for the implicit flush on exit, and the parent process
2180 has closed stdout (e.g., exec >&- in a shell), then the output file
2181 winds up empty. I don't understand why. This is under SunOS,
2182 Solaris, Ultrix, and Irix. This premature fflush makes the output
2183 reappear. --karl@cs.umb.edu */
2184 if (fflush (ofp) < 0)
2185 error (1, errno, _("%s: write error"), outfile);
2187 if (have_read_stdin && fclose (stdin) == EOF)
2188 error (1, errno, outfile);
2189 if (ferror (stdout) || fclose (stdout) == EOF)
2190 error (1, errno, _("%s: write error"), outfile);