2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
48 static wchar_t **wmonths;
49 static char **cmonths;
51 /* initialise months */
54 initialise_months(void)
56 const nl_item item[12] = { ABMON_1, ABMON_2, ABMON_3, ABMON_4,
57 ABMON_5, ABMON_6, ABMON_7, ABMON_8, ABMON_9, ABMON_10,
62 if (mb_cur_max == 1) {
63 if (cmonths == NULL) {
66 cmonths = sort_malloc(sizeof(char*) * 12);
67 for (int i = 0; i < 12; i++) {
69 tmp = nl_langinfo(item[i]);
71 printf("month[%d]=%s\n", i, tmp);
76 for (unsigned int j = 0; j < len; j++)
83 if (wmonths == NULL) {
86 wmonths = sort_malloc(sizeof(wchar_t *) * 12);
87 for (int i = 0; i < 12; i++) {
89 tmp = nl_langinfo(item[i]);
91 printf("month[%d]=%s\n", i, tmp);
95 m = sort_malloc(SIZEOF_WCHAR_STRING(len + 1));
96 if (mbstowcs(m, tmp, len) ==
102 for (unsigned int j = 0; j < len; j++)
103 m[j] = towupper(m[j]);
111 * Compare two wide-character strings
114 wide_str_coll(const wchar_t *s1, const wchar_t *s2)
119 ret = wcscoll(s1, s2);
120 if (errno == EILSEQ) {
122 ret = wcscmp(s1, s2);
124 for (size_t i = 0; ; ++i) {
128 return ((c2 == L'\0') ? 0 : -1);
133 return ((int)(c1 - c2));
140 /* counterparts of wcs functions */
143 bwsprintf(FILE *f, struct bwstring *bws, const char *prefix, const char *suffix)
147 fprintf(f, "%s%s%s", prefix, bws->cdata.str, suffix);
149 fprintf(f, "%s%S%s", prefix, bws->wdata.str, suffix);
152 const void* bwsrawdata(const struct bwstring *bws)
155 return (&(bws->wdata));
158 size_t bwsrawlen(const struct bwstring *bws)
161 return ((mb_cur_max == 1) ? bws->cdata.len :
162 SIZEOF_WCHAR_STRING(bws->wdata.len));
166 bws_memsize(const struct bwstring *bws)
169 return ((mb_cur_max == 1) ?
170 (bws->cdata.len + 2 + sizeof(struct bwstring)) :
171 (SIZEOF_WCHAR_STRING(bws->wdata.len + 1) + sizeof(struct bwstring)));
175 bws_setlen(struct bwstring *bws, size_t newlen)
178 if (mb_cur_max == 1 && bws && newlen != bws->cdata.len &&
179 newlen <= bws->cdata.len) {
180 bws->cdata.len = newlen;
181 bws->cdata.str[newlen] = '\0';
182 } else if (bws && newlen != bws->wdata.len && newlen <= bws->wdata.len) {
183 bws->wdata.len = newlen;
184 bws->wdata.str[newlen] = L'\0';
189 * Allocate a new binary string of specified size
194 struct bwstring *ret;
196 if (mb_cur_max == 1) {
197 ret = sort_malloc(sizeof(struct bwstring) + 1 + sz);
199 ret->cdata.str[sz] = '\0';
202 sizeof(struct bwstring) + SIZEOF_WCHAR_STRING(sz + 1));
204 ret->wdata.str[sz] = L'\0';
211 * Create a copy of binary string.
212 * New string size equals the length of the old string.
215 bwsdup(const struct bwstring *s)
221 struct bwstring *ret = bwsalloc(BWSLEN(s));
224 memcpy(ret->cdata.str, s->cdata.str, (s->cdata.len));
226 memcpy(ret->wdata.str, s->wdata.str,
227 SIZEOF_WCHAR_STRING(s->wdata.len));
234 * Create a new binary string from a wide character buffer.
237 bwssbdup(const wchar_t *str, size_t len)
241 return ((len == 0) ? bwsalloc(0) : NULL);
243 struct bwstring *ret;
248 for (size_t i = 0; i < len; ++i)
249 ret->cdata.str[i] = (char)str[i];
251 memcpy(ret->wdata.str, str, SIZEOF_WCHAR_STRING(len));
258 * Create a new binary string from a raw binary buffer.
261 bwscsbdup(const unsigned char *str, size_t len)
263 struct bwstring *ret;
269 memcpy(ret->cdata.str, str, len);
273 size_t charlen, chars, cptr;
277 s = (const char *) str;
279 memset(&mbs, 0, sizeof(mbs));
282 size_t n = mb_cur_max;
286 charlen = mbrlen(s + cptr, n, &mbs);
293 ret->wdata.str[chars++] =
294 (unsigned char) s[cptr];
298 n = mbrtowc(ret->wdata.str + (chars++),
299 s + cptr, charlen, &mbs);
300 if ((n == (size_t)-1) || (n == (size_t)-2))
302 err(2, "mbrtowc error");
307 ret->wdata.len = chars;
308 ret->wdata.str[ret->wdata.len] = L'\0';
315 * De-allocate object memory
318 bwsfree(const struct bwstring *s)
326 * Copy content of src binary string to dst.
327 * If the capacity of the dst string is not sufficient,
328 * then the data is truncated.
331 bwscpy(struct bwstring *dst, const struct bwstring *src)
333 size_t nums = BWSLEN(src);
335 if (nums > BWSLEN(dst))
338 if (mb_cur_max == 1) {
339 memcpy(dst->cdata.str, src->cdata.str, nums);
340 dst->cdata.len = nums;
341 dst->cdata.str[dst->cdata.len] = '\0';
343 memcpy(dst->wdata.str, src->wdata.str,
344 SIZEOF_WCHAR_STRING(nums));
345 dst->wdata.len = nums;
346 dst->wdata.str[nums] = L'\0';
353 * Copy content of src binary string to dst,
354 * with specified number of symbols to be copied.
355 * If the capacity of the dst string is not sufficient,
356 * then the data is truncated.
359 bwsncpy(struct bwstring *dst, const struct bwstring *src, size_t size)
361 size_t nums = BWSLEN(src);
363 if (nums > BWSLEN(dst))
368 if (mb_cur_max == 1) {
369 memcpy(dst->cdata.str, src->cdata.str, nums);
370 dst->cdata.len = nums;
371 dst->cdata.str[nums] = '\0';
373 memcpy(dst->wdata.str, src->wdata.str,
374 SIZEOF_WCHAR_STRING(nums));
375 dst->wdata.len = nums;
376 dst->wdata.str[nums] = L'\0';
383 * Copy content of src binary string to dst,
384 * with specified number of symbols to be copied.
385 * An offset value can be specified, from the start of src string.
386 * If the capacity of the dst string is not sufficient,
387 * then the data is truncated.
390 bwsnocpy(struct bwstring *dst, const struct bwstring *src, size_t offset,
394 if (offset >= BWSLEN(src)) {
397 size_t nums = BWSLEN(src) - offset;
399 if (nums > BWSLEN(dst))
403 if (mb_cur_max == 1) {
404 memcpy(dst->cdata.str, src->cdata.str + offset, nums);
405 dst->cdata.len = nums;
406 dst->cdata.str[nums] = '\0';
408 memcpy(dst->wdata.str, src->wdata.str + offset,
409 SIZEOF_WCHAR_STRING(nums));
410 dst->wdata.len = nums;
411 dst->wdata.str[nums] = L'\0';
418 * Write binary string to the file.
419 * The output is ended either with '\n' (nl == true)
420 * or '\0' (nl == false).
423 bwsfwrite(struct bwstring *bws, FILE *f, bool zero_ended)
426 if (mb_cur_max == 1) {
427 size_t len = bws->cdata.len;
430 bws->cdata.str[len] = '\n';
432 if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
435 bws->cdata.str[len] = '\0';
436 } else if (fwrite(bws->cdata.str, len + 1, 1, f) < 1)
445 eols = zero_ended ? btowc('\0') : btowc('\n');
447 while (printed < BWSLEN(bws)) {
448 const wchar_t *s = bws->wdata.str + printed;
453 nums = fwprintf(f, L"%lc", *s);
461 nums = fwprintf(f, L"%ls", s);
468 fwprintf(f, L"%lc", eols);
469 return (printed + 1);
474 * Allocate and read a binary string from file.
475 * The strings are nl-ended or zero-ended, depending on the sort setting.
478 bwsfgetln(FILE *f, size_t *len, bool zero_ended, struct reader_buffer *rb)
482 eols = zero_ended ? btowc('\0') : btowc('\n');
484 if (!zero_ended && (mb_cur_max > 1)) {
487 ret = fgetwln(f, len);
495 if (ret[*len - 1] == (wchar_t)eols)
498 return (bwssbdup(ret, *len));
500 } else if (!zero_ended && (mb_cur_max == 1)) {
503 ret = fgetln(f, len);
511 if (ret[*len - 1] == '\n')
514 return (bwscsbdup((unsigned char *)ret, *len));
522 if (2 >= rb->fgetwln_z_buffer_size) {
523 rb->fgetwln_z_buffer_size += 256;
524 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
525 sizeof(wchar_t) * rb->fgetwln_z_buffer_size);
527 rb->fgetwln_z_buffer[*len] = 0;
543 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
544 rb->fgetwln_z_buffer_size += 256;
545 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
546 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
549 rb->fgetwln_z_buffer[*len] = c;
550 rb->fgetwln_z_buffer[++(*len)] = 0;
566 if (*len + 1 >= rb->fgetwln_z_buffer_size) {
567 rb->fgetwln_z_buffer_size += 256;
568 rb->fgetwln_z_buffer = sort_realloc(rb->fgetwln_z_buffer,
569 SIZEOF_WCHAR_STRING(rb->fgetwln_z_buffer_size));
572 rb->fgetwln_z_buffer[*len] = c;
573 rb->fgetwln_z_buffer[++(*len)] = 0;
577 /* we do not count the last 0 */
578 return (bwssbdup(rb->fgetwln_z_buffer, *len));
583 bwsncmp(const struct bwstring *bws1, const struct bwstring *bws2,
584 size_t offset, size_t len)
586 size_t cmp_len, len1, len2;
592 if (len1 <= offset) {
593 return ((len2 <= offset) ? 0 : -1);
609 if (mb_cur_max == 1) {
612 s1 = bws1->cdata.str + offset;
613 s2 = bws2->cdata.str + offset;
615 res = memcmp(s1, s2, cmp_len);
618 const wchar_t *s1, *s2;
620 s1 = bws1->wdata.str + offset;
621 s2 = bws2->wdata.str + offset;
623 res = memcmp(s1, s2, SIZEOF_WCHAR_STRING(cmp_len));
629 if (len1 < cmp_len && len1 < len2)
631 else if (len2 < cmp_len && len2 < len1)
639 bwscmp(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
641 size_t len1, len2, cmp_len;
655 res = bwsncmp(bws1, bws2, offset, cmp_len);
660 else if (len2 < len1)
668 bws_iterator_cmp(bwstring_iterator iter1, bwstring_iterator iter2, size_t len)
673 for (i = 0; i < len; ++i) {
674 c1 = bws_get_iter_value(iter1);
675 c2 = bws_get_iter_value(iter2);
678 iter1 = bws_iterator_inc(iter1, 1);
679 iter2 = bws_iterator_inc(iter2, 1);
686 bwscoll(const struct bwstring *bws1, const struct bwstring *bws2, size_t offset)
694 return ((len2 <= offset) ? 0 : -1);
702 if (mb_cur_max == 1) {
705 s1 = bws1->cdata.str + offset;
706 s2 = bws2->cdata.str + offset;
712 res = memcmp(s1, s2, len2);
715 } else if (len1 < len2) {
716 res = memcmp(s1, s2, len1);
720 res = memcmp(s1, s2, len1);
735 /* goto next non-zero part: */
736 while ((i < maxlen) &&
746 err(2, "bwscoll error 01");
749 } else if (s2[i] == 0)
752 res = strcoll((const char*)(s1 + i), (const char*)(s2 + i));
756 while ((i < maxlen) &&
769 } else if (s2[i] == 0)
773 err(2, "bwscoll error 02");
778 else if (len1 > len2)
784 const wchar_t *s1, *s2;
788 s1 = bws1->wdata.str + offset;
789 s2 = bws2->wdata.str + offset;
799 /* goto next non-zero part: */
800 while ((i < maxlen) &&
810 err(2, "bwscoll error 1");
813 } else if (s2[i] == 0)
816 res = wide_str_coll(s1 + i, s2 + i);
820 while ((i < maxlen) && s1[i] && s2[i])
832 } else if (s2[i] == 0)
836 err(2, "bwscoll error 2");
841 else if (len1 > len2)
851 * Correction of the system API
854 bwstod(struct bwstring *s0, bool *empty)
858 if (mb_cur_max == 1) {
863 end = s + s0->cdata.len;
866 while (isblank(*s) && s < end)
874 ret = strtod((char*)s, &ep);
880 wchar_t *end, *ep, *s;
883 end = s + s0->wdata.len;
886 while (iswblank(*s) && s < end)
894 ret = wcstod(s, &ep);
906 * A helper function for monthcoll. If a line matches
907 * a month name, it returns (number of the month - 1),
908 * while if there is no match, it just return -1.
912 bws_month_score(const struct bwstring *s0)
915 if (mb_cur_max == 1) {
919 end = s + s0->cdata.len;
921 while (isblank(*s) && s < end)
924 for (int i = 11; i >= 0; --i) {
926 (s == strstr(s, cmonths[i])))
931 const wchar_t *end, *s;
934 end = s + s0->wdata.len;
936 while (iswblank(*s) && s < end)
939 for (int i = 11; i >= 0; --i) {
940 if (wmonths[i] && (s == wcsstr(s, wmonths[i])))
949 * Rips out leading blanks (-b).
952 ignore_leading_blanks(struct bwstring *str)
955 if (mb_cur_max == 1) {
956 char *dst, *end, *src;
958 src = str->cdata.str;
960 end = src + str->cdata.len;
962 while (src < end && isblank(*src))
968 newlen = BWSLEN(str) - (src - dst);
975 bws_setlen(str, newlen);
978 wchar_t *dst, *end, *src;
980 src = str->wdata.str;
982 end = src + str->wdata.len;
984 while (src < end && iswblank(*src))
989 size_t newlen = BWSLEN(str) - (src - dst);
996 bws_setlen(str, newlen);
1004 * Rips out nonprinting characters (-i).
1007 ignore_nonprinting(struct bwstring *str)
1009 size_t newlen = BWSLEN(str);
1011 if (mb_cur_max == 1) {
1012 char *dst, *end, *src;
1015 src = str->cdata.str;
1017 end = src + str->cdata.len;
1031 wchar_t *dst, *end, *src;
1034 src = str->wdata.str;
1036 end = src + str->wdata.len;
1050 bws_setlen(str, newlen);
1056 * Rips out any characters that are not alphanumeric characters
1060 dictionary_order(struct bwstring *str)
1062 size_t newlen = BWSLEN(str);
1064 if (mb_cur_max == 1) {
1065 char *dst, *end, *src;
1068 src = str->cdata.str;
1070 end = src + str->cdata.len;
1074 if (isalnum(c) || isblank(c)) {
1084 wchar_t *dst, *end, *src;
1087 src = str->wdata.str;
1089 end = src + str->wdata.len;
1093 if (iswalnum(c) || iswblank(c)) {
1103 bws_setlen(str, newlen);
1109 * Converts string to lower case(-f).
1112 ignore_case(struct bwstring *str)
1115 if (mb_cur_max == 1) {
1119 end = s + str->cdata.len;
1129 end = s + str->wdata.len;
1140 bws_disorder_warnx(struct bwstring *s, const char *fn, size_t pos)
1143 if (mb_cur_max == 1)
1144 warnx("%s:%zu: disorder: %s", fn, pos + 1, s->cdata.str);
1146 warnx("%s:%zu: disorder: %ls", fn, pos + 1, s->wdata.str);