Update to file-4.19.
[dragonfly.git] / contrib / file-4 / src / ascmagic.c
CommitLineData
ab0b56cc
JS
1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * ASCII magic -- file types that we know based on keywords
30 * that can appear anywhere in the file.
31 *
32 * Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000,
33 * to handle character codes other than ASCII on a unified basis.
34 *
35 * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
36 * international characters, now subsumed into this file.
37 */
38
39#include "file.h"
40#include "magic.h"
41#include <stdio.h>
42#include <string.h>
43#include <memory.h>
44#include <ctype.h>
45#include <stdlib.h>
46#ifdef HAVE_UNISTD_H
47#include <unistd.h>
48#endif
49#include "names.h"
50
51#ifndef lint
9b22a626 52FILE_RCSID("@(#)$Id: ascmagic.c,v 1.46 2006/10/20 21:04:15 christos Exp $")
ab0b56cc
JS
53#endif /* lint */
54
55typedef unsigned long unichar;
56
57#define MAXLINELEN 300 /* longest sane line length */
58#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
59 || (x) == 0x85 || (x) == '\f')
60
61private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
62private int looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
63private int looks_unicode(const unsigned char *, size_t, unichar *, size_t *);
64private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
65private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
66private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
67private int ascmatch(const unsigned char *, const unichar *, size_t);
68
69
70protected int
71file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
72{
73 size_t i;
2be182fc
JS
74 unsigned char *nbuf = NULL;
75 unichar *ubuf = NULL;
ab0b56cc
JS
76 size_t ulen;
77 struct names *p;
2be182fc 78 int rv = -1;
ab0b56cc
JS
79
80 const char *code = NULL;
81 const char *code_mime = NULL;
82 const char *type = NULL;
83 const char *subtype = NULL;
84 const char *subtype_mime = NULL;
85
86 int has_escapes = 0;
87 int has_backspace = 0;
88 int seen_cr = 0;
89
90 int n_crlf = 0;
91 int n_lf = 0;
92 int n_cr = 0;
93 int n_nel = 0;
94
95 int last_line_end = -1;
96 int has_long_lines = 0;
97
98 /*
99 * Undo the NUL-termination kindly provided by process()
100 * but leave at least one byte to look at
101 */
ab0b56cc
JS
102 while (nbytes > 1 && buf[nbytes - 1] == '\0')
103 nbytes--;
104
9b22a626 105 if ((nbuf = calloc(1, (nbytes + 1) * sizeof(nbuf[0]))) == NULL)
2be182fc 106 goto done;
9b22a626 107 if ((ubuf = calloc(1, (nbytes + 1) * sizeof(ubuf[0]))) == NULL)
2be182fc 108 goto done;
ab0b56cc
JS
109
110 /*
111 * Then try to determine whether it's any character code we can
112 * identify. Each of these tests, if it succeeds, will leave
113 * the text converted into one-unichar-per-character Unicode in
114 * ubuf, and the number of characters converted in ulen.
115 */
116 if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
117 code = "ASCII";
118 code_mime = "us-ascii";
119 type = "text";
120 } else if (looks_utf8(buf, nbytes, ubuf, &ulen)) {
121 code = "UTF-8 Unicode";
122 code_mime = "utf-8";
123 type = "text";
124 } else if ((i = looks_unicode(buf, nbytes, ubuf, &ulen)) != 0) {
125 if (i == 1)
126 code = "Little-endian UTF-16 Unicode";
127 else
128 code = "Big-endian UTF-16 Unicode";
129
130 type = "character data";
131 code_mime = "utf-16"; /* is this defined? */
132 } else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
133 code = "ISO-8859";
134 type = "text";
135 code_mime = "iso-8859-1";
136 } else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
137 code = "Non-ISO extended-ASCII";
138 type = "text";
139 code_mime = "unknown";
140 } else {
141 from_ebcdic(buf, nbytes, nbuf);
142
143 if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
144 code = "EBCDIC";
145 type = "character data";
146 code_mime = "ebcdic";
147 } else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
148 code = "International EBCDIC";
149 type = "character data";
150 code_mime = "ebcdic";
151 } else {
2be182fc
JS
152 rv = 0;
153 goto done; /* doesn't look like text at all */
ab0b56cc
JS
154 }
155 }
156
9b22a626
PA
157 if (nbytes <= 1) {
158 rv = 0;
159 goto done;
160 }
161
ab0b56cc
JS
162 /*
163 * for troff, look for . + letter + letter or .\";
164 * this must be done to disambiguate tar archives' ./file
165 * and other trash from real troff input.
166 *
167 * I believe Plan 9 troff allows non-ASCII characters in the names
168 * of macros, so this test might possibly fail on such a file.
169 */
170 if (*ubuf == '.') {
171 unichar *tp = ubuf + 1;
172
173 while (ISSPC(*tp))
174 ++tp; /* skip leading whitespace */
175 if ((tp[0] == '\\' && tp[1] == '\"') ||
176 (isascii((unsigned char)tp[0]) &&
177 isalnum((unsigned char)tp[0]) &&
178 isascii((unsigned char)tp[1]) &&
179 isalnum((unsigned char)tp[1]) &&
180 ISSPC(tp[2]))) {
181 subtype_mime = "text/troff";
182 subtype = "troff or preprocessor input";
183 goto subtype_identified;
184 }
185 }
186
187 if ((*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) {
188 subtype_mime = "text/fortran";
189 subtype = "fortran program";
190 goto subtype_identified;
191 }
192
193 /* look for tokens from names.h - this is expensive! */
194
195 i = 0;
196 while (i < ulen) {
197 size_t end;
198
199 /*
200 * skip past any leading space
201 */
202 while (i < ulen && ISSPC(ubuf[i]))
203 i++;
204 if (i >= ulen)
205 break;
206
207 /*
208 * find the next whitespace
209 */
210 for (end = i + 1; end < nbytes; end++)
211 if (ISSPC(ubuf[end]))
212 break;
213
214 /*
215 * compare the word thus isolated against the token list
216 */
217 for (p = names; p < names + NNAMES; p++) {
218 if (ascmatch((const unsigned char *)p->name, ubuf + i,
219 end - i)) {
220 subtype = types[p->type].human;
221 subtype_mime = types[p->type].mime;
222 goto subtype_identified;
223 }
224 }
225
226 i = end;
227 }
228
229subtype_identified:
230
231 /*
232 * Now try to discover other details about the file.
233 */
234 for (i = 0; i < ulen; i++) {
235 if (ubuf[i] == '\n') {
236 if (seen_cr)
237 n_crlf++;
238 else
239 n_lf++;
240 last_line_end = i;
241 } else if (seen_cr)
242 n_cr++;
243
244 seen_cr = (ubuf[i] == '\r');
245 if (seen_cr)
246 last_line_end = i;
247
248 if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
249 n_nel++;
250 last_line_end = i;
251 }
252
253 /* If this line is _longer_ than MAXLINELEN, remember it. */
254 if (i > last_line_end + MAXLINELEN)
255 has_long_lines = 1;
256
257 if (ubuf[i] == '\033')
258 has_escapes = 1;
259 if (ubuf[i] == '\b')
260 has_backspace = 1;
261 }
262
263 /* Beware, if the data has been truncated, the final CR could have
264 been followed by a LF. If we have HOWMANY bytes, it indicates
265 that the data might have been truncated, probably even before
266 this function was called. */
267 if (seen_cr && nbytes < HOWMANY)
268 n_cr++;
269
270 if ((ms->flags & MAGIC_MIME)) {
271 if (subtype_mime) {
272 if (file_printf(ms, subtype_mime) == -1)
2be182fc 273 goto done;
ab0b56cc
JS
274 } else {
275 if (file_printf(ms, "text/plain") == -1)
2be182fc 276 goto done;
ab0b56cc
JS
277 }
278
279 if (code_mime) {
280 if (file_printf(ms, "; charset=") == -1)
2be182fc 281 goto done;
ab0b56cc 282 if (file_printf(ms, code_mime) == -1)
2be182fc 283 goto done;
ab0b56cc
JS
284 }
285 } else {
286 if (file_printf(ms, code) == -1)
2be182fc 287 goto done;
ab0b56cc
JS
288
289 if (subtype) {
290 if (file_printf(ms, " ") == -1)
2be182fc 291 goto done;
ab0b56cc 292 if (file_printf(ms, subtype) == -1)
2be182fc 293 goto done;
ab0b56cc
JS
294 }
295
296 if (file_printf(ms, " ") == -1)
2be182fc 297 goto done;
ab0b56cc 298 if (file_printf(ms, type) == -1)
2be182fc 299 goto done;
ab0b56cc
JS
300
301 if (has_long_lines)
302 if (file_printf(ms, ", with very long lines") == -1)
2be182fc 303 goto done;
ab0b56cc
JS
304
305 /*
306 * Only report line terminators if we find one other than LF,
307 * or if we find none at all.
308 */
309 if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
310 (n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
311 if (file_printf(ms, ", with") == -1)
2be182fc 312 goto done;
ab0b56cc
JS
313
314 if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
315 if (file_printf(ms, " no") == -1)
2be182fc 316 goto done;
ab0b56cc
JS
317 } else {
318 if (n_crlf) {
319 if (file_printf(ms, " CRLF") == -1)
2be182fc 320 goto done;
ab0b56cc
JS
321 if (n_cr || n_lf || n_nel)
322 if (file_printf(ms, ",") == -1)
2be182fc 323 goto done;
ab0b56cc
JS
324 }
325 if (n_cr) {
326 if (file_printf(ms, " CR") == -1)
2be182fc 327 goto done;
ab0b56cc
JS
328 if (n_lf || n_nel)
329 if (file_printf(ms, ",") == -1)
2be182fc 330 goto done;
ab0b56cc
JS
331 }
332 if (n_lf) {
333 if (file_printf(ms, " LF") == -1)
2be182fc 334 goto done;
ab0b56cc
JS
335 if (n_nel)
336 if (file_printf(ms, ",") == -1)
2be182fc 337 goto done;
ab0b56cc
JS
338 }
339 if (n_nel)
340 if (file_printf(ms, " NEL") == -1)
2be182fc 341 goto done;
ab0b56cc
JS
342 }
343
344 if (file_printf(ms, " line terminators") == -1)
2be182fc 345 goto done;
ab0b56cc
JS
346 }
347
348 if (has_escapes)
349 if (file_printf(ms, ", with escape sequences") == -1)
2be182fc 350 goto done;
ab0b56cc
JS
351 if (has_backspace)
352 if (file_printf(ms, ", with overstriking") == -1)
2be182fc 353 goto done;
ab0b56cc 354 }
2be182fc
JS
355 rv = 1;
356done:
357 if (nbuf)
358 free(nbuf);
359 if (ubuf)
360 free(ubuf);
361
362 return rv;
ab0b56cc
JS
363}
364
365private int
366ascmatch(const unsigned char *s, const unichar *us, size_t ulen)
367{
368 size_t i;
369
370 for (i = 0; i < ulen; i++) {
371 if (s[i] != us[i])
372 return 0;
373 }
374
375 if (s[i])
376 return 0;
377 else
378 return 1;
379}
380
381/*
382 * This table reflects a particular philosophy about what constitutes
383 * "text," and there is room for disagreement about it.
384 *
385 * Version 3.31 of the file command considered a file to be ASCII if
386 * each of its characters was approved by either the isascii() or
387 * isalpha() function. On most systems, this would mean that any
388 * file consisting only of characters in the range 0x00 ... 0x7F
389 * would be called ASCII text, but many systems might reasonably
390 * consider some characters outside this range to be alphabetic,
391 * so the file command would call such characters ASCII. It might
392 * have been more accurate to call this "considered textual on the
393 * local system" than "ASCII."
394 *
395 * It considered a file to be "International language text" if each
396 * of its characters was either an ASCII printing character (according
397 * to the real ASCII standard, not the above test), a character in
398 * the range 0x80 ... 0xFF, or one of the following control characters:
399 * backspace, tab, line feed, vertical tab, form feed, carriage return,
400 * escape. No attempt was made to determine the language in which files
401 * of this type were written.
402 *
403 *
404 * The table below considers a file to be ASCII if all of its characters
405 * are either ASCII printing characters (again, according to the X3.4
406 * standard, not isascii()) or any of the following controls: bell,
407 * backspace, tab, line feed, form feed, carriage return, esc, nextline.
408 *
409 * I include bell because some programs (particularly shell scripts)
410 * use it literally, even though it is rare in normal text. I exclude
411 * vertical tab because it never seems to be used in real text. I also
412 * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
413 * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
414 * character to. It might be more appropriate to include it in the 8859
415 * set instead of the ASCII set, but it's got to be included in *something*
416 * we recognize or EBCDIC files aren't going to be considered textual.
417 * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
418 * and Latin characters, so these should possibly be allowed. But they
419 * make a real mess on VT100-style displays if they're not paired properly,
420 * so we are probably better off not calling them text.
421 *
422 * A file is considered to be ISO-8859 text if its characters are all
423 * either ASCII, according to the above definition, or printing characters
424 * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
425 *
426 * Finally, a file is considered to be international text from some other
427 * character code if its characters are all either ISO-8859 (according to
428 * the above definition) or characters in the range 0x80 ... 0x9F, which
429 * ISO-8859 considers to be control characters but the IBM PC and Macintosh
430 * consider to be printing characters.
431 */
432
433#define F 0 /* character never appears in text */
434#define T 1 /* character appears in plain ASCII text */
435#define I 2 /* character appears in ISO-8859 text */
436#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
437
438private char text_chars[256] = {
439 /* BEL BS HT LF FF CR */
440 F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
441 /* ESC */
442 F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
443 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
444 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
445 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
446 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
447 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
448 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
449 /* NEL */
450 X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
451 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
452 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
453 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
454 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
455 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
456 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
457 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
458};
459
460private int
461looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
462 size_t *ulen)
463{
464 int i;
465
466 *ulen = 0;
467
468 for (i = 0; i < nbytes; i++) {
469 int t = text_chars[buf[i]];
470
471 if (t != T)
472 return 0;
473
474 ubuf[(*ulen)++] = buf[i];
475 }
476
477 return 1;
478}
479
480private int
481looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
482{
483 int i;
484
485 *ulen = 0;
486
487 for (i = 0; i < nbytes; i++) {
488 int t = text_chars[buf[i]];
489
490 if (t != T && t != I)
491 return 0;
492
493 ubuf[(*ulen)++] = buf[i];
494 }
495
496 return 1;
497}
498
499private int
500looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
501 size_t *ulen)
502{
503 int i;
504
505 *ulen = 0;
506
507 for (i = 0; i < nbytes; i++) {
508 int t = text_chars[buf[i]];
509
510 if (t != T && t != I && t != X)
511 return 0;
512
513 ubuf[(*ulen)++] = buf[i];
514 }
515
516 return 1;
517}
518
519private int
520looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
521{
522 int i, n;
523 unichar c;
524 int gotone = 0;
525
526 *ulen = 0;
527
528 for (i = 0; i < nbytes; i++) {
529 if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
530 /*
531 * Even if the whole file is valid UTF-8 sequences,
532 * still reject it if it uses weird control characters.
533 */
534
535 if (text_chars[buf[i]] != T)
536 return 0;
537
538 ubuf[(*ulen)++] = buf[i];
539 } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
540 return 0;
541 } else { /* 11xxxxxx begins UTF-8 */
542 int following;
543
544 if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
545 c = buf[i] & 0x1f;
546 following = 1;
547 } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
548 c = buf[i] & 0x0f;
549 following = 2;
550 } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
551 c = buf[i] & 0x07;
552 following = 3;
553 } else if ((buf[i] & 0x04) == 0) { /* 111110xx */
554 c = buf[i] & 0x03;
555 following = 4;
556 } else if ((buf[i] & 0x02) == 0) { /* 1111110x */
557 c = buf[i] & 0x01;
558 following = 5;
559 } else
560 return 0;
561
562 for (n = 0; n < following; n++) {
563 i++;
564 if (i >= nbytes)
565 goto done;
566
567 if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
568 return 0;
569
570 c = (c << 6) + (buf[i] & 0x3f);
571 }
572
573 ubuf[(*ulen)++] = c;
574 gotone = 1;
575 }
576 }
577done:
578 return gotone; /* don't claim it's UTF-8 if it's all 7-bit */
579}
580
581private int
582looks_unicode(const unsigned char *buf, size_t nbytes, unichar *ubuf,
583 size_t *ulen)
584{
585 int bigend;
586 int i;
587
588 if (nbytes < 2)
589 return 0;
590
591 if (buf[0] == 0xff && buf[1] == 0xfe)
592 bigend = 0;
593 else if (buf[0] == 0xfe && buf[1] == 0xff)
594 bigend = 1;
595 else
596 return 0;
597
598 *ulen = 0;
599
600 for (i = 2; i + 1 < nbytes; i += 2) {
601 /* XXX fix to properly handle chars > 65536 */
602
603 if (bigend)
604 ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
605 else
606 ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
607
608 if (ubuf[*ulen - 1] == 0xfffe)
609 return 0;
610 if (ubuf[*ulen - 1] < 128 &&
611 text_chars[(size_t)ubuf[*ulen - 1]] != T)
612 return 0;
613 }
614
615 return 1 + bigend;
616}
617
618#undef F
619#undef T
620#undef I
621#undef X
622
623/*
624 * This table maps each EBCDIC character to an (8-bit extended) ASCII
625 * character, as specified in the rationale for the dd(1) command in
626 * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
627 *
628 * Unfortunately it does not seem to correspond exactly to any of the
629 * five variants of EBCDIC documented in IBM's _Enterprise Systems
630 * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
631 * Edition, July, 1999, pp. I-1 - I-4.
632 *
633 * Fortunately, though, all versions of EBCDIC, including this one, agree
634 * on most of the printing characters that also appear in (7-bit) ASCII.
635 * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
636 *
637 * Fortunately too, there is general agreement that codes 0x00 through
638 * 0x3F represent control characters, 0x41 a nonbreaking space, and the
639 * remainder printing characters.
640 *
641 * This is sufficient to allow us to identify EBCDIC text and to distinguish
642 * between old-style and internationalized examples of text.
643 */
644
645private unsigned char ebcdic_to_ascii[] = {
646 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15,
647 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31,
648128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7,
649144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26,
650' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
651'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
652'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
653186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
654195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
655202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
656209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
657216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
658'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
659'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
660'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
661'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
662};
663
664#ifdef notdef
665/*
666 * The following EBCDIC-to-ASCII table may relate more closely to reality,
667 * or at least to modern reality. It comes from
668 *
669 * http://ftp.s390.ibm.com/products/oe/bpxqp9.html
670 *
671 * and maps the characters of EBCDIC code page 1047 (the code used for
672 * Unix-derived software on IBM's 390 systems) to the corresponding
673 * characters from ISO 8859-1.
674 *
675 * If this table is used instead of the above one, some of the special
676 * cases for the NEL character can be taken out of the code.
677 */
678
679private unsigned char ebcdic_1047_to_8859[] = {
6800x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
6810x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
6820x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
6830x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
6840x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
6850x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
6860x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
6870xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
6880xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
6890xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
6900xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
6910xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
6920x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
6930x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
6940x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
6950x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
696};
697#endif
698
699/*
700 * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
701 */
702private void
703from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
704{
705 int i;
706
707 for (i = 0; i < nbytes; i++) {
708 out[i] = ebcdic_to_ascii[buf[i]];
709 }
710}