Initial import from FreeBSD RELENG_4:
[dragonfly.git] / contrib / awk / node.c
CommitLineData
984263bc
MD
1/*
2 * node.c -- routines for node management
3 */
4
5/*
6 * Copyright (C) 1986, 1988, 1989, 1991-2000 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
24 *
25 * $FreeBSD: src/contrib/awk/node.c,v 1.4.2.1 2001/01/23 22:08:31 asmodai Exp $
26 */
27
28#include "awk.h"
29
30/* r_force_number --- force a value to be numeric */
31
32AWKNUM
33r_force_number(n)
34register NODE *n;
35{
36 register char *cp;
37 register char *cpend;
38 char save;
39 char *ptr;
40 unsigned int newflags;
41 extern double strtod();
42
43#ifdef DEBUG
44 if (n == NULL)
45 cant_happen();
46 if (n->type != Node_val)
47 cant_happen();
48 if(n->flags == 0)
49 cant_happen();
50 if (n->flags & NUM)
51 return n->numbr;
52#endif
53
54 /* all the conditionals are an attempt to avoid the expensive strtod */
55
56 n->numbr = 0.0;
57 n->flags |= NUM;
58
59 if (n->stlen == 0)
60 return 0.0;
61
62 cp = n->stptr;
63 if (ISALPHA(*cp))
64 return 0.0;
65
66 cpend = cp + n->stlen;
67 while (cp < cpend && isspace(*cp))
68 cp++;
69 if (cp == cpend || isalpha(*cp))
70 return 0.0;
71
72 if (n->flags & MAYBE_NUM) {
73 newflags = NUMBER;
74 n->flags &= ~MAYBE_NUM;
75 } else
76 newflags = 0;
77 if (cpend - cp == 1) {
78 if (ISDIGIT(*cp)) {
79 n->numbr = (AWKNUM)(*cp - '0');
80 n->flags |= newflags;
81 }
82 return n->numbr;
83 }
84
85#ifdef NONDECDATA
86 errno = 0;
87 if (! do_traditional && isnondecimal(cp)) {
88 n->numbr = nondec2awknum(cp, cpend - cp);
89 goto finish;
90 }
91#endif /* NONDECDATA */
92
93 errno = 0;
94 save = *cpend;
95 *cpend = '\0';
96 n->numbr = (AWKNUM) strtod((const char *) cp, &ptr);
97
98 /* POSIX says trailing space is OK for NUMBER */
99 while (ISSPACE(*ptr))
100 ptr++;
101 *cpend = save;
102finish:
103 /* the >= should be ==, but for SunOS 3.5 strtod() */
104 if (errno == 0 && ptr >= cpend)
105 n->flags |= newflags;
106 else
107 errno = 0;
108
109 return n->numbr;
110}
111
112/*
113 * the following lookup table is used as an optimization in force_string
114 * (more complicated) variations on this theme didn't seem to pay off, but
115 * systematic testing might be in order at some point
116 */
117static const char *values[] = {
118 "0",
119 "1",
120 "2",
121 "3",
122 "4",
123 "5",
124 "6",
125 "7",
126 "8",
127 "9",
128};
129#define NVAL (sizeof(values)/sizeof(values[0]))
130
131/* format_val --- format a numeric value based on format */
132
133NODE *
134format_val(format, index, s)
135char *format;
136int index;
137register NODE *s;
138{
139 char buf[128];
140 register char *sp = buf;
141 double val;
142
143 /* not an integral value, or out of range */
144 if ((val = double_to_int(s->numbr)) != s->numbr
145 || val < LONG_MIN || val > LONG_MAX) {
146 /*
147 * Once upon a time, if GFMT_WORKAROUND wasn't defined,
148 * we just blindly did this:
149 * sprintf(sp, format, s->numbr);
150 * s->stlen = strlen(sp);
151 * s->stfmt = (char) index;
152 * but that's no good if, e.g., OFMT is %s. So we punt,
153 * and just always format the value ourselves.
154 */
155
156 NODE *dummy, *r;
157 unsigned short oflags;
158 extern NODE *format_tree P((const char *, int, NODE *));
159 extern NODE **fmt_list; /* declared in eval.c */
160
161 /* create dummy node for a sole use of format_tree */
162 getnode(dummy);
163 dummy->lnode = s;
164 dummy->rnode = NULL;
165 oflags = s->flags;
166 s->flags |= PERM; /* prevent from freeing by format_tree() */
167 r = format_tree(format, fmt_list[index]->stlen, dummy);
168 s->flags = oflags;
169 s->stfmt = (char) index;
170 s->stlen = r->stlen;
171 s->stptr = r->stptr;
172 freenode(r); /* Do not free_temp(r)! We want */
173 freenode(dummy); /* to keep s->stptr == r->stpr. */
174
175 goto no_malloc;
176 } else {
177 /* integral value */
178 /* force conversion to long only once */
179 register long num = (long) val;
180 if (num < NVAL && num >= 0) {
181 sp = (char *) values[num];
182 s->stlen = 1;
183 } else {
184 (void) sprintf(sp, "%ld", num);
185 s->stlen = strlen(sp);
186 }
187 s->stfmt = -1;
188 }
189 emalloc(s->stptr, char *, s->stlen + 2, "format_val");
190 memcpy(s->stptr, sp, s->stlen+1);
191no_malloc:
192 s->stref = 1;
193 s->flags |= STR;
194 return s;
195}
196
197/* r_force_string --- force a value to be a string */
198
199NODE *
200r_force_string(s)
201register NODE *s;
202{
203#ifdef DEBUG
204 if (s == NULL)
205 cant_happen();
206 if (s->type != Node_val)
207 cant_happen();
208/*
209 if ((s->flags & NUM) == 0)
210 cant_happen();
211*/
212 if (s->stref <= 0)
213 cant_happen();
214 if ((s->flags & STR) != 0
215 && (s->stfmt == -1 || s->stfmt == CONVFMTidx))
216 return s;
217#endif
218
219 return format_val(CONVFMT, CONVFMTidx, s);
220}
221
222/*
223 * dupnode:
224 * Duplicate a node. (For strings, "duplicate" means crank up the
225 * reference count.)
226 */
227
228NODE *
229dupnode(n)
230NODE *n;
231{
232 register NODE *r;
233
234 if ((n->flags & TEMP) != 0) {
235 n->flags &= ~TEMP;
236 n->flags |= MALLOC;
237 return n;
238 }
239 if ((n->flags & (MALLOC|STR)) == (MALLOC|STR)) {
240 if (n->stref < LONG_MAX)
241 n->stref++;
242 return n;
243 }
244 getnode(r);
245 *r = *n;
246 r->flags &= ~(PERM|TEMP|FIELD);
247 r->flags |= MALLOC;
248 if (n->type == Node_val && (n->flags & STR) != 0) {
249 r->stref = 1;
250 emalloc(r->stptr, char *, r->stlen + 2, "dupnode");
251 memcpy(r->stptr, n->stptr, r->stlen);
252 r->stptr[r->stlen] = '\0';
253 }
254 return r;
255}
256
257/* mk_number --- allocate a node with defined number */
258
259NODE *
260mk_number(x, flags)
261AWKNUM x;
262unsigned int flags;
263{
264 register NODE *r;
265
266 getnode(r);
267 r->type = Node_val;
268 r->numbr = x;
269 r->flags = flags | SCALAR;
270#ifdef DEBUG
271 r->stref = 1;
272 r->stptr = NULL;
273 r->stlen = 0;
274#endif
275 return r;
276}
277
278/* make_str_node --- make a string node */
279
280NODE *
281make_str_node(s, len, flags)
282char *s;
283size_t len;
284int flags;
285{
286 register NODE *r;
287
288 getnode(r);
289 r->type = Node_val;
290 r->flags = (STRING|STR|MALLOC|SCALAR);
291 if (flags & ALREADY_MALLOCED)
292 r->stptr = s;
293 else {
294 emalloc(r->stptr, char *, len + 2, s);
295 memcpy(r->stptr, s, len);
296 }
297 r->stptr[len] = '\0';
298
299 if ((flags & SCAN) != 0) { /* scan for escape sequences */
300 char *pf;
301 register char *ptm;
302 register int c;
303 register char *end;
304
305 end = &(r->stptr[len]);
306 for (pf = ptm = r->stptr; pf < end;) {
307 c = *pf++;
308 if (c == '\\') {
309 c = parse_escape(&pf);
310 if (c < 0) {
311 if (do_lint)
312 warning("backslash at end of string");
313 c = '\\';
314 }
315 *ptm++ = c;
316 } else
317 *ptm++ = c;
318 }
319 len = ptm - r->stptr;
320 erealloc(r->stptr, char *, len + 1, "make_str_node");
321 r->stptr[len] = '\0';
322 r->flags |= PERM;
323 }
324 r->stlen = len;
325 r->stref = 1;
326 r->stfmt = -1;
327
328 return r;
329}
330
331/* tmp_string --- allocate a temporary string */
332
333NODE *
334tmp_string(s, len)
335char *s;
336size_t len;
337{
338 register NODE *r;
339
340 r = make_string(s, len);
341 r->flags |= TEMP;
342 return r;
343}
344
345/* more_nodes --- allocate more nodes */
346
347#define NODECHUNK 100
348
349NODE *nextfree = NULL;
350
351NODE *
352more_nodes()
353{
354 register NODE *np;
355
356 /* get more nodes and initialize list */
357 emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "newnode");
358 for (np = nextfree; np <= &nextfree[NODECHUNK - 1]; np++) {
359 np->flags = 0;
360 np->nextp = np + 1;
361 }
362 --np;
363 np->nextp = NULL;
364 np = nextfree;
365 nextfree = nextfree->nextp;
366 return np;
367}
368
369#ifdef DEBUG
370/* freenode --- release a node back to the pool */
371
372void
373freenode(it)
374NODE *it;
375{
376 it->flags &= ~SCALAR;
377#ifdef MPROF
378 it->stref = 0;
379 free((char *) it);
380#else /* not MPROF */
381 /* add it to head of freelist */
382 it->nextp = nextfree;
383 nextfree = it;
384#endif /* not MPROF */
385}
386#endif /* DEBUG */
387
388/* unref --- remove reference to a particular node */
389
390void
391unref(tmp)
392register NODE *tmp;
393{
394 if (tmp == NULL)
395 return;
396 if ((tmp->flags & PERM) != 0)
397 return;
398 if ((tmp->flags & (MALLOC|TEMP)) != 0) {
399 tmp->flags &= ~TEMP;
400 if ((tmp->flags & STR) != 0) {
401 if (tmp->stref > 1) {
402 if (tmp->stref != LONG_MAX)
403 tmp->stref--;
404 return;
405 }
406 free(tmp->stptr);
407 }
408 freenode(tmp);
409 return;
410 }
411 if ((tmp->flags & FIELD) != 0) {
412 freenode(tmp);
413 return;
414 }
415}
416
417/*
418 * parse_escape:
419 *
420 * Parse a C escape sequence. STRING_PTR points to a variable containing a
421 * pointer to the string to parse. That pointer is updated past the
422 * characters we use. The value of the escape sequence is returned.
423 *
424 * A negative value means the sequence \ newline was seen, which is supposed to
425 * be equivalent to nothing at all.
426 *
427 * If \ is followed by a null character, we return a negative value and leave
428 * the string pointer pointing at the null character.
429 *
430 * If \ is followed by 000, we return 0 and leave the string pointer after the
431 * zeros. A value of 0 does not mean end of string.
432 *
433 * Posix doesn't allow \x.
434 */
435
436int
437parse_escape(string_ptr)
438char **string_ptr;
439{
440 register int c = *(*string_ptr)++;
441 register int i;
442 register int count;
443
444 switch (c) {
445 case 'a':
446 return BELL;
447 case 'b':
448 return '\b';
449 case 'f':
450 return '\f';
451 case 'n':
452 return '\n';
453 case 'r':
454 return '\r';
455 case 't':
456 return '\t';
457 case 'v':
458 return '\v';
459 case '\n':
460 return -2;
461 case 0:
462 (*string_ptr)--;
463 return -1;
464 case '0':
465 case '1':
466 case '2':
467 case '3':
468 case '4':
469 case '5':
470 case '6':
471 case '7':
472 i = c - '0';
473 count = 0;
474 while (++count < 3) {
475 if ((c = *(*string_ptr)++) >= '0' && c <= '7') {
476 i *= 8;
477 i += c - '0';
478 } else {
479 (*string_ptr)--;
480 break;
481 }
482 }
483 return i;
484 case 'x':
485 if (do_lint) {
486 static int didwarn = FALSE;
487
488 if (! didwarn) {
489 didwarn = TRUE;
490 warning("POSIX does not allow \"\\x\" escapes");
491 }
492 }
493 if (do_posix)
494 return ('x');
495 if (! isxdigit((*string_ptr)[0])) {
496 warning("no hex digits in \\x escape sequence");
497 return ('x');
498 }
499 i = 0;
500 for (;;) {
501 /* do outside test to avoid multiple side effects */
502 c = *(*string_ptr)++;
503 if (ISXDIGIT(c)) {
504 i *= 16;
505 if (ISDIGIT(c))
506 i += c - '0';
507 else if (ISUPPER(c))
508 i += c - 'A' + 10;
509 else
510 i += c - 'a' + 10;
511 } else {
512 (*string_ptr)--;
513 break;
514 }
515 }
516 return i;
517 default:
518 if (do_lint) {
519 static short warned[256];
520 unsigned char uc = (unsigned char) c;
521
522 /* N.B.: use unsigned char here to avoid Latin-1 problems */
523
524 if (! warned[uc]) {
525 warned[uc] = TRUE;
526
527 warning("escape sequence `\\%c' treated as plain `%c'", uc, uc);
528 }
529 }
530 return c;
531 }
532}