When checking for binary file, check if ch is not EOF only ch actually got
[freebsd.git] / usr.bin / dtc / input_buffer.cc
1 /*-
2  * Copyright (c) 2013 David Chisnall
3  * All rights reserved.
4  *
5  * This software was developed by SRI International and the University of
6  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7  * ("CTSRD"), as part of the DARPA CRASH research programme.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD$
31  */
32
33 #include "input_buffer.hh"
34 #include <ctype.h>
35 #include <limits.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <functional>
41 #ifndef NDEBUG
42 #include <iostream>
43 #endif
44
45
46 #include <sys/stat.h>
47 #include <sys/mman.h>
48 #include <assert.h>
49
50 #ifndef MAP_PREFAULT_READ
51 #define MAP_PREFAULT_READ 0
52 #endif
53
54 namespace dtc
55 {
56 void
57 input_buffer::skip_spaces()
58 {
59         if (cursor >= size) { return; }
60         if (cursor < 0) { return; }
61         char c = buffer[cursor];
62         while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f')
63                || (c == '\v') || (c == '\r'))
64         {
65                 cursor++;
66                 if (cursor > size)
67                 {
68                         c = '\0';
69                 }
70                 else
71                 {
72                         c = buffer[cursor];
73                 }
74         }
75 }
76
77 input_buffer
78 input_buffer::buffer_from_offset(int offset, int s)
79 {
80         if (s == 0)
81         {
82                 s = size - offset;
83         }
84         if (offset > size)
85         {
86                 return input_buffer();
87         }
88         if (s > (size-offset))
89         {
90                 return input_buffer();
91         }
92         return input_buffer(&buffer[offset], s);
93 }
94
95 bool
96 input_buffer::consume(const char *str)
97 {
98         int len = strlen(str);
99         if (len > size - cursor)
100         {
101                 return false;
102         }
103         else
104         {
105                 for (int i=0 ; i<len ; ++i)
106                 {
107                         if (str[i] != buffer[cursor + i])
108                         {
109                                 return false;
110                         }
111                 }
112                 cursor += len;
113                 return true;
114         }
115         return false;
116 }
117
118 bool
119 input_buffer::consume_integer(unsigned long long &outInt)
120 {
121         // The first character must be a digit.  Hex and octal strings
122         // are prefixed by 0 and 0x, respectively.
123         if (!isdigit((*this)[0]))
124         {
125                 return false;
126         }
127         char *end=0;
128         outInt = strtoull(&buffer[cursor], &end, 0);
129         if (end == &buffer[cursor])
130         {
131                 return false;
132         }
133         cursor = end - buffer;
134         return true;
135 }
136
137 namespace {
138
139 /**
140  * Convenience typedef for the type that we use for all values.
141  */
142 typedef unsigned long long valty;
143
144 /**
145  * Expression tree currently being parsed.
146  */
147 struct expression
148 {
149         /**
150          * Evaluate this node, taking into account operator precedence.
151          */
152         virtual valty operator()() = 0;
153         /**
154          * Returns the precedence of this node.  Lower values indicate higher
155          * precedence.
156          */
157         virtual int precedence() = 0;
158         virtual ~expression() {}
159 #ifndef NDEBUG
160         /**
161          * Dumps this expression to `std::cerr`, appending a newline if `nl` is
162          * `true`.
163          */
164         void dump(bool nl=false)
165         {
166                 if (this == nullptr)
167                 {
168                         std::cerr << "{nullptr}\n";
169                         return;
170                 }
171                 dump_impl();
172                 if (nl)
173                 {
174                         std::cerr << '\n';
175                 }
176         }
177         private:
178         /**
179          * Method that sublcasses override to implement the behaviour of `dump()`.
180          */
181         virtual void dump_impl() = 0;
182 #endif
183 };
184
185 /**
186  * Expression wrapping a single integer.  Leaf nodes in the expression tree.
187  */
188 class terminal_expr : public expression
189 {
190         /**
191          * The value that this wraps.
192          */
193         valty val;
194         /**
195          * Evaluate.  Trivially returns the value that this class wraps.
196          */
197         valty operator()() override
198         {
199                 return val;
200         }
201         int precedence() override
202         {
203                 return 0;
204         }
205         public:
206         /**
207          * Constructor.
208          */
209         terminal_expr(valty v) : val(v) {}
210 #ifndef NDEBUG
211         void dump_impl() override { std::cerr << val; }
212 #endif
213 };
214
215 /**
216  * Parenthetical expression.  Exists to make the contents opaque.
217  */
218 struct paren_expression : public expression
219 {
220         /**
221          * The expression within the parentheses.
222          */
223         expression_ptr subexpr;
224         /**
225          * Constructor.  Takes the child expression as the only argument.
226          */
227         paren_expression(expression_ptr p) : subexpr(std::move(p)) {}
228         int precedence() override
229         {
230                 return 0;
231         }
232         /**
233          * Evaluate - just forwards to the underlying expression.
234          */
235         valty operator()() override
236         {
237                 return (*subexpr)();
238         }
239 #ifndef NDEBUG
240         void dump_impl() override
241         {
242                 std::cerr << " (";
243                 subexpr->dump();
244                 std::cerr << ") ";
245         }
246 #endif
247 };
248
249 /**
250  * Template class for unary operators.  The `OpChar` template parameter is
251  * solely for debugging and makes it easy to print the expression.  The `Op`
252  * template parameter is a function object that implements the operator that
253  * this class provides.  Most of these are provided by the `<functional>`
254  * header.
255  */
256 template<char OpChar, class Op>
257 class unary_operator : public expression
258 {
259         /**
260          * The subexpression for this unary operator.
261          */
262         expression_ptr subexpr;
263         valty operator()() override
264         {
265                 Op op;
266                 return op((*subexpr)());
267         }
268         /**
269          * All unary operators have the same precedence.  They are all evaluated
270          * before binary expressions, but after parentheses.
271          */
272         int precedence() override
273         {
274                 return 3;
275         }
276         public:
277         unary_operator(expression_ptr p) : subexpr(std::move(p)) {}
278 #ifndef NDEBUG
279         void dump_impl() override
280         {
281                 std::cerr << OpChar;
282                 subexpr->dump();
283         }
284 #endif
285 };
286
287 /**
288  * Abstract base class for binary operators.  Allows the tree to be modified
289  * without knowing what the operations actually are.
290  */
291 struct binary_operator_base : public expression
292 {
293         /**
294          * The left side of the expression.
295          */
296         expression_ptr lhs;
297         /**
298          * The right side of the expression.
299          */
300         expression_ptr rhs;
301         /**
302          * Insert a node somewhere down the path of left children, until it would
303          * be preempting something that should execute first.
304          */
305         void insert_left(binary_operator_base *new_left)
306         {
307                 if (lhs->precedence() < new_left->precedence())
308                 {
309                         new_left->rhs = std::move(lhs);
310                         lhs.reset(new_left);
311                 }
312                 else
313                 {
314                         static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left);
315                 }
316         }
317 };
318
319 /**
320  * Template class for binary operators.  The precedence and the operation are
321  * provided as template parameters.
322  */
323 template<int Precedence, class Op>
324 struct binary_operator : public binary_operator_base
325 {
326         valty operator()() override
327         {
328                 Op op;
329                 return op((*lhs)(), (*rhs)());
330         }
331         int precedence() override
332         {
333                 return Precedence;
334         }
335 #ifdef NDEBUG
336         /**
337          * Constructor.  Takes the name of the operator as an argument, for
338          * debugging.  Only stores it in debug mode.
339          */
340         binary_operator(const char *) {}
341 #else
342         const char *opName;
343         binary_operator(const char *o) : opName(o) {}
344         void dump_impl() override
345         {
346                 lhs->dump();
347                 std::cerr << opName;
348                 rhs->dump();
349         }
350 #endif
351 };
352
353 /**
354  * Ternary conditional operators (`cond ? true : false`) are a special case -
355  * there are no other ternary operators.
356  */
357 class ternary_conditional_operator : public expression
358 {
359         /**
360          * The condition for the clause.
361          */
362         expression_ptr cond;
363         /**
364          * The expression that this evaluates to if the condition is true.
365          */
366         expression_ptr lhs;
367         /**
368          * The expression that this evaluates to if the condition is false.
369          */
370         expression_ptr rhs;
371         valty operator()() override
372         {
373                 return (*cond)() ? (*lhs)() : (*rhs)();
374         }
375         int precedence() override
376         {
377                 // The actual precedence of a ternary conditional operator is 15, but
378                 // its associativity is the opposite way around to the other operators,
379                 // so we fudge it slightly.
380                 return 3;
381         }
382 #ifndef NDEBUG
383         void dump_impl() override
384         {
385                 cond->dump();
386                 std::cerr << " ? ";
387                 lhs->dump();
388                 std::cerr << " : ";
389                 rhs->dump();
390         }
391 #endif
392         public:
393         ternary_conditional_operator(expression_ptr c,
394                                      expression_ptr l,
395                                      expression_ptr r) :
396                 cond(std::move(c)), lhs(std::move(l)), rhs(std::move(r)) {}
397 };
398
399 template<typename T>
400 struct lshift
401 {
402         constexpr T operator()(const T &lhs, const T &rhs) const
403         {
404                 return lhs << rhs;
405         }
406 };
407 template<typename T>
408 struct rshift
409 {
410         constexpr T operator()(const T &lhs, const T &rhs) const
411         {
412                 return lhs >> rhs;
413         }
414 };
415 template<typename T>
416 struct unary_plus
417 {
418         constexpr T operator()(const T &val) const
419         {
420                 return +val;
421         }
422 };
423 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline.
424 template<typename T>
425 struct bit_not
426 {
427         constexpr T operator()(const T &val) const
428         {
429                 return ~val;
430         }
431 };
432
433 } // anonymous namespace
434
435
436 expression_ptr input_buffer::parse_binary_expression(expression_ptr lhs)
437 {
438         next_token();
439         binary_operator_base *expr = nullptr;
440         char op = ((*this)[0]);
441         switch (op)
442         {
443                 default:
444                         return lhs;
445                 case '+':
446                         expr = new binary_operator<6, std::plus<valty>>("+");
447                         break;
448                 case '-':
449                         expr = new binary_operator<6, std::minus<valty>>("-");
450                         break;
451                 case '%':
452                         expr = new binary_operator<5, std::modulus<valty>>("%");
453                         break;
454                 case '*':
455                         expr = new binary_operator<5, std::multiplies<valty>>("*");
456                         break;
457                 case '/':
458                         expr = new binary_operator<5, std::divides<valty>>("/");
459                         break;
460                 case '<':
461                         cursor++;
462                         switch ((*this)[0])
463                         {
464                                 default:
465                                         parse_error("Invalid operator");
466                                         return nullptr;
467                                 case ' ':
468                                 case '(':
469                                 case '0'...'9':
470                                         cursor--;
471                                         expr = new binary_operator<8, std::less<valty>>("<");
472                                         break;
473                                 case '=':
474                                         expr = new binary_operator<8, std::less_equal<valty>>("<=");
475                                         break;
476                                 case '<':
477                                         expr = new binary_operator<7, lshift<valty>>("<<");
478                                         break;
479                         }
480                         break;
481                 case '>':
482                         cursor++;
483                         switch ((*this)[0])
484                         {
485                                 default:
486                                         parse_error("Invalid operator");
487                                         return nullptr;
488                                 case '(':
489                                 case ' ':
490                                 case '0'...'9':
491                                         cursor--;
492                                         expr = new binary_operator<8, std::greater<valty>>(">");
493                                         break;
494                                 case '=':
495                                         expr = new binary_operator<8, std::greater_equal<valty>>(">=");
496                                         break;
497                                 case '>':
498                                         expr = new binary_operator<7, rshift<valty>>(">>");
499                                         break;
500                                         return lhs;
501                         }
502                         break;
503                 case '=':
504                         if ((*this)[1] != '=')
505                         {
506                                 parse_error("Invalid operator");
507                                 return nullptr;
508                         }
509                         consume('=');
510                         expr = new binary_operator<9, std::equal_to<valty>>("==");
511                         break;
512                 case '!':
513                         if ((*this)[1] != '=')
514                         {
515                                 parse_error("Invalid operator");
516                                 return nullptr;
517                         }
518                         cursor++;
519                         expr = new binary_operator<9, std::not_equal_to<valty>>("!=");
520                         break;
521                 case '&':
522                         if ((*this)[1] == '&')
523                         {
524                                 expr = new binary_operator<13, std::logical_and<valty>>("&&");
525                         }
526                         else
527                         {
528                                 expr = new binary_operator<10, std::bit_and<valty>>("&");
529                         }
530                         break;
531                 case '|':
532                         if ((*this)[1] == '|')
533                         {
534                                 expr = new binary_operator<12, std::logical_or<valty>>("||");
535                         }
536                         else
537                         {
538                                 expr = new binary_operator<14, std::bit_or<valty>>("|");
539                         }
540                         break;
541                 case '?':
542                 {
543                         consume('?');
544                         expression_ptr true_case = parse_expression();
545                         next_token();
546                         if (!true_case || !consume(':'))
547                         {
548                                 parse_error("Expected : in ternary conditional operator");
549                                 return nullptr;
550                         }
551                         expression_ptr false_case = parse_expression();
552                         if (!false_case)
553                         {
554                                 parse_error("Expected false condition for ternary operator");
555                                 return nullptr;
556                         }
557                         return expression_ptr(new ternary_conditional_operator(std::move(lhs),
558                                                 std::move(true_case), std::move(false_case)));
559                 }
560         }
561         cursor++;
562         next_token();
563         expression_ptr e(expr);
564         expression_ptr rhs(parse_expression());
565         if (!rhs)
566         {
567                 return nullptr;
568         }
569         expr->lhs = std::move(lhs);
570         if (rhs->precedence() < expr->precedence())
571         {
572                 expr->rhs = std::move(rhs);
573         }
574         else
575         {
576                 // If we're a normal left-to-right expression, then we need to insert
577                 // this as the far-left child node of the rhs expression
578                 binary_operator_base *rhs_op =
579                         static_cast<binary_operator_base*>(rhs.get());
580                 rhs_op->insert_left(expr);
581                 e.release();
582                 return rhs;
583         }
584         return e;
585 }
586
587 expression_ptr input_buffer::parse_expression(bool stopAtParen)
588 {
589         next_token();
590         unsigned long long leftVal;
591         expression_ptr lhs;
592         switch ((*this)[0])
593         {
594                 case '0'...'9':
595                         if (!consume_integer(leftVal))
596                         {
597                                 return nullptr;
598                         }
599                         lhs.reset(new terminal_expr(leftVal));
600                         break;
601                 case '(':
602                 {
603                         consume('(');
604                         expression_ptr &&subexpr = parse_expression();
605                         if (!subexpr)
606                         {
607                                 return nullptr;
608                         }
609                         lhs.reset(new paren_expression(std::move(subexpr)));
610                         if (!consume(')'))
611                         {
612                                 return nullptr;
613                         }
614                         if (stopAtParen)
615                         {
616                                 return lhs;
617                         }
618                         break;
619                 }
620                 case '+':
621                 {
622                         consume('+');
623                         expression_ptr &&subexpr = parse_expression();
624                         if (!subexpr)
625                         {
626                                 return nullptr;
627                         }
628                         lhs.reset(new unary_operator<'+', unary_plus<valty>>(std::move(subexpr)));
629                         break;
630                 }
631                 case '-':
632                 {
633                         consume('-');
634                         expression_ptr &&subexpr = parse_expression();
635                         if (!subexpr)
636                         {
637                                 return nullptr;
638                         }
639                         lhs.reset(new unary_operator<'-', std::negate<valty>>(std::move(subexpr)));
640                         break;
641                 }
642                 case '!':
643                 {
644                         consume('!');
645                         expression_ptr &&subexpr = parse_expression();
646                         if (!subexpr)
647                         {
648                                 return nullptr;
649                         }
650                         lhs.reset(new unary_operator<'!', std::logical_not<valty>>(std::move(subexpr)));
651                         break;
652                 }
653                 case '~':
654                 {
655                         consume('~');
656                         expression_ptr &&subexpr = parse_expression();
657                         if (!subexpr)
658                         {
659                                 return nullptr;
660                         }
661                         lhs.reset(new unary_operator<'~', bit_not<valty>>(std::move(subexpr)));
662                         break;
663                 }
664         }
665         if (!lhs)
666         {
667                 return nullptr;
668         }
669         return parse_binary_expression(std::move(lhs));
670 }
671
672 bool
673 input_buffer::consume_integer_expression(unsigned long long &outInt)
674 {
675         switch ((*this)[0])
676         {
677                 case '(':
678                 {
679                         expression_ptr e(parse_expression(true));
680                         if (!e)
681                         {
682                                 return false;
683                         }
684                         outInt = (*e)();
685                         return true;
686                 }
687                 case '0'...'9':
688                         return consume_integer(outInt);
689                 default:
690                         return false;
691         }
692 }
693
694 bool
695 input_buffer::consume_hex_byte(uint8_t &outByte)
696 {
697         if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1]))
698         {
699                 return false;
700         }
701         outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]);
702         cursor += 2;
703         return true;
704 }
705
706 input_buffer&
707 input_buffer::next_token()
708 {
709         int start;
710         do {
711                 start = cursor;
712                 skip_spaces();
713                 // Parse /* comments
714                 if ((*this)[0] == '/' && (*this)[1] == '*')
715                 {
716                         // eat the start of the comment
717                         ++(*this);
718                         ++(*this);
719                         do {
720                                 // Find the ending * of */
721                                 while ((**this != '\0') && (**this != '*'))
722                                 {
723                                         ++(*this);
724                                 }
725                                 // Eat the *
726                                 ++(*this);
727                         } while ((**this != '\0') && (**this != '/'));
728                         // Eat the /
729                         ++(*this);
730                 }
731                 // Parse // comments
732                 if (((*this)[0] == '/' && (*this)[1] == '/'))
733                 {
734                         // eat the start of the comment
735                         ++(*this);
736                         ++(*this);
737                         // Find the ending of the line
738                         while (**this != '\n')
739                         {
740                                 ++(*this);
741                         }
742                         // Eat the \n
743                         ++(*this);
744                 }
745         } while (start != cursor);
746         return *this;
747 }
748
749 void
750 input_buffer::parse_error(const char *msg)
751 {
752         int line_count = 1;
753         int line_start = 0;
754         int line_end = cursor;
755         for (int i=cursor ; i>0 ; --i)
756         {
757                 if (buffer[i] == '\n')
758                 {
759                         line_count++;
760                         if (line_start == 0)
761                         {
762                                 line_start = i+1;
763                         }
764                 }
765         }
766         for (int i=cursor+1 ; i<size ; ++i)
767         {
768                 if (buffer[i] == '\n')
769                 {
770                         line_end = i;
771                         break;
772                 }
773         }
774         fprintf(stderr, "Error on line %d: %s\n", line_count, msg);
775         fwrite(&buffer[line_start], line_end-line_start, 1, stderr);
776         putc('\n', stderr);
777         for (int i=0 ; i<(cursor-line_start) ; ++i)
778         {
779                 char c = (buffer[i+line_start] == '\t') ? '\t' : ' ';
780                 putc(c, stderr);
781         }
782         putc('^', stderr);
783         putc('\n', stderr);
784 }
785 #ifndef NDEBUG
786 void
787 input_buffer::dump()
788 {
789         fprintf(stderr, "Current cursor: %d\n", cursor);
790         fwrite(&buffer[cursor], size-cursor, 1, stderr);
791 }
792 #endif
793
794 mmap_input_buffer::mmap_input_buffer(int fd) : input_buffer(0, 0)
795 {
796         struct stat sb;
797         if (fstat(fd, &sb))
798         {
799                 perror("Failed to stat file");
800         }
801         size = sb.st_size;
802         buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE |
803                         MAP_PREFAULT_READ, fd, 0);
804         if (buffer == MAP_FAILED)
805         {
806                 perror("Failed to mmap file");
807                 exit(EXIT_FAILURE);
808         }
809 }
810
811 mmap_input_buffer::~mmap_input_buffer()
812 {
813         if (buffer != 0)
814         {
815                 munmap((void*)buffer, size);
816         }
817 }
818
819 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0)
820 {
821         int c;
822         while ((c = fgetc(stdin)) != EOF)
823         {
824                 b.push_back(c);
825         }
826         buffer = b.data();
827         size = b.size();
828 }
829
830 } // namespace dtc
831