contrib/libucl/src/ucl_parser.c

   1 /* Copyright (c) 2013, Vsevolod Stakhov
   2  * All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions are met:
   6  *       * Redistributions of source code must retain the above copyright
   7  *         notice, this list of conditions and the following disclaimer.
   8  *       * Redistributions in binary form must reproduce the above copyright
   9  *         notice, this list of conditions and the following disclaimer in the
  10  *         documentation and/or other materials provided with the distribution.
  11  *
  12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
  13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
  16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  22  */
  23
  24 #include "ucl.h"
  25 #include "ucl_internal.h"
  26 #include "ucl_chartable.h"
  27
  28 /**
  29  * @file rcl_parser.c
  30  * The implementation of rcl parser
  31  */
  32
  33 struct ucl_parser_saved_state {
  34         unsigned int line;
  35         unsigned int column;
  36         size_t remain;
  37         const unsigned char *pos;
  38 };
  39
  40 /**
  41  * Move up to len characters
  42  * @param parser
  43  * @param begin
  44  * @param len
  45  * @return new position in chunk
  46  */
  47 #define ucl_chunk_skipc(chunk, p)    do{                                        \
  48     if (*(p) == '\n') {                                                                         \
  49         (chunk)->line ++;                                                                       \
  50         (chunk)->column = 0;                                                            \
  51     }                                                                                                           \
  52     else (chunk)->column ++;                                                            \
  53     (p++);                                                                                                      \
  54     (chunk)->pos ++;                                                                            \
  55     (chunk)->remain --;                                                                         \
  56     } while (0)
  57
  58 static inline void
  59 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
  60 {
  61         if (chunk->pos < chunk->end) {
  62                 if (isgraph (*chunk->pos)) {
  63                         ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
  64                                         chunk->line, chunk->column, str, *chunk->pos);
  65                 }
  66                 else {
  67                         ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
  68                                         chunk->line, chunk->column, str, (int)*chunk->pos);
  69                 }
  70         }
  71         else {
  72                 ucl_create_err (err, "error at the end of chunk: %s", str);
  73         }
  74 }
  75
  76 /**
  77  * Skip all comments from the current pos resolving nested and multiline comments
  78  * @param parser
  79  * @return
  80  */
  81 static bool
  82 ucl_skip_comments (struct ucl_parser *parser)
  83 {
  84         struct ucl_chunk *chunk = parser->chunks;
  85         const unsigned char *p;
  86         int comments_nested = 0;
  87
  88         p = chunk->pos;
  89
  90 start:
  91         if (*p == '#') {
  92                 if (parser->state != UCL_STATE_SCOMMENT &&
  93                                 parser->state != UCL_STATE_MCOMMENT) {
  94                         while (p < chunk->end) {
  95                                 if (*p == '\n') {
  96                                         ucl_chunk_skipc (chunk, p);
  97                                         goto start;
  98                                 }
  99                                 ucl_chunk_skipc (chunk, p);
 100                         }
 101                 }
 102         }
 103         else if (*p == '/' && chunk->remain >= 2) {
 104                 if (p[1] == '*') {
 105                         ucl_chunk_skipc (chunk, p);
 106                         comments_nested ++;
 107                         ucl_chunk_skipc (chunk, p);
 108
 109                         while (p < chunk->end) {
 110                                 if (*p == '*') {
 111                                         ucl_chunk_skipc (chunk, p);
 112                                         if (*p == '/') {
 113                                                 comments_nested --;
 114                                                 if (comments_nested == 0) {
 115                                                         ucl_chunk_skipc (chunk, p);
 116                                                         goto start;
 117                                                 }
 118                                         }
 119                                         ucl_chunk_skipc (chunk, p);
 120                                 }
 121                                 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
 122                                         comments_nested ++;
 123                                         ucl_chunk_skipc (chunk, p);
 124                                         ucl_chunk_skipc (chunk, p);
 125                                         continue;
 126                                 }
 127                                 ucl_chunk_skipc (chunk, p);
 128                         }
 129                         if (comments_nested != 0) {
 130                                 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
 131                                 return false;
 132                         }
 133                 }
 134         }
 135
 136         return true;
 137 }
 138
 139 /**
 140  * Return multiplier for a character
 141  * @param c multiplier character
 142  * @param is_bytes if true use 1024 multiplier
 143  * @return multiplier
 144  */
 145 static inline unsigned long
 146 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
 147         const struct {
 148                 char c;
 149                 long mult_normal;
 150                 long mult_bytes;
 151         } multipliers[] = {
 152                         {'m', 1000 * 1000, 1024 * 1024},
 153                         {'k', 1000, 1024},
 154                         {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
 155         };
 156         int i;
 157
 158         for (i = 0; i < 3; i ++) {
 159                 if (tolower (c) == multipliers[i].c) {
 160                         if (is_bytes) {
 161                                 return multipliers[i].mult_bytes;
 162                         }
 163                         return multipliers[i].mult_normal;
 164                 }
 165         }
 166
 167         return 1;
 168 }
 169
 170
 171 /**
 172  * Return multiplier for time scaling
 173  * @param c
 174  * @return
 175  */
 176 static inline double
 177 ucl_lex_time_multiplier (const unsigned char c) {
 178         const struct {
 179                 char c;
 180                 double mult;
 181         } multipliers[] = {
 182                         {'m', 60},
 183                         {'h', 60 * 60},
 184                         {'d', 60 * 60 * 24},
 185                         {'w', 60 * 60 * 24 * 7},
 186                         {'y', 60 * 60 * 24 * 7 * 365}
 187         };
 188         int i;
 189
 190         for (i = 0; i < 5; i ++) {
 191                 if (tolower (c) == multipliers[i].c) {
 192                         return multipliers[i].mult;
 193                 }
 194         }
 195
 196         return 1;
 197 }
 198
 199 /**
 200  * Return true if a character is a end of an atom
 201  * @param c
 202  * @return
 203  */
 204 static inline bool
 205 ucl_lex_is_atom_end (const unsigned char c)
 206 {
 207         return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
 208 }
 209
 210 static inline bool
 211 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
 212 {
 213         if (c1 == '/') {
 214                 if (c2 == '*') {
 215                         return true;
 216                 }
 217         }
 218         else if (c1 == '#') {
 219                 return true;
 220         }
 221         return false;
 222 }
 223
 224 /**
 225  * Check variable found
 226  * @param parser
 227  * @param ptr
 228  * @param remain
 229  * @param out_len
 230  * @param strict
 231  * @param found
 232  * @return
 233  */
 234 static inline const char *
 235 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
 236                 size_t *out_len, bool strict, bool *found)
 237 {
 238         struct ucl_variable *var;
 239         unsigned char *dst;
 240         size_t dstlen;
 241         bool need_free = false;
 242
 243         LL_FOREACH (parser->variables, var) {
 244                 if (strict) {
 245                         if (remain == var->var_len) {
 246                                 if (memcmp (ptr, var->var, var->var_len) == 0) {
 247                                         *out_len += var->value_len;
 248                                         *found = true;
 249                                         return (ptr + var->var_len);
 250                                 }
 251                         }
 252                 }
 253                 else {
 254                         if (remain >= var->var_len) {
 255                                 if (memcmp (ptr, var->var, var->var_len) == 0) {
 256                                         *out_len += var->value_len;
 257                                         *found = true;
 258                                         return (ptr + var->var_len);
 259                                 }
 260                         }
 261                 }
 262         }
 263
 264         /* XXX: can only handle ${VAR} */
 265         if (!(*found) && parser->var_handler != NULL && strict) {
 266                 /* Call generic handler */
 267                 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
 268                                 parser->var_data)) {
 269                         *found = true;
 270                         if (need_free) {
 271                                 free (dst);
 272                         }
 273                         return (ptr + remain);
 274                 }
 275         }
 276
 277         return ptr;
 278 }
 279
 280 /**
 281  * Check for a variable in a given string
 282  * @param parser
 283  * @param ptr
 284  * @param remain
 285  * @param out_len
 286  * @param vars_found
 287  * @return
 288  */
 289 static const char *
 290 ucl_check_variable (struct ucl_parser *parser, const char *ptr,
 291                 size_t remain, size_t *out_len, bool *vars_found)
 292 {
 293         const char *p, *end, *ret = ptr;
 294         bool found = false;
 295
 296         if (*ptr == '{') {
 297                 /* We need to match the variable enclosed in braces */
 298                 p = ptr + 1;
 299                 end = ptr + remain;
 300                 while (p < end) {
 301                         if (*p == '}') {
 302                                 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
 303                                                 out_len, true, &found);
 304                                 if (found) {
 305                                         /* {} must be excluded actually */
 306                                         ret ++;
 307                                         if (!*vars_found) {
 308                                                 *vars_found = true;
 309                                         }
 310                                 }
 311                                 else {
 312                                         *out_len += 2;
 313                                 }
 314                                 break;
 315                         }
 316                         p ++;
 317                 }
 318         }
 319         else if (*ptr != '$') {
 320                 /* Not count escaped dollar sign */
 321                 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
 322                 if (found && !*vars_found) {
 323                         *vars_found = true;
 324                 }
 325                 if (!found) {
 326                         (*out_len) ++;
 327                 }
 328         }
 329         else {
 330                 ret ++;
 331                 (*out_len) ++;
 332         }
 333
 334         return ret;
 335 }
 336
 337 /**
 338  * Expand a single variable
 339  * @param parser
 340  * @param ptr
 341  * @param remain
 342  * @param dest
 343  * @return
 344  */
 345 static const char *
 346 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
 347                 size_t remain, unsigned char **dest)
 348 {
 349         unsigned char *d = *dest, *dst;
 350         const char *p = ptr + 1, *ret;
 351         struct ucl_variable *var;
 352         size_t dstlen;
 353         bool need_free = false;
 354         bool found = false;
 355         bool strict = false;
 356
 357         ret = ptr + 1;
 358         remain --;
 359
 360         if (*p == '$') {
 361                 *d++ = *p++;
 362                 *dest = d;
 363                 return p;
 364         }
 365         else if (*p == '{') {
 366                 p ++;
 367                 strict = true;
 368                 ret += 2;
 369                 remain -= 2;
 370         }
 371
 372         LL_FOREACH (parser->variables, var) {
 373                 if (remain >= var->var_len) {
 374                         if (memcmp (p, var->var, var->var_len) == 0) {
 375                                 memcpy (d, var->value, var->value_len);
 376                                 ret += var->var_len;
 377                                 d += var->value_len;
 378                                 found = true;
 379                                 break;
 380                         }
 381                 }
 382         }
 383         if (!found) {
 384                 if (strict && parser->var_handler != NULL) {
 385                         if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
 386                                                         parser->var_data)) {
 387                                 memcpy (d, dst, dstlen);
 388                                 ret += dstlen;
 389                                 d += remain;
 390                                 found = true;
 391                         }
 392                 }
 393
 394                 /* Leave variable as is */
 395                 if (!found) {
 396                         if (strict) {
 397                                 /* Copy '${' */
 398                                 memcpy (d, ptr, 2);
 399                                 d += 2;
 400                                 ret --;
 401                         }
 402                         else {
 403                                 memcpy (d, ptr, 1);
 404                                 d ++;
 405                         }
 406                 }
 407         }
 408
 409         *dest = d;
 410         return ret;
 411 }
 412
 413 /**
 414  * Expand variables in string
 415  * @param parser
 416  * @param dst
 417  * @param src
 418  * @param in_len
 419  * @return
 420  */
 421 static ssize_t
 422 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
 423                 const char *src, size_t in_len)
 424 {
 425         const char *p, *end = src + in_len;
 426         unsigned char *d;
 427         size_t out_len = 0;
 428         bool vars_found = false;
 429
 430         p = src;
 431         while (p != end) {
 432                 if (*p == '$') {
 433                         p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
 434                 }
 435                 else {
 436                         p ++;
 437                         out_len ++;
 438                 }
 439         }
 440
 441         if (!vars_found) {
 442                 /* Trivial case */
 443                 *dst = NULL;
 444                 return in_len;
 445         }
 446
 447         *dst = UCL_ALLOC (out_len + 1);
 448         if (*dst == NULL) {
 449                 return in_len;
 450         }
 451
 452         d = *dst;
 453         p = src;
 454         while (p != end) {
 455                 if (*p == '$') {
 456                         p = ucl_expand_single_variable (parser, p, end - p, &d);
 457                 }
 458                 else {
 459                         *d++ = *p++;
 460                 }
 461         }
 462
 463         *d = '\0';
 464
 465         return out_len;
 466 }
 467
 468 /**
 469  * Store or copy pointer to the trash stack
 470  * @param parser parser object
 471  * @param src src string
 472  * @param dst destination buffer (trash stack pointer)
 473  * @param dst_const const destination pointer (e.g. value of object)
 474  * @param in_len input length
 475  * @param need_unescape need to unescape source (and copy it)
 476  * @param need_lowercase need to lowercase value (and copy)
 477  * @param need_expand need to expand variables (and copy as well)
 478  * @return output length (excluding \0 symbol)
 479  */
 480 static inline ssize_t
 481 ucl_copy_or_store_ptr (struct ucl_parser *parser,
 482                 const unsigned char *src, unsigned char **dst,
 483                 const char **dst_const, size_t in_len,
 484                 bool need_unescape, bool need_lowercase, bool need_expand)
 485 {
 486         ssize_t ret = -1, tret;
 487         unsigned char *tmp;
 488
 489         if (need_unescape || need_lowercase ||
 490                         (need_expand && parser->variables != NULL) ||
 491                         !(parser->flags & UCL_PARSER_ZEROCOPY)) {
 492                 /* Copy string */
 493                 *dst = UCL_ALLOC (in_len + 1);
 494                 if (*dst == NULL) {
 495                         ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
 496                         return false;
 497                 }
 498                 if (need_lowercase) {
 499                         ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
 500                 }
 501                 else {
 502                         ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
 503                 }
 504
 505                 if (need_unescape) {
 506                         ret = ucl_unescape_json_string (*dst, ret);
 507                 }
 508                 if (need_expand) {
 509                         tmp = *dst;
 510                         tret = ret;
 511                         ret = ucl_expand_variable (parser, dst, tmp, ret);
 512                         if (*dst == NULL) {
 513                                 /* Nothing to expand */
 514                                 *dst = tmp;
 515                                 ret = tret;
 516                         }
 517                 }
 518                 *dst_const = *dst;
 519         }
 520         else {
 521                 *dst_const = src;
 522                 ret = in_len;
 523         }
 524
 525         return ret;
 526 }
 527
 528 /**
 529  * Create and append an object at the specified level
 530  * @param parser
 531  * @param is_array
 532  * @param level
 533  * @return
 534  */
 535 static inline ucl_object_t *
 536 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
 537 {
 538         struct ucl_stack *st;
 539
 540         if (!is_array) {
 541                 if (obj == NULL) {
 542                         obj = ucl_object_typed_new (UCL_OBJECT);
 543                 }
 544                 else {
 545                         obj->type = UCL_OBJECT;
 546                 }
 547                 obj->value.ov = ucl_hash_create ();
 548                 parser->state = UCL_STATE_KEY;
 549         }
 550         else {
 551                 if (obj == NULL) {
 552                         obj = ucl_object_typed_new (UCL_ARRAY);
 553                 }
 554                 else {
 555                         obj->type = UCL_ARRAY;
 556                 }
 557                 parser->state = UCL_STATE_VALUE;
 558         }
 559
 560         st = UCL_ALLOC (sizeof (struct ucl_stack));
 561         if (st == NULL) {
 562                 ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
 563                 return NULL;
 564         }
 565         st->obj = obj;
 566         st->level = level;
 567         LL_PREPEND (parser->stack, st);
 568         parser->cur_obj = obj;
 569
 570         return obj;
 571 }
 572
 573 int
 574 ucl_maybe_parse_number (ucl_object_t *obj,
 575                 const char *start, const char *end, const char **pos,
 576                 bool allow_double, bool number_bytes, bool allow_time)
 577 {
 578         const char *p = start, *c = start;
 579         char *endptr;
 580         bool got_dot = false, got_exp = false, need_double = false,
 581                         is_time = false, valid_start = false, is_hex = false,
 582                         is_neg = false;
 583         double dv = 0;
 584         int64_t lv = 0;
 585
 586         if (*p == '-') {
 587                 is_neg = true;
 588                 c ++;
 589                 p ++;
 590         }
 591         while (p < end) {
 592                 if (is_hex && isxdigit (*p)) {
 593                         p ++;
 594                 }
 595                 else if (isdigit (*p)) {
 596                         valid_start = true;
 597                         p ++;
 598                 }
 599                 else if (!is_hex && (*p == 'x' || *p == 'X')) {
 600                         is_hex = true;
 601                         allow_double = false;
 602                         c = p + 1;
 603                 }
 604                 else if (allow_double) {
 605                         if (p == c) {
 606                                 /* Empty digits sequence, not a number */
 607                                 *pos = start;
 608                                 return EINVAL;
 609                         }
 610                         else if (*p == '.') {
 611                                 if (got_dot) {
 612                                         /* Double dots, not a number */
 613                                         *pos = start;
 614                                         return EINVAL;
 615                                 }
 616                                 else {
 617                                         got_dot = true;
 618                                         need_double = true;
 619                                         p ++;
 620                                 }
 621                         }
 622                         else if (*p == 'e' || *p == 'E') {
 623                                 if (got_exp) {
 624                                         /* Double exp, not a number */
 625                                         *pos = start;
 626                                         return EINVAL;
 627                                 }
 628                                 else {
 629                                         got_exp = true;
 630                                         need_double = true;
 631                                         p ++;
 632                                         if (p >= end) {
 633                                                 *pos = start;
 634                                                 return EINVAL;
 635                                         }
 636                                         if (!isdigit (*p) && *p != '+' && *p != '-') {
 637                                                 /* Wrong exponent sign */
 638                                                 *pos = start;
 639                                                 return EINVAL;
 640                                         }
 641                                         else {
 642                                                 p ++;
 643                                         }
 644                                 }
 645                         }
 646                         else {
 647                                 /* Got the end of the number, need to check */
 648                                 break;
 649                         }
 650                 }
 651                 else {
 652                         break;
 653                 }
 654         }
 655
 656         if (!valid_start) {
 657                 *pos = start;
 658                 return EINVAL;
 659         }
 660
 661         errno = 0;
 662         if (need_double) {
 663                 dv = strtod (c, &endptr);
 664         }
 665         else {
 666                 if (is_hex) {
 667                         lv = strtoimax (c, &endptr, 16);
 668                 }
 669                 else {
 670                         lv = strtoimax (c, &endptr, 10);
 671                 }
 672         }
 673         if (errno == ERANGE) {
 674                 *pos = start;
 675                 return ERANGE;
 676         }
 677
 678         /* Now check endptr */
 679         if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
 680                         ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
 681                 p = endptr;
 682                 goto set_obj;
 683         }
 684
 685         if (endptr < end && endptr != start) {
 686                 p = endptr;
 687                 switch (*p) {
 688                 case 'm':
 689                 case 'M':
 690                 case 'g':
 691                 case 'G':
 692                 case 'k':
 693                 case 'K':
 694                         if (end - p >= 2) {
 695                                 if (p[1] == 's' || p[1] == 'S') {
 696                                         /* Milliseconds */
 697                                         if (!need_double) {
 698                                                 need_double = true;
 699                                                 dv = lv;
 700                                         }
 701                                         is_time = true;
 702                                         if (p[0] == 'm' || p[0] == 'M') {
 703                                                 dv /= 1000.;
 704                                         }
 705                                         else {
 706                                                 dv *= ucl_lex_num_multiplier (*p, false);
 707                                         }
 708                                         p += 2;
 709                                         goto set_obj;
 710                                 }
 711                                 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
 712                                         /* Bytes */
 713                                         if (need_double) {
 714                                                 need_double = false;
 715                                                 lv = dv;
 716                                         }
 717                                         lv *= ucl_lex_num_multiplier (*p, true);
 718                                         p += 2;
 719                                         goto set_obj;
 720                                 }
 721                                 else if (ucl_lex_is_atom_end (p[1])) {
 722                                         if (need_double) {
 723                                                 dv *= ucl_lex_num_multiplier (*p, false);
 724                                         }
 725                                         else {
 726                                                 lv *= ucl_lex_num_multiplier (*p, number_bytes);
 727                                         }
 728                                         p ++;
 729                                         goto set_obj;
 730                                 }
 731                                 else if (allow_time && end - p >= 3) {
 732                                         if (tolower (p[0]) == 'm' &&
 733                                                         tolower (p[1]) == 'i' &&
 734                                                         tolower (p[2]) == 'n') {
 735                                                 /* Minutes */
 736                                                 if (!need_double) {
 737                                                         need_double = true;
 738                                                         dv = lv;
 739                                                 }
 740                                                 is_time = true;
 741                                                 dv *= 60.;
 742                                                 p += 3;
 743                                                 goto set_obj;
 744                                         }
 745                                 }
 746                         }
 747                         else {
 748                                 if (need_double) {
 749                                         dv *= ucl_lex_num_multiplier (*p, false);
 750                                 }
 751                                 else {
 752                                         lv *= ucl_lex_num_multiplier (*p, number_bytes);
 753                                 }
 754                                 p ++;
 755                                 goto set_obj;
 756                         }
 757                         break;
 758                 case 'S':
 759                 case 's':
 760                         if (allow_time &&
 761                                         (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
 762                                 if (!need_double) {
 763                                         need_double = true;
 764                                         dv = lv;
 765                                 }
 766                                 p ++;
 767                                 is_time = true;
 768                                 goto set_obj;
 769                         }
 770                         break;
 771                 case 'h':
 772                 case 'H':
 773                 case 'd':
 774                 case 'D':
 775                 case 'w':
 776                 case 'W':
 777                 case 'Y':
 778                 case 'y':
 779                         if (allow_time &&
 780                                         (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
 781                                 if (!need_double) {
 782                                         need_double = true;
 783                                         dv = lv;
 784                                 }
 785                                 is_time = true;
 786                                 dv *= ucl_lex_time_multiplier (*p);
 787                                 p ++;
 788                                 goto set_obj;
 789                         }
 790                         break;
 791                 }
 792         }
 793
 794         *pos = c;
 795         return EINVAL;
 796
 797         set_obj:
 798         if (allow_double && (need_double || is_time)) {
 799                 if (!is_time) {
 800                         obj->type = UCL_FLOAT;
 801                 }
 802                 else {
 803                         obj->type = UCL_TIME;
 804                 }
 805                 obj->value.dv = is_neg ? (-dv) : dv;
 806         }
 807         else {
 808                 obj->type = UCL_INT;
 809                 obj->value.iv = is_neg ? (-lv) : lv;
 810         }
 811         *pos = p;
 812         return 0;
 813 }
 814
 815 /**
 816  * Parse possible number
 817  * @param parser
 818  * @param chunk
 819  * @return true if a number has been parsed
 820  */
 821 static bool
 822 ucl_lex_number (struct ucl_parser *parser,
 823                 struct ucl_chunk *chunk, ucl_object_t *obj)
 824 {
 825         const unsigned char *pos;
 826         int ret;
 827
 828         ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
 829                         true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
 830
 831         if (ret == 0) {
 832                 chunk->remain -= pos - chunk->pos;
 833                 chunk->column += pos - chunk->pos;
 834                 chunk->pos = pos;
 835                 return true;
 836         }
 837         else if (ret == ERANGE) {
 838                 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
 839         }
 840
 841         return false;
 842 }
 843
 844 /**
 845  * Parse quoted string with possible escapes
 846  * @param parser
 847  * @param chunk
 848  * @return true if a string has been parsed
 849  */
 850 static bool
 851 ucl_lex_json_string (struct ucl_parser *parser,
 852                 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
 853 {
 854         const unsigned char *p = chunk->pos;
 855         unsigned char c;
 856         int i;
 857
 858         while (p < chunk->end) {
 859                 c = *p;
 860                 if (c < 0x1F) {
 861                         /* Unmasked control character */
 862                         if (c == '\n') {
 863                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
 864                         }
 865                         else {
 866                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
 867                         }
 868                         return false;
 869                 }
 870                 else if (c == '\\') {
 871                         ucl_chunk_skipc (chunk, p);
 872                         c = *p;
 873                         if (p >= chunk->end) {
 874                                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
 875                                 return false;
 876                         }
 877                         else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
 878                                 if (c == 'u') {
 879                                         ucl_chunk_skipc (chunk, p);
 880                                         for (i = 0; i < 4 && p < chunk->end; i ++) {
 881                                                 if (!isxdigit (*p)) {
 882                                                         ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
 883                                                         return false;
 884                                                 }
 885                                                 ucl_chunk_skipc (chunk, p);
 886                                         }
 887                                         if (p >= chunk->end) {
 888                                                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
 889                                                 return false;
 890                                         }
 891                                 }
 892                                 else {
 893                                         ucl_chunk_skipc (chunk, p);
 894                                 }
 895                         }
 896                         *need_unescape = true;
 897                         *ucl_escape = true;
 898                         continue;
 899                 }
 900                 else if (c == '"') {
 901                         ucl_chunk_skipc (chunk, p);
 902                         return true;
 903                 }
 904                 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
 905                         *ucl_escape = true;
 906                 }
 907                 else if (c == '$') {
 908                         *var_expand = true;
 909                 }
 910                 ucl_chunk_skipc (chunk, p);
 911         }
 912
 913         ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
 914         return false;
 915 }
 916
 917 /**
 918  * Parse a key in an object
 919  * @param parser
 920  * @param chunk
 921  * @return true if a key has been parsed
 922  */
 923 static bool
 924 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
 925 {
 926         const unsigned char *p, *c = NULL, *end, *t;
 927         const char *key = NULL;
 928         bool got_quote = false, got_eq = false, got_semicolon = false,
 929                         need_unescape = false, ucl_escape = false, var_expand = false,
 930                         got_content = false, got_sep = false;
 931         ucl_object_t *nobj, *tobj;
 932         ucl_hash_t *container;
 933         ssize_t keylen;
 934
 935         p = chunk->pos;
 936
 937         if (*p == '.') {
 938                 /* It is macro actually */
 939                 ucl_chunk_skipc (chunk, p);
 940                 parser->prev_state = parser->state;
 941                 parser->state = UCL_STATE_MACRO_NAME;
 942                 return true;
 943         }
 944         while (p < chunk->end) {
 945                 /*
 946                  * A key must start with alpha, number, '/' or '_' and end with space character
 947                  */
 948                 if (c == NULL) {
 949                         if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
 950                                 if (!ucl_skip_comments (parser)) {
 951                                         return false;
 952                                 }
 953                                 p = chunk->pos;
 954                         }
 955                         else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
 956                                 ucl_chunk_skipc (chunk, p);
 957                         }
 958                         else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
 959                                 /* The first symbol */
 960                                 c = p;
 961                                 ucl_chunk_skipc (chunk, p);
 962                                 got_content = true;
 963                         }
 964                         else if (*p == '"') {
 965                                 /* JSON style key */
 966                                 c = p + 1;
 967                                 got_quote = true;
 968                                 got_content = true;
 969                                 ucl_chunk_skipc (chunk, p);
 970                         }
 971                         else if (*p == '}') {
 972                                 /* We have actually end of an object */
 973                                 *end_of_object = true;
 974                                 return true;
 975                         }
 976                         else if (*p == '.') {
 977                                 ucl_chunk_skipc (chunk, p);
 978                                 parser->prev_state = parser->state;
 979                                 parser->state = UCL_STATE_MACRO_NAME;
 980                                 return true;
 981                         }
 982                         else {
 983                                 /* Invalid identifier */
 984                                 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
 985                                 return false;
 986                         }
 987                 }
 988                 else {
 989                         /* Parse the body of a key */
 990                         if (!got_quote) {
 991                                 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
 992                                         got_content = true;
 993                                         ucl_chunk_skipc (chunk, p);
 994                                 }
 995                                 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
 996                                         end = p;
 997                                         break;
 998                                 }
 999                                 else {
1000                                         ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
1001                                         return false;
1002                                 }
1003                         }
1004                         else {
1005                                 /* We need to parse json like quoted string */
1006                                 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1007                                         return false;
1008                                 }
1009                                 /* Always escape keys obtained via json */
1010                                 end = chunk->pos - 1;
1011                                 p = chunk->pos;
1012                                 break;
1013                         }
1014                 }
1015         }
1016
1017         if (p >= chunk->end && got_content) {
1018                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1019                 return false;
1020         }
1021         else if (!got_content) {
1022                 return true;
1023         }
1024         *end_of_object = false;
1025         /* We are now at the end of the key, need to parse the rest */
1026         while (p < chunk->end) {
1027                 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1028                         ucl_chunk_skipc (chunk, p);
1029                 }
1030                 else if (*p == '=') {
1031                         if (!got_eq && !got_semicolon) {
1032                                 ucl_chunk_skipc (chunk, p);
1033                                 got_eq = true;
1034                         }
1035                         else {
1036                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1037                                 return false;
1038                         }
1039                 }
1040                 else if (*p == ':') {
1041                         if (!got_eq && !got_semicolon) {
1042                                 ucl_chunk_skipc (chunk, p);
1043                                 got_semicolon = true;
1044                         }
1045                         else {
1046                                 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1047                                 return false;
1048                         }
1049                 }
1050                 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1051                         /* Check for comment */
1052                         if (!ucl_skip_comments (parser)) {
1053                                 return false;
1054                         }
1055                         p = chunk->pos;
1056                 }
1057                 else {
1058                         /* Start value */
1059                         break;
1060                 }
1061         }
1062
1063         if (p >= chunk->end && got_content) {
1064                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1065                 return false;
1066         }
1067
1068         got_sep = got_semicolon || got_eq;
1069
1070         if (!got_sep) {
1071                 /*
1072                  * Maybe we have more keys nested, so search for termination character.
1073                  * Possible choices:
1074                  * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1075                  * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1076                  * 3) key1 value[;,\n] <- we treat that as linear object
1077                  */
1078                 t = p;
1079                 *next_key = false;
1080                 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1081                         t ++;
1082                 }
1083                 /* Check first non-space character after a key */
1084                 if (*t != '{' && *t != '[') {
1085                         while (t < chunk->end) {
1086                                 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1087                                         break;
1088                                 }
1089                                 else if (*t == '{' || *t == '[') {
1090                                         *next_key = true;
1091                                         break;
1092                                 }
1093                                 t ++;
1094                         }
1095                 }
1096         }
1097
1098         /* Create a new object */
1099         nobj = ucl_object_new ();
1100         keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1101                         &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1102         if (keylen == -1) {
1103                 ucl_object_unref (nobj);
1104                 return false;
1105         }
1106         else if (keylen == 0) {
1107                 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1108                 ucl_object_unref (nobj);
1109                 return false;
1110         }
1111
1112         container = parser->stack->obj->value.ov;
1113         nobj->key = key;
1114         nobj->keylen = keylen;
1115         tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1116         if (tobj == NULL) {
1117                 container = ucl_hash_insert_object (container, nobj);
1118                 nobj->prev = nobj;
1119                 nobj->next = NULL;
1120                 parser->stack->obj->len ++;
1121         }
1122         else {
1123                 DL_APPEND (tobj, nobj);
1124         }
1125
1126         if (ucl_escape) {
1127                 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1128         }
1129         parser->stack->obj->value.ov = container;
1130
1131         parser->cur_obj = nobj;
1132
1133         return true;
1134 }
1135
1136 /**
1137  * Parse a cl string
1138  * @param parser
1139  * @param chunk
1140  * @return true if a key has been parsed
1141  */
1142 static bool
1143 ucl_parse_string_value (struct ucl_parser *parser,
1144                 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1145 {
1146         const unsigned char *p;
1147         enum {
1148                 UCL_BRACE_ROUND = 0,
1149                 UCL_BRACE_SQUARE,
1150                 UCL_BRACE_FIGURE
1151         };
1152         int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1153
1154         p = chunk->pos;
1155
1156         while (p < chunk->end) {
1157
1158                 /* Skip pairs of figure braces */
1159                 if (*p == '{') {
1160                         braces[UCL_BRACE_FIGURE][0] ++;
1161                 }
1162                 else if (*p == '}') {
1163                         braces[UCL_BRACE_FIGURE][1] ++;
1164                         if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1165                                 /* This is not a termination symbol, continue */
1166                                 ucl_chunk_skipc (chunk, p);
1167                                 continue;
1168                         }
1169                 }
1170                 /* Skip pairs of square braces */
1171                 else if (*p == '[') {
1172                         braces[UCL_BRACE_SQUARE][0] ++;
1173                 }
1174                 else if (*p == ']') {
1175                         braces[UCL_BRACE_SQUARE][1] ++;
1176                         if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1177                                 /* This is not a termination symbol, continue */
1178                                 ucl_chunk_skipc (chunk, p);
1179                                 continue;
1180                         }
1181                 }
1182                 else if (*p == '$') {
1183                         *var_expand = true;
1184                 }
1185                 else if (*p == '\\') {
1186                         *need_unescape = true;
1187                         ucl_chunk_skipc (chunk, p);
1188                         if (p < chunk->end) {
1189                                 ucl_chunk_skipc (chunk, p);
1190                         }
1191                         continue;
1192                 }
1193
1194                 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1195                         break;
1196                 }
1197                 ucl_chunk_skipc (chunk, p);
1198         }
1199
1200         if (p >= chunk->end) {
1201                 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1202                 return false;
1203         }
1204
1205         return true;
1206 }
1207
1208 /**
1209  * Parse multiline string ending with \n{term}\n
1210  * @param parser
1211  * @param chunk
1212  * @param term
1213  * @param term_len
1214  * @return size of multiline string or 0 in case of error
1215  */
1216 static int
1217 ucl_parse_multiline_string (struct ucl_parser *parser,
1218                 struct ucl_chunk *chunk, const unsigned char *term,
1219                 int term_len, unsigned char const **beg,
1220                 bool *var_expand)
1221 {
1222         const unsigned char *p, *c;
1223         bool newline = false;
1224         int len = 0;
1225
1226         p = chunk->pos;
1227
1228         c = p;
1229
1230         while (p < chunk->end) {
1231                 if (newline) {
1232                         if (chunk->end - p < term_len) {
1233                                 return 0;
1234                         }
1235                         else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1236                                 len = p - c;
1237                                 chunk->remain -= term_len;
1238                                 chunk->pos = p + term_len;
1239                                 chunk->column = term_len;
1240                                 *beg = c;
1241                                 break;
1242                         }
1243                 }
1244                 if (*p == '\n') {
1245                         newline = true;
1246                 }
1247                 else {
1248                         if (*p == '$') {
1249                                 *var_expand = true;
1250                         }
1251                         newline = false;
1252                 }
1253                 ucl_chunk_skipc (chunk, p);
1254         }
1255
1256         return len;
1257 }
1258
1259 static ucl_object_t*
1260 ucl_get_value_object (struct ucl_parser *parser)
1261 {
1262         ucl_object_t *t, *obj = NULL;
1263
1264         if (parser->stack->obj->type == UCL_ARRAY) {
1265                 /* Object must be allocated */
1266                 obj = ucl_object_new ();
1267                 t = parser->stack->obj->value.av;
1268                 DL_APPEND (t, obj);
1269                 parser->cur_obj = obj;
1270                 parser->stack->obj->value.av = t;
1271                 parser->stack->obj->len ++;
1272         }
1273         else {
1274                 /* Object has been already allocated */
1275                 obj = parser->cur_obj;
1276         }
1277
1278         return obj;
1279 }
1280
1281 /**
1282  * Handle value data
1283  * @param parser
1284  * @param chunk
1285  * @return
1286  */
1287 static bool
1288 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1289 {
1290         const unsigned char *p, *c;
1291         ucl_object_t *obj = NULL;
1292         unsigned int stripped_spaces;
1293         int str_len;
1294         bool need_unescape = false, ucl_escape = false, var_expand = false;
1295
1296         p = chunk->pos;
1297
1298         /* Skip any spaces and comments */
1299         if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1300                         (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1301                 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1302                         ucl_chunk_skipc (chunk, p);
1303                 }
1304                 if (!ucl_skip_comments (parser)) {
1305                         return false;
1306                 }
1307                 p = chunk->pos;
1308         }
1309
1310         while (p < chunk->end) {
1311                 c = p;
1312                 switch (*p) {
1313                 case '"':
1314                         obj = ucl_get_value_object (parser);
1315                         ucl_chunk_skipc (chunk, p);
1316                         if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1317                                 return false;
1318                         }
1319                         str_len = chunk->pos - c - 2;
1320                         obj->type = UCL_STRING;
1321                         if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1322                                         &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1323                                 return false;
1324                         }
1325                         obj->len = str_len;
1326                         parser->state = UCL_STATE_AFTER_VALUE;
1327                         p = chunk->pos;
1328                         return true;
1329                         break;
1330                 case '{':
1331                         obj = ucl_get_value_object (parser);
1332                         /* We have a new object */
1333                         obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1334                         if (obj == NULL) {
1335                                 return false;
1336                         }
1337
1338                         ucl_chunk_skipc (chunk, p);
1339                         return true;
1340                         break;
1341                 case '[':
1342                         obj = ucl_get_value_object (parser);
1343                         /* We have a new array */
1344                         obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1345                         if (obj == NULL) {
1346                                 return false;
1347                         }
1348
1349                         ucl_chunk_skipc (chunk, p);
1350                         return true;
1351                         break;
1352                 case ']':
1353                         /* We have the array ending */
1354                         if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1355                                 parser->state = UCL_STATE_AFTER_VALUE;
1356                                 return true;
1357                         }
1358                         else {
1359                                 goto parse_string;
1360                         }
1361                         break;
1362                 case '<':
1363                         obj = ucl_get_value_object (parser);
1364                         /* We have something like multiline value, which must be <<[A-Z]+\n */
1365                         if (chunk->end - p > 3) {
1366                                 if (memcmp (p, "<<", 2) == 0) {
1367                                         p += 2;
1368                                         /* We allow only uppercase characters in multiline definitions */
1369                                         while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1370                                                 p ++;
1371                                         }
1372                                         if (*p =='\n') {
1373                                                 /* Set chunk positions and start multiline parsing */
1374                                                 c += 2;
1375                                                 chunk->remain -= p - c;
1376                                                 chunk->pos = p + 1;
1377                                                 chunk->column = 0;
1378                                                 chunk->line ++;
1379                                                 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1380                                                                 p - c, &c, &var_expand)) == 0) {
1381                                                         ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1382                                                         return false;
1383                                                 }
1384                                                 obj->type = UCL_STRING;
1385                                                 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1386                                                         &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1387                                                         return false;
1388                                                 }
1389                                                 obj->len = str_len;
1390                                                 parser->state = UCL_STATE_AFTER_VALUE;
1391                                                 return true;
1392                                         }
1393                                 }
1394                         }
1395                         /* Fallback to ordinary strings */
1396                 default:
1397 parse_string:
1398                         if (obj == NULL) {
1399                                 obj = ucl_get_value_object (parser);
1400                         }
1401                         /* Parse atom */
1402                         if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1403                                 if (!ucl_lex_number (parser, chunk, obj)) {
1404                                         if (parser->state == UCL_STATE_ERROR) {
1405                                                 return false;
1406                                         }
1407                                 }
1408                                 else {
1409                                         parser->state = UCL_STATE_AFTER_VALUE;
1410                                         return true;
1411                                 }
1412                                 /* Fallback to normal string */
1413                         }
1414
1415                         if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1416                                 return false;
1417                         }
1418                         /* Cut trailing spaces */
1419                         stripped_spaces = 0;
1420                         while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1421                                         UCL_CHARACTER_WHITESPACE)) {
1422                                 stripped_spaces ++;
1423                         }
1424                         str_len = chunk->pos - c - stripped_spaces;
1425                         if (str_len <= 0) {
1426                                 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1427                                 return false;
1428                         }
1429                         else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1430                                 obj->len = 0;
1431                                 obj->type = UCL_NULL;
1432                         }
1433                         else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1434                                 obj->type = UCL_STRING;
1435                                 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1436                                                 &obj->value.sv, str_len, need_unescape,
1437                                                 false, var_expand)) == -1) {
1438                                         return false;
1439                                 }
1440                                 obj->len = str_len;
1441                         }
1442                         parser->state = UCL_STATE_AFTER_VALUE;
1443                         p = chunk->pos;
1444
1445                         return true;
1446                         break;
1447                 }
1448         }
1449
1450         return true;
1451 }
1452
1453 /**
1454  * Handle after value data
1455  * @param parser
1456  * @param chunk
1457  * @return
1458  */
1459 static bool
1460 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1461 {
1462         const unsigned char *p;
1463         bool got_sep = false;
1464         struct ucl_stack *st;
1465
1466         p = chunk->pos;
1467
1468         while (p < chunk->end) {
1469                 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1470                         /* Skip whitespaces */
1471                         ucl_chunk_skipc (chunk, p);
1472                 }
1473                 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1474                         /* Skip comment */
1475                         if (!ucl_skip_comments (parser)) {
1476                                 return false;
1477                         }
1478                         /* Treat comment as a separator */
1479                         got_sep = true;
1480                         p = chunk->pos;
1481                 }
1482                 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1483                         if (*p == '}' || *p == ']') {
1484                                 if (parser->stack == NULL) {
1485                                         ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1486                                         return false;
1487                                 }
1488                                 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1489                                                 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1490
1491                                         /* Pop all nested objects from a stack */
1492                                         st = parser->stack;
1493                                         parser->stack = st->next;
1494                                         UCL_FREE (sizeof (struct ucl_stack), st);
1495
1496                                         while (parser->stack != NULL) {
1497                                                 st = parser->stack;
1498                                                 if (st->next == NULL || st->next->level == st->level) {
1499                                                         break;
1500                                                 }
1501                                                 parser->stack = st->next;
1502                                                 UCL_FREE (sizeof (struct ucl_stack), st);
1503                                         }
1504                                 }
1505                                 else {
1506                                         ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1507                                         return false;
1508                                 }
1509
1510                                 if (parser->stack == NULL) {
1511                                         /* Ignore everything after a top object */
1512                                         return true;
1513                                 }
1514                                 else {
1515                                         ucl_chunk_skipc (chunk, p);
1516                                 }
1517                                 got_sep = true;
1518                         }
1519                         else {
1520                                 /* Got a separator */
1521                                 got_sep = true;
1522                                 ucl_chunk_skipc (chunk, p);
1523                         }
1524                 }
1525                 else {
1526                         /* Anything else */
1527                         if (!got_sep) {
1528                                 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1529                                 return false;
1530                         }
1531                         return true;
1532                 }
1533         }
1534
1535         return true;
1536 }
1537
1538 /**
1539  * Handle macro data
1540  * @param parser
1541  * @param chunk
1542  * @return
1543  */
1544 static bool
1545 ucl_parse_macro_value (struct ucl_parser *parser,
1546                 struct ucl_chunk *chunk, struct ucl_macro *macro,
1547                 unsigned char const **macro_start, size_t *macro_len)
1548 {
1549         const unsigned char *p, *c;
1550         bool need_unescape = false, ucl_escape = false, var_expand = false;
1551
1552         p = chunk->pos;
1553
1554         switch (*p) {
1555         case '"':
1556                 /* We have macro value encoded in quotes */
1557                 c = p;
1558                 ucl_chunk_skipc (chunk, p);
1559                 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1560                         return false;
1561                 }
1562
1563                 *macro_start = c + 1;
1564                 *macro_len = chunk->pos - c - 2;
1565                 p = chunk->pos;
1566                 break;
1567         case '{':
1568                 /* We got a multiline macro body */
1569                 ucl_chunk_skipc (chunk, p);
1570                 /* Skip spaces at the beginning */
1571                 while (p < chunk->end) {
1572                         if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1573                                 ucl_chunk_skipc (chunk, p);
1574                         }
1575                         else {
1576                                 break;
1577                         }
1578                 }
1579                 c = p;
1580                 while (p < chunk->end) {
1581                         if (*p == '}') {
1582                                 break;
1583                         }
1584                         ucl_chunk_skipc (chunk, p);
1585                 }
1586                 *macro_start = c;
1587                 *macro_len = p - c;
1588                 ucl_chunk_skipc (chunk, p);
1589                 break;
1590         default:
1591                 /* Macro is not enclosed in quotes or braces */
1592                 c = p;
1593                 while (p < chunk->end) {
1594                         if (ucl_lex_is_atom_end (*p)) {
1595                                 break;
1596                         }
1597                         ucl_chunk_skipc (chunk, p);
1598                 }
1599                 *macro_start = c;
1600                 *macro_len = p - c;
1601                 break;
1602         }
1603
1604         /* We are at the end of a macro */
1605         /* Skip ';' and space characters and return to previous state */
1606         while (p < chunk->end) {
1607                 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1608                         break;
1609                 }
1610                 ucl_chunk_skipc (chunk, p);
1611         }
1612         return true;
1613 }
1614
1615 /**
1616  * Handle the main states of rcl parser
1617  * @param parser parser structure
1618  * @param data the pointer to the beginning of a chunk
1619  * @param len the length of a chunk
1620  * @return true if chunk has been parsed and false in case of error
1621  */
1622 static bool
1623 ucl_state_machine (struct ucl_parser *parser)
1624 {
1625         ucl_object_t *obj;
1626         struct ucl_chunk *chunk = parser->chunks;
1627         const unsigned char *p, *c = NULL, *macro_start = NULL;
1628         unsigned char *macro_escaped;
1629         size_t macro_len = 0;
1630         struct ucl_macro *macro = NULL;
1631         bool next_key = false, end_of_object = false;
1632
1633         if (parser->top_obj == NULL) {
1634                 if (*chunk->pos == '[') {
1635                         obj = ucl_add_parser_stack (NULL, parser, true, 0);
1636                 }
1637                 else {
1638                         obj = ucl_add_parser_stack (NULL, parser, false, 0);
1639                 }
1640                 if (obj == NULL) {
1641                         return false;
1642                 }
1643                 parser->top_obj = obj;
1644                 parser->cur_obj = obj;
1645                 parser->state = UCL_STATE_INIT;
1646         }
1647
1648         p = chunk->pos;
1649         while (chunk->pos < chunk->end) {
1650                 switch (parser->state) {
1651                 case UCL_STATE_INIT:
1652                         /*
1653                          * At the init state we can either go to the parse array or object
1654                          * if we got [ or { correspondingly or can just treat new data as
1655                          * a key of newly created object
1656                          */
1657                         obj = parser->cur_obj;
1658                         if (!ucl_skip_comments (parser)) {
1659                                 parser->prev_state = parser->state;
1660                                 parser->state = UCL_STATE_ERROR;
1661                                 return false;
1662                         }
1663                         else {
1664                                 /* Skip any spaces */
1665                                 while (p < chunk->end && ucl_test_character (*p,
1666                                                 UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1667                                         ucl_chunk_skipc (chunk, p);
1668                                 }
1669                                 p = chunk->pos;
1670                                 if (*p == '[') {
1671                                         parser->state = UCL_STATE_VALUE;
1672                                         ucl_chunk_skipc (chunk, p);
1673                                 }
1674                                 else {
1675                                         parser->state = UCL_STATE_KEY;
1676                                         if (*p == '{') {
1677                                                 ucl_chunk_skipc (chunk, p);
1678                                         }
1679                                 }
1680                         }
1681                         break;
1682                 case UCL_STATE_KEY:
1683                         /* Skip any spaces */
1684                         while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1685                                 ucl_chunk_skipc (chunk, p);
1686                         }
1687                         if (*p == '}') {
1688                                 /* We have the end of an object */
1689                                 parser->state = UCL_STATE_AFTER_VALUE;
1690                                 continue;
1691                         }
1692                         if (parser->stack == NULL) {
1693                                 /* No objects are on stack, but we want to parse a key */
1694                                 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1695                                                 "expects a key", &parser->err);
1696                                 parser->prev_state = parser->state;
1697                                 parser->state = UCL_STATE_ERROR;
1698                                 return false;
1699                         }
1700                         if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1701                                 parser->prev_state = parser->state;
1702                                 parser->state = UCL_STATE_ERROR;
1703                                 return false;
1704                         }
1705                         if (end_of_object) {
1706                                 p = chunk->pos;
1707                                 parser->state = UCL_STATE_AFTER_VALUE;
1708                                 continue;
1709                         }
1710                         else if (parser->state != UCL_STATE_MACRO_NAME) {
1711                                 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1712                                         /* Parse more keys and nest objects accordingly */
1713                                         obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1714                                                         parser->stack->level + 1);
1715                                         if (obj == NULL) {
1716                                                 return false;
1717                                         }
1718                                 }
1719                                 else {
1720                                         parser->state = UCL_STATE_VALUE;
1721                                 }
1722                         }
1723                         else {
1724                                 c = chunk->pos;
1725                         }
1726                         p = chunk->pos;
1727                         break;
1728                 case UCL_STATE_VALUE:
1729                         /* We need to check what we do have */
1730                         if (!ucl_parse_value (parser, chunk)) {
1731                                 parser->prev_state = parser->state;
1732                                 parser->state = UCL_STATE_ERROR;
1733                                 return false;
1734                         }
1735                         /* State is set in ucl_parse_value call */
1736                         p = chunk->pos;
1737                         break;
1738                 case UCL_STATE_AFTER_VALUE:
1739                         if (!ucl_parse_after_value (parser, chunk)) {
1740                                 parser->prev_state = parser->state;
1741                                 parser->state = UCL_STATE_ERROR;
1742                                 return false;
1743                         }
1744                         if (parser->stack != NULL) {
1745                                 if (parser->stack->obj->type == UCL_OBJECT) {
1746                                         parser->state = UCL_STATE_KEY;
1747                                 }
1748                                 else {
1749                                         /* Array */
1750                                         parser->state = UCL_STATE_VALUE;
1751                                 }
1752                         }
1753                         else {
1754                                 /* Skip everything at the end */
1755                                 return true;
1756                         }
1757                         p = chunk->pos;
1758                         break;
1759                 case UCL_STATE_MACRO_NAME:
1760                         if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1761                                 ucl_chunk_skipc (chunk, p);
1762                         }
1763                         else if (p - c > 0) {
1764                                 /* We got macro name */
1765                                 macro_len = (size_t)(p - c);
1766                                 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1767                                 if (macro == NULL) {
1768                                         ucl_create_err (&parser->err, "error on line %d at column %d: "
1769                                                         "unknown macro: '%.*s', character: '%c'",
1770                                                                 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1771                                         parser->state = UCL_STATE_ERROR;
1772                                         return false;
1773                                 }
1774                                 /* Now we need to skip all spaces */
1775                                 while (p < chunk->end) {
1776                                         if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1777                                                 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1778                                                         /* Skip comment */
1779                                                         if (!ucl_skip_comments (parser)) {
1780                                                                 return false;
1781                                                         }
1782                                                         p = chunk->pos;
1783                                                 }
1784                                                 break;
1785                                         }
1786                                         ucl_chunk_skipc (chunk, p);
1787                                 }
1788                                 parser->state = UCL_STATE_MACRO;
1789                         }
1790                         break;
1791                 case UCL_STATE_MACRO:
1792                         if (!ucl_parse_macro_value (parser, chunk, macro,
1793                                         &macro_start, &macro_len)) {
1794                                 parser->prev_state = parser->state;
1795                                 parser->state = UCL_STATE_ERROR;
1796                                 return false;
1797                         }
1798                         macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1799                         parser->state = parser->prev_state;
1800                         if (macro_escaped == NULL) {
1801                                 if (!macro->handler (macro_start, macro_len, macro->ud)) {
1802                                         return false;
1803                                 }
1804                         }
1805                         else {
1806                                 if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1807                                         UCL_FREE (macro_len + 1, macro_escaped);
1808                                         return false;
1809                                 }
1810                                 UCL_FREE (macro_len + 1, macro_escaped);
1811                         }
1812                         p = chunk->pos;
1813                         break;
1814                 default:
1815                         /* TODO: add all states */
1816                         ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1817                         parser->state = UCL_STATE_ERROR;
1818                         return false;
1819                 }
1820         }
1821
1822         return true;
1823 }
1824
1825 struct ucl_parser*
1826 ucl_parser_new (int flags)
1827 {
1828         struct ucl_parser *new;
1829
1830         new = UCL_ALLOC (sizeof (struct ucl_parser));
1831         if (new == NULL) {
1832                 return NULL;
1833         }
1834         memset (new, 0, sizeof (struct ucl_parser));
1835
1836         ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1837         ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1838         ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1839
1840         new->flags = flags;
1841
1842         /* Initial assumption about filevars */
1843         ucl_parser_set_filevars (new, NULL, false);
1844
1845         return new;
1846 }
1847
1848
1849 void
1850 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1851                 ucl_macro_handler handler, void* ud)
1852 {
1853         struct ucl_macro *new;
1854
1855         if (macro == NULL || handler == NULL) {
1856                 return;
1857         }
1858         new = UCL_ALLOC (sizeof (struct ucl_macro));
1859         if (new == NULL) {
1860                 return;
1861         }
1862         memset (new, 0, sizeof (struct ucl_macro));
1863         new->handler = handler;
1864         new->name = strdup (macro);
1865         new->ud = ud;
1866         HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1867 }
1868
1869 void
1870 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1871                 const char *value)
1872 {
1873         struct ucl_variable *new = NULL, *cur;
1874
1875         if (var == NULL) {
1876                 return;
1877         }
1878
1879         /* Find whether a variable already exists */
1880         LL_FOREACH (parser->variables, cur) {
1881                 if (strcmp (cur->var, var) == 0) {
1882                         new = cur;
1883                         break;
1884                 }
1885         }
1886
1887         if (value == NULL) {
1888
1889                 if (new != NULL) {
1890                         /* Remove variable */
1891                         LL_DELETE (parser->variables, new);
1892                         free (new->var);
1893                         free (new->value);
1894                         UCL_FREE (sizeof (struct ucl_variable), new);
1895                 }
1896                 else {
1897                         /* Do nothing */
1898                         return;
1899                 }
1900         }
1901         else {
1902                 if (new == NULL) {
1903                         new = UCL_ALLOC (sizeof (struct ucl_variable));
1904                         if (new == NULL) {
1905                                 return;
1906                         }
1907                         memset (new, 0, sizeof (struct ucl_variable));
1908                         new->var = strdup (var);
1909                         new->var_len = strlen (var);
1910                         new->value = strdup (value);
1911                         new->value_len = strlen (value);
1912
1913                         LL_PREPEND (parser->variables, new);
1914                 }
1915                 else {
1916                         free (new->value);
1917                         new->value = strdup (value);
1918                         new->value_len = strlen (value);
1919                 }
1920         }
1921 }
1922
1923 void
1924 ucl_parser_set_variables_handler (struct ucl_parser *parser,
1925                 ucl_variable_handler handler, void *ud)
1926 {
1927         parser->var_handler = handler;
1928         parser->var_data = ud;
1929 }
1930
1931 bool
1932 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1933                 size_t len)
1934 {
1935         struct ucl_chunk *chunk;
1936
1937         if (data == NULL || len == 0) {
1938                 ucl_create_err (&parser->err, "invalid chunk added");
1939                 return false;
1940         }
1941         if (parser->state != UCL_STATE_ERROR) {
1942                 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1943                 if (chunk == NULL) {
1944                         ucl_create_err (&parser->err, "cannot allocate chunk structure");
1945                         return false;
1946                 }
1947                 chunk->begin = data;
1948                 chunk->remain = len;
1949                 chunk->pos = chunk->begin;
1950                 chunk->end = chunk->begin + len;
1951                 chunk->line = 1;
1952                 chunk->column = 0;
1953                 LL_PREPEND (parser->chunks, chunk);
1954                 parser->recursion ++;
1955                 if (parser->recursion > UCL_MAX_RECURSION) {
1956                         ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1957                                         parser->recursion);
1958                         return false;
1959                 }
1960                 return ucl_state_machine (parser);
1961         }
1962
1963         ucl_create_err (&parser->err, "a parser is in an invalid state");
1964
1965         return false;
1966 }
1967
1968 bool
1969 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1970                 size_t len)
1971 {
1972         if (data == NULL) {
1973                 ucl_create_err (&parser->err, "invalid string added");
1974                 return false;
1975         }
1976         if (len == 0) {
1977                 len = strlen (data);
1978         }
1979
1980         return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
1981 }