1 /* Copyright (c) 2013, Vsevolod Stakhov
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 #include "ucl_internal.h"
26 #include "ucl_chartable.h"
30 * The implementation of rcl parser
33 struct ucl_parser_saved_state {
37 const unsigned char *pos;
41 * Move up to len characters
45 * @return new position in chunk
47 #define ucl_chunk_skipc(chunk, p) do{ \
50 (chunk)->column = 0; \
52 else (chunk)->column ++; \
59 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
61 if (chunk->pos < chunk->end) {
62 if (isgraph (*chunk->pos)) {
63 ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
64 chunk->line, chunk->column, str, *chunk->pos);
67 ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
68 chunk->line, chunk->column, str, (int)*chunk->pos);
72 ucl_create_err (err, "error at the end of chunk: %s", str);
77 * Skip all comments from the current pos resolving nested and multiline comments
82 ucl_skip_comments (struct ucl_parser *parser)
84 struct ucl_chunk *chunk = parser->chunks;
85 const unsigned char *p;
86 int comments_nested = 0;
92 if (parser->state != UCL_STATE_SCOMMENT &&
93 parser->state != UCL_STATE_MCOMMENT) {
94 while (p < chunk->end) {
96 ucl_chunk_skipc (chunk, p);
99 ucl_chunk_skipc (chunk, p);
103 else if (*p == '/' && chunk->remain >= 2) {
105 ucl_chunk_skipc (chunk, p);
107 ucl_chunk_skipc (chunk, p);
109 while (p < chunk->end) {
111 ucl_chunk_skipc (chunk, p);
114 if (comments_nested == 0) {
115 ucl_chunk_skipc (chunk, p);
119 ucl_chunk_skipc (chunk, p);
121 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
123 ucl_chunk_skipc (chunk, p);
124 ucl_chunk_skipc (chunk, p);
127 ucl_chunk_skipc (chunk, p);
129 if (comments_nested != 0) {
130 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
140 * Return multiplier for a character
141 * @param c multiplier character
142 * @param is_bytes if true use 1024 multiplier
145 static inline unsigned long
146 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
152 {'m', 1000 * 1000, 1024 * 1024},
154 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
158 for (i = 0; i < 3; i ++) {
159 if (tolower (c) == multipliers[i].c) {
161 return multipliers[i].mult_bytes;
163 return multipliers[i].mult_normal;
172 * Return multiplier for time scaling
177 ucl_lex_time_multiplier (const unsigned char c) {
185 {'w', 60 * 60 * 24 * 7},
186 {'y', 60 * 60 * 24 * 7 * 365}
190 for (i = 0; i < 5; i ++) {
191 if (tolower (c) == multipliers[i].c) {
192 return multipliers[i].mult;
200 * Return true if a character is a end of an atom
205 ucl_lex_is_atom_end (const unsigned char c)
207 return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
211 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
218 else if (c1 == '#') {
225 * Check variable found
234 static inline const char *
235 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
236 size_t *out_len, bool strict, bool *found)
238 struct ucl_variable *var;
241 bool need_free = false;
243 LL_FOREACH (parser->variables, var) {
245 if (remain == var->var_len) {
246 if (memcmp (ptr, var->var, var->var_len) == 0) {
247 *out_len += var->value_len;
249 return (ptr + var->var_len);
254 if (remain >= var->var_len) {
255 if (memcmp (ptr, var->var, var->var_len) == 0) {
256 *out_len += var->value_len;
258 return (ptr + var->var_len);
264 /* XXX: can only handle ${VAR} */
265 if (!(*found) && parser->var_handler != NULL && strict) {
266 /* Call generic handler */
267 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
273 return (ptr + remain);
281 * Check for a variable in a given string
290 ucl_check_variable (struct ucl_parser *parser, const char *ptr,
291 size_t remain, size_t *out_len, bool *vars_found)
293 const char *p, *end, *ret = ptr;
297 /* We need to match the variable enclosed in braces */
302 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
303 out_len, true, &found);
305 /* {} must be excluded actually */
319 else if (*ptr != '$') {
320 /* Not count escaped dollar sign */
321 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
322 if (found && !*vars_found) {
338 * Expand a single variable
346 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
347 size_t remain, unsigned char **dest)
349 unsigned char *d = *dest, *dst;
350 const char *p = ptr + 1, *ret;
351 struct ucl_variable *var;
353 bool need_free = false;
365 else if (*p == '{') {
372 LL_FOREACH (parser->variables, var) {
373 if (remain >= var->var_len) {
374 if (memcmp (p, var->var, var->var_len) == 0) {
375 memcpy (d, var->value, var->value_len);
384 if (strict && parser->var_handler != NULL) {
385 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
387 memcpy (d, dst, dstlen);
394 /* Leave variable as is */
414 * Expand variables in string
422 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
423 const char *src, size_t in_len)
425 const char *p, *end = src + in_len;
428 bool vars_found = false;
433 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
447 *dst = UCL_ALLOC (out_len + 1);
456 p = ucl_expand_single_variable (parser, p, end - p, &d);
469 * Store or copy pointer to the trash stack
470 * @param parser parser object
471 * @param src src string
472 * @param dst destination buffer (trash stack pointer)
473 * @param dst_const const destination pointer (e.g. value of object)
474 * @param in_len input length
475 * @param need_unescape need to unescape source (and copy it)
476 * @param need_lowercase need to lowercase value (and copy)
477 * @param need_expand need to expand variables (and copy as well)
478 * @return output length (excluding \0 symbol)
480 static inline ssize_t
481 ucl_copy_or_store_ptr (struct ucl_parser *parser,
482 const unsigned char *src, unsigned char **dst,
483 const char **dst_const, size_t in_len,
484 bool need_unescape, bool need_lowercase, bool need_expand)
486 ssize_t ret = -1, tret;
489 if (need_unescape || need_lowercase ||
490 (need_expand && parser->variables != NULL) ||
491 !(parser->flags & UCL_PARSER_ZEROCOPY)) {
493 *dst = UCL_ALLOC (in_len + 1);
495 ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
498 if (need_lowercase) {
499 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
502 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
506 ret = ucl_unescape_json_string (*dst, ret);
511 ret = ucl_expand_variable (parser, dst, tmp, ret);
513 /* Nothing to expand */
529 * Create and append an object at the specified level
535 static inline ucl_object_t *
536 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
538 struct ucl_stack *st;
542 obj = ucl_object_typed_new (UCL_OBJECT);
545 obj->type = UCL_OBJECT;
547 obj->value.ov = ucl_hash_create ();
548 parser->state = UCL_STATE_KEY;
552 obj = ucl_object_typed_new (UCL_ARRAY);
555 obj->type = UCL_ARRAY;
557 parser->state = UCL_STATE_VALUE;
560 st = UCL_ALLOC (sizeof (struct ucl_stack));
562 ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
567 LL_PREPEND (parser->stack, st);
568 parser->cur_obj = obj;
574 ucl_maybe_parse_number (ucl_object_t *obj,
575 const char *start, const char *end, const char **pos,
576 bool allow_double, bool number_bytes, bool allow_time)
578 const char *p = start, *c = start;
580 bool got_dot = false, got_exp = false, need_double = false,
581 is_time = false, valid_start = false, is_hex = false,
592 if (is_hex && isxdigit (*p)) {
595 else if (isdigit (*p)) {
599 else if (!is_hex && (*p == 'x' || *p == 'X')) {
601 allow_double = false;
604 else if (allow_double) {
606 /* Empty digits sequence, not a number */
610 else if (*p == '.') {
612 /* Double dots, not a number */
622 else if (*p == 'e' || *p == 'E') {
624 /* Double exp, not a number */
636 if (!isdigit (*p) && *p != '+' && *p != '-') {
637 /* Wrong exponent sign */
647 /* Got the end of the number, need to check */
663 dv = strtod (c, &endptr);
667 lv = strtoimax (c, &endptr, 16);
670 lv = strtoimax (c, &endptr, 10);
673 if (errno == ERANGE) {
678 /* Now check endptr */
679 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
680 ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
685 if (endptr < end && endptr != start) {
695 if (p[1] == 's' || p[1] == 'S') {
702 if (p[0] == 'm' || p[0] == 'M') {
706 dv *= ucl_lex_num_multiplier (*p, false);
711 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
717 lv *= ucl_lex_num_multiplier (*p, true);
721 else if (ucl_lex_is_atom_end (p[1])) {
723 dv *= ucl_lex_num_multiplier (*p, false);
726 lv *= ucl_lex_num_multiplier (*p, number_bytes);
731 else if (allow_time && end - p >= 3) {
732 if (tolower (p[0]) == 'm' &&
733 tolower (p[1]) == 'i' &&
734 tolower (p[2]) == 'n') {
749 dv *= ucl_lex_num_multiplier (*p, false);
752 lv *= ucl_lex_num_multiplier (*p, number_bytes);
761 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
780 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
786 dv *= ucl_lex_time_multiplier (*p);
798 if (allow_double && (need_double || is_time)) {
800 obj->type = UCL_FLOAT;
803 obj->type = UCL_TIME;
805 obj->value.dv = is_neg ? (-dv) : dv;
809 obj->value.iv = is_neg ? (-lv) : lv;
816 * Parse possible number
819 * @return true if a number has been parsed
822 ucl_lex_number (struct ucl_parser *parser,
823 struct ucl_chunk *chunk, ucl_object_t *obj)
825 const unsigned char *pos;
828 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
829 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
832 chunk->remain -= pos - chunk->pos;
833 chunk->column += pos - chunk->pos;
837 else if (ret == ERANGE) {
838 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
845 * Parse quoted string with possible escapes
848 * @return true if a string has been parsed
851 ucl_lex_json_string (struct ucl_parser *parser,
852 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
854 const unsigned char *p = chunk->pos;
858 while (p < chunk->end) {
861 /* Unmasked control character */
863 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
866 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
870 else if (c == '\\') {
871 ucl_chunk_skipc (chunk, p);
873 if (p >= chunk->end) {
874 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
877 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
879 ucl_chunk_skipc (chunk, p);
880 for (i = 0; i < 4 && p < chunk->end; i ++) {
881 if (!isxdigit (*p)) {
882 ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
885 ucl_chunk_skipc (chunk, p);
887 if (p >= chunk->end) {
888 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
893 ucl_chunk_skipc (chunk, p);
896 *need_unescape = true;
901 ucl_chunk_skipc (chunk, p);
904 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
910 ucl_chunk_skipc (chunk, p);
913 ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
918 * Parse a key in an object
921 * @return true if a key has been parsed
924 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
926 const unsigned char *p, *c = NULL, *end, *t;
927 const char *key = NULL;
928 bool got_quote = false, got_eq = false, got_semicolon = false,
929 need_unescape = false, ucl_escape = false, var_expand = false,
930 got_content = false, got_sep = false;
931 ucl_object_t *nobj, *tobj;
932 ucl_hash_t *container;
938 /* It is macro actually */
939 ucl_chunk_skipc (chunk, p);
940 parser->prev_state = parser->state;
941 parser->state = UCL_STATE_MACRO_NAME;
944 while (p < chunk->end) {
946 * A key must start with alpha, number, '/' or '_' and end with space character
949 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
950 if (!ucl_skip_comments (parser)) {
955 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
956 ucl_chunk_skipc (chunk, p);
958 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
959 /* The first symbol */
961 ucl_chunk_skipc (chunk, p);
964 else if (*p == '"') {
969 ucl_chunk_skipc (chunk, p);
971 else if (*p == '}') {
972 /* We have actually end of an object */
973 *end_of_object = true;
976 else if (*p == '.') {
977 ucl_chunk_skipc (chunk, p);
978 parser->prev_state = parser->state;
979 parser->state = UCL_STATE_MACRO_NAME;
983 /* Invalid identifier */
984 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
989 /* Parse the body of a key */
991 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
993 ucl_chunk_skipc (chunk, p);
995 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
1000 ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
1005 /* We need to parse json like quoted string */
1006 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1009 /* Always escape keys obtained via json */
1010 end = chunk->pos - 1;
1017 if (p >= chunk->end && got_content) {
1018 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1021 else if (!got_content) {
1024 *end_of_object = false;
1025 /* We are now at the end of the key, need to parse the rest */
1026 while (p < chunk->end) {
1027 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1028 ucl_chunk_skipc (chunk, p);
1030 else if (*p == '=') {
1031 if (!got_eq && !got_semicolon) {
1032 ucl_chunk_skipc (chunk, p);
1036 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1040 else if (*p == ':') {
1041 if (!got_eq && !got_semicolon) {
1042 ucl_chunk_skipc (chunk, p);
1043 got_semicolon = true;
1046 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1050 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1051 /* Check for comment */
1052 if (!ucl_skip_comments (parser)) {
1063 if (p >= chunk->end && got_content) {
1064 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1068 got_sep = got_semicolon || got_eq;
1072 * Maybe we have more keys nested, so search for termination character.
1074 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1075 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1076 * 3) key1 value[;,\n] <- we treat that as linear object
1080 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1083 /* Check first non-space character after a key */
1084 if (*t != '{' && *t != '[') {
1085 while (t < chunk->end) {
1086 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1089 else if (*t == '{' || *t == '[') {
1098 /* Create a new object */
1099 nobj = ucl_object_new ();
1100 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1101 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1103 ucl_object_unref (nobj);
1106 else if (keylen == 0) {
1107 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1108 ucl_object_unref (nobj);
1112 container = parser->stack->obj->value.ov;
1114 nobj->keylen = keylen;
1115 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1117 container = ucl_hash_insert_object (container, nobj);
1120 parser->stack->obj->len ++;
1123 DL_APPEND (tobj, nobj);
1127 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1129 parser->stack->obj->value.ov = container;
1131 parser->cur_obj = nobj;
1140 * @return true if a key has been parsed
1143 ucl_parse_string_value (struct ucl_parser *parser,
1144 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1146 const unsigned char *p;
1148 UCL_BRACE_ROUND = 0,
1152 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1156 while (p < chunk->end) {
1158 /* Skip pairs of figure braces */
1160 braces[UCL_BRACE_FIGURE][0] ++;
1162 else if (*p == '}') {
1163 braces[UCL_BRACE_FIGURE][1] ++;
1164 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1165 /* This is not a termination symbol, continue */
1166 ucl_chunk_skipc (chunk, p);
1170 /* Skip pairs of square braces */
1171 else if (*p == '[') {
1172 braces[UCL_BRACE_SQUARE][0] ++;
1174 else if (*p == ']') {
1175 braces[UCL_BRACE_SQUARE][1] ++;
1176 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1177 /* This is not a termination symbol, continue */
1178 ucl_chunk_skipc (chunk, p);
1182 else if (*p == '$') {
1185 else if (*p == '\\') {
1186 *need_unescape = true;
1187 ucl_chunk_skipc (chunk, p);
1188 if (p < chunk->end) {
1189 ucl_chunk_skipc (chunk, p);
1194 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1197 ucl_chunk_skipc (chunk, p);
1200 if (p >= chunk->end) {
1201 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1209 * Parse multiline string ending with \n{term}\n
1214 * @return size of multiline string or 0 in case of error
1217 ucl_parse_multiline_string (struct ucl_parser *parser,
1218 struct ucl_chunk *chunk, const unsigned char *term,
1219 int term_len, unsigned char const **beg,
1222 const unsigned char *p, *c;
1223 bool newline = false;
1230 while (p < chunk->end) {
1232 if (chunk->end - p < term_len) {
1235 else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1237 chunk->remain -= term_len;
1238 chunk->pos = p + term_len;
1239 chunk->column = term_len;
1253 ucl_chunk_skipc (chunk, p);
1259 static ucl_object_t*
1260 ucl_get_value_object (struct ucl_parser *parser)
1262 ucl_object_t *t, *obj = NULL;
1264 if (parser->stack->obj->type == UCL_ARRAY) {
1265 /* Object must be allocated */
1266 obj = ucl_object_new ();
1267 t = parser->stack->obj->value.av;
1269 parser->cur_obj = obj;
1270 parser->stack->obj->value.av = t;
1271 parser->stack->obj->len ++;
1274 /* Object has been already allocated */
1275 obj = parser->cur_obj;
1288 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1290 const unsigned char *p, *c;
1291 ucl_object_t *obj = NULL;
1292 unsigned int stripped_spaces;
1294 bool need_unescape = false, ucl_escape = false, var_expand = false;
1298 /* Skip any spaces and comments */
1299 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1300 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1301 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1302 ucl_chunk_skipc (chunk, p);
1304 if (!ucl_skip_comments (parser)) {
1310 while (p < chunk->end) {
1314 obj = ucl_get_value_object (parser);
1315 ucl_chunk_skipc (chunk, p);
1316 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1319 str_len = chunk->pos - c - 2;
1320 obj->type = UCL_STRING;
1321 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1322 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1326 parser->state = UCL_STATE_AFTER_VALUE;
1331 obj = ucl_get_value_object (parser);
1332 /* We have a new object */
1333 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1338 ucl_chunk_skipc (chunk, p);
1342 obj = ucl_get_value_object (parser);
1343 /* We have a new array */
1344 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1349 ucl_chunk_skipc (chunk, p);
1353 /* We have the array ending */
1354 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1355 parser->state = UCL_STATE_AFTER_VALUE;
1363 obj = ucl_get_value_object (parser);
1364 /* We have something like multiline value, which must be <<[A-Z]+\n */
1365 if (chunk->end - p > 3) {
1366 if (memcmp (p, "<<", 2) == 0) {
1368 /* We allow only uppercase characters in multiline definitions */
1369 while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1373 /* Set chunk positions and start multiline parsing */
1375 chunk->remain -= p - c;
1379 if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1380 p - c, &c, &var_expand)) == 0) {
1381 ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1384 obj->type = UCL_STRING;
1385 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1386 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1390 parser->state = UCL_STATE_AFTER_VALUE;
1395 /* Fallback to ordinary strings */
1399 obj = ucl_get_value_object (parser);
1402 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1403 if (!ucl_lex_number (parser, chunk, obj)) {
1404 if (parser->state == UCL_STATE_ERROR) {
1409 parser->state = UCL_STATE_AFTER_VALUE;
1412 /* Fallback to normal string */
1415 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1418 /* Cut trailing spaces */
1419 stripped_spaces = 0;
1420 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1421 UCL_CHARACTER_WHITESPACE)) {
1424 str_len = chunk->pos - c - stripped_spaces;
1426 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1429 else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1431 obj->type = UCL_NULL;
1433 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1434 obj->type = UCL_STRING;
1435 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1436 &obj->value.sv, str_len, need_unescape,
1437 false, var_expand)) == -1) {
1442 parser->state = UCL_STATE_AFTER_VALUE;
1454 * Handle after value data
1460 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1462 const unsigned char *p;
1463 bool got_sep = false;
1464 struct ucl_stack *st;
1468 while (p < chunk->end) {
1469 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1470 /* Skip whitespaces */
1471 ucl_chunk_skipc (chunk, p);
1473 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1475 if (!ucl_skip_comments (parser)) {
1478 /* Treat comment as a separator */
1482 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1483 if (*p == '}' || *p == ']') {
1484 if (parser->stack == NULL) {
1485 ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1488 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1489 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1491 /* Pop all nested objects from a stack */
1493 parser->stack = st->next;
1494 UCL_FREE (sizeof (struct ucl_stack), st);
1496 while (parser->stack != NULL) {
1498 if (st->next == NULL || st->next->level == st->level) {
1501 parser->stack = st->next;
1502 UCL_FREE (sizeof (struct ucl_stack), st);
1506 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1510 if (parser->stack == NULL) {
1511 /* Ignore everything after a top object */
1515 ucl_chunk_skipc (chunk, p);
1520 /* Got a separator */
1522 ucl_chunk_skipc (chunk, p);
1528 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1545 ucl_parse_macro_value (struct ucl_parser *parser,
1546 struct ucl_chunk *chunk, struct ucl_macro *macro,
1547 unsigned char const **macro_start, size_t *macro_len)
1549 const unsigned char *p, *c;
1550 bool need_unescape = false, ucl_escape = false, var_expand = false;
1556 /* We have macro value encoded in quotes */
1558 ucl_chunk_skipc (chunk, p);
1559 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1563 *macro_start = c + 1;
1564 *macro_len = chunk->pos - c - 2;
1568 /* We got a multiline macro body */
1569 ucl_chunk_skipc (chunk, p);
1570 /* Skip spaces at the beginning */
1571 while (p < chunk->end) {
1572 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1573 ucl_chunk_skipc (chunk, p);
1580 while (p < chunk->end) {
1584 ucl_chunk_skipc (chunk, p);
1588 ucl_chunk_skipc (chunk, p);
1591 /* Macro is not enclosed in quotes or braces */
1593 while (p < chunk->end) {
1594 if (ucl_lex_is_atom_end (*p)) {
1597 ucl_chunk_skipc (chunk, p);
1604 /* We are at the end of a macro */
1605 /* Skip ';' and space characters and return to previous state */
1606 while (p < chunk->end) {
1607 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1610 ucl_chunk_skipc (chunk, p);
1616 * Handle the main states of rcl parser
1617 * @param parser parser structure
1618 * @param data the pointer to the beginning of a chunk
1619 * @param len the length of a chunk
1620 * @return true if chunk has been parsed and false in case of error
1623 ucl_state_machine (struct ucl_parser *parser)
1626 struct ucl_chunk *chunk = parser->chunks;
1627 const unsigned char *p, *c = NULL, *macro_start = NULL;
1628 unsigned char *macro_escaped;
1629 size_t macro_len = 0;
1630 struct ucl_macro *macro = NULL;
1631 bool next_key = false, end_of_object = false;
1633 if (parser->top_obj == NULL) {
1634 if (*chunk->pos == '[') {
1635 obj = ucl_add_parser_stack (NULL, parser, true, 0);
1638 obj = ucl_add_parser_stack (NULL, parser, false, 0);
1643 parser->top_obj = obj;
1644 parser->cur_obj = obj;
1645 parser->state = UCL_STATE_INIT;
1649 while (chunk->pos < chunk->end) {
1650 switch (parser->state) {
1651 case UCL_STATE_INIT:
1653 * At the init state we can either go to the parse array or object
1654 * if we got [ or { correspondingly or can just treat new data as
1655 * a key of newly created object
1657 obj = parser->cur_obj;
1658 if (!ucl_skip_comments (parser)) {
1659 parser->prev_state = parser->state;
1660 parser->state = UCL_STATE_ERROR;
1664 /* Skip any spaces */
1665 while (p < chunk->end && ucl_test_character (*p,
1666 UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1667 ucl_chunk_skipc (chunk, p);
1671 parser->state = UCL_STATE_VALUE;
1672 ucl_chunk_skipc (chunk, p);
1675 parser->state = UCL_STATE_KEY;
1677 ucl_chunk_skipc (chunk, p);
1683 /* Skip any spaces */
1684 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1685 ucl_chunk_skipc (chunk, p);
1688 /* We have the end of an object */
1689 parser->state = UCL_STATE_AFTER_VALUE;
1692 if (parser->stack == NULL) {
1693 /* No objects are on stack, but we want to parse a key */
1694 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1695 "expects a key", &parser->err);
1696 parser->prev_state = parser->state;
1697 parser->state = UCL_STATE_ERROR;
1700 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1701 parser->prev_state = parser->state;
1702 parser->state = UCL_STATE_ERROR;
1705 if (end_of_object) {
1707 parser->state = UCL_STATE_AFTER_VALUE;
1710 else if (parser->state != UCL_STATE_MACRO_NAME) {
1711 if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1712 /* Parse more keys and nest objects accordingly */
1713 obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1714 parser->stack->level + 1);
1720 parser->state = UCL_STATE_VALUE;
1728 case UCL_STATE_VALUE:
1729 /* We need to check what we do have */
1730 if (!ucl_parse_value (parser, chunk)) {
1731 parser->prev_state = parser->state;
1732 parser->state = UCL_STATE_ERROR;
1735 /* State is set in ucl_parse_value call */
1738 case UCL_STATE_AFTER_VALUE:
1739 if (!ucl_parse_after_value (parser, chunk)) {
1740 parser->prev_state = parser->state;
1741 parser->state = UCL_STATE_ERROR;
1744 if (parser->stack != NULL) {
1745 if (parser->stack->obj->type == UCL_OBJECT) {
1746 parser->state = UCL_STATE_KEY;
1750 parser->state = UCL_STATE_VALUE;
1754 /* Skip everything at the end */
1759 case UCL_STATE_MACRO_NAME:
1760 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1761 ucl_chunk_skipc (chunk, p);
1763 else if (p - c > 0) {
1764 /* We got macro name */
1765 macro_len = (size_t)(p - c);
1766 HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1767 if (macro == NULL) {
1768 ucl_create_err (&parser->err, "error on line %d at column %d: "
1769 "unknown macro: '%.*s', character: '%c'",
1770 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1771 parser->state = UCL_STATE_ERROR;
1774 /* Now we need to skip all spaces */
1775 while (p < chunk->end) {
1776 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1777 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1779 if (!ucl_skip_comments (parser)) {
1786 ucl_chunk_skipc (chunk, p);
1788 parser->state = UCL_STATE_MACRO;
1791 case UCL_STATE_MACRO:
1792 if (!ucl_parse_macro_value (parser, chunk, macro,
1793 ¯o_start, ¯o_len)) {
1794 parser->prev_state = parser->state;
1795 parser->state = UCL_STATE_ERROR;
1798 macro_len = ucl_expand_variable (parser, ¯o_escaped, macro_start, macro_len);
1799 parser->state = parser->prev_state;
1800 if (macro_escaped == NULL) {
1801 if (!macro->handler (macro_start, macro_len, macro->ud)) {
1806 if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1807 UCL_FREE (macro_len + 1, macro_escaped);
1810 UCL_FREE (macro_len + 1, macro_escaped);
1815 /* TODO: add all states */
1816 ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1817 parser->state = UCL_STATE_ERROR;
1826 ucl_parser_new (int flags)
1828 struct ucl_parser *new;
1830 new = UCL_ALLOC (sizeof (struct ucl_parser));
1834 memset (new, 0, sizeof (struct ucl_parser));
1836 ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1837 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1838 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1842 /* Initial assumption about filevars */
1843 ucl_parser_set_filevars (new, NULL, false);
1850 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1851 ucl_macro_handler handler, void* ud)
1853 struct ucl_macro *new;
1855 if (macro == NULL || handler == NULL) {
1858 new = UCL_ALLOC (sizeof (struct ucl_macro));
1862 memset (new, 0, sizeof (struct ucl_macro));
1863 new->handler = handler;
1864 new->name = strdup (macro);
1866 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1870 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1873 struct ucl_variable *new = NULL, *cur;
1879 /* Find whether a variable already exists */
1880 LL_FOREACH (parser->variables, cur) {
1881 if (strcmp (cur->var, var) == 0) {
1887 if (value == NULL) {
1890 /* Remove variable */
1891 LL_DELETE (parser->variables, new);
1894 UCL_FREE (sizeof (struct ucl_variable), new);
1903 new = UCL_ALLOC (sizeof (struct ucl_variable));
1907 memset (new, 0, sizeof (struct ucl_variable));
1908 new->var = strdup (var);
1909 new->var_len = strlen (var);
1910 new->value = strdup (value);
1911 new->value_len = strlen (value);
1913 LL_PREPEND (parser->variables, new);
1917 new->value = strdup (value);
1918 new->value_len = strlen (value);
1924 ucl_parser_set_variables_handler (struct ucl_parser *parser,
1925 ucl_variable_handler handler, void *ud)
1927 parser->var_handler = handler;
1928 parser->var_data = ud;
1932 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1935 struct ucl_chunk *chunk;
1937 if (data == NULL || len == 0) {
1938 ucl_create_err (&parser->err, "invalid chunk added");
1941 if (parser->state != UCL_STATE_ERROR) {
1942 chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1943 if (chunk == NULL) {
1944 ucl_create_err (&parser->err, "cannot allocate chunk structure");
1947 chunk->begin = data;
1948 chunk->remain = len;
1949 chunk->pos = chunk->begin;
1950 chunk->end = chunk->begin + len;
1953 LL_PREPEND (parser->chunks, chunk);
1954 parser->recursion ++;
1955 if (parser->recursion > UCL_MAX_RECURSION) {
1956 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1960 return ucl_state_machine (parser);
1963 ucl_create_err (&parser->err, "a parser is in an invalid state");
1969 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1973 ucl_create_err (&parser->err, "invalid string added");
1977 len = strlen (data);
1980 return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);