gdb - Local mods (compile)
[dragonfly.git] / contrib / tre / lib / tre-internal.h
... / ...
CommitLineData
1/*
2 tre-internal.h - TRE internal definitions
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7*/
8
9#ifndef TRE_INTERNAL_H
10#define TRE_INTERNAL_H 1
11
12#ifdef HAVE_WCHAR_H
13#include <wchar.h>
14#endif /* HAVE_WCHAR_H */
15
16#ifdef HAVE_WCTYPE_H
17#include <wctype.h>
18#endif /* !HAVE_WCTYPE_H */
19
20#include <ctype.h>
21#include <bitstring.h>
22#include "tre.h"
23#include "xlocale_private.h"
24
25#ifdef TRE_DEBUG
26#include <stdio.h>
27#define DPRINT(msg) do {printf msg; fflush(stdout);} while(/*CONSTCOND*/0)
28#else /* !TRE_DEBUG */
29#define DPRINT(msg) do { } while(/*CONSTCOND*/0)
30#endif /* !TRE_DEBUG */
31
32#define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
33
34#ifdef HAVE_MBRTOWC
35#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
36#define tre_mbrtowc_l(pwc,s,n,ps,l) (mbrtowc_l((pwc), (s), (n), (ps), (l)))
37#else /* !HAVE_MBRTOWC */
38#ifdef HAVE_MBTOWC
39#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
40#endif /* HAVE_MBTOWC */
41#endif /* !HAVE_MBRTOWC */
42
43#ifdef TRE_MULTIBYTE
44#ifdef HAVE_MBSTATE_T
45#define TRE_MBSTATE
46#endif /* TRE_MULTIBYTE */
47#endif /* HAVE_MBSTATE_T */
48
49/* Define the character types and functions. */
50#ifdef TRE_WCHAR
51
52/* Wide characters. */
53typedef wint_t tre_cint_t;
54#define TRE_CHAR_MAX WCHAR_MAX
55
56#ifdef TRE_MULTIBYTE
57#define TRE_MB_CUR_MAX MB_CUR_MAX
58#define TRE_MB_CUR_MAX_L MB_CUR_MAX_L
59#else /* !TRE_MULTIBYTE */
60#define TRE_MB_CUR_MAX 1
61#endif /* !TRE_MULTIBYTE */
62
63#define tre_isalnum iswalnum
64#define tre_isalpha iswalpha
65#ifdef HAVE_ISWBLANK
66#define tre_isblank iswblank
67#endif /* HAVE_ISWBLANK */
68#define tre_iscntrl iswcntrl
69#define tre_isdigit iswdigit
70#define tre_isgraph iswgraph
71#define tre_islower iswlower
72#define tre_isprint iswprint
73#define tre_ispunct iswpunct
74#define tre_isspace iswspace
75#define tre_isupper iswupper
76#define tre_isxdigit iswxdigit
77
78#define tre_tolower towlower
79#define tre_toupper towupper
80#define tre_strlen wcslen
81
82#define tre_isalnum_l iswalnum_l
83#define tre_isdigit_l iswdigit_l
84#define tre_islower_l iswlower_l
85#define tre_isupper_l iswupper_l
86#define tre_isxdigit_l iswxdigit_l
87#define tre_tolower_l towlower_l
88#define tre_toupper_l towupper_l
89
90#else /* !TRE_WCHAR */
91
92/* 8 bit characters. */
93typedef short tre_cint_t;
94#define TRE_CHAR_MAX 255
95#define TRE_MB_CUR_MAX 1
96
97#define tre_isalnum isalnum
98#define tre_isalpha isalpha
99#ifdef HAVE_ISASCII
100#define tre_isascii isascii
101#endif /* HAVE_ISASCII */
102#ifdef HAVE_ISBLANK
103#define tre_isblank isblank
104#endif /* HAVE_ISBLANK */
105#define tre_iscntrl iscntrl
106#define tre_isdigit isdigit
107#define tre_isgraph isgraph
108#define tre_islower islower
109#define tre_isprint isprint
110#define tre_ispunct ispunct
111#define tre_isspace isspace
112#define tre_isupper isupper
113#define tre_isxdigit isxdigit
114
115#define tre_tolower(c) (tre_cint_t)(tolower(c))
116#define tre_toupper(c) (tre_cint_t)(toupper(c))
117#define tre_strlen(s) (strlen((const char*)s))
118
119#endif /* !TRE_WCHAR */
120
121#if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE)
122#define TRE_USE_SYSTEM_WCTYPE 1
123#endif
124
125#ifdef TRE_USE_SYSTEM_WCTYPE
126/* Use system provided iswctype() and wctype(). */
127typedef wctype_t tre_ctype_t;
128#define tre_isctype iswctype
129#define tre_ctype wctype
130#define tre_isctype_l iswctype_l
131#define tre_ctype_l wctype_l
132#else /* !TRE_USE_SYSTEM_WCTYPE */
133/* Define our own versions of iswctype() and wctype(). */
134typedef int (*tre_ctype_t)(tre_cint_t);
135#define tre_isctype(c, type) ( (type)(c) )
136tre_ctype_t tre_ctype(const char *name);
137#endif /* !TRE_USE_SYSTEM_WCTYPE */
138
139typedef enum { STR_WIDE, STR_BYTE, STR_MBS } tre_str_type_t;
140
141/* Returns number of bytes to add to (char *)ptr to make it
142 properly aligned for the type. */
143#define ALIGN(ptr, type) \
144 ((((long)ptr) % sizeof(type)) \
145 ? (sizeof(type) - (((long)ptr) % sizeof(type))) \
146 : 0)
147
148#undef MAX
149#undef MIN
150#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
151#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
152
153/* Define STRF to the correct printf formatter for strings. */
154#ifdef TRE_WCHAR
155#define STRF "ls"
156#else /* !TRE_WCHAR */
157#define STRF "s"
158#endif /* !TRE_WCHAR */
159
160/* Types to handle bracket expressions. */
161typedef enum {
162 TRE_BRACKET_MATCH_TYPE_UNUSED = 0,
163 TRE_BRACKET_MATCH_TYPE_CHAR, /* Single character value */
164 TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN, /* Collation range begin */
165 TRE_BRACKET_MATCH_TYPE_RANGE_END, /* Collation range end */
166 TRE_BRACKET_MATCH_TYPE_CLASS, /* Character class */
167 TRE_BRACKET_MATCH_TYPE_EQUIVALENCE, /* Collation equivalence value */
168} tre_bracket_match_type_t;
169
170typedef struct {
171 tre_bracket_match_type_t type;
172 tre_cint_t value;
173} tre_bracket_match_t;
174
175#define TRE_BRACKET_MATCH_FLAG_NEGATE 1
176
177typedef struct {
178 int num_bracket_matches;
179 int flags;
180 tre_bracket_match_t bracket_matches[0];
181} tre_bracket_match_list_t;
182
183#define SIZEOF_BRACKET_MATCH_LIST_N(n) (sizeof(tre_bracket_match_list_t) + \
184 sizeof(tre_bracket_match_t) * (n))
185#define SIZEOF_BRACKET_MATCH_LIST(l) SIZEOF_BRACKET_MATCH_LIST_N( \
186 (l)->num_bracket_matches)
187
188/* The "count" field is the number of time the tag was set, initially zero.
189 The "first" field contains the first set value (when "count" equals 1).
190 The "value" field contains the current value of the tag, if "count" is
191 greater than zero (the tag's current value is -1 if "count" is zero).
192 The "touch" field is the touch value, a montonically increasing value
193 (maintained by the caller) set each time the tag itself is set. */
194typedef struct {
195 int count;
196 int first;
197 int value;
198 int touch;
199} tre_tag_t;
200
201/* TNFA transition type. A TNFA state is an array of transitions,
202 the terminator is a transition with NULL `state'. */
203typedef struct tnfa_transition tre_tnfa_transition_t;
204
205struct tnfa_transition {
206 /* Range of accepted characters. */
207 tre_cint_t code_min;
208 tre_cint_t code_max;
209 /* Pointer to the destination state. */
210 tre_tnfa_transition_t *state;
211 /* ID number of the destination state. */
212 int state_id;
213 /* -1 terminated array of tags (or NULL). */
214 int *tags;
215 /* Matching parameters settings (or NULL). */
216 int *params;
217 /* Assertion bitmap. */
218 int assertions;
219 /* Assertion parameters. */
220 union {
221 /* Bracket matches. */
222 tre_bracket_match_list_t *bracket_match_list;
223 /* Back reference assertion. */
224 int backref;
225 } u;
226};
227
228
229/* Assertions. */
230#define ASSERT_AT_BOL 1 /* Beginning of line. */
231#define ASSERT_AT_EOL 2 /* End of line. */
232#define ASSERT_BRACKET_MATCH 4 /* Matches in `bracket_match_list'. */
233#define ASSERT_AT_BOW 8 /* Beginning of word. */
234#define ASSERT_AT_EOW 16 /* End of word. */
235#define ASSERT_AT_WB 32 /* Word boundary. */
236#define ASSERT_AT_WB_NEG 64 /* Not a word boundary. */
237#define ASSERT_BACKREF 128 /* A back reference in `backref'. */
238#define ASSERT_LAST 128
239
240/* Tag directions. */
241typedef enum {
242 TRE_TAG_MINIMIZE = 0,
243 TRE_TAG_MAXIMIZE,
244 TRE_TAG_LEFT_MAXIMIZE,
245} tre_tag_direction_t;
246
247/* Parameters that can be changed dynamically while matching. */
248typedef enum {
249 TRE_PARAM_COST_INS = 0,
250 TRE_PARAM_COST_DEL = 1,
251 TRE_PARAM_COST_SUBST = 2,
252 TRE_PARAM_COST_MAX = 3,
253 TRE_PARAM_MAX_INS = 4,
254 TRE_PARAM_MAX_DEL = 5,
255 TRE_PARAM_MAX_SUBST = 6,
256 TRE_PARAM_MAX_ERR = 7,
257 TRE_PARAM_DEPTH = 8,
258 TRE_PARAM_LAST = 9
259} tre_param_t;
260
261/* Unset matching parameter */
262#define TRE_PARAM_UNSET -1
263
264/* Signifies the default matching parameter value. */
265#define TRE_PARAM_DEFAULT -2
266
267/* Instructions to compute submatch register values from tag values
268 after a successful match. */
269struct tre_submatch_data {
270 /* Tag that gives the value for rm_so (submatch start offset). */
271 int so_tag;
272 /* Tag that gives the value for rm_eo (submatch end offset). */
273 int eo_tag;
274};
275
276typedef struct tre_submatch_data tre_submatch_data_t;
277
278
279/* LAST MATCHED structures */
280struct __previous_type;
281struct __previous_branch_type;
282struct __current_type;
283struct __current_branch_type;
284
285typedef struct __previous_branch_type {
286 struct __previous_branch_type *next;
287 struct __previous_type *last_matched; int n_last_matched; int cmp_tag; int n_tags;
288 int tot_branches;
289 int tot_last_matched;
290 int tot_tags;
291 bitstr_t tags[0];
292} tre_last_matched_branch_pre_t;
293
294typedef struct __previous_type {
295 struct __previous_type *next;
296 tre_last_matched_branch_pre_t *branches; int n_branches; int start_tag;
297 int tot_branches;
298 int tot_last_matched;
299 int tot_tags;
300} tre_last_matched_pre_t;
301
302typedef struct __current_branch_type {
303 int *tags;
304 struct __current_type *last_matched; int n_last_matched; int cmp_tag; int n_tags;
305} tre_last_matched_branch_t;
306
307typedef struct __current_type {
308 tre_last_matched_branch_t *branches; int n_branches; int start_tag;
309} tre_last_matched_t;
310
311/* TNFA definition. */
312typedef struct tnfa tre_tnfa_t;
313
314struct tnfa {
315 tre_tnfa_transition_t *transitions;
316 tre_tnfa_transition_t *initial;
317 tre_tnfa_transition_t *final;
318 tre_submatch_data_t *submatch_data;
319 tre_tag_direction_t *tag_directions;
320 int *minimal_tags;
321 tre_last_matched_branch_t *last_matched_branch;
322 locale_t loc;
323 unsigned int num_transitions;
324 int first_char;
325 unsigned int num_submatches;
326 unsigned int num_submatches_invisible;
327 int num_tags;
328 int num_minimals;
329 int end_tag;
330 int num_states;
331 int cflags;
332 int have_backrefs;
333 int num_reorder_tags;
334 int have_approx;
335 int params_depth;
336};
337
338int
339tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
340 locale_t loc);
341
342void
343tre_free(regex_t *preg);
344
345reg_errcode_t
346tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
347 const tre_tnfa_t *tnfa, const tre_tag_t *tags, int match_eo);
348
349reg_errcode_t
350tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
351 tre_str_type_t type, tre_tag_t *match_tags, int eflags,
352 int *match_end_ofs);
353
354reg_errcode_t
355tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
356 int len, tre_str_type_t type, tre_tag_t *match_tags,
357 int eflags, int *match_end_ofs);
358
359#ifdef TRE_APPROX
360reg_errcode_t
361tre_tnfa_run_approx(const tre_tnfa_t *tnfa, const void *string, int len,
362 tre_str_type_t type, tre_tag_t *match_tags,
363 regamatch_t *match, regaparams_t params,
364 int eflags, int *match_end_ofs);
365#endif /* TRE_APPROX */
366
367#endif /* TRE_INTERNAL_H */
368
369/* EOF */