1 /* search.c -- searching large bodies of text.
2 $Id: search.c,v 1.6 2002/03/23 20:45:24 karl Exp $
4 Copyright (C) 1993, 97, 98 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 Written by Brian Fox (bfox@ai.mit.edu). */
27 /* The search functions take two arguments:
29 1) a string to search for, and
31 2) a pointer to a SEARCH_BINDING which contains the buffer, start,
32 and end of the search.
34 They return a long, which is the offset from the start of the buffer
35 at which the match was found. An offset of -1 indicates failure. */
37 /* A function which makes a binding with buffer and bounds. */
39 make_binding (buffer, start, end)
43 SEARCH_BINDING *binding;
45 binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING));
46 binding->buffer = buffer;
47 binding->start = start;
54 /* Make a copy of BINDING without duplicating the data. */
56 copy_binding (binding)
57 SEARCH_BINDING *binding;
61 copy = make_binding (binding->buffer, binding->start, binding->end);
62 copy->flags = binding->flags;
67 /* **************************************************************** */
69 /* The Actual Searching Functions */
71 /* **************************************************************** */
73 /* Search forwards or backwards for the text delimited by BINDING.
74 The search is forwards if BINDING->start is greater than BINDING->end. */
76 search (string, binding)
78 SEARCH_BINDING *binding;
82 /* If the search is backwards, then search backwards, otherwise forwards. */
83 if (binding->start > binding->end)
84 result = search_backward (string, binding);
86 result = search_forward (string, binding);
91 /* Search forwards for STRING through the text delimited in BINDING. */
93 search_forward (string, binding)
95 SEARCH_BINDING *binding;
97 register int c, i, len;
98 register char *buff, *end;
99 char *alternate = (char *)NULL;
101 len = strlen (string);
103 /* We match characters in the search buffer against STRING and ALTERNATE.
104 ALTERNATE is a case reversed version of STRING; this is cheaper than
105 case folding each character before comparison. Alternate is only
106 used if the case folding bit is turned on in the passed BINDING. */
108 if (binding->flags & S_FoldCase)
110 alternate = xstrdup (string);
112 for (i = 0; i < len; i++)
114 if (islower (alternate[i]))
115 alternate[i] = toupper (alternate[i]);
116 else if (isupper (alternate[i]))
117 alternate[i] = tolower (alternate[i]);
121 buff = binding->buffer + binding->start;
122 end = binding->buffer + binding->end + 1;
124 while (buff < (end - len))
126 for (i = 0; i < len; i++)
130 if ((c != string[i]) && (!alternate || c != alternate[i]))
138 if (binding->flags & S_SkipDest)
140 return ((long) (buff - binding->buffer));
152 /* Search for STRING backwards through the text delimited in BINDING. */
154 search_backward (input_string, binding)
156 SEARCH_BINDING *binding;
158 register int c, i, len;
159 register char *buff, *end;
161 char *alternate = (char *)NULL;
163 len = strlen (input_string);
165 /* Reverse the characters in the search string. */
166 string = (char *)xmalloc (1 + len);
167 for (c = 0, i = len - 1; input_string[c]; c++, i--)
168 string[i] = input_string[c];
172 /* We match characters in the search buffer against STRING and ALTERNATE.
173 ALTERNATE is a case reversed version of STRING; this is cheaper than
174 case folding each character before comparison. ALTERNATE is only
175 used if the case folding bit is turned on in the passed BINDING. */
177 if (binding->flags & S_FoldCase)
179 alternate = xstrdup (string);
181 for (i = 0; i < len; i++)
183 if (islower (alternate[i]))
184 alternate[i] = toupper (alternate[i]);
185 else if (isupper (alternate[i]))
186 alternate[i] = tolower (alternate[i]);
190 buff = binding->buffer + binding->start - 1;
191 end = binding->buffer + binding->end;
193 while (buff > (end + len))
195 for (i = 0; i < len; i++)
199 if (c != string[i] && (!alternate || c != alternate[i]))
209 if (binding->flags & S_SkipDest)
211 return ((long) (1 + (buff - binding->buffer)));
224 /* Find STRING in LINE, returning the offset of the end of the string.
225 Return an offset of -1 if STRING does not appear in LINE. The search
226 is bound by the end of the line (i.e., either NEWLINE or 0). */
228 string_in_line (string, line)
232 SEARCH_BINDING binding;
234 /* Find the end of the line. */
235 for (end = 0; line[end] && line[end] != '\n'; end++);
237 /* Search for STRING within these confines. */
238 binding.buffer = line;
241 binding.flags = S_FoldCase | S_SkipDest;
243 return (search_forward (string, &binding));
246 /* Return non-zero if STRING is the first text to appear at BINDING. */
248 looking_at (string, binding)
250 SEARCH_BINDING *binding;
254 search_end = search (string, binding);
256 /* If the string was not found, SEARCH_END is -1. If the string was found,
257 but not right away, SEARCH_END is != binding->start. Otherwise, the
258 string was found at binding->start. */
259 return (search_end == binding->start);
262 /* **************************************************************** */
264 /* Small String Searches */
266 /* **************************************************************** */
268 /* Function names that start with "skip" are passed a string, and return
269 an offset from the start of that string. Function names that start
270 with "find" are passed a SEARCH_BINDING, and return an absolute position
271 marker of the item being searched for. "Find" functions return a value
272 of -1 if the item being looked for couldn't be found. */
274 /* Return the index of the first non-whitespace character in STRING. */
276 skip_whitespace (string)
281 for (i = 0; string && whitespace (string[i]); i++);
285 /* Return the index of the first non-whitespace or newline character in
288 skip_whitespace_and_newlines (string)
293 for (i = 0; string && whitespace_or_newline (string[i]); i++);
297 /* Return the index of the first whitespace character in STRING. */
299 skip_non_whitespace (string)
304 for (i = 0; string && string[i] && !whitespace (string[i]); i++);
308 /* Return the index of the first non-node character in STRING. Note that
309 this function contains quite a bit of hair to ignore periods in some
310 special cases. This is because we here at GNU ship some info files which
311 contain nodenames that contain periods. No such nodename can start with
312 a period, or continue with whitespace, newline, or ')' immediately following
313 the period. If second argument NEWLINES_OKAY is non-zero, newlines should
314 be skipped while parsing out the nodename specification. */
316 skip_node_characters (string, newlines_okay)
320 register int c, i = 0;
324 /* Handle special case. This is when another function has parsed out the
325 filename component of the node name, and we just want to parse out the
326 nodename proper. In that case, a period at the start of the nodename
327 indicates an empty nodename. */
328 if (string && *string == '.')
331 if (string && *string == '(')
338 for (; string && (c = string[i]); i++)
350 /* If the character following the close paren is a space or period,
351 then this node name has no more characters associated with it. */
355 ((!newlines_okay) && (c == '\n')) ||
356 ((paren_seen && string[i - 1] == ')') &&
357 (c == ' ' || c == '.')) ||
361 /* This test causes a node name ending in a period, like `This.', not to
362 be found. The trailing . is stripped. This occurs in the jargon
363 file (`I see no X here.' is a node name). */
366 (whitespace_or_newline (string[i + 1])) ||
367 (string[i + 1] == ')'))))
374 /* **************************************************************** */
376 /* Searching FILE_BUFFER's */
378 /* **************************************************************** */
380 /* Return the absolute position of the first occurence of a node separator in
381 BINDING-buffer. The search starts at BINDING->start. Return -1 if no node
382 separator was found. */
384 find_node_separator (binding)
385 SEARCH_BINDING *binding;
390 body = binding->buffer;
392 /* A node is started by [^L]^_[^L]\n. That is to say, the C-l's are
393 optional, but the DELETE and NEWLINE are not. This separator holds
394 true for all separated elements in an Info file, including the tags
395 table (if present) and the indirect tags table (if present). */
396 for (i = binding->start; i < binding->end - 1; i++)
397 if (((body[i] == INFO_FF && body[i + 1] == INFO_COOKIE) &&
398 (body[i + 2] == '\n' ||
399 (body[i + 2] == INFO_FF && body[i + 3] == '\n'))) ||
400 ((body[i] == INFO_COOKIE) &&
401 (body[i + 1] == '\n' ||
402 (body[i + 1] == INFO_FF && body[i + 2] == '\n'))))
407 /* Return the length of the node separator characters that BODY is
408 currently pointing at. */
410 skip_node_separator (body)
417 if (body[i] == INFO_FF)
420 if (body[i++] != INFO_COOKIE)
423 if (body[i] == INFO_FF)
426 if (body[i++] != '\n')
432 /* Return the number of characters from STRING to the start of
440 for (i = 0; string && string[i] && string[i] != '\n'; i++);
442 if (string[i] == '\n')
448 /* Return the absolute position of the beginning of a tags table in this
449 binding starting the search at binding->start. */
451 find_tags_table (binding)
452 SEARCH_BINDING *binding;
454 SEARCH_BINDING search;
457 search.buffer = binding->buffer;
458 search.start = binding->start;
459 search.end = binding->end;
460 search.flags = S_FoldCase;
462 while ((position = find_node_separator (&search)) != -1 )
464 search.start = position;
465 search.start += skip_node_separator (search.buffer + search.start);
467 if (looking_at (TAGS_TABLE_BEG_LABEL, &search))
473 /* Return the absolute position of the node named NODENAME in BINDING.
474 This is a brute force search, and we wish to avoid it when possible.
475 This function is called when a tag (indirect or otherwise) doesn't
476 really point to the right node. It returns the absolute position of
477 the separator preceding the node. */
479 find_node_in_binding (nodename, binding)
481 SEARCH_BINDING *binding;
485 SEARCH_BINDING search;
487 namelen = strlen (nodename);
489 search.buffer = binding->buffer;
490 search.start = binding->start;
491 search.end = binding->end;
494 while ((position = find_node_separator (&search)) != -1)
496 search.start = position;
497 search.start += skip_node_separator (search.buffer + search.start);
499 offset = string_in_line (INFO_NODE_LABEL, search.buffer + search.start);
504 search.start += offset;
505 search.start += skip_whitespace (search.buffer + search.start);
506 offset = skip_node_characters
507 (search.buffer + search.start, DONT_SKIP_NEWLINES);
509 /* Notice that this is an exact match. You cannot grovel through
510 the buffer with this function looking for random nodes. */
511 if ((offset == namelen) &&
512 (search.buffer[search.start] == nodename[0]) &&
513 (strncmp (search.buffer + search.start, nodename, offset) == 0))