1 /* search.c -- searching large bodies of text.
2 $Id: search.c,v 1.3 2004/04/11 17:56:46 karl Exp $
4 Copyright (C) 1993, 1997, 1998, 2002, 2004 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 Written by Brian Fox (bfox@ai.mit.edu). */
27 /* The search functions take two arguments:
29 1) a string to search for, and
31 2) a pointer to a SEARCH_BINDING which contains the buffer, start,
32 and end of the search.
34 They return a long, which is the offset from the start of the buffer
35 at which the match was found. An offset of -1 indicates failure. */
37 /* A function which makes a binding with buffer and bounds. */
39 make_binding (char *buffer, long int start, long int end)
41 SEARCH_BINDING *binding;
43 binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING));
44 binding->buffer = buffer;
45 binding->start = start;
52 /* Make a copy of BINDING without duplicating the data. */
54 copy_binding (SEARCH_BINDING *binding)
58 copy = make_binding (binding->buffer, binding->start, binding->end);
59 copy->flags = binding->flags;
64 /* **************************************************************** */
66 /* The Actual Searching Functions */
68 /* **************************************************************** */
70 /* Search forwards or backwards for the text delimited by BINDING.
71 The search is forwards if BINDING->start is greater than BINDING->end. */
73 search (char *string, SEARCH_BINDING *binding)
77 /* If the search is backwards, then search backwards, otherwise forwards. */
78 if (binding->start > binding->end)
79 result = search_backward (string, binding);
81 result = search_forward (string, binding);
86 /* Search forwards for STRING through the text delimited in BINDING. */
88 search_forward (char *string, SEARCH_BINDING *binding)
90 register int c, i, len;
91 register char *buff, *end;
92 char *alternate = (char *)NULL;
94 len = strlen (string);
96 /* We match characters in the search buffer against STRING and ALTERNATE.
97 ALTERNATE is a case reversed version of STRING; this is cheaper than
98 case folding each character before comparison. Alternate is only
99 used if the case folding bit is turned on in the passed BINDING. */
101 if (binding->flags & S_FoldCase)
103 alternate = xstrdup (string);
105 for (i = 0; i < len; i++)
107 if (islower (alternate[i]))
108 alternate[i] = toupper (alternate[i]);
109 else if (isupper (alternate[i]))
110 alternate[i] = tolower (alternate[i]);
114 buff = binding->buffer + binding->start;
115 end = binding->buffer + binding->end + 1;
117 while (buff < (end - len))
119 for (i = 0; i < len; i++)
123 if ((c != string[i]) && (!alternate || c != alternate[i]))
131 if (binding->flags & S_SkipDest)
133 return ((long) (buff - binding->buffer));
145 /* Search for STRING backwards through the text delimited in BINDING. */
147 search_backward (char *input_string, SEARCH_BINDING *binding)
149 register int c, i, len;
150 register char *buff, *end;
152 char *alternate = (char *)NULL;
154 len = strlen (input_string);
156 /* Reverse the characters in the search string. */
157 string = (char *)xmalloc (1 + len);
158 for (c = 0, i = len - 1; input_string[c]; c++, i--)
159 string[i] = input_string[c];
163 /* We match characters in the search buffer against STRING and ALTERNATE.
164 ALTERNATE is a case reversed version of STRING; this is cheaper than
165 case folding each character before comparison. ALTERNATE is only
166 used if the case folding bit is turned on in the passed BINDING. */
168 if (binding->flags & S_FoldCase)
170 alternate = xstrdup (string);
172 for (i = 0; i < len; i++)
174 if (islower (alternate[i]))
175 alternate[i] = toupper (alternate[i]);
176 else if (isupper (alternate[i]))
177 alternate[i] = tolower (alternate[i]);
181 buff = binding->buffer + binding->start - 1;
182 end = binding->buffer + binding->end;
184 while (buff > (end + len))
186 for (i = 0; i < len; i++)
190 if (c != string[i] && (!alternate || c != alternate[i]))
200 if (binding->flags & S_SkipDest)
202 return ((long) (1 + (buff - binding->buffer)));
215 /* Find STRING in LINE, returning the offset of the end of the string.
216 Return an offset of -1 if STRING does not appear in LINE. The search
217 is bound by the end of the line (i.e., either NEWLINE or 0). */
219 string_in_line (char *string, char *line)
222 SEARCH_BINDING binding;
224 /* Find the end of the line. */
225 for (end = 0; line[end] && line[end] != '\n'; end++);
227 /* Search for STRING within these confines. */
228 binding.buffer = line;
231 binding.flags = S_FoldCase | S_SkipDest;
233 return (search_forward (string, &binding));
236 /* Return non-zero if STRING is the first text to appear at BINDING. */
238 looking_at (char *string, SEARCH_BINDING *binding)
242 search_end = search (string, binding);
244 /* If the string was not found, SEARCH_END is -1. If the string was found,
245 but not right away, SEARCH_END is != binding->start. Otherwise, the
246 string was found at binding->start. */
247 return (search_end == binding->start);
250 /* **************************************************************** */
252 /* Small String Searches */
254 /* **************************************************************** */
256 /* Function names that start with "skip" are passed a string, and return
257 an offset from the start of that string. Function names that start
258 with "find" are passed a SEARCH_BINDING, and return an absolute position
259 marker of the item being searched for. "Find" functions return a value
260 of -1 if the item being looked for couldn't be found. */
262 /* Return the index of the first non-whitespace character in STRING. */
264 skip_whitespace (char *string)
268 for (i = 0; string && whitespace (string[i]); i++);
272 /* Return the index of the first non-whitespace or newline character in
275 skip_whitespace_and_newlines (char *string)
279 for (i = 0; string && whitespace_or_newline (string[i]); i++);
283 /* Return the index of the first whitespace character in STRING. */
285 skip_non_whitespace (char *string)
289 for (i = 0; string && string[i] && !whitespace (string[i]); i++);
293 /* Return the index of the first non-node character in STRING. Note that
294 this function contains quite a bit of hair to ignore periods in some
295 special cases. This is because we here at GNU ship some info files which
296 contain nodenames that contain periods. No such nodename can start with
297 a period, or continue with whitespace, newline, or ')' immediately following
298 the period. If second argument NEWLINES_OKAY is non-zero, newlines should
299 be skipped while parsing out the nodename specification. */
301 skip_node_characters (char *string, int newlines_okay)
303 register int c, i = 0;
307 /* Handle special case. This is when another function has parsed out the
308 filename component of the node name, and we just want to parse out the
309 nodename proper. In that case, a period at the start of the nodename
310 indicates an empty nodename. */
311 if (string && *string == '.')
314 if (string && *string == '(')
321 for (; string && (c = string[i]); i++)
333 /* If the character following the close paren is a space or period,
334 then this node name has no more characters associated with it. */
338 ((!newlines_okay) && (c == '\n')) ||
339 ((paren_seen && string[i - 1] == ')') &&
340 (c == ' ' || c == '.')) ||
344 /* This test causes a node name ending in a period, like `This.', not to
345 be found. The trailing . is stripped. This occurs in the jargon
346 file (`I see no X here.' is a node name). */
349 (whitespace_or_newline (string[i + 1])) ||
350 (string[i + 1] == ')'))))
357 /* **************************************************************** */
359 /* Searching FILE_BUFFER's */
361 /* **************************************************************** */
363 /* Return the absolute position of the first occurence of a node separator in
364 BINDING-buffer. The search starts at BINDING->start. Return -1 if no node
365 separator was found. */
367 find_node_separator (SEARCH_BINDING *binding)
372 body = binding->buffer;
374 /* A node is started by [^L]^_[^L]\n. That is to say, the C-l's are
375 optional, but the DELETE and NEWLINE are not. This separator holds
376 true for all separated elements in an Info file, including the tags
377 table (if present) and the indirect tags table (if present). */
378 for (i = binding->start; i < binding->end - 1; i++)
379 if (((body[i] == INFO_FF && body[i + 1] == INFO_COOKIE) &&
380 (body[i + 2] == '\n' ||
381 (body[i + 2] == INFO_FF && body[i + 3] == '\n'))) ||
382 ((body[i] == INFO_COOKIE) &&
383 (body[i + 1] == '\n' ||
384 (body[i + 1] == INFO_FF && body[i + 2] == '\n'))))
389 /* Return the length of the node separator characters that BODY is
390 currently pointing at. */
392 skip_node_separator (char *body)
398 if (body[i] == INFO_FF)
401 if (body[i++] != INFO_COOKIE)
404 if (body[i] == INFO_FF)
407 if (body[i++] != '\n')
413 /* Return the number of characters from STRING to the start of
416 skip_line (char *string)
420 for (i = 0; string && string[i] && string[i] != '\n'; i++);
422 if (string[i] == '\n')
428 /* Return the absolute position of the beginning of a tags table in this
429 binding starting the search at binding->start. */
431 find_tags_table (SEARCH_BINDING *binding)
433 SEARCH_BINDING tmp_search;
436 tmp_search.buffer = binding->buffer;
437 tmp_search.start = binding->start;
438 tmp_search.end = binding->end;
439 tmp_search.flags = S_FoldCase;
441 while ((position = find_node_separator (&tmp_search)) != -1 )
443 tmp_search.start = position;
444 tmp_search.start += skip_node_separator (tmp_search.buffer
447 if (looking_at (TAGS_TABLE_BEG_LABEL, &tmp_search))
453 /* Return the absolute position of the node named NODENAME in BINDING.
454 This is a brute force search, and we wish to avoid it when possible.
455 This function is called when a tag (indirect or otherwise) doesn't
456 really point to the right node. It returns the absolute position of
457 the separator preceding the node. */
459 find_node_in_binding (char *nodename, SEARCH_BINDING *binding)
463 SEARCH_BINDING tmp_search;
465 namelen = strlen (nodename);
467 tmp_search.buffer = binding->buffer;
468 tmp_search.start = binding->start;
469 tmp_search.end = binding->end;
470 tmp_search.flags = 0;
472 while ((position = find_node_separator (&tmp_search)) != -1)
474 tmp_search.start = position;
475 tmp_search.start += skip_node_separator
476 (tmp_search.buffer + tmp_search.start);
478 offset = string_in_line
479 (INFO_NODE_LABEL, tmp_search.buffer + tmp_search.start);
484 tmp_search.start += offset;
485 tmp_search.start += skip_whitespace (tmp_search.buffer + tmp_search.start);
486 offset = skip_node_characters
487 (tmp_search.buffer + tmp_search.start, DONT_SKIP_NEWLINES);
489 /* Notice that this is an exact match. You cannot grovel through
490 the buffer with this function looking for random nodes. */
491 if ((offset == namelen) &&
492 (tmp_search.buffer[tmp_search.start] == nodename[0]) &&
493 (strncmp (tmp_search.buffer + tmp_search.start, nodename, offset) == 0))