contrib/bmake/str.c

   1 /*      $NetBSD: str.c,v 1.81 2021/02/01 22:36:28 rillig Exp $  */
   2
   3 /*
   4  * Copyright (c) 1988, 1989, 1990, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  *
   7  * This code is derived from software contributed to Berkeley by
   8  * Adam de Boor.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 3. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 /*
  36  * Copyright (c) 1989 by Berkeley Softworks
  37  * All rights reserved.
  38  *
  39  * This code is derived from software contributed to Berkeley by
  40  * Adam de Boor.
  41  *
  42  * Redistribution and use in source and binary forms, with or without
  43  * modification, are permitted provided that the following conditions
  44  * are met:
  45  * 1. Redistributions of source code must retain the above copyright
  46  *    notice, this list of conditions and the following disclaimer.
  47  * 2. Redistributions in binary form must reproduce the above copyright
  48  *    notice, this list of conditions and the following disclaimer in the
  49  *    documentation and/or other materials provided with the distribution.
  50  * 3. All advertising materials mentioning features or use of this software
  51  *    must display the following acknowledgement:
  52  *      This product includes software developed by the University of
  53  *      California, Berkeley and its contributors.
  54  * 4. Neither the name of the University nor the names of its contributors
  55  *    may be used to endorse or promote products derived from this software
  56  *    without specific prior written permission.
  57  *
  58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  68  * SUCH DAMAGE.
  69  */
  70
  71 #include "make.h"
  72
  73 /*      "@(#)str.c      5.8 (Berkeley) 6/1/90"  */
  74 MAKE_RCSID("$NetBSD: str.c,v 1.81 2021/02/01 22:36:28 rillig Exp $");
  75
  76 /* Return the concatenation of s1 and s2, freshly allocated. */
  77 char *
  78 str_concat2(const char *s1, const char *s2)
  79 {
  80         size_t len1 = strlen(s1);
  81         size_t len2 = strlen(s2);
  82         char *result = bmake_malloc(len1 + len2 + 1);
  83         memcpy(result, s1, len1);
  84         memcpy(result + len1, s2, len2 + 1);
  85         return result;
  86 }
  87
  88 /* Return the concatenation of s1, s2 and s3, freshly allocated. */
  89 char *
  90 str_concat3(const char *s1, const char *s2, const char *s3)
  91 {
  92         size_t len1 = strlen(s1);
  93         size_t len2 = strlen(s2);
  94         size_t len3 = strlen(s3);
  95         char *result = bmake_malloc(len1 + len2 + len3 + 1);
  96         memcpy(result, s1, len1);
  97         memcpy(result + len1, s2, len2);
  98         memcpy(result + len1 + len2, s3, len3 + 1);
  99         return result;
 100 }
 101
 102 /* Return the concatenation of s1, s2, s3 and s4, freshly allocated. */
 103 char *
 104 str_concat4(const char *s1, const char *s2, const char *s3, const char *s4)
 105 {
 106         size_t len1 = strlen(s1);
 107         size_t len2 = strlen(s2);
 108         size_t len3 = strlen(s3);
 109         size_t len4 = strlen(s4);
 110         char *result = bmake_malloc(len1 + len2 + len3 + len4 + 1);
 111         memcpy(result, s1, len1);
 112         memcpy(result + len1, s2, len2);
 113         memcpy(result + len1 + len2, s3, len3);
 114         memcpy(result + len1 + len2 + len3, s4, len4 + 1);
 115         return result;
 116 }
 117
 118 /*
 119  * Fracture a string into an array of words (as delineated by tabs or spaces)
 120  * taking quotation marks into account.
 121  *
 122  * If expand is TRUE, quotes are removed and escape sequences such as \r, \t,
 123  * etc... are expanded. In this case, return NULL on parse errors.
 124  *
 125  * Returns the fractured words, which must be freed later using Words_Free,
 126  * unless the returned Words.words was NULL.
 127  */
 128 Words
 129 Str_Words(const char *str, Boolean expand)
 130 {
 131         size_t str_len;
 132         char *words_buf;
 133         size_t words_cap;
 134         char **words;
 135         size_t words_len;
 136         char inquote;
 137         char *word_start;
 138         char *word_end;
 139         const char *str_p;
 140
 141         /* XXX: why only hspace, not whitespace? */
 142         cpp_skip_hspace(&str);  /* skip leading space chars. */
 143
 144         /* words_buf holds the words, separated by '\0'. */
 145         str_len = strlen(str);
 146         words_buf = bmake_malloc(str_len + 1);
 147
 148         words_cap = str_len / 5 > 50 ? str_len / 5 : 50;
 149         words = bmake_malloc((words_cap + 1) * sizeof(char *));
 150
 151         /*
 152          * copy the string; at the same time, parse backslashes,
 153          * quotes and build the word list.
 154          */
 155         words_len = 0;
 156         inquote = '\0';
 157         word_start = words_buf;
 158         word_end = words_buf;
 159         for (str_p = str;; str_p++) {
 160                 char ch = *str_p;
 161                 switch (ch) {
 162                 case '"':
 163                 case '\'':
 164                         if (inquote != '\0') {
 165                                 if (inquote == ch)
 166                                         inquote = '\0';
 167                                 else
 168                                         break;
 169                         } else {
 170                                 inquote = ch;
 171                                 /* Don't miss "" or '' */
 172                                 if (word_start == NULL && str_p[1] == inquote) {
 173                                         if (!expand) {
 174                                                 word_start = word_end;
 175                                                 *word_end++ = ch;
 176                                         } else
 177                                                 word_start = word_end + 1;
 178                                         str_p++;
 179                                         inquote = '\0';
 180                                         break;
 181                                 }
 182                         }
 183                         if (!expand) {
 184                                 if (word_start == NULL)
 185                                         word_start = word_end;
 186                                 *word_end++ = ch;
 187                         }
 188                         continue;
 189                 case ' ':
 190                 case '\t':
 191                 case '\n':
 192                         if (inquote != '\0')
 193                                 break;
 194                         if (word_start == NULL)
 195                                 continue;
 196                         /* FALLTHROUGH */
 197                 case '\0':
 198                         /*
 199                          * end of a token -- make sure there's enough words
 200                          * space and save off a pointer.
 201                          */
 202                         if (word_start == NULL)
 203                                 goto done;
 204
 205                         *word_end++ = '\0';
 206                         if (words_len == words_cap) {
 207                                 size_t new_size;
 208                                 words_cap *= 2;         /* ramp up fast */
 209                                 new_size = (words_cap + 1) * sizeof(char *);
 210                                 words = bmake_realloc(words, new_size);
 211                         }
 212                         words[words_len++] = word_start;
 213                         word_start = NULL;
 214                         if (ch == '\n' || ch == '\0') {
 215                                 if (expand && inquote != '\0') {
 216                                         free(words);
 217                                         free(words_buf);
 218                                         return (Words){ NULL, 0, NULL };
 219                                 }
 220                                 goto done;
 221                         }
 222                         continue;
 223                 case '\\':
 224                         if (!expand) {
 225                                 if (word_start == NULL)
 226                                         word_start = word_end;
 227                                 *word_end++ = '\\';
 228                                 /* catch '\' at end of line */
 229                                 if (str_p[1] == '\0')
 230                                         continue;
 231                                 ch = *++str_p;
 232                                 break;
 233                         }
 234
 235                         switch (ch = *++str_p) {
 236                         case '\0':
 237                         case '\n':
 238                                 /* hmmm; fix it up as best we can */
 239                                 ch = '\\';
 240                                 str_p--;
 241                                 break;
 242                         case 'b':
 243                                 ch = '\b';
 244                                 break;
 245                         case 'f':
 246                                 ch = '\f';
 247                                 break;
 248                         case 'n':
 249                                 ch = '\n';
 250                                 break;
 251                         case 'r':
 252                                 ch = '\r';
 253                                 break;
 254                         case 't':
 255                                 ch = '\t';
 256                                 break;
 257                         }
 258                         break;
 259                 }
 260                 if (word_start == NULL)
 261                         word_start = word_end;
 262                 *word_end++ = ch;
 263         }
 264 done:
 265         words[words_len] = NULL;        /* useful for argv */
 266         return (Words){ words, words_len, words_buf };
 267 }
 268
 269 /*
 270  * Str_Match -- Test if a string matches a pattern like "*.[ch]".
 271  * The following special characters are known *?\[] (as in fnmatch(3)).
 272  *
 273  * XXX: this function does not detect or report malformed patterns.
 274  */
 275 Boolean
 276 Str_Match(const char *str, const char *pat)
 277 {
 278         for (;;) {
 279                 /*
 280                  * See if we're at the end of both the pattern and the
 281                  * string. If so, we succeeded.  If we're at the end of the
 282                  * pattern but not at the end of the string, we failed.
 283                  */
 284                 if (*pat == '\0')
 285                         return *str == '\0';
 286                 if (*str == '\0' && *pat != '*')
 287                         return FALSE;
 288
 289                 /*
 290                  * A '*' in the pattern matches any substring.  We handle this
 291                  * by calling ourselves for each suffix of the string.
 292                  */
 293                 if (*pat == '*') {
 294                         pat++;
 295                         while (*pat == '*')
 296                                 pat++;
 297                         if (*pat == '\0')
 298                                 return TRUE;
 299                         while (*str != '\0') {
 300                                 if (Str_Match(str, pat))
 301                                         return TRUE;
 302                                 str++;
 303                         }
 304                         return FALSE;
 305                 }
 306
 307                 /* A '?' in the pattern matches any single character. */
 308                 if (*pat == '?')
 309                         goto thisCharOK;
 310
 311                 /*
 312                  * A '[' in the pattern matches a character from a list.
 313                  * The '[' is followed by the list of acceptable characters,
 314                  * or by ranges (two characters separated by '-'). In these
 315                  * character lists, the backslash is an ordinary character.
 316                  */
 317                 if (*pat == '[') {
 318                         Boolean neg = pat[1] == '^';
 319                         pat += neg ? 2 : 1;
 320
 321                         for (;;) {
 322                                 if (*pat == ']' || *pat == '\0') {
 323                                         if (neg)
 324                                                 break;
 325                                         return FALSE;
 326                                 }
 327                                 /*
 328                                  * XXX: This naive comparison makes the
 329                                  * control flow of the pattern parser
 330                                  * dependent on the actual value of the
 331                                  * string.  This is unpredictable.  It may be
 332                                  * though that the code only looks wrong but
 333                                  * actually all code paths result in the same
 334                                  * behavior.  This needs further tests.
 335                                  */
 336                                 if (*pat == *str)
 337                                         break;
 338                                 if (pat[1] == '-') {
 339                                         if (pat[2] == '\0')
 340                                                 return neg;
 341                                         if (*pat <= *str && pat[2] >= *str)
 342                                                 break;
 343                                         if (*pat >= *str && pat[2] <= *str)
 344                                                 break;
 345                                         pat += 2;
 346                                 }
 347                                 pat++;
 348                         }
 349                         if (neg && *pat != ']' && *pat != '\0')
 350                                 return FALSE;
 351                         while (*pat != ']' && *pat != '\0')
 352                                 pat++;
 353                         if (*pat == '\0')
 354                                 pat--;
 355                         goto thisCharOK;
 356                 }
 357
 358                 /*
 359                  * A backslash in the pattern matches the character following
 360                  * it exactly.
 361                  */
 362                 if (*pat == '\\') {
 363                         pat++;
 364                         if (*pat == '\0')
 365                                 return FALSE;
 366                 }
 367
 368                 if (*pat != *str)
 369                         return FALSE;
 370
 371         thisCharOK:
 372                 pat++;
 373                 str++;
 374         }
 375 }