contrib/ldns/compat/b32_pton.c

   1 /*
   2  * Copyright (c) 1996, 1998 by Internet Software Consortium.
   3  *
   4  * Permission to use, copy, modify, and distribute this software for any
   5  * purpose with or without fee is hereby granted, provided that the above
   6  * copyright notice and this permission notice appear in all copies.
   7  *
   8  * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
   9  * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
  10  * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
  11  * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
  12  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
  13  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  14  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
  15  * SOFTWARE.
  16  */
  17
  18 /*
  19  * Portions Copyright (c) 1995 by International Business Machines, Inc.
  20  *
  21  * International Business Machines, Inc. (hereinafter called IBM) grants
  22  * permission under its copyrights to use, copy, modify, and distribute this
  23  * Software with or without fee, provided that the above copyright notice and
  24  * all paragraphs of this notice appear in all copies, and that the name of IBM
  25  * not be used in connection with the marketing of any product incorporating
  26  * the Software or modifications thereof, without specific, written prior
  27  * permission.
  28  *
  29  * To the extent it has a right to do so, IBM grants an immunity from suit
  30  * under its patents, if any, for the use, sale or manufacture of products to
  31  * the extent that such products are used for performing Domain Name System
  32  * dynamic updates in TCP/IP networks by means of the Software.  No immunity is
  33  * granted for any product per se or for any other function of any product.
  34  *
  35  * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
  36  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  37  * PARTICULAR PURPOSE.  IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
  38  * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
  39  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
  40  * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
  41  */
  42 #include <ldns/config.h>
  43
  44 #include <sys/types.h>
  45 #include <sys/param.h>
  46 #ifdef HAVE_SYS_SOCKET_H
  47 #include <sys/socket.h>
  48 #endif
  49
  50 #ifdef HAVE_NETINET_IN_H
  51 #include <netinet/in.h>
  52 #endif
  53 #ifdef HAVE_ARPA_INET_H
  54 #include <arpa/inet.h>
  55 #endif
  56
  57 #include <ctype.h>
  58 #include <stdio.h>
  59 #include <stdlib.h>
  60 #include <string.h>
  61
  62 /*      "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";*/
  63 static const char Base32[] =
  64         "abcdefghijklmnopqrstuvwxyz234567";
  65 /*      "0123456789ABCDEFGHIJKLMNOPQRSTUV";*/
  66 static const char Base32_extended_hex[] =
  67         "0123456789abcdefghijklmnopqrstuv";
  68 static const char Pad32 = '=';
  69
  70 /* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
  71 5.  Base 32 Encoding
  72
  73    The Base 32 encoding is designed to represent arbitrary sequences of
  74    octets in a form that needs to be case insensitive but need not be
  75    humanly readable.
  76
  77    A 33-character subset of US-ASCII is used, enabling 5 bits to be
  78    represented per printable character.  (The extra 33rd character, "=",
  79    is used to signify a special processing function.)
  80
  81    The encoding process represents 40-bit groups of input bits as output
  82    strings of 8 encoded characters.  Proceeding from left to right, a
  83    40-bit input group is formed by concatenating 5 8bit input groups.
  84    These 40 bits are then treated as 8 concatenated 5-bit groups, each
  85    of which is translated into a single digit in the base 32 alphabet.
  86    When encoding a bit stream via the base 32 encoding, the bit stream
  87    must be presumed to be ordered with the most-significant-bit first.
  88    That is, the first bit in the stream will be the high-order bit in
  89    the first 8bit byte, and the eighth bit will be the low-order bit in
  90    the first 8bit byte, and so on.
  91
  92    Each 5-bit group is used as an index into an array of 32 printable
  93    characters.  The character referenced by the index is placed in the
  94    output string.  These characters, identified in Table 3, below, are
  95    selected from US-ASCII digits and uppercase letters.
  96
  97                       Table 3: The Base 32 Alphabet
  98
  99          Value Encoding  Value Encoding  Value Encoding  Value Encoding
 100              0 A             9 J            18 S            27 3
 101              1 B            10 K            19 T            28 4
 102              2 C            11 L            20 U            29 5
 103              3 D            12 M            21 V            30 6
 104              4 E            13 N            22 W            31 7
 105              5 F            14 O            23 X
 106              6 G            15 P            24 Y         (pad) =
 107              7 H            16 Q            25 Z
 108              8 I            17 R            26 2
 109
 110
 111    Special processing is performed if fewer than 40 bits are available
 112    at the end of the data being encoded.  A full encoding quantum is
 113    always completed at the end of a body.  When fewer than 40 input bits
 114    are available in an input group, zero bits are added (on the right)
 115    to form an integral number of 5-bit groups.  Padding at the end of
 116    the data is performed using the "=" character.  Since all base 32
 117    input is an integral number of octets, only the following cases can
 118    arise:
 119
 120    (1) the final quantum of encoding input is an integral multiple of 40
 121    bits; here, the final unit of encoded output will be an integral
 122    multiple of 8 characters with no "=" padding,
 123
 124    (2) the final quantum of encoding input is exactly 8 bits; here, the
 125    final unit of encoded output will be two characters followed by six
 126    "=" padding characters,
 127
 128    (3) the final quantum of encoding input is exactly 16 bits; here, the
 129    final unit of encoded output will be four characters followed by four
 130    "=" padding characters,
 131
 132    (4) the final quantum of encoding input is exactly 24 bits; here, the
 133    final unit of encoded output will be five characters followed by
 134    three "=" padding characters, or
 135
 136    (5) the final quantum of encoding input is exactly 32 bits; here, the
 137    final unit of encoded output will be seven characters followed by one
 138    "=" padding character.
 139
 140
 141 6.  Base 32 Encoding with Extended Hex Alphabet
 142
 143    The following description of base 32 is due to [7].  This encoding
 144    should not be regarded as the same as the "base32" encoding, and
 145    should not be referred to as only "base32".
 146
 147    One property with this alphabet, that the base32 and base32 alphabet
 148    lack, is that encoded data maintain its sort order when the encoded
 149    data is compared bit-wise.
 150
 151    This encoding is identical to the previous one, except for the
 152    alphabet.  The new alphabet is found in table 4.
 153
 154                      Table 4: The "Extended Hex" Base 32 Alphabet
 155
 156          Value Encoding  Value Encoding  Value Encoding  Value Encoding
 157              0 0             9 9            18 I            27 R
 158              1 1            10 A            19 J            28 S
 159              2 2            11 B            20 K            29 T
 160              3 3            12 C            21 L            30 U
 161              4 4            13 D            22 M            31 V
 162              5 5            14 E            23 N
 163              6 6            15 F            24 O         (pad) =
 164              7 7            16 G            25 P
 165              8 8            17 H            26 Q
 166
 167
 168
 169
 170 */
 171 /* skips all whitespace anywhere.
 172    converts characters, four at a time, starting at (or after)
 173    src from base - 32 numbers into three 8 bit bytes in the target area.
 174    it returns the number of data bytes stored at the target, or -1 on error.
 175  */
 176
 177 int
 178 ldns_b32_pton_ar(char const *src, size_t hashed_owner_str_len, uint8_t *target, size_t targsize, const char B32_ar[])
 179 {
 180         int tarindex, state, ch;
 181         char *pos;
 182         int i = 0;
 183
 184         state = 0;
 185         tarindex = 0;
 186
 187         while ((ch = *src++) != '\0' && (i == 0 || i < (int) hashed_owner_str_len)) {
 188                 i++;
 189                 ch = tolower(ch);
 190                 if (isspace((unsigned char)ch))        /* Skip whitespace anywhere. */
 191                         continue;
 192
 193                 if (ch == Pad32)
 194                         break;
 195
 196                 pos = strchr(B32_ar, ch);
 197                 if (pos == 0) {
 198                         /* A non-base32 character. */
 199                         return (-ch);
 200                 }
 201
 202                 switch (state) {
 203                 case 0:
 204                         if (target) {
 205                                 if ((size_t)tarindex >= targsize) {
 206                                         return (-2);
 207                                 }
 208                                 target[tarindex] = (pos - B32_ar) << 3;
 209                         }
 210                         state = 1;
 211                         break;
 212                 case 1:
 213                         if (target) {
 214                                 if ((size_t)tarindex + 1 >= targsize) {
 215                                         return (-3);
 216                                 }
 217                                 target[tarindex]   |=  (pos - B32_ar) >> 2;
 218                                 target[tarindex+1]  = ((pos - B32_ar) & 0x03)
 219                                                         << 6 ;
 220                         }
 221                         tarindex++;
 222                         state = 2;
 223                         break;
 224                 case 2:
 225                         if (target) {
 226                                 if ((size_t)tarindex + 1 >= targsize) {
 227                                         return (-4);
 228                                 }
 229                                 target[tarindex]   |=  (pos - B32_ar) << 1;
 230                         }
 231                         /*tarindex++;*/
 232                         state = 3;
 233                         break;
 234                 case 3:
 235                         if (target) {
 236                                 if ((size_t)tarindex + 1 >= targsize) {
 237                                         return (-5);
 238                                 }
 239                                 target[tarindex]   |=  (pos - B32_ar) >> 4;
 240                                 target[tarindex+1]  = ((pos - B32_ar) & 0x0f) << 4 ;
 241                         }
 242                         tarindex++;
 243                         state = 4;
 244                         break;
 245                 case 4:
 246                         if (target) {
 247                                 if ((size_t)tarindex + 1 >= targsize) {
 248                                         return (-6);
 249                                 }
 250                                 target[tarindex]   |=  (pos - B32_ar) >> 1;
 251                                 target[tarindex+1]  = ((pos - B32_ar) & 0x01)
 252                                                         << 7 ;
 253                         }
 254                         tarindex++;
 255                         state = 5;
 256                         break;
 257                 case 5:
 258                         if (target) {
 259                                 if ((size_t)tarindex + 1 >= targsize) {
 260                                         return (-7);
 261                                 }
 262                                 target[tarindex]   |=  (pos - B32_ar) << 2;
 263                         }
 264                         state = 6;
 265                         break;
 266                 case 6:
 267                         if (target) {
 268                                 if ((size_t)tarindex + 1 >= targsize) {
 269                                         return (-8);
 270                                 }
 271                                 target[tarindex]   |=  (pos - B32_ar) >> 3;
 272                                 target[tarindex+1]  = ((pos - B32_ar) & 0x07)
 273                                                         << 5 ;
 274                         }
 275                         tarindex++;
 276                         state = 7;
 277                         break;
 278                 case 7:
 279                         if (target) {
 280                                 if ((size_t)tarindex + 1 >= targsize) {
 281                                         return (-9);
 282                                 }
 283                                 target[tarindex]   |=  (pos - B32_ar);
 284                         }
 285                         tarindex++;
 286                         state = 0;
 287                         break;
 288                 default:
 289                         abort();
 290                 }
 291         }
 292
 293         /*
 294          * We are done decoding Base-32 chars.  Let's see if we ended
 295          * on a byte boundary, and/or with erroneous trailing characters.
 296          */
 297
 298         if (ch == Pad32) {              /* We got a pad char. */
 299                 ch = *src++;            /* Skip it, get next. */
 300                 switch (state) {
 301                 case 0:         /* Invalid = in first position */
 302                 case 1:         /* Invalid = in second position */
 303                         return (-10);
 304
 305                 case 2:         /* Valid, means one byte of info */
 306                 case 3:
 307                         /* Skip any number of spaces. */
 308                         for ((void)NULL; ch != '\0'; ch = *src++)
 309                                 if (!isspace((unsigned char)ch))
 310                                         break;
 311                         /* Make sure there is another trailing = sign. */
 312                         if (ch != Pad32) {
 313                                 return (-11);
 314                         }
 315                         ch = *src++;            /* Skip the = */
 316                         /* Fall through to "single trailing =" case. */
 317                         /* FALLTHROUGH */
 318
 319                 case 4:         /* Valid, means two bytes of info */
 320                 case 5:
 321                 case 6:
 322                         /*
 323                          * We know this char is an =.  Is there anything but
 324                          * whitespace after it?
 325                          */
 326                         for ((void)NULL; ch != '\0'; ch = *src++)
 327                                 if (!(isspace((unsigned char)ch) || ch == '=')) {
 328                                         return (-12);
 329                                 }
 330
 331                 case 7:         /* Valid, means three bytes of info */
 332                         /*
 333                          * We know this char is an =.  Is there anything but
 334                          * whitespace after it?
 335                          */
 336                         for ((void)NULL; ch != '\0'; ch = *src++)
 337                                 if (!isspace((unsigned char)ch)) {
 338                                         return (-13);
 339                                 }
 340
 341                         /*
 342                          * Now make sure for cases 2 and 3 that the "extra"
 343                          * bits that slopped past the last full byte were
 344                          * zeros.  If we don't check them, they become a
 345                          * subliminal channel.
 346                          */
 347                         if (target && target[tarindex] != 0) {
 348                                 return (-14);
 349                         }
 350                 }
 351         } else {
 352                 /*
 353                  * We ended by seeing the end of the string.  Make sure we
 354                  * have no partial bytes lying around.
 355                  */
 356                 if (state != 0)
 357                         return (-15);
 358         }
 359
 360         return (tarindex);
 361 }
 362
 363 int
 364 ldns_b32_pton(char const *src, size_t hashed_owner_str_len, uint8_t *target, size_t targsize)
 365 {
 366         return ldns_b32_pton_ar(src, hashed_owner_str_len, target, targsize, Base32);
 367 }
 368
 369 /* deprecated, here for backwards compatibility */
 370 int
 371 b32_pton(char const *src, size_t hashed_owner_str_len, uint8_t *target, size_t targsize)
 372 {
 373         return ldns_b32_pton_ar(src, hashed_owner_str_len, target, targsize, Base32);
 374 }
 375
 376 int
 377 ldns_b32_pton_extended_hex(char const *src, size_t hashed_owner_str_len, uint8_t *target, size_t targsize)
 378 {
 379         return ldns_b32_pton_ar(src, hashed_owner_str_len, target, targsize, Base32_extended_hex);
 380 }
 381
 382 /* deprecated, here for backwards compatibility */
 383 int
 384 b32_pton_extended_hex(char const *src, size_t hashed_owner_str_len, uint8_t *target, size_t targsize)
 385 {
 386         return ldns_b32_pton_ar(src, hashed_owner_str_len, target, targsize, Base32_extended_hex);
 387 }