| 1 | /*- |
| 2 | * Copyright (c) 2003-2007 Tim Kientzle |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions |
| 7 | * are met: |
| 8 | * 1. Redistributions of source code must retain the above copyright |
| 9 | * notice, this list of conditions and the following disclaimer. |
| 10 | * 2. Redistributions in binary form must reproduce the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer in the |
| 12 | * documentation and/or other materials provided with the distribution. |
| 13 | * |
| 14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR |
| 15 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| 16 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| 17 | * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 18 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| 19 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 20 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 21 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| 23 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 24 | */ |
| 25 | |
| 26 | #include "archive_platform.h" |
| 27 | __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_tar.c,v 1.58 2007/07/12 15:00:28 cperciva Exp $"); |
| 28 | |
| 29 | #ifdef HAVE_ERRNO_H |
| 30 | #include <errno.h> |
| 31 | #endif |
| 32 | #include <stddef.h> |
| 33 | /* #include <stdint.h> */ /* See archive_platform.h */ |
| 34 | #ifdef HAVE_STDLIB_H |
| 35 | #include <stdlib.h> |
| 36 | #endif |
| 37 | #ifdef HAVE_STRING_H |
| 38 | #include <string.h> |
| 39 | #endif |
| 40 | |
| 41 | /* Obtain suitable wide-character manipulation functions. */ |
| 42 | #ifdef HAVE_WCHAR_H |
| 43 | #include <wchar.h> |
| 44 | #else |
| 45 | /* Good enough for equality testing, which is all we need. */ |
| 46 | static int wcscmp(const wchar_t *s1, const wchar_t *s2) |
| 47 | { |
| 48 | int diff = *s1 - *s2; |
| 49 | while (*s1 && diff == 0) |
| 50 | diff = (int)*++s1 - (int)*++s2; |
| 51 | return diff; |
| 52 | } |
| 53 | /* Good enough for equality testing, which is all we need. */ |
| 54 | static int wcsncmp(const wchar_t *s1, const wchar_t *s2, size_t n) |
| 55 | { |
| 56 | int diff = *s1 - *s2; |
| 57 | while (*s1 && diff == 0 && n-- > 0) |
| 58 | diff = (int)*++s1 - (int)*++s2; |
| 59 | return diff; |
| 60 | } |
| 61 | static size_t wcslen(const wchar_t *s) |
| 62 | { |
| 63 | const wchar_t *p = s; |
| 64 | while (*p) |
| 65 | p++; |
| 66 | return p - s; |
| 67 | } |
| 68 | #endif |
| 69 | |
| 70 | #include "archive.h" |
| 71 | #include "archive_entry.h" |
| 72 | #include "archive_private.h" |
| 73 | #include "archive_read_private.h" |
| 74 | |
| 75 | /* |
| 76 | * Layout of POSIX 'ustar' tar header. |
| 77 | */ |
| 78 | struct archive_entry_header_ustar { |
| 79 | char name[100]; |
| 80 | char mode[8]; |
| 81 | char uid[8]; |
| 82 | char gid[8]; |
| 83 | char size[12]; |
| 84 | char mtime[12]; |
| 85 | char checksum[8]; |
| 86 | char typeflag[1]; |
| 87 | char linkname[100]; /* "old format" header ends here */ |
| 88 | char magic[6]; /* For POSIX: "ustar\0" */ |
| 89 | char version[2]; /* For POSIX: "00" */ |
| 90 | char uname[32]; |
| 91 | char gname[32]; |
| 92 | char rdevmajor[8]; |
| 93 | char rdevminor[8]; |
| 94 | char prefix[155]; |
| 95 | }; |
| 96 | |
| 97 | /* |
| 98 | * Structure of GNU tar header |
| 99 | */ |
| 100 | struct gnu_sparse { |
| 101 | char offset[12]; |
| 102 | char numbytes[12]; |
| 103 | }; |
| 104 | |
| 105 | struct archive_entry_header_gnutar { |
| 106 | char name[100]; |
| 107 | char mode[8]; |
| 108 | char uid[8]; |
| 109 | char gid[8]; |
| 110 | char size[12]; |
| 111 | char mtime[12]; |
| 112 | char checksum[8]; |
| 113 | char typeflag[1]; |
| 114 | char linkname[100]; |
| 115 | char magic[8]; /* "ustar \0" (note blank/blank/null at end) */ |
| 116 | char uname[32]; |
| 117 | char gname[32]; |
| 118 | char rdevmajor[8]; |
| 119 | char rdevminor[8]; |
| 120 | char atime[12]; |
| 121 | char ctime[12]; |
| 122 | char offset[12]; |
| 123 | char longnames[4]; |
| 124 | char unused[1]; |
| 125 | struct gnu_sparse sparse[4]; |
| 126 | char isextended[1]; |
| 127 | char realsize[12]; |
| 128 | /* |
| 129 | * GNU doesn't use POSIX 'prefix' field; they use the 'L' (longname) |
| 130 | * entry instead. |
| 131 | */ |
| 132 | }; |
| 133 | |
| 134 | /* |
| 135 | * Data specific to this format. |
| 136 | */ |
| 137 | struct sparse_block { |
| 138 | struct sparse_block *next; |
| 139 | off_t offset; |
| 140 | off_t remaining; |
| 141 | }; |
| 142 | |
| 143 | struct tar { |
| 144 | struct archive_string acl_text; |
| 145 | struct archive_string entry_name; |
| 146 | struct archive_string entry_linkname; |
| 147 | struct archive_string entry_uname; |
| 148 | struct archive_string entry_gname; |
| 149 | struct archive_string longlink; |
| 150 | struct archive_string longname; |
| 151 | struct archive_string pax_header; |
| 152 | struct archive_string pax_global; |
| 153 | struct archive_string line; |
| 154 | wchar_t *pax_entry; |
| 155 | size_t pax_entry_length; |
| 156 | int header_recursion_depth; |
| 157 | off_t entry_bytes_remaining; |
| 158 | off_t entry_offset; |
| 159 | off_t entry_padding; |
| 160 | off_t realsize; |
| 161 | struct sparse_block *sparse_list; |
| 162 | struct sparse_block *sparse_last; |
| 163 | int64_t sparse_offset; |
| 164 | int64_t sparse_numbytes; |
| 165 | int sparse_gnu_major; |
| 166 | int sparse_gnu_minor; |
| 167 | char sparse_gnu_pending; |
| 168 | }; |
| 169 | |
| 170 | static size_t UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n); |
| 171 | static int archive_block_is_null(const unsigned char *p); |
| 172 | static char *base64_decode(const wchar_t *, size_t, size_t *); |
| 173 | static void gnu_add_sparse_entry(struct tar *, |
| 174 | off_t offset, off_t remaining); |
| 175 | static int gnu_sparse_old_read(struct archive_read *, struct tar *, |
| 176 | const struct archive_entry_header_gnutar *header); |
| 177 | static void gnu_sparse_old_parse(struct tar *, |
| 178 | const struct gnu_sparse *sparse, int length); |
| 179 | static int gnu_sparse_01_parse(struct tar *, const wchar_t *); |
| 180 | static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *); |
| 181 | static int header_Solaris_ACL(struct archive_read *, struct tar *, |
| 182 | struct archive_entry *, const void *); |
| 183 | static int header_common(struct archive_read *, struct tar *, |
| 184 | struct archive_entry *, const void *); |
| 185 | static int header_old_tar(struct archive_read *, struct tar *, |
| 186 | struct archive_entry *, const void *); |
| 187 | static int header_pax_extensions(struct archive_read *, struct tar *, |
| 188 | struct archive_entry *, const void *); |
| 189 | static int header_pax_global(struct archive_read *, struct tar *, |
| 190 | struct archive_entry *, const void *h); |
| 191 | static int header_longlink(struct archive_read *, struct tar *, |
| 192 | struct archive_entry *, const void *h); |
| 193 | static int header_longname(struct archive_read *, struct tar *, |
| 194 | struct archive_entry *, const void *h); |
| 195 | static int header_volume(struct archive_read *, struct tar *, |
| 196 | struct archive_entry *, const void *h); |
| 197 | static int header_ustar(struct archive_read *, struct tar *, |
| 198 | struct archive_entry *, const void *h); |
| 199 | static int header_gnutar(struct archive_read *, struct tar *, |
| 200 | struct archive_entry *, const void *h); |
| 201 | static int archive_read_format_tar_bid(struct archive_read *); |
| 202 | static int archive_read_format_tar_cleanup(struct archive_read *); |
| 203 | static int archive_read_format_tar_read_data(struct archive_read *a, |
| 204 | const void **buff, size_t *size, off_t *offset); |
| 205 | static int archive_read_format_tar_skip(struct archive_read *a); |
| 206 | static int archive_read_format_tar_read_header(struct archive_read *, |
| 207 | struct archive_entry *); |
| 208 | static int checksum(struct archive_read *, const void *); |
| 209 | static int pax_attribute(struct tar *, struct archive_entry *, |
| 210 | wchar_t *key, wchar_t *value); |
| 211 | static int pax_header(struct archive_read *, struct tar *, |
| 212 | struct archive_entry *, char *attr); |
| 213 | static void pax_time(const wchar_t *, int64_t *sec, long *nanos); |
| 214 | static ssize_t readline(struct archive_read *, struct tar *, const char **); |
| 215 | static int read_body_to_string(struct archive_read *, struct tar *, |
| 216 | struct archive_string *, const void *h); |
| 217 | static int64_t tar_atol(const char *, unsigned); |
| 218 | static int64_t tar_atol10(const wchar_t *, unsigned); |
| 219 | static int64_t tar_atol256(const char *, unsigned); |
| 220 | static int64_t tar_atol8(const char *, unsigned); |
| 221 | static int tar_read_header(struct archive_read *, struct tar *, |
| 222 | struct archive_entry *); |
| 223 | static int tohex(int c); |
| 224 | static char *url_decode(const char *); |
| 225 | static int utf8_decode(wchar_t *, const char *, size_t length); |
| 226 | static char *wide_to_narrow(const wchar_t *wval); |
| 227 | |
| 228 | int |
| 229 | archive_read_support_format_gnutar(struct archive *a) |
| 230 | { |
| 231 | return (archive_read_support_format_tar(a)); |
| 232 | } |
| 233 | |
| 234 | |
| 235 | int |
| 236 | archive_read_support_format_tar(struct archive *_a) |
| 237 | { |
| 238 | struct archive_read *a = (struct archive_read *)_a; |
| 239 | struct tar *tar; |
| 240 | int r; |
| 241 | |
| 242 | tar = (struct tar *)malloc(sizeof(*tar)); |
| 243 | if (tar == NULL) { |
| 244 | archive_set_error(&a->archive, ENOMEM, |
| 245 | "Can't allocate tar data"); |
| 246 | return (ARCHIVE_FATAL); |
| 247 | } |
| 248 | memset(tar, 0, sizeof(*tar)); |
| 249 | |
| 250 | r = __archive_read_register_format(a, tar, |
| 251 | archive_read_format_tar_bid, |
| 252 | archive_read_format_tar_read_header, |
| 253 | archive_read_format_tar_read_data, |
| 254 | archive_read_format_tar_skip, |
| 255 | archive_read_format_tar_cleanup); |
| 256 | |
| 257 | if (r != ARCHIVE_OK) |
| 258 | free(tar); |
| 259 | return (ARCHIVE_OK); |
| 260 | } |
| 261 | |
| 262 | static int |
| 263 | archive_read_format_tar_cleanup(struct archive_read *a) |
| 264 | { |
| 265 | struct tar *tar; |
| 266 | struct sparse_block *p; |
| 267 | |
| 268 | tar = (struct tar *)(a->format->data); |
| 269 | while (tar->sparse_list != NULL) { |
| 270 | p = tar->sparse_list; |
| 271 | tar->sparse_list = p->next; |
| 272 | free(p); |
| 273 | } |
| 274 | archive_string_free(&tar->acl_text); |
| 275 | archive_string_free(&tar->entry_name); |
| 276 | archive_string_free(&tar->entry_linkname); |
| 277 | archive_string_free(&tar->entry_uname); |
| 278 | archive_string_free(&tar->entry_gname); |
| 279 | archive_string_free(&tar->line); |
| 280 | archive_string_free(&tar->pax_global); |
| 281 | archive_string_free(&tar->pax_header); |
| 282 | free(tar->pax_entry); |
| 283 | free(tar); |
| 284 | (a->format->data) = NULL; |
| 285 | return (ARCHIVE_OK); |
| 286 | } |
| 287 | |
| 288 | |
| 289 | static int |
| 290 | archive_read_format_tar_bid(struct archive_read *a) |
| 291 | { |
| 292 | int bid; |
| 293 | ssize_t bytes_read; |
| 294 | const void *h; |
| 295 | const struct archive_entry_header_ustar *header; |
| 296 | |
| 297 | /* |
| 298 | * If we're already reading a non-tar file, don't |
| 299 | * bother to bid. |
| 300 | */ |
| 301 | if (a->archive.archive_format != 0 && |
| 302 | (a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) != |
| 303 | ARCHIVE_FORMAT_TAR) |
| 304 | return (0); |
| 305 | bid = 0; |
| 306 | |
| 307 | /* |
| 308 | * If we're already reading a tar format, start the bid at 1 as |
| 309 | * a failsafe. |
| 310 | */ |
| 311 | if ((a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) == |
| 312 | ARCHIVE_FORMAT_TAR) |
| 313 | bid++; |
| 314 | |
| 315 | /* Now let's look at the actual header and see if it matches. */ |
| 316 | if (a->decompressor->read_ahead != NULL) |
| 317 | bytes_read = (a->decompressor->read_ahead)(a, &h, 512); |
| 318 | else |
| 319 | bytes_read = 0; /* Empty file. */ |
| 320 | if (bytes_read < 0) |
| 321 | return (ARCHIVE_FATAL); |
| 322 | if (bytes_read == 0 && bid > 0) { |
| 323 | /* An archive without a proper end-of-archive marker. */ |
| 324 | /* Hold our nose and bid 1 anyway. */ |
| 325 | return (1); |
| 326 | } |
| 327 | if (bytes_read < 512) { |
| 328 | /* If it's a new archive, then just return a zero bid. */ |
| 329 | if (bid == 0) |
| 330 | return (0); |
| 331 | /* |
| 332 | * If we already know this is a tar archive, |
| 333 | * then we have a problem. |
| 334 | */ |
| 335 | archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, |
| 336 | "Truncated tar archive"); |
| 337 | return (ARCHIVE_FATAL); |
| 338 | } |
| 339 | |
| 340 | /* If it's an end-of-archive mark, we can handle it. */ |
| 341 | if ((*(const char *)h) == 0 && archive_block_is_null((const unsigned char *)h)) { |
| 342 | /* If it's a known tar file, end-of-archive is definite. */ |
| 343 | if ((a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) == |
| 344 | ARCHIVE_FORMAT_TAR) |
| 345 | return (512); |
| 346 | /* Empty archive? */ |
| 347 | return (1); |
| 348 | } |
| 349 | |
| 350 | /* If it's not an end-of-archive mark, it must have a valid checksum.*/ |
| 351 | if (!checksum(a, h)) |
| 352 | return (0); |
| 353 | bid += 48; /* Checksum is usually 6 octal digits. */ |
| 354 | |
| 355 | header = (const struct archive_entry_header_ustar *)h; |
| 356 | |
| 357 | /* Recognize POSIX formats. */ |
| 358 | if ((memcmp(header->magic, "ustar\0", 6) == 0) |
| 359 | &&(memcmp(header->version, "00", 2)==0)) |
| 360 | bid += 56; |
| 361 | |
| 362 | /* Recognize GNU tar format. */ |
| 363 | if ((memcmp(header->magic, "ustar ", 6) == 0) |
| 364 | &&(memcmp(header->version, " \0", 2)==0)) |
| 365 | bid += 56; |
| 366 | |
| 367 | /* Type flag must be null, digit or A-Z, a-z. */ |
| 368 | if (header->typeflag[0] != 0 && |
| 369 | !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') && |
| 370 | !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') && |
| 371 | !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') ) |
| 372 | return (0); |
| 373 | bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */ |
| 374 | |
| 375 | /* Sanity check: Look at first byte of mode field. */ |
| 376 | switch (255 & (unsigned)header->mode[0]) { |
| 377 | case 0: case 255: |
| 378 | /* Base-256 value: No further verification possible! */ |
| 379 | break; |
| 380 | case ' ': /* Not recommended, but not illegal, either. */ |
| 381 | break; |
| 382 | case '0': case '1': case '2': case '3': |
| 383 | case '4': case '5': case '6': case '7': |
| 384 | /* Octal Value. */ |
| 385 | /* TODO: Check format of remainder of this field. */ |
| 386 | break; |
| 387 | default: |
| 388 | /* Not a valid mode; bail out here. */ |
| 389 | return (0); |
| 390 | } |
| 391 | /* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */ |
| 392 | |
| 393 | return (bid); |
| 394 | } |
| 395 | |
| 396 | /* |
| 397 | * The function invoked by archive_read_header(). This |
| 398 | * just sets up a few things and then calls the internal |
| 399 | * tar_read_header() function below. |
| 400 | */ |
| 401 | static int |
| 402 | archive_read_format_tar_read_header(struct archive_read *a, |
| 403 | struct archive_entry *entry) |
| 404 | { |
| 405 | /* |
| 406 | * When converting tar archives to cpio archives, it is |
| 407 | * essential that each distinct file have a distinct inode |
| 408 | * number. To simplify this, we keep a static count here to |
| 409 | * assign fake dev/inode numbers to each tar entry. Note that |
| 410 | * pax format archives may overwrite this with something more |
| 411 | * useful. |
| 412 | * |
| 413 | * Ideally, we would track every file read from the archive so |
| 414 | * that we could assign the same dev/ino pair to hardlinks, |
| 415 | * but the memory required to store a complete lookup table is |
| 416 | * probably not worthwhile just to support the relatively |
| 417 | * obscure tar->cpio conversion case. |
| 418 | */ |
| 419 | static int default_inode; |
| 420 | static int default_dev; |
| 421 | struct tar *tar; |
| 422 | struct sparse_block *sp; |
| 423 | const char *p; |
| 424 | int r; |
| 425 | size_t l; |
| 426 | ssize_t size; |
| 427 | |
| 428 | /* Assign default device/inode values. */ |
| 429 | archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */ |
| 430 | archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */ |
| 431 | /* Limit generated st_ino number to 16 bits. */ |
| 432 | if (default_inode >= 0xffff) { |
| 433 | ++default_dev; |
| 434 | default_inode = 0; |
| 435 | } |
| 436 | |
| 437 | tar = (struct tar *)(a->format->data); |
| 438 | tar->entry_offset = 0; |
| 439 | while (tar->sparse_list != NULL) { |
| 440 | sp = tar->sparse_list; |
| 441 | tar->sparse_list = sp->next; |
| 442 | free(sp); |
| 443 | } |
| 444 | tar->sparse_last = NULL; |
| 445 | |
| 446 | r = tar_read_header(a, tar, entry); |
| 447 | |
| 448 | /* |
| 449 | * Yuck. See comments for gnu_sparse_10_read for why this |
| 450 | * is here and not in _read_data where it "should" go. |
| 451 | */ |
| 452 | if (tar->sparse_gnu_pending |
| 453 | && tar->sparse_gnu_major == 1 |
| 454 | && tar->sparse_gnu_minor == 0) { |
| 455 | tar->sparse_gnu_pending = 0; |
| 456 | /* Read initial sparse map. */ |
| 457 | size = gnu_sparse_10_read(a, tar); |
| 458 | if (size < 0) |
| 459 | return (size); |
| 460 | tar->entry_bytes_remaining -= size; |
| 461 | tar->entry_padding += size; |
| 462 | } |
| 463 | |
| 464 | /* |
| 465 | * "non-sparse" files are really just sparse files with |
| 466 | * a single block. |
| 467 | */ |
| 468 | if (tar->sparse_list == NULL) |
| 469 | gnu_add_sparse_entry(tar, 0, tar->entry_bytes_remaining); |
| 470 | |
| 471 | tar->realsize = archive_entry_size(entry); |
| 472 | |
| 473 | if (r == ARCHIVE_OK) { |
| 474 | /* |
| 475 | * "Regular" entry with trailing '/' is really |
| 476 | * directory: This is needed for certain old tar |
| 477 | * variants and even for some broken newer ones. |
| 478 | */ |
| 479 | p = archive_entry_pathname(entry); |
| 480 | l = strlen(p); |
| 481 | if (archive_entry_filetype(entry) == AE_IFREG |
| 482 | && p[l-1] == '/') |
| 483 | archive_entry_set_filetype(entry, AE_IFDIR); |
| 484 | } |
| 485 | return (r); |
| 486 | } |
| 487 | |
| 488 | static int |
| 489 | archive_read_format_tar_read_data(struct archive_read *a, |
| 490 | const void **buff, size_t *size, off_t *offset) |
| 491 | { |
| 492 | ssize_t bytes_read; |
| 493 | struct tar *tar; |
| 494 | struct sparse_block *p; |
| 495 | |
| 496 | tar = (struct tar *)(a->format->data); |
| 497 | |
| 498 | if (tar->sparse_gnu_pending) { |
| 499 | if (tar->sparse_gnu_major == 1 && tar->sparse_gnu_minor == 0) { |
| 500 | /* |
| 501 | * <sigh> We should parse the sparse data |
| 502 | * here, but have to parse it as part of the |
| 503 | * header because of a bug in GNU tar 1.16.1. |
| 504 | */ |
| 505 | } else { |
| 506 | *size = 0; |
| 507 | *offset = 0; |
| 508 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
| 509 | "Unrecognized GNU sparse file format"); |
| 510 | return (ARCHIVE_WARN); |
| 511 | } |
| 512 | tar->sparse_gnu_pending = 0; |
| 513 | } |
| 514 | |
| 515 | /* Remove exhausted entries from sparse list. */ |
| 516 | while (tar->sparse_list != NULL && |
| 517 | tar->sparse_list->remaining == 0) { |
| 518 | p = tar->sparse_list; |
| 519 | tar->sparse_list = p->next; |
| 520 | free(p); |
| 521 | } |
| 522 | |
| 523 | /* If we're at end of file, return EOF. */ |
| 524 | if (tar->sparse_list == NULL || tar->entry_bytes_remaining == 0) { |
| 525 | if ((a->decompressor->skip)(a, tar->entry_padding) < 0) |
| 526 | return (ARCHIVE_FATAL); |
| 527 | tar->entry_padding = 0; |
| 528 | *buff = NULL; |
| 529 | *size = 0; |
| 530 | *offset = tar->realsize; |
| 531 | return (ARCHIVE_EOF); |
| 532 | } |
| 533 | |
| 534 | bytes_read = (a->decompressor->read_ahead)(a, buff, 1); |
| 535 | if (bytes_read == 0) { |
| 536 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
| 537 | "Truncated tar archive"); |
| 538 | return (ARCHIVE_FATAL); |
| 539 | } |
| 540 | if (bytes_read < 0) |
| 541 | return (ARCHIVE_FATAL); |
| 542 | if (bytes_read > tar->entry_bytes_remaining) |
| 543 | bytes_read = tar->entry_bytes_remaining; |
| 544 | /* Don't read more than is available in the |
| 545 | * current sparse block. */ |
| 546 | if (tar->sparse_list->remaining < bytes_read) |
| 547 | bytes_read = tar->sparse_list->remaining; |
| 548 | *size = bytes_read; |
| 549 | *offset = tar->sparse_list->offset; |
| 550 | tar->sparse_list->remaining -= bytes_read; |
| 551 | tar->sparse_list->offset += bytes_read; |
| 552 | tar->entry_bytes_remaining -= bytes_read; |
| 553 | (a->decompressor->consume)(a, bytes_read); |
| 554 | return (ARCHIVE_OK); |
| 555 | } |
| 556 | |
| 557 | static int |
| 558 | archive_read_format_tar_skip(struct archive_read *a) |
| 559 | { |
| 560 | off_t bytes_skipped; |
| 561 | struct tar* tar; |
| 562 | struct sparse_block *p; |
| 563 | |
| 564 | tar = (struct tar *)(a->format->data); |
| 565 | |
| 566 | /* |
| 567 | * Compression layer skip functions are required to either skip the |
| 568 | * length requested or fail, so we can rely upon the entire entry |
| 569 | * plus padding being skipped. |
| 570 | */ |
| 571 | bytes_skipped = (a->decompressor->skip)(a, tar->entry_bytes_remaining + |
| 572 | tar->entry_padding); |
| 573 | if (bytes_skipped < 0) |
| 574 | return (ARCHIVE_FATAL); |
| 575 | |
| 576 | tar->entry_bytes_remaining = 0; |
| 577 | tar->entry_padding = 0; |
| 578 | |
| 579 | /* Free the sparse list. */ |
| 580 | while (tar->sparse_list != NULL) { |
| 581 | p = tar->sparse_list; |
| 582 | tar->sparse_list = p->next; |
| 583 | free(p); |
| 584 | } |
| 585 | tar->sparse_last = NULL; |
| 586 | |
| 587 | return (ARCHIVE_OK); |
| 588 | } |
| 589 | |
| 590 | /* |
| 591 | * This function recursively interprets all of the headers associated |
| 592 | * with a single entry. |
| 593 | */ |
| 594 | static int |
| 595 | tar_read_header(struct archive_read *a, struct tar *tar, |
| 596 | struct archive_entry *entry) |
| 597 | { |
| 598 | ssize_t bytes; |
| 599 | int err; |
| 600 | const void *h; |
| 601 | const struct archive_entry_header_ustar *header; |
| 602 | |
| 603 | /* Read 512-byte header record */ |
| 604 | bytes = (a->decompressor->read_ahead)(a, &h, 512); |
| 605 | if (bytes < 512) { |
| 606 | /* |
| 607 | * If we're here, it's becase the _bid function accepted |
| 608 | * this file. So just call a short read end-of-archive |
| 609 | * and be done with it. |
| 610 | */ |
| 611 | return (ARCHIVE_EOF); |
| 612 | } |
| 613 | (a->decompressor->consume)(a, 512); |
| 614 | |
| 615 | /* Check for end-of-archive mark. */ |
| 616 | if (((*(const char *)h)==0) && archive_block_is_null((const unsigned char *)h)) { |
| 617 | /* Try to consume a second all-null record, as well. */ |
| 618 | bytes = (a->decompressor->read_ahead)(a, &h, 512); |
| 619 | if (bytes > 0) |
| 620 | (a->decompressor->consume)(a, bytes); |
| 621 | archive_set_error(&a->archive, 0, NULL); |
| 622 | return (ARCHIVE_EOF); |
| 623 | } |
| 624 | |
| 625 | /* |
| 626 | * Note: If the checksum fails and we return ARCHIVE_RETRY, |
| 627 | * then the client is likely to just retry. This is a very |
| 628 | * crude way to search for the next valid header! |
| 629 | * |
| 630 | * TODO: Improve this by implementing a real header scan. |
| 631 | */ |
| 632 | if (!checksum(a, h)) { |
| 633 | archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); |
| 634 | return (ARCHIVE_RETRY); /* Retryable: Invalid header */ |
| 635 | } |
| 636 | |
| 637 | if (++tar->header_recursion_depth > 32) { |
| 638 | archive_set_error(&a->archive, EINVAL, "Too many special headers"); |
| 639 | return (ARCHIVE_WARN); |
| 640 | } |
| 641 | |
| 642 | /* Determine the format variant. */ |
| 643 | header = (const struct archive_entry_header_ustar *)h; |
| 644 | switch(header->typeflag[0]) { |
| 645 | case 'A': /* Solaris tar ACL */ |
| 646 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; |
| 647 | a->archive.archive_format_name = "Solaris tar"; |
| 648 | err = header_Solaris_ACL(a, tar, entry, h); |
| 649 | break; |
| 650 | case 'g': /* POSIX-standard 'g' header. */ |
| 651 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; |
| 652 | a->archive.archive_format_name = "POSIX pax interchange format"; |
| 653 | err = header_pax_global(a, tar, entry, h); |
| 654 | break; |
| 655 | case 'K': /* Long link name (GNU tar, others) */ |
| 656 | err = header_longlink(a, tar, entry, h); |
| 657 | break; |
| 658 | case 'L': /* Long filename (GNU tar, others) */ |
| 659 | err = header_longname(a, tar, entry, h); |
| 660 | break; |
| 661 | case 'V': /* GNU volume header */ |
| 662 | err = header_volume(a, tar, entry, h); |
| 663 | break; |
| 664 | case 'X': /* Used by SUN tar; same as 'x'. */ |
| 665 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; |
| 666 | a->archive.archive_format_name = |
| 667 | "POSIX pax interchange format (Sun variant)"; |
| 668 | err = header_pax_extensions(a, tar, entry, h); |
| 669 | break; |
| 670 | case 'x': /* POSIX-standard 'x' header. */ |
| 671 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; |
| 672 | a->archive.archive_format_name = "POSIX pax interchange format"; |
| 673 | err = header_pax_extensions(a, tar, entry, h); |
| 674 | break; |
| 675 | default: |
| 676 | if (memcmp(header->magic, "ustar \0", 8) == 0) { |
| 677 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR; |
| 678 | a->archive.archive_format_name = "GNU tar format"; |
| 679 | err = header_gnutar(a, tar, entry, h); |
| 680 | } else if (memcmp(header->magic, "ustar", 5) == 0) { |
| 681 | if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) { |
| 682 | a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR; |
| 683 | a->archive.archive_format_name = "POSIX ustar format"; |
| 684 | } |
| 685 | err = header_ustar(a, tar, entry, h); |
| 686 | } else { |
| 687 | a->archive.archive_format = ARCHIVE_FORMAT_TAR; |
| 688 | a->archive.archive_format_name = "tar (non-POSIX)"; |
| 689 | err = header_old_tar(a, tar, entry, h); |
| 690 | } |
| 691 | } |
| 692 | --tar->header_recursion_depth; |
| 693 | /* We return warnings or success as-is. Anything else is fatal. */ |
| 694 | if (err == ARCHIVE_WARN || err == ARCHIVE_OK) |
| 695 | return (err); |
| 696 | if (err == ARCHIVE_EOF) |
| 697 | /* EOF when recursively reading a header is bad. */ |
| 698 | archive_set_error(&a->archive, EINVAL, "Damaged tar archive"); |
| 699 | return (ARCHIVE_FATAL); |
| 700 | } |
| 701 | |
| 702 | /* |
| 703 | * Return true if block checksum is correct. |
| 704 | */ |
| 705 | static int |
| 706 | checksum(struct archive_read *a, const void *h) |
| 707 | { |
| 708 | const unsigned char *bytes; |
| 709 | const struct archive_entry_header_ustar *header; |
| 710 | int check, i, sum; |
| 711 | |
| 712 | (void)a; /* UNUSED */ |
| 713 | bytes = (const unsigned char *)h; |
| 714 | header = (const struct archive_entry_header_ustar *)h; |
| 715 | |
| 716 | /* |
| 717 | * Test the checksum. Note that POSIX specifies _unsigned_ |
| 718 | * bytes for this calculation. |
| 719 | */ |
| 720 | sum = tar_atol(header->checksum, sizeof(header->checksum)); |
| 721 | check = 0; |
| 722 | for (i = 0; i < 148; i++) |
| 723 | check += (unsigned char)bytes[i]; |
| 724 | for (; i < 156; i++) |
| 725 | check += 32; |
| 726 | for (; i < 512; i++) |
| 727 | check += (unsigned char)bytes[i]; |
| 728 | if (sum == check) |
| 729 | return (1); |
| 730 | |
| 731 | /* |
| 732 | * Repeat test with _signed_ bytes, just in case this archive |
| 733 | * was created by an old BSD, Solaris, or HP-UX tar with a |
| 734 | * broken checksum calculation. |
| 735 | */ |
| 736 | check = 0; |
| 737 | for (i = 0; i < 148; i++) |
| 738 | check += (signed char)bytes[i]; |
| 739 | for (; i < 156; i++) |
| 740 | check += 32; |
| 741 | for (; i < 512; i++) |
| 742 | check += (signed char)bytes[i]; |
| 743 | if (sum == check) |
| 744 | return (1); |
| 745 | |
| 746 | return (0); |
| 747 | } |
| 748 | |
| 749 | /* |
| 750 | * Return true if this block contains only nulls. |
| 751 | */ |
| 752 | static int |
| 753 | archive_block_is_null(const unsigned char *p) |
| 754 | { |
| 755 | unsigned i; |
| 756 | |
| 757 | for (i = 0; i < ARCHIVE_BYTES_PER_RECORD / sizeof(*p); i++) |
| 758 | if (*p++) |
| 759 | return (0); |
| 760 | return (1); |
| 761 | } |
| 762 | |
| 763 | /* |
| 764 | * Interpret 'A' Solaris ACL header |
| 765 | */ |
| 766 | static int |
| 767 | header_Solaris_ACL(struct archive_read *a, struct tar *tar, |
| 768 | struct archive_entry *entry, const void *h) |
| 769 | { |
| 770 | const struct archive_entry_header_ustar *header; |
| 771 | size_t size; |
| 772 | int err; |
| 773 | char *acl, *p; |
| 774 | wchar_t *wp; |
| 775 | |
| 776 | /* |
| 777 | * read_body_to_string adds a NUL terminator, but we need a little |
| 778 | * more to make sure that we don't overrun acl_text later. |
| 779 | */ |
| 780 | header = (const struct archive_entry_header_ustar *)h; |
| 781 | size = tar_atol(header->size, sizeof(header->size)); |
| 782 | err = read_body_to_string(a, tar, &(tar->acl_text), h); |
| 783 | if (err != ARCHIVE_OK) |
| 784 | return (err); |
| 785 | err = tar_read_header(a, tar, entry); |
| 786 | if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) |
| 787 | return (err); |
| 788 | |
| 789 | /* Skip leading octal number. */ |
| 790 | /* XXX TODO: Parse the octal number and sanity-check it. */ |
| 791 | p = acl = tar->acl_text.s; |
| 792 | while (*p != '\0' && p < acl + size) |
| 793 | p++; |
| 794 | p++; |
| 795 | |
| 796 | if (p >= acl + size) { |
| 797 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
| 798 | "Malformed Solaris ACL attribute"); |
| 799 | return(ARCHIVE_WARN); |
| 800 | } |
| 801 | |
| 802 | /* Skip leading octal number. */ |
| 803 | size -= (p - acl); |
| 804 | acl = p; |
| 805 | |
| 806 | while (*p != '\0' && p < acl + size) |
| 807 | p++; |
| 808 | |
| 809 | wp = (wchar_t *)malloc((p - acl + 1) * sizeof(wchar_t)); |
| 810 | if (wp == NULL) { |
| 811 | archive_set_error(&a->archive, ENOMEM, |
| 812 | "Can't allocate work buffer for ACL parsing"); |
| 813 | return (ARCHIVE_FATAL); |
| 814 | } |
| 815 | utf8_decode(wp, acl, p - acl); |
| 816 | err = __archive_entry_acl_parse_w(entry, wp, |
| 817 | ARCHIVE_ENTRY_ACL_TYPE_ACCESS); |
| 818 | free(wp); |
| 819 | return (err); |
| 820 | } |
| 821 | |
| 822 | /* |
| 823 | * Interpret 'K' long linkname header. |
| 824 | */ |
| 825 | static int |
| 826 | header_longlink(struct archive_read *a, struct tar *tar, |
| 827 | struct archive_entry *entry, const void *h) |
| 828 | { |
| 829 | int err; |
| 830 | |
| 831 | err = read_body_to_string(a, tar, &(tar->longlink), h); |
| 832 | if (err != ARCHIVE_OK) |
| 833 | return (err); |
| 834 | err = tar_read_header(a, tar, entry); |
| 835 | if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) |
| 836 | return (err); |
| 837 | /* Set symlink if symlink already set, else hardlink. */ |
| 838 | archive_entry_set_link(entry, tar->longlink.s); |
| 839 | return (ARCHIVE_OK); |
| 840 | } |
| 841 | |
| 842 | /* |
| 843 | * Interpret 'L' long filename header. |
| 844 | */ |
| 845 | static int |
| 846 | header_longname(struct archive_read *a, struct tar *tar, |
| 847 | struct archive_entry *entry, const void *h) |
| 848 | { |
| 849 | int err; |
| 850 | |
| 851 | err = read_body_to_string(a, tar, &(tar->longname), h); |
| 852 | if (err != ARCHIVE_OK) |
| 853 | return (err); |
| 854 | /* Read and parse "real" header, then override name. */ |
| 855 | err = tar_read_header(a, tar, entry); |
| 856 | if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) |
| 857 | return (err); |
| 858 | archive_entry_set_pathname(entry, tar->longname.s); |
| 859 | return (ARCHIVE_OK); |
| 860 | } |
| 861 | |
| 862 | |
| 863 | /* |
| 864 | * Interpret 'V' GNU tar volume header. |
| 865 | */ |
| 866 | static int |
| 867 | header_volume(struct archive_read *a, struct tar *tar, |
| 868 | struct archive_entry *entry, const void *h) |
| 869 | { |
| 870 | (void)h; |
| 871 | |
| 872 | /* Just skip this and read the next header. */ |
| 873 | return (tar_read_header(a, tar, entry)); |
| 874 | } |
| 875 | |
| 876 | /* |
| 877 | * Read body of an archive entry into an archive_string object. |
| 878 | */ |
| 879 | static int |
| 880 | read_body_to_string(struct archive_read *a, struct tar *tar, |
| 881 | struct archive_string *as, const void *h) |
| 882 | { |
| 883 | off_t size, padded_size; |
| 884 | ssize_t bytes_read, bytes_to_copy; |
| 885 | const struct archive_entry_header_ustar *header; |
| 886 | const void *src; |
| 887 | char *dest; |
| 888 | |
| 889 | (void)tar; /* UNUSED */ |
| 890 | header = (const struct archive_entry_header_ustar *)h; |
| 891 | size = tar_atol(header->size, sizeof(header->size)); |
| 892 | if ((size > 1048576) || (size < 0)) { |
| 893 | archive_set_error(&a->archive, EINVAL, |
| 894 | "Special header too large"); |
| 895 | return (ARCHIVE_FATAL); |
| 896 | } |
| 897 | |
| 898 | /* Read the body into the string. */ |
| 899 | archive_string_ensure(as, size+1); |
| 900 | padded_size = (size + 511) & ~ 511; |
| 901 | dest = as->s; |
| 902 | while (padded_size > 0) { |
| 903 | bytes_read = (a->decompressor->read_ahead)(a, &src, padded_size); |
| 904 | if (bytes_read == 0) |
| 905 | return (ARCHIVE_EOF); |
| 906 | if (bytes_read < 0) |
| 907 | return (ARCHIVE_FATAL); |
| 908 | if (bytes_read > padded_size) |
| 909 | bytes_read = padded_size; |
| 910 | (a->decompressor->consume)(a, bytes_read); |
| 911 | bytes_to_copy = bytes_read; |
| 912 | if ((off_t)bytes_to_copy > size) |
| 913 | bytes_to_copy = (ssize_t)size; |
| 914 | memcpy(dest, src, bytes_to_copy); |
| 915 | dest += bytes_to_copy; |
| 916 | size -= bytes_to_copy; |
| 917 | padded_size -= bytes_read; |
| 918 | } |
| 919 | *dest = '\0'; |
| 920 | return (ARCHIVE_OK); |
| 921 | } |
| 922 | |
| 923 | /* |
| 924 | * Parse out common header elements. |
| 925 | * |
| 926 | * This would be the same as header_old_tar, except that the |
| 927 | * filename is handled slightly differently for old and POSIX |
| 928 | * entries (POSIX entries support a 'prefix'). This factoring |
| 929 | * allows header_old_tar and header_ustar |
| 930 | * to handle filenames differently, while still putting most of the |
| 931 | * common parsing into one place. |
| 932 | */ |
| 933 | static int |
| 934 | header_common(struct archive_read *a, struct tar *tar, |
| 935 | struct archive_entry *entry, const void *h) |
| 936 | { |
| 937 | const struct archive_entry_header_ustar *header; |
| 938 | char tartype; |
| 939 | |
| 940 | (void)a; /* UNUSED */ |
| 941 | |
| 942 | header = (const struct archive_entry_header_ustar *)h; |
| 943 | if (header->linkname[0]) |
| 944 | archive_strncpy(&(tar->entry_linkname), header->linkname, |
| 945 | sizeof(header->linkname)); |
| 946 | else |
| 947 | archive_string_empty(&(tar->entry_linkname)); |
| 948 | |
| 949 | /* Parse out the numeric fields (all are octal) */ |
| 950 | archive_entry_set_mode(entry, tar_atol(header->mode, sizeof(header->mode))); |
| 951 | archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid))); |
| 952 | archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid))); |
| 953 | tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size)); |
| 954 | archive_entry_set_size(entry, tar->entry_bytes_remaining); |
| 955 | archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0); |
| 956 | |
| 957 | /* Handle the tar type flag appropriately. */ |
| 958 | tartype = header->typeflag[0]; |
| 959 | |
| 960 | switch (tartype) { |
| 961 | case '1': /* Hard link */ |
| 962 | archive_entry_set_hardlink(entry, tar->entry_linkname.s); |
| 963 | /* |
| 964 | * The following may seem odd, but: Technically, tar |
| 965 | * does not store the file type for a "hard link" |
| 966 | * entry, only the fact that it is a hard link. So, I |
| 967 | * leave the type zero normally. But, pax interchange |
| 968 | * format allows hard links to have data, which |
| 969 | * implies that the underlying entry is a regular |
| 970 | * file. |
| 971 | */ |
| 972 | if (archive_entry_size(entry) > 0) |
| 973 | archive_entry_set_filetype(entry, AE_IFREG); |
| 974 | |
| 975 | /* |
| 976 | * A tricky point: Traditionally, tar readers have |
| 977 | * ignored the size field when reading hardlink |
| 978 | * entries, and some writers put non-zero sizes even |
| 979 | * though the body is empty. POSIX.1-2001 broke with |
| 980 | * this tradition by permitting hardlink entries to |
| 981 | * store valid bodies in pax interchange format, but |
| 982 | * not in ustar format. Since there is no hard and |
| 983 | * fast way to distinguish pax interchange from |
| 984 | * earlier archives (the 'x' and 'g' entries are |
| 985 | * optional, after all), we need a heuristic. Here, I |
| 986 | * use the bid function to test whether or not there's |
| 987 | * a valid header following. Of course, if we know |
| 988 | * this is pax interchange format, then we must obey |
| 989 | * the size. |
| 990 | * |
| 991 | * This heuristic will only fail for a pax interchange |
| 992 | * archive that is storing hardlink bodies, no pax |
| 993 | * extended attribute entries have yet occurred, and |
| 994 | * we encounter a hardlink entry for a file that is |
| 995 | * itself an uncompressed tar archive. |
| 996 | */ |
| 997 | if (archive_entry_size(entry) > 0 && |
| 998 | a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE && |
| 999 | archive_read_format_tar_bid(a) > 50) { |
| 1000 | archive_entry_set_size(entry, 0); |
| 1001 | tar->entry_bytes_remaining = 0; |
| 1002 | } |
| 1003 | break; |
| 1004 | case '2': /* Symlink */ |
| 1005 | archive_entry_set_filetype(entry, AE_IFLNK); |
| 1006 | archive_entry_set_size(entry, 0); |
| 1007 | tar->entry_bytes_remaining = 0; |
| 1008 | archive_entry_set_symlink(entry, tar->entry_linkname.s); |
| 1009 | break; |
| 1010 | case '3': /* Character device */ |
| 1011 | archive_entry_set_filetype(entry, AE_IFCHR); |
| 1012 | archive_entry_set_size(entry, 0); |
| 1013 | tar->entry_bytes_remaining = 0; |
| 1014 | break; |
| 1015 | case '4': /* Block device */ |
| 1016 | archive_entry_set_filetype(entry, AE_IFBLK); |
| 1017 | archive_entry_set_size(entry, 0); |
| 1018 | tar->entry_bytes_remaining = 0; |
| 1019 | break; |
| 1020 | case '5': /* Dir */ |
| 1021 | archive_entry_set_filetype(entry, AE_IFDIR); |
| 1022 | archive_entry_set_size(entry, 0); |
| 1023 | tar->entry_bytes_remaining = 0; |
| 1024 | break; |
| 1025 | case '6': /* FIFO device */ |
| 1026 | archive_entry_set_filetype(entry, AE_IFIFO); |
| 1027 | archive_entry_set_size(entry, 0); |
| 1028 | tar->entry_bytes_remaining = 0; |
| 1029 | break; |
| 1030 | case 'D': /* GNU incremental directory type */ |
| 1031 | /* |
| 1032 | * No special handling is actually required here. |
| 1033 | * It might be nice someday to preprocess the file list and |
| 1034 | * provide it to the client, though. |
| 1035 | */ |
| 1036 | archive_entry_set_filetype(entry, AE_IFDIR); |
| 1037 | break; |
| 1038 | case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/ |
| 1039 | /* |
| 1040 | * As far as I can tell, this is just like a regular file |
| 1041 | * entry, except that the contents should be _appended_ to |
| 1042 | * the indicated file at the indicated offset. This may |
| 1043 | * require some API work to fully support. |
| 1044 | */ |
| 1045 | break; |
| 1046 | case 'N': /* Old GNU "long filename" entry. */ |
| 1047 | /* The body of this entry is a script for renaming |
| 1048 | * previously-extracted entries. Ugh. It will never |
| 1049 | * be supported by libarchive. */ |
| 1050 | archive_entry_set_filetype(entry, AE_IFREG); |
| 1051 | break; |
| 1052 | case 'S': /* GNU sparse files */ |
| 1053 | /* |
| 1054 | * Sparse files are really just regular files with |
| 1055 | * sparse information in the extended area. |
| 1056 | */ |
| 1057 | /* FALLTHROUGH */ |
| 1058 | default: /* Regular file and non-standard types */ |
| 1059 | /* |
| 1060 | * Per POSIX: non-recognized types should always be |
| 1061 | * treated as regular files. |
| 1062 | */ |
| 1063 | archive_entry_set_filetype(entry, AE_IFREG); |
| 1064 | break; |
| 1065 | } |
| 1066 | return (0); |
| 1067 | } |
| 1068 | |
| 1069 | /* |
| 1070 | * Parse out header elements for "old-style" tar archives. |
| 1071 | */ |
| 1072 | static int |
| 1073 | header_old_tar(struct archive_read *a, struct tar *tar, |
| 1074 | struct archive_entry *entry, const void *h) |
| 1075 | { |
| 1076 | const struct archive_entry_header_ustar *header; |
| 1077 | |
| 1078 | /* Copy filename over (to ensure null termination). */ |
| 1079 | header = (const struct archive_entry_header_ustar *)h; |
| 1080 | archive_strncpy(&(tar->entry_name), header->name, sizeof(header->name)); |
| 1081 | archive_entry_set_pathname(entry, tar->entry_name.s); |
| 1082 | |
| 1083 | /* Grab rest of common fields */ |
| 1084 | header_common(a, tar, entry, h); |
| 1085 | |
| 1086 | tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); |
| 1087 | return (0); |
| 1088 | } |
| 1089 | |
| 1090 | /* |
| 1091 | * Parse a file header for a pax extended archive entry. |
| 1092 | */ |
| 1093 | static int |
| 1094 | header_pax_global(struct archive_read *a, struct tar *tar, |
| 1095 | struct archive_entry *entry, const void *h) |
| 1096 | { |
| 1097 | int err; |
| 1098 | |
| 1099 | err = read_body_to_string(a, tar, &(tar->pax_global), h); |
| 1100 | if (err != ARCHIVE_OK) |
| 1101 | return (err); |
| 1102 | err = tar_read_header(a, tar, entry); |
| 1103 | return (err); |
| 1104 | } |
| 1105 | |
| 1106 | static int |
| 1107 | header_pax_extensions(struct archive_read *a, struct tar *tar, |
| 1108 | struct archive_entry *entry, const void *h) |
| 1109 | { |
| 1110 | int err, err2; |
| 1111 | |
| 1112 | err = read_body_to_string(a, tar, &(tar->pax_header), h); |
| 1113 | if (err != ARCHIVE_OK) |
| 1114 | return (err); |
| 1115 | |
| 1116 | /* Parse the next header. */ |
| 1117 | err = tar_read_header(a, tar, entry); |
| 1118 | if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN)) |
| 1119 | return (err); |
| 1120 | |
| 1121 | /* |
| 1122 | * TODO: Parse global/default options into 'entry' struct here |
| 1123 | * before handling file-specific options. |
| 1124 | * |
| 1125 | * This design (parse standard header, then overwrite with pax |
| 1126 | * extended attribute data) usually works well, but isn't ideal; |
| 1127 | * it would be better to parse the pax extended attributes first |
| 1128 | * and then skip any fields in the standard header that were |
| 1129 | * defined in the pax header. |
| 1130 | */ |
| 1131 | err2 = pax_header(a, tar, entry, tar->pax_header.s); |
| 1132 | err = err_combine(err, err2); |
| 1133 | tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); |
| 1134 | return (err); |
| 1135 | } |
| 1136 | |
| 1137 | |
| 1138 | /* |
| 1139 | * Parse a file header for a Posix "ustar" archive entry. This also |
| 1140 | * handles "pax" or "extended ustar" entries. |
| 1141 | */ |
| 1142 | static int |
| 1143 | header_ustar(struct archive_read *a, struct tar *tar, |
| 1144 | struct archive_entry *entry, const void *h) |
| 1145 | { |
| 1146 | const struct archive_entry_header_ustar *header; |
| 1147 | struct archive_string *as; |
| 1148 | |
| 1149 | header = (const struct archive_entry_header_ustar *)h; |
| 1150 | |
| 1151 | /* Copy name into an internal buffer to ensure null-termination. */ |
| 1152 | as = &(tar->entry_name); |
| 1153 | if (header->prefix[0]) { |
| 1154 | archive_strncpy(as, header->prefix, sizeof(header->prefix)); |
| 1155 | if (as->s[archive_strlen(as) - 1] != '/') |
| 1156 | archive_strappend_char(as, '/'); |
| 1157 | archive_strncat(as, header->name, sizeof(header->name)); |
| 1158 | } else |
| 1159 | archive_strncpy(as, header->name, sizeof(header->name)); |
| 1160 | |
| 1161 | archive_entry_set_pathname(entry, as->s); |
| 1162 | |
| 1163 | /* Handle rest of common fields. */ |
| 1164 | header_common(a, tar, entry, h); |
| 1165 | |
| 1166 | /* Handle POSIX ustar fields. */ |
| 1167 | archive_strncpy(&(tar->entry_uname), header->uname, |
| 1168 | sizeof(header->uname)); |
| 1169 | archive_entry_set_uname(entry, tar->entry_uname.s); |
| 1170 | |
| 1171 | archive_strncpy(&(tar->entry_gname), header->gname, |
| 1172 | sizeof(header->gname)); |
| 1173 | archive_entry_set_gname(entry, tar->entry_gname.s); |
| 1174 | |
| 1175 | /* Parse out device numbers only for char and block specials. */ |
| 1176 | if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { |
| 1177 | archive_entry_set_rdevmajor(entry, |
| 1178 | tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); |
| 1179 | archive_entry_set_rdevminor(entry, |
| 1180 | tar_atol(header->rdevminor, sizeof(header->rdevminor))); |
| 1181 | } |
| 1182 | |
| 1183 | tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); |
| 1184 | |
| 1185 | return (0); |
| 1186 | } |
| 1187 | |
| 1188 | |
| 1189 | /* |
| 1190 | * Parse the pax extended attributes record. |
| 1191 | * |
| 1192 | * Returns non-zero if there's an error in the data. |
| 1193 | */ |
| 1194 | static int |
| 1195 | pax_header(struct archive_read *a, struct tar *tar, |
| 1196 | struct archive_entry *entry, char *attr) |
| 1197 | { |
| 1198 | size_t attr_length, l, line_length; |
| 1199 | char *line, *p; |
| 1200 | wchar_t *key, *wp, *value; |
| 1201 | int err, err2; |
| 1202 | |
| 1203 | attr_length = strlen(attr); |
| 1204 | err = ARCHIVE_OK; |
| 1205 | while (attr_length > 0) { |
| 1206 | /* Parse decimal length field at start of line. */ |
| 1207 | line_length = 0; |
| 1208 | l = attr_length; |
| 1209 | line = p = attr; /* Record start of line. */ |
| 1210 | while (l>0) { |
| 1211 | if (*p == ' ') { |
| 1212 | p++; |
| 1213 | l--; |
| 1214 | break; |
| 1215 | } |
| 1216 | if (*p < '0' || *p > '9') { |
| 1217 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
| 1218 | "Ignoring malformed pax extended attributes"); |
| 1219 | return (ARCHIVE_WARN); |
| 1220 | } |
| 1221 | line_length *= 10; |
| 1222 | line_length += *p - '0'; |
| 1223 | if (line_length > 999999) { |
| 1224 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
| 1225 | "Rejecting pax extended attribute > 1MB"); |
| 1226 | return (ARCHIVE_WARN); |
| 1227 | } |
| 1228 | p++; |
| 1229 | l--; |
| 1230 | } |
| 1231 | |
| 1232 | /* |
| 1233 | * Parsed length must be no bigger than available data, |
| 1234 | * at least 1, and the last character of the line must |
| 1235 | * be '\n'. |
| 1236 | */ |
| 1237 | if (line_length > attr_length |
| 1238 | || line_length < 1 |
| 1239 | || attr[line_length - 1] != '\n') |
| 1240 | { |
| 1241 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
| 1242 | "Ignoring malformed pax extended attribute"); |
| 1243 | return (ARCHIVE_WARN); |
| 1244 | } |
| 1245 | |
| 1246 | /* Ensure pax_entry buffer is big enough. */ |
| 1247 | if (tar->pax_entry_length <= line_length) { |
| 1248 | wchar_t *old_entry = tar->pax_entry; |
| 1249 | |
| 1250 | if (tar->pax_entry_length <= 0) |
| 1251 | tar->pax_entry_length = 1024; |
| 1252 | while (tar->pax_entry_length <= line_length + 1) |
| 1253 | tar->pax_entry_length *= 2; |
| 1254 | |
| 1255 | old_entry = tar->pax_entry; |
| 1256 | tar->pax_entry = (wchar_t *)realloc(tar->pax_entry, |
| 1257 | tar->pax_entry_length * sizeof(wchar_t)); |
| 1258 | if (tar->pax_entry == NULL) { |
| 1259 | free(old_entry); |
| 1260 | archive_set_error(&a->archive, ENOMEM, |
| 1261 | "No memory"); |
| 1262 | return (ARCHIVE_FATAL); |
| 1263 | } |
| 1264 | } |
| 1265 | |
| 1266 | /* Decode UTF-8 to wchar_t, null-terminate result. */ |
| 1267 | if (utf8_decode(tar->pax_entry, p, |
| 1268 | line_length - (p - attr) - 1)) { |
| 1269 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
| 1270 | "Invalid UTF8 character in pax extended attribute"); |
| 1271 | err = err_combine(err, ARCHIVE_WARN); |
| 1272 | } |
| 1273 | |
| 1274 | /* Null-terminate 'key' value. */ |
| 1275 | wp = key = tar->pax_entry; |
| 1276 | if (key[0] == L'=') |
| 1277 | return (-1); |
| 1278 | while (*wp && *wp != L'=') |
| 1279 | ++wp; |
| 1280 | if (*wp == L'\0') { |
| 1281 | archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, |
| 1282 | "Invalid pax extended attributes"); |
| 1283 | return (ARCHIVE_WARN); |
| 1284 | } |
| 1285 | *wp = 0; |
| 1286 | |
| 1287 | /* Identify null-terminated 'value' portion. */ |
| 1288 | value = wp + 1; |
| 1289 | |
| 1290 | /* Identify this attribute and set it in the entry. */ |
| 1291 | err2 = pax_attribute(tar, entry, key, value); |
| 1292 | err = err_combine(err, err2); |
| 1293 | |
| 1294 | /* Skip to next line */ |
| 1295 | attr += line_length; |
| 1296 | attr_length -= line_length; |
| 1297 | } |
| 1298 | return (err); |
| 1299 | } |
| 1300 | |
| 1301 | static int |
| 1302 | pax_attribute_xattr(struct archive_entry *entry, |
| 1303 | wchar_t *name, wchar_t *value) |
| 1304 | { |
| 1305 | char *name_decoded, *name_narrow; |
| 1306 | void *value_decoded; |
| 1307 | size_t value_len; |
| 1308 | |
| 1309 | if (wcslen(name) < 18 || (wcsncmp(name, L"LIBARCHIVE.xattr.", 17)) != 0) |
| 1310 | return 3; |
| 1311 | |
| 1312 | name += 17; |
| 1313 | |
| 1314 | /* URL-decode name */ |
| 1315 | name_narrow = wide_to_narrow(name); |
| 1316 | if (name_narrow == NULL) |
| 1317 | return 2; |
| 1318 | name_decoded = url_decode(name_narrow); |
| 1319 | free(name_narrow); |
| 1320 | if (name_decoded == NULL) |
| 1321 | return 2; |
| 1322 | |
| 1323 | /* Base-64 decode value */ |
| 1324 | value_decoded = base64_decode(value, wcslen(value), &value_len); |
| 1325 | if (value_decoded == NULL) { |
| 1326 | free(name_decoded); |
| 1327 | return 1; |
| 1328 | } |
| 1329 | |
| 1330 | archive_entry_xattr_add_entry(entry, name_decoded, |
| 1331 | value_decoded, value_len); |
| 1332 | |
| 1333 | free(name_decoded); |
| 1334 | free(value_decoded); |
| 1335 | return 0; |
| 1336 | } |
| 1337 | |
| 1338 | /* |
| 1339 | * Parse a single key=value attribute. key/value pointers are |
| 1340 | * assumed to point into reasonably long-lived storage. |
| 1341 | * |
| 1342 | * Note that POSIX reserves all-lowercase keywords. Vendor-specific |
| 1343 | * extensions should always have keywords of the form "VENDOR.attribute" |
| 1344 | * In particular, it's quite feasible to support many different |
| 1345 | * vendor extensions here. I'm using "LIBARCHIVE" for extensions |
| 1346 | * unique to this library. |
| 1347 | * |
| 1348 | * Investigate other vendor-specific extensions and see if |
| 1349 | * any of them look useful. |
| 1350 | */ |
| 1351 | static int |
| 1352 | pax_attribute(struct tar *tar, struct archive_entry *entry, |
| 1353 | wchar_t *key, wchar_t *value) |
| 1354 | { |
| 1355 | int64_t s; |
| 1356 | long n; |
| 1357 | |
| 1358 | switch (key[0]) { |
| 1359 | case 'G': |
| 1360 | /* GNU "0.0" sparse pax format. */ |
| 1361 | if (wcscmp(key, L"GNU.sparse.numblocks") == 0) { |
| 1362 | tar->sparse_offset = -1; |
| 1363 | tar->sparse_numbytes = -1; |
| 1364 | tar->sparse_gnu_major = 0; |
| 1365 | tar->sparse_gnu_minor = 0; |
| 1366 | } |
| 1367 | if (wcscmp(key, L"GNU.sparse.offset") == 0) { |
| 1368 | tar->sparse_offset = tar_atol10(value, wcslen(value)); |
| 1369 | if (tar->sparse_numbytes != -1) { |
| 1370 | gnu_add_sparse_entry(tar, |
| 1371 | tar->sparse_offset, tar->sparse_numbytes); |
| 1372 | tar->sparse_offset = -1; |
| 1373 | tar->sparse_numbytes = -1; |
| 1374 | } |
| 1375 | } |
| 1376 | if (wcscmp(key, L"GNU.sparse.numbytes") == 0) { |
| 1377 | tar->sparse_numbytes = tar_atol10(value, wcslen(value)); |
| 1378 | if (tar->sparse_numbytes != -1) { |
| 1379 | gnu_add_sparse_entry(tar, |
| 1380 | tar->sparse_offset, tar->sparse_numbytes); |
| 1381 | tar->sparse_offset = -1; |
| 1382 | tar->sparse_numbytes = -1; |
| 1383 | } |
| 1384 | } |
| 1385 | if (wcscmp(key, L"GNU.sparse.size") == 0) |
| 1386 | archive_entry_set_size(entry, |
| 1387 | tar_atol10(value, wcslen(value))); |
| 1388 | |
| 1389 | /* GNU "0.1" sparse pax format. */ |
| 1390 | if (wcscmp(key, L"GNU.sparse.map") == 0) { |
| 1391 | tar->sparse_gnu_major = 0; |
| 1392 | tar->sparse_gnu_minor = 1; |
| 1393 | if (gnu_sparse_01_parse(tar, value) != ARCHIVE_OK) |
| 1394 | return (ARCHIVE_WARN); |
| 1395 | } |
| 1396 | |
| 1397 | /* GNU "1.0" sparse pax format */ |
| 1398 | if (wcscmp(key, L"GNU.sparse.major") == 0) { |
| 1399 | tar->sparse_gnu_major = tar_atol10(value, wcslen(value)); |
| 1400 | tar->sparse_gnu_pending = 1; |
| 1401 | } |
| 1402 | if (wcscmp(key, L"GNU.sparse.minor") == 0) { |
| 1403 | tar->sparse_gnu_minor = tar_atol10(value, wcslen(value)); |
| 1404 | tar->sparse_gnu_pending = 1; |
| 1405 | } |
| 1406 | if (wcscmp(key, L"GNU.sparse.name") == 0) |
| 1407 | archive_entry_copy_pathname_w(entry, value); |
| 1408 | if (wcscmp(key, L"GNU.sparse.realsize") == 0) |
| 1409 | archive_entry_set_size(entry, |
| 1410 | tar_atol10(value, wcslen(value))); |
| 1411 | break; |
| 1412 | case 'L': |
| 1413 | /* Our extensions */ |
| 1414 | /* TODO: Handle arbitrary extended attributes... */ |
| 1415 | /* |
| 1416 | if (strcmp(key, "LIBARCHIVE.xxxxxxx")==0) |
| 1417 | archive_entry_set_xxxxxx(entry, value); |
| 1418 | */ |
| 1419 | if (wcsncmp(key, L"LIBARCHIVE.xattr.", 17)==0) |
| 1420 | pax_attribute_xattr(entry, key, value); |
| 1421 | break; |
| 1422 | case 'S': |
| 1423 | /* We support some keys used by the "star" archiver */ |
| 1424 | if (wcscmp(key, L"SCHILY.acl.access")==0) |
| 1425 | __archive_entry_acl_parse_w(entry, value, |
| 1426 | ARCHIVE_ENTRY_ACL_TYPE_ACCESS); |
| 1427 | else if (wcscmp(key, L"SCHILY.acl.default")==0) |
| 1428 | __archive_entry_acl_parse_w(entry, value, |
| 1429 | ARCHIVE_ENTRY_ACL_TYPE_DEFAULT); |
| 1430 | else if (wcscmp(key, L"SCHILY.devmajor")==0) |
| 1431 | archive_entry_set_rdevmajor(entry, tar_atol10(value, wcslen(value))); |
| 1432 | else if (wcscmp(key, L"SCHILY.devminor")==0) |
| 1433 | archive_entry_set_rdevminor(entry, tar_atol10(value, wcslen(value))); |
| 1434 | else if (wcscmp(key, L"SCHILY.fflags")==0) |
| 1435 | archive_entry_copy_fflags_text_w(entry, value); |
| 1436 | else if (wcscmp(key, L"SCHILY.dev")==0) |
| 1437 | archive_entry_set_dev(entry, tar_atol10(value, wcslen(value))); |
| 1438 | else if (wcscmp(key, L"SCHILY.ino")==0) |
| 1439 | archive_entry_set_ino(entry, tar_atol10(value, wcslen(value))); |
| 1440 | else if (wcscmp(key, L"SCHILY.nlink")==0) |
| 1441 | archive_entry_set_nlink(entry, tar_atol10(value, wcslen(value))); |
| 1442 | break; |
| 1443 | case 'a': |
| 1444 | if (wcscmp(key, L"atime")==0) { |
| 1445 | pax_time(value, &s, &n); |
| 1446 | archive_entry_set_atime(entry, s, n); |
| 1447 | } |
| 1448 | break; |
| 1449 | case 'c': |
| 1450 | if (wcscmp(key, L"ctime")==0) { |
| 1451 | pax_time(value, &s, &n); |
| 1452 | archive_entry_set_ctime(entry, s, n); |
| 1453 | } else if (wcscmp(key, L"charset")==0) { |
| 1454 | /* TODO: Publish charset information in entry. */ |
| 1455 | } else if (wcscmp(key, L"comment")==0) { |
| 1456 | /* TODO: Publish comment in entry. */ |
| 1457 | } |
| 1458 | break; |
| 1459 | case 'g': |
| 1460 | if (wcscmp(key, L"gid")==0) |
| 1461 | archive_entry_set_gid(entry, tar_atol10(value, wcslen(value))); |
| 1462 | else if (wcscmp(key, L"gname")==0) |
| 1463 | archive_entry_copy_gname_w(entry, value); |
| 1464 | break; |
| 1465 | case 'l': |
| 1466 | /* pax interchange doesn't distinguish hardlink vs. symlink. */ |
| 1467 | if (wcscmp(key, L"linkpath")==0) { |
| 1468 | if (archive_entry_hardlink(entry)) |
| 1469 | archive_entry_copy_hardlink_w(entry, value); |
| 1470 | else |
| 1471 | archive_entry_copy_symlink_w(entry, value); |
| 1472 | } |
| 1473 | break; |
| 1474 | case 'm': |
| 1475 | if (wcscmp(key, L"mtime")==0) { |
| 1476 | pax_time(value, &s, &n); |
| 1477 | archive_entry_set_mtime(entry, s, n); |
| 1478 | } |
| 1479 | break; |
| 1480 | case 'p': |
| 1481 | if (wcscmp(key, L"path")==0) |
| 1482 | archive_entry_copy_pathname_w(entry, value); |
| 1483 | break; |
| 1484 | case 'r': |
| 1485 | /* POSIX has reserved 'realtime.*' */ |
| 1486 | break; |
| 1487 | case 's': |
| 1488 | /* POSIX has reserved 'security.*' */ |
| 1489 | /* Someday: if (wcscmp(key, L"security.acl")==0) { ... } */ |
| 1490 | if (wcscmp(key, L"size")==0) { |
| 1491 | tar->entry_bytes_remaining = tar_atol10(value, wcslen(value)); |
| 1492 | archive_entry_set_size(entry, tar->entry_bytes_remaining); |
| 1493 | } |
| 1494 | tar->entry_bytes_remaining = 0; |
| 1495 | |
| 1496 | break; |
| 1497 | case 'u': |
| 1498 | if (wcscmp(key, L"uid")==0) |
| 1499 | archive_entry_set_uid(entry, tar_atol10(value, wcslen(value))); |
| 1500 | else if (wcscmp(key, L"uname")==0) |
| 1501 | archive_entry_copy_uname_w(entry, value); |
| 1502 | break; |
| 1503 | } |
| 1504 | return (0); |
| 1505 | } |
| 1506 | |
| 1507 | |
| 1508 | |
| 1509 | /* |
| 1510 | * parse a decimal time value, which may include a fractional portion |
| 1511 | */ |
| 1512 | static void |
| 1513 | pax_time(const wchar_t *p, int64_t *ps, long *pn) |
| 1514 | { |
| 1515 | char digit; |
| 1516 | int64_t s; |
| 1517 | unsigned long l; |
| 1518 | int sign; |
| 1519 | int64_t limit, last_digit_limit; |
| 1520 | |
| 1521 | limit = INT64_MAX / 10; |
| 1522 | last_digit_limit = INT64_MAX % 10; |
| 1523 | |
| 1524 | s = 0; |
| 1525 | sign = 1; |
| 1526 | if (*p == '-') { |
| 1527 | sign = -1; |
| 1528 | p++; |
| 1529 | } |
| 1530 | while (*p >= '0' && *p <= '9') { |
| 1531 | digit = *p - '0'; |
| 1532 | if (s > limit || |
| 1533 | (s == limit && digit > last_digit_limit)) { |
| 1534 | s = UINT64_MAX; |
| 1535 | break; |
| 1536 | } |
| 1537 | s = (s * 10) + digit; |
| 1538 | ++p; |
| 1539 | } |
| 1540 | |
| 1541 | *ps = s * sign; |
| 1542 | |
| 1543 | /* Calculate nanoseconds. */ |
| 1544 | *pn = 0; |
| 1545 | |
| 1546 | if (*p != '.') |
| 1547 | return; |
| 1548 | |
| 1549 | l = 100000000UL; |
| 1550 | do { |
| 1551 | ++p; |
| 1552 | if (*p >= '0' && *p <= '9') |
| 1553 | *pn += (*p - '0') * l; |
| 1554 | else |
| 1555 | break; |
| 1556 | } while (l /= 10); |
| 1557 | } |
| 1558 | |
| 1559 | /* |
| 1560 | * Parse GNU tar header |
| 1561 | */ |
| 1562 | static int |
| 1563 | header_gnutar(struct archive_read *a, struct tar *tar, |
| 1564 | struct archive_entry *entry, const void *h) |
| 1565 | { |
| 1566 | const struct archive_entry_header_gnutar *header; |
| 1567 | |
| 1568 | (void)a; |
| 1569 | |
| 1570 | /* |
| 1571 | * GNU header is like POSIX ustar, except 'prefix' is |
| 1572 | * replaced with some other fields. This also means the |
| 1573 | * filename is stored as in old-style archives. |
| 1574 | */ |
| 1575 | |
| 1576 | /* Grab fields common to all tar variants. */ |
| 1577 | header_common(a, tar, entry, h); |
| 1578 | |
| 1579 | /* Copy filename over (to ensure null termination). */ |
| 1580 | header = (const struct archive_entry_header_gnutar *)h; |
| 1581 | archive_strncpy(&(tar->entry_name), header->name, |
| 1582 | sizeof(header->name)); |
| 1583 | archive_entry_set_pathname(entry, tar->entry_name.s); |
| 1584 | |
| 1585 | /* Fields common to ustar and GNU */ |
| 1586 | /* XXX Can the following be factored out since it's common |
| 1587 | * to ustar and gnu tar? Is it okay to move it down into |
| 1588 | * header_common, perhaps? */ |
| 1589 | archive_strncpy(&(tar->entry_uname), |
| 1590 | header->uname, sizeof(header->uname)); |
| 1591 | archive_entry_set_uname(entry, tar->entry_uname.s); |
| 1592 | |
| 1593 | archive_strncpy(&(tar->entry_gname), |
| 1594 | header->gname, sizeof(header->gname)); |
| 1595 | archive_entry_set_gname(entry, tar->entry_gname.s); |
| 1596 | |
| 1597 | /* Parse out device numbers only for char and block specials */ |
| 1598 | if (header->typeflag[0] == '3' || header->typeflag[0] == '4') { |
| 1599 | archive_entry_set_rdevmajor(entry, |
| 1600 | tar_atol(header->rdevmajor, sizeof(header->rdevmajor))); |
| 1601 | archive_entry_set_rdevminor(entry, |
| 1602 | tar_atol(header->rdevminor, sizeof(header->rdevminor))); |
| 1603 | } else |
| 1604 | archive_entry_set_rdev(entry, 0); |
| 1605 | |
| 1606 | tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining); |
| 1607 | |
| 1608 | /* Grab GNU-specific fields. */ |
| 1609 | archive_entry_set_atime(entry, |
| 1610 | tar_atol(header->atime, sizeof(header->atime)), 0); |
| 1611 | archive_entry_set_ctime(entry, |
| 1612 | tar_atol(header->ctime, sizeof(header->ctime)), 0); |
| 1613 | if (header->realsize[0] != 0) { |
| 1614 | archive_entry_set_size(entry, |
| 1615 | tar_atol(header->realsize, sizeof(header->realsize))); |
| 1616 | } |
| 1617 | |
| 1618 | if (header->sparse[0].offset[0] != 0) { |
| 1619 | gnu_sparse_old_read(a, tar, header); |
| 1620 | } else { |
| 1621 | if (header->isextended[0] != 0) { |
| 1622 | /* XXX WTF? XXX */ |
| 1623 | } |
| 1624 | } |
| 1625 | |
| 1626 | return (0); |
| 1627 | } |
| 1628 | |
| 1629 | static void |
| 1630 | gnu_add_sparse_entry(struct tar *tar, off_t offset, off_t remaining) |
| 1631 | { |
| 1632 | struct sparse_block *p; |
| 1633 | |
| 1634 | p = (struct sparse_block *)malloc(sizeof(*p)); |
| 1635 | if (p == NULL) |
| 1636 | __archive_errx(1, "Out of memory"); |
| 1637 | memset(p, 0, sizeof(*p)); |
| 1638 | if (tar->sparse_last != NULL) |
| 1639 | tar->sparse_last->next = p; |
| 1640 | else |
| 1641 | tar->sparse_list = p; |
| 1642 | tar->sparse_last = p; |
| 1643 | p->offset = offset; |
| 1644 | p->remaining = remaining; |
| 1645 | } |
| 1646 | |
| 1647 | /* |
| 1648 | * GNU tar old-format sparse data. |
| 1649 | * |
| 1650 | * GNU old-format sparse data is stored in a fixed-field |
| 1651 | * format. Offset/size values are 11-byte octal fields (same |
| 1652 | * format as 'size' field in ustart header). These are |
| 1653 | * stored in the header, allocating subsequent header blocks |
| 1654 | * as needed. Extending the header in this way is a pretty |
| 1655 | * severe POSIX violation; this design has earned GNU tar a |
| 1656 | * lot of criticism. |
| 1657 | */ |
| 1658 | |
| 1659 | static int |
| 1660 | gnu_sparse_old_read(struct archive_read *a, struct tar *tar, |
| 1661 | const struct archive_entry_header_gnutar *header) |
| 1662 | { |
| 1663 | ssize_t bytes_read; |
| 1664 | const void *data; |
| 1665 | struct extended { |
| 1666 | struct gnu_sparse sparse[21]; |
| 1667 | char isextended[1]; |
| 1668 | char padding[7]; |
| 1669 | }; |
| 1670 | const struct extended *ext; |
| 1671 | |
| 1672 | gnu_sparse_old_parse(tar, header->sparse, 4); |
| 1673 | if (header->isextended[0] == 0) |
| 1674 | return (ARCHIVE_OK); |
| 1675 | |
| 1676 | do { |
| 1677 | bytes_read = (a->decompressor->read_ahead)(a, &data, 512); |
| 1678 | if (bytes_read < 0) |
| 1679 | return (ARCHIVE_FATAL); |
| 1680 | if (bytes_read < 512) { |
| 1681 | archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, |
| 1682 | "Truncated tar archive " |
| 1683 | "detected while reading sparse file data"); |
| 1684 | return (ARCHIVE_FATAL); |
| 1685 | } |
| 1686 | (a->decompressor->consume)(a, 512); |
| 1687 | ext = (const struct extended *)data; |
| 1688 | gnu_sparse_old_parse(tar, ext->sparse, 21); |
| 1689 | } while (ext->isextended[0] != 0); |
| 1690 | if (tar->sparse_list != NULL) |
| 1691 | tar->entry_offset = tar->sparse_list->offset; |
| 1692 | return (ARCHIVE_OK); |
| 1693 | } |
| 1694 | |
| 1695 | static void |
| 1696 | gnu_sparse_old_parse(struct tar *tar, |
| 1697 | const struct gnu_sparse *sparse, int length) |
| 1698 | { |
| 1699 | while (length > 0 && sparse->offset[0] != 0) { |
| 1700 | gnu_add_sparse_entry(tar, |
| 1701 | tar_atol(sparse->offset, sizeof(sparse->offset)), |
| 1702 | tar_atol(sparse->numbytes, sizeof(sparse->numbytes))); |
| 1703 | sparse++; |
| 1704 | length--; |
| 1705 | } |
| 1706 | } |
| 1707 | |
| 1708 | /* |
| 1709 | * GNU tar sparse format 0.0 |
| 1710 | * |
| 1711 | * Beginning with GNU tar 1.15, sparse files are stored using |
| 1712 | * information in the pax extended header. The GNU tar maintainers |
| 1713 | * have gone through a number of variations in the process of working |
| 1714 | * out this scheme; furtunately, they're all numbered. |
| 1715 | * |
| 1716 | * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the |
| 1717 | * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to |
| 1718 | * store offset/size for each block. The repeated instances of these |
| 1719 | * latter fields violate the pax specification (which frowns on |
| 1720 | * duplicate keys), so this format was quickly replaced. |
| 1721 | */ |
| 1722 | |
| 1723 | /* |
| 1724 | * GNU tar sparse format 0.1 |
| 1725 | * |
| 1726 | * This version replaced the offset/numbytes attributes with |
| 1727 | * a single "map" attribute that stored a list of integers. This |
| 1728 | * format had two problems: First, the "map" attribute could be very |
| 1729 | * long, which caused problems for some implementations. More |
| 1730 | * importantly, the sparse data was lost when extracted by archivers |
| 1731 | * that didn't recognize this extension. |
| 1732 | */ |
| 1733 | |
| 1734 | static int |
| 1735 | gnu_sparse_01_parse(struct tar *tar, const wchar_t *p) |
| 1736 | { |
| 1737 | const wchar_t *e; |
| 1738 | off_t offset = -1, size = -1; |
| 1739 | |
| 1740 | for (;;) { |
| 1741 | e = p; |
| 1742 | while (*e != '\0' && *e != ',') { |
| 1743 | if (*e < '0' || *e > '9') |
| 1744 | return (ARCHIVE_WARN); |
| 1745 | e++; |
| 1746 | } |
| 1747 | if (offset < 0) { |
| 1748 | offset = tar_atol10(p, e - p); |
| 1749 | if (offset < 0) |
| 1750 | return (ARCHIVE_WARN); |
| 1751 | } else { |
| 1752 | size = tar_atol10(p, e - p); |
| 1753 | if (size < 0) |
| 1754 | return (ARCHIVE_WARN); |
| 1755 | gnu_add_sparse_entry(tar, offset, size); |
| 1756 | offset = -1; |
| 1757 | } |
| 1758 | if (*e == '\0') |
| 1759 | return (ARCHIVE_OK); |
| 1760 | p = e + 1; |
| 1761 | } |
| 1762 | } |
| 1763 | |
| 1764 | /* |
| 1765 | * GNU tar sparse format 1.0 |
| 1766 | * |
| 1767 | * The idea: The offset/size data is stored as a series of base-10 |
| 1768 | * ASCII numbers prepended to the file data, so that dearchivers that |
| 1769 | * don't support this format will extract the block map along with the |
| 1770 | * data and a separate post-process can restore the sparseness. |
| 1771 | * |
| 1772 | * Unfortunately, GNU tar 1.16 adds bogus padding to the end of the |
| 1773 | * entry that depends on the size of the map; this means we have to |
| 1774 | * parse the sparse map when we read the header (otherwise, entry_skip |
| 1775 | * will fail). This is why sparse_10_read is called from read_header |
| 1776 | * above, instead of at the beginning of read_data, where it "should" |
| 1777 | * go. |
| 1778 | * |
| 1779 | * This variant also replaced GNU.sparse.size with GNU.sparse.realsize |
| 1780 | * and introduced the GNU.sparse.major/GNU.sparse.minor attributes. |
| 1781 | */ |
| 1782 | |
| 1783 | /* |
| 1784 | * Read the next line from the input, and parse it as a decimal |
| 1785 | * integer followed by '\n'. Returns positive integer value or |
| 1786 | * negative on error. |
| 1787 | */ |
| 1788 | static int64_t |
| 1789 | gnu_sparse_10_atol(struct archive_read *a, struct tar *tar, |
| 1790 | ssize_t *total_read) |
| 1791 | { |
| 1792 | int64_t l, limit, last_digit_limit; |
| 1793 | const char *p; |
| 1794 | ssize_t bytes_read; |
| 1795 | int base, digit; |
| 1796 | |
| 1797 | base = 10; |
| 1798 | limit = INT64_MAX / base; |
| 1799 | last_digit_limit = INT64_MAX % base; |
| 1800 | |
| 1801 | bytes_read = readline(a, tar, &p); |
| 1802 | if (bytes_read <= 0) |
| 1803 | return (ARCHIVE_FATAL); |
| 1804 | *total_read += bytes_read; |
| 1805 | |
| 1806 | l = 0; |
| 1807 | while (bytes_read > 0) { |
| 1808 | if (*p == '\n') |
| 1809 | return (l); |
| 1810 | if (*p < '0' || *p >= '0' + base) |
| 1811 | return (ARCHIVE_WARN); |
| 1812 | digit = *p - '0'; |
| 1813 | if (l > limit || (l == limit && digit > last_digit_limit)) |
| 1814 | l = UINT64_MAX; /* Truncate on overflow. */ |
| 1815 | else |
| 1816 | l = (l * base) + digit; |
| 1817 | p++; |
| 1818 | bytes_read--; |
| 1819 | } |
| 1820 | /* TODO: Error message. */ |
| 1821 | return (ARCHIVE_WARN); |
| 1822 | } |
| 1823 | |
| 1824 | /* |
| 1825 | * Returns number of bytes consumed to read the sparse block data. |
| 1826 | */ |
| 1827 | static ssize_t |
| 1828 | gnu_sparse_10_read(struct archive_read *a, struct tar *tar) |
| 1829 | { |
| 1830 | ssize_t bytes_read = 0; |
| 1831 | int entries; |
| 1832 | off_t offset, size, to_skip; |
| 1833 | |
| 1834 | /* Parse entries. */ |
| 1835 | entries = gnu_sparse_10_atol(a, tar, &bytes_read); |
| 1836 | if (entries < 0) |
| 1837 | return (ARCHIVE_FATAL); |
| 1838 | /* Parse the individual entries. */ |
| 1839 | while (entries-- > 0) { |
| 1840 | /* Parse offset/size */ |
| 1841 | offset = gnu_sparse_10_atol(a, tar, &bytes_read); |
| 1842 | if (offset < 0) |
| 1843 | return (ARCHIVE_FATAL); |
| 1844 | size = gnu_sparse_10_atol(a, tar, &bytes_read); |
| 1845 | if (size < 0) |
| 1846 | return (ARCHIVE_FATAL); |
| 1847 | /* Add a new sparse entry. */ |
| 1848 | gnu_add_sparse_entry(tar, offset, size); |
| 1849 | } |
| 1850 | /* Skip rest of block... */ |
| 1851 | to_skip = 0x1ff & -bytes_read; |
| 1852 | if (to_skip != (a->decompressor->skip)(a, to_skip)) |
| 1853 | return (ARCHIVE_FATAL); |
| 1854 | return (bytes_read + to_skip); |
| 1855 | } |
| 1856 | |
| 1857 | /*- |
| 1858 | * Convert text->integer. |
| 1859 | * |
| 1860 | * Traditional tar formats (including POSIX) specify base-8 for |
| 1861 | * all of the standard numeric fields. This is a significant limitation |
| 1862 | * in practice: |
| 1863 | * = file size is limited to 8GB |
| 1864 | * = rdevmajor and rdevminor are limited to 21 bits |
| 1865 | * = uid/gid are limited to 21 bits |
| 1866 | * |
| 1867 | * There are two workarounds for this: |
| 1868 | * = pax extended headers, which use variable-length string fields |
| 1869 | * = GNU tar and STAR both allow either base-8 or base-256 in |
| 1870 | * most fields. The high bit is set to indicate base-256. |
| 1871 | * |
| 1872 | * On read, this implementation supports both extensions. |
| 1873 | */ |
| 1874 | static int64_t |
| 1875 | tar_atol(const char *p, unsigned char_cnt) |
| 1876 | { |
| 1877 | /* |
| 1878 | * Technically, GNU tar considers a field to be in base-256 |
| 1879 | * only if the first byte is 0xff or 0x80. |
| 1880 | */ |
| 1881 | if (*p & 0x80) |
| 1882 | return (tar_atol256(p, char_cnt)); |
| 1883 | return (tar_atol8(p, char_cnt)); |
| 1884 | } |
| 1885 | |
| 1886 | /* |
| 1887 | * Note that this implementation does not (and should not!) obey |
| 1888 | * locale settings; you cannot simply substitute strtol here, since |
| 1889 | * it does obey locale. |
| 1890 | */ |
| 1891 | static int64_t |
| 1892 | tar_atol8(const char *p, unsigned char_cnt) |
| 1893 | { |
| 1894 | int64_t l, limit, last_digit_limit; |
| 1895 | int digit, sign, base; |
| 1896 | |
| 1897 | base = 8; |
| 1898 | limit = INT64_MAX / base; |
| 1899 | last_digit_limit = INT64_MAX % base; |
| 1900 | |
| 1901 | while (*p == ' ' || *p == '\t') |
| 1902 | p++; |
| 1903 | if (*p == '-') { |
| 1904 | sign = -1; |
| 1905 | p++; |
| 1906 | } else |
| 1907 | sign = 1; |
| 1908 | |
| 1909 | l = 0; |
| 1910 | digit = *p - '0'; |
| 1911 | while (digit >= 0 && digit < base && char_cnt-- > 0) { |
| 1912 | if (l>limit || (l == limit && digit > last_digit_limit)) { |
| 1913 | l = UINT64_MAX; /* Truncate on overflow. */ |
| 1914 | break; |
| 1915 | } |
| 1916 | l = (l * base) + digit; |
| 1917 | digit = *++p - '0'; |
| 1918 | } |
| 1919 | return (sign < 0) ? -l : l; |
| 1920 | } |
| 1921 | |
| 1922 | /* |
| 1923 | * Note that this implementation does not (and should not!) obey |
| 1924 | * locale settings; you cannot simply substitute strtol here, since |
| 1925 | * it does obey locale. |
| 1926 | */ |
| 1927 | static int64_t |
| 1928 | tar_atol10(const wchar_t *p, unsigned char_cnt) |
| 1929 | { |
| 1930 | int64_t l, limit, last_digit_limit; |
| 1931 | int base, digit, sign; |
| 1932 | |
| 1933 | base = 10; |
| 1934 | limit = INT64_MAX / base; |
| 1935 | last_digit_limit = INT64_MAX % base; |
| 1936 | |
| 1937 | while (*p == ' ' || *p == '\t') |
| 1938 | p++; |
| 1939 | if (*p == '-') { |
| 1940 | sign = -1; |
| 1941 | p++; |
| 1942 | } else |
| 1943 | sign = 1; |
| 1944 | |
| 1945 | l = 0; |
| 1946 | digit = *p - '0'; |
| 1947 | while (digit >= 0 && digit < base && char_cnt-- > 0) { |
| 1948 | if (l > limit || (l == limit && digit > last_digit_limit)) { |
| 1949 | l = UINT64_MAX; /* Truncate on overflow. */ |
| 1950 | break; |
| 1951 | } |
| 1952 | l = (l * base) + digit; |
| 1953 | digit = *++p - '0'; |
| 1954 | } |
| 1955 | return (sign < 0) ? -l : l; |
| 1956 | } |
| 1957 | |
| 1958 | /* |
| 1959 | * Parse a base-256 integer. This is just a straight signed binary |
| 1960 | * value in big-endian order, except that the high-order bit is |
| 1961 | * ignored. Remember that "int64_t" may or may not be exactly 64 |
| 1962 | * bits; the implementation here tries to avoid making any assumptions |
| 1963 | * about the actual size of an int64_t. It does assume we're using |
| 1964 | * twos-complement arithmetic, though. |
| 1965 | */ |
| 1966 | static int64_t |
| 1967 | tar_atol256(const char *_p, unsigned char_cnt) |
| 1968 | { |
| 1969 | int64_t l, upper_limit, lower_limit; |
| 1970 | const unsigned char *p = (const unsigned char *)_p; |
| 1971 | |
| 1972 | upper_limit = INT64_MAX / 256; |
| 1973 | lower_limit = INT64_MIN / 256; |
| 1974 | |
| 1975 | /* Pad with 1 or 0 bits, depending on sign. */ |
| 1976 | if ((0x40 & *p) == 0x40) |
| 1977 | l = (int64_t)-1; |
| 1978 | else |
| 1979 | l = 0; |
| 1980 | l = (l << 6) | (0x3f & *p++); |
| 1981 | while (--char_cnt > 0) { |
| 1982 | if (l > upper_limit) { |
| 1983 | l = INT64_MAX; /* Truncate on overflow */ |
| 1984 | break; |
| 1985 | } else if (l < lower_limit) { |
| 1986 | l = INT64_MIN; |
| 1987 | break; |
| 1988 | } |
| 1989 | l = (l << 8) | (0xff & (int64_t)*p++); |
| 1990 | } |
| 1991 | return (l); |
| 1992 | } |
| 1993 | |
| 1994 | /* |
| 1995 | * Returns length of line (including trailing newline) |
| 1996 | * or negative on error. 'start' argument is updated to |
| 1997 | * point to first character of line. This avoids copying |
| 1998 | * when possible. |
| 1999 | */ |
| 2000 | static ssize_t |
| 2001 | readline(struct archive_read *a, struct tar *tar, const char **start) |
| 2002 | { |
| 2003 | ssize_t bytes_read; |
| 2004 | ssize_t total_size = 0; |
| 2005 | const void *t; |
| 2006 | const char *s; |
| 2007 | void *p; |
| 2008 | |
| 2009 | bytes_read = (a->decompressor->read_ahead)(a, &t, 1); |
| 2010 | if (bytes_read <= 0) |
| 2011 | return (ARCHIVE_FATAL); |
| 2012 | s = t; /* Start of line? */ |
| 2013 | p = memchr(t, '\n', bytes_read); |
| 2014 | /* If we found '\n' in the read buffer, return pointer to that. */ |
| 2015 | if (p != NULL) { |
| 2016 | bytes_read = 1 + ((const char *)p) - s; |
| 2017 | (a->decompressor->consume)(a, bytes_read); |
| 2018 | *start = s; |
| 2019 | return (bytes_read); |
| 2020 | } |
| 2021 | /* Otherwise, we need to accumulate in a line buffer. */ |
| 2022 | for (;;) { |
| 2023 | archive_string_ensure(&tar->line, total_size + bytes_read); |
| 2024 | memcpy(tar->line.s + total_size, t, bytes_read); |
| 2025 | (a->decompressor->consume)(a, bytes_read); |
| 2026 | total_size += bytes_read; |
| 2027 | /* If we found '\n', clean up and return. */ |
| 2028 | if (p != NULL) { |
| 2029 | *start = tar->line.s; |
| 2030 | return (total_size); |
| 2031 | } |
| 2032 | /* Read some more. */ |
| 2033 | bytes_read = (a->decompressor->read_ahead)(a, &t, 1); |
| 2034 | if (bytes_read <= 0) |
| 2035 | return (ARCHIVE_FATAL); |
| 2036 | s = t; /* Start of line? */ |
| 2037 | p = memchr(t, '\n', bytes_read); |
| 2038 | /* If we found '\n', trim the read. */ |
| 2039 | if (p != NULL) { |
| 2040 | bytes_read = 1 + ((const char *)p) - s; |
| 2041 | } |
| 2042 | } |
| 2043 | } |
| 2044 | |
| 2045 | static int |
| 2046 | utf8_decode(wchar_t *dest, const char *src, size_t length) |
| 2047 | { |
| 2048 | size_t n; |
| 2049 | int err; |
| 2050 | |
| 2051 | err = 0; |
| 2052 | while (length > 0) { |
| 2053 | n = UTF8_mbrtowc(dest, src, length); |
| 2054 | if (n == 0) |
| 2055 | break; |
| 2056 | dest++; |
| 2057 | src += n; |
| 2058 | length -= n; |
| 2059 | } |
| 2060 | *dest++ = L'\0'; |
| 2061 | return (err); |
| 2062 | } |
| 2063 | |
| 2064 | /* |
| 2065 | * Copied and simplified from FreeBSD libc/locale. |
| 2066 | */ |
| 2067 | static size_t |
| 2068 | UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n) |
| 2069 | { |
| 2070 | int ch, i, len, mask; |
| 2071 | unsigned long wch; |
| 2072 | |
| 2073 | if (s == NULL || n == 0 || pwc == NULL) |
| 2074 | return (0); |
| 2075 | |
| 2076 | /* |
| 2077 | * Determine the number of octets that make up this character from |
| 2078 | * the first octet, and a mask that extracts the interesting bits of |
| 2079 | * the first octet. |
| 2080 | */ |
| 2081 | ch = (unsigned char)*s; |
| 2082 | if ((ch & 0x80) == 0) { |
| 2083 | mask = 0x7f; |
| 2084 | len = 1; |
| 2085 | } else if ((ch & 0xe0) == 0xc0) { |
| 2086 | mask = 0x1f; |
| 2087 | len = 2; |
| 2088 | } else if ((ch & 0xf0) == 0xe0) { |
| 2089 | mask = 0x0f; |
| 2090 | len = 3; |
| 2091 | } else if ((ch & 0xf8) == 0xf0) { |
| 2092 | mask = 0x07; |
| 2093 | len = 4; |
| 2094 | } else if ((ch & 0xfc) == 0xf8) { |
| 2095 | mask = 0x03; |
| 2096 | len = 5; |
| 2097 | } else if ((ch & 0xfe) == 0xfc) { |
| 2098 | mask = 0x01; |
| 2099 | len = 6; |
| 2100 | } else { |
| 2101 | /* Invalid first byte; convert to '?' */ |
| 2102 | *pwc = '?'; |
| 2103 | return (1); |
| 2104 | } |
| 2105 | |
| 2106 | if (n < (size_t)len) { |
| 2107 | /* Invalid first byte; convert to '?' */ |
| 2108 | *pwc = '?'; |
| 2109 | return (1); |
| 2110 | } |
| 2111 | |
| 2112 | /* |
| 2113 | * Decode the octet sequence representing the character in chunks |
| 2114 | * of 6 bits, most significant first. |
| 2115 | */ |
| 2116 | wch = (unsigned char)*s++ & mask; |
| 2117 | i = len; |
| 2118 | while (--i != 0) { |
| 2119 | if ((*s & 0xc0) != 0x80) { |
| 2120 | /* Invalid intermediate byte; consume one byte and |
| 2121 | * emit '?' */ |
| 2122 | *pwc = '?'; |
| 2123 | return (1); |
| 2124 | } |
| 2125 | wch <<= 6; |
| 2126 | wch |= *s++ & 0x3f; |
| 2127 | } |
| 2128 | |
| 2129 | /* Assign the value to the output; out-of-range values |
| 2130 | * just get truncated. */ |
| 2131 | *pwc = (wchar_t)wch; |
| 2132 | #ifdef WCHAR_MAX |
| 2133 | /* |
| 2134 | * If platform has WCHAR_MAX, we can do something |
| 2135 | * more sensible with out-of-range values. |
| 2136 | */ |
| 2137 | if (wch >= WCHAR_MAX) |
| 2138 | *pwc = '?'; |
| 2139 | #endif |
| 2140 | /* Return number of bytes input consumed: 0 for end-of-string. */ |
| 2141 | return (wch == L'\0' ? 0 : len); |
| 2142 | } |
| 2143 | |
| 2144 | |
| 2145 | /* |
| 2146 | * base64_decode - Base64 decode |
| 2147 | * |
| 2148 | * This accepts most variations of base-64 encoding, including: |
| 2149 | * * with or without line breaks |
| 2150 | * * with or without the final group padded with '=' or '_' characters |
| 2151 | * (The most economical Base-64 variant does not pad the last group and |
| 2152 | * omits line breaks; RFC1341 used for MIME requires both.) |
| 2153 | */ |
| 2154 | static char * |
| 2155 | base64_decode(const wchar_t *src, size_t len, size_t *out_len) |
| 2156 | { |
| 2157 | static const unsigned char digits[64] = { |
| 2158 | 'A','B','C','D','E','F','G','H','I','J','K','L','M','N', |
| 2159 | 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b', |
| 2160 | 'c','d','e','f','g','h','i','j','k','l','m','n','o','p', |
| 2161 | 'q','r','s','t','u','v','w','x','y','z','0','1','2','3', |
| 2162 | '4','5','6','7','8','9','+','/' }; |
| 2163 | static unsigned char decode_table[128]; |
| 2164 | char *out, *d; |
| 2165 | |
| 2166 | /* If the decode table is not yet initialized, prepare it. */ |
| 2167 | if (decode_table[digits[1]] != 1) { |
| 2168 | size_t i; |
| 2169 | memset(decode_table, 0xff, sizeof(decode_table)); |
| 2170 | for (i = 0; i < sizeof(digits); i++) |
| 2171 | decode_table[digits[i]] = i; |
| 2172 | } |
| 2173 | |
| 2174 | /* Allocate enough space to hold the entire output. */ |
| 2175 | /* Note that we may not use all of this... */ |
| 2176 | out = (char *)malloc((len * 3 + 3) / 4); |
| 2177 | if (out == NULL) { |
| 2178 | *out_len = 0; |
| 2179 | return (NULL); |
| 2180 | } |
| 2181 | d = out; |
| 2182 | |
| 2183 | while (len > 0) { |
| 2184 | /* Collect the next group of (up to) four characters. */ |
| 2185 | int v = 0; |
| 2186 | int group_size = 0; |
| 2187 | while (group_size < 4 && len > 0) { |
| 2188 | /* '=' or '_' padding indicates final group. */ |
| 2189 | if (*src == '=' || *src == '_') { |
| 2190 | len = 0; |
| 2191 | break; |
| 2192 | } |
| 2193 | /* Skip illegal characters (including line breaks) */ |
| 2194 | if (*src > 127 || *src < 32 |
| 2195 | || decode_table[*src] == 0xff) { |
| 2196 | len--; |
| 2197 | src++; |
| 2198 | continue; |
| 2199 | } |
| 2200 | v <<= 6; |
| 2201 | v |= decode_table[*src++]; |
| 2202 | len --; |
| 2203 | group_size++; |
| 2204 | } |
| 2205 | /* Align a short group properly. */ |
| 2206 | v <<= 6 * (4 - group_size); |
| 2207 | /* Unpack the group we just collected. */ |
| 2208 | switch (group_size) { |
| 2209 | case 4: d[2] = v & 0xff; |
| 2210 | /* FALLTHROUGH */ |
| 2211 | case 3: d[1] = (v >> 8) & 0xff; |
| 2212 | /* FALLTHROUGH */ |
| 2213 | case 2: d[0] = (v >> 16) & 0xff; |
| 2214 | break; |
| 2215 | case 1: /* this is invalid! */ |
| 2216 | break; |
| 2217 | } |
| 2218 | d += group_size * 3 / 4; |
| 2219 | } |
| 2220 | |
| 2221 | *out_len = d - out; |
| 2222 | return (out); |
| 2223 | } |
| 2224 | |
| 2225 | /* |
| 2226 | * This is a little tricky because the C99 standard wcstombs() |
| 2227 | * function returns the number of bytes that were converted, |
| 2228 | * not the number that should be converted. As a result, |
| 2229 | * we can never accurately size the output buffer (without |
| 2230 | * doing a tedious output size calculation in advance). |
| 2231 | * This approach (try a conversion, then try again if it fails) |
| 2232 | * will almost always succeed on the first try, and is thus |
| 2233 | * much faster, at the cost of sometimes requiring multiple |
| 2234 | * passes while we expand the buffer. |
| 2235 | */ |
| 2236 | static char * |
| 2237 | wide_to_narrow(const wchar_t *wval) |
| 2238 | { |
| 2239 | int converted_length; |
| 2240 | /* Guess an output buffer size and try the conversion. */ |
| 2241 | int alloc_length = wcslen(wval) * 3; |
| 2242 | char *mbs_val = (char *)malloc(alloc_length + 1); |
| 2243 | if (mbs_val == NULL) |
| 2244 | return (NULL); |
| 2245 | converted_length = wcstombs(mbs_val, wval, alloc_length); |
| 2246 | |
| 2247 | /* If we exhausted the buffer, resize and try again. */ |
| 2248 | while (converted_length >= alloc_length) { |
| 2249 | free(mbs_val); |
| 2250 | alloc_length *= 2; |
| 2251 | mbs_val = (char *)malloc(alloc_length + 1); |
| 2252 | if (mbs_val == NULL) |
| 2253 | return (NULL); |
| 2254 | converted_length = wcstombs(mbs_val, wval, alloc_length); |
| 2255 | } |
| 2256 | |
| 2257 | /* Ensure a trailing null and return the final string. */ |
| 2258 | mbs_val[alloc_length] = '\0'; |
| 2259 | return (mbs_val); |
| 2260 | } |
| 2261 | |
| 2262 | static char * |
| 2263 | url_decode(const char *in) |
| 2264 | { |
| 2265 | char *out, *d; |
| 2266 | const char *s; |
| 2267 | |
| 2268 | out = (char *)malloc(strlen(in) + 1); |
| 2269 | if (out == NULL) |
| 2270 | return (NULL); |
| 2271 | for (s = in, d = out; *s != '\0'; ) { |
| 2272 | if (*s == '%') { |
| 2273 | /* Try to convert % escape */ |
| 2274 | int digit1 = tohex(s[1]); |
| 2275 | int digit2 = tohex(s[2]); |
| 2276 | if (digit1 >= 0 && digit2 >= 0) { |
| 2277 | /* Looks good, consume three chars */ |
| 2278 | s += 3; |
| 2279 | /* Convert output */ |
| 2280 | *d++ = ((digit1 << 4) | digit2); |
| 2281 | continue; |
| 2282 | } |
| 2283 | /* Else fall through and treat '%' as normal char */ |
| 2284 | } |
| 2285 | *d++ = *s++; |
| 2286 | } |
| 2287 | *d = '\0'; |
| 2288 | return (out); |
| 2289 | } |
| 2290 | |
| 2291 | static int |
| 2292 | tohex(int c) |
| 2293 | { |
| 2294 | if (c >= '0' && c <= '9') |
| 2295 | return (c - '0'); |
| 2296 | else if (c >= 'A' && c <= 'F') |
| 2297 | return (c - 'A' + 10); |
| 2298 | else if (c >= 'a' && c <= 'f') |
| 2299 | return (c - 'a' + 10); |
| 2300 | else |
| 2301 | return (-1); |
| 2302 | } |