2 * Copyright (c) 2003-2010 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_open_filename.c 201093 2009-12-28 02:28:44Z kientzle $");
29 #ifdef HAVE_SYS_IOCTL_H
30 #include <sys/ioctl.h>
32 #ifdef HAVE_SYS_STAT_H
53 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
55 #elif defined(__NetBSD__) || defined(__OpenBSD__)
56 #include <sys/disklabel.h>
58 #elif defined(__DragonFly__)
59 #include <sys/diskslice.h>
63 #include "archive_string.h"
69 struct read_file_data {
73 mode_t st_mode; /* Mode bits for opened file. */
75 enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type;
77 char m[1];/* MBS filename. */
78 wchar_t w[1];/* WCS filename. */
79 } filename; /* Must be last! */
82 static int file_close(struct archive *, void *);
83 static int file_open_filename(struct archive *, enum fnt_e, const void *,
85 static ssize_t file_read(struct archive *, void *, const void **buff);
86 static int64_t file_seek(struct archive *, void *, int64_t request, int);
87 static int64_t file_skip(struct archive *, void *, int64_t request);
88 static int64_t file_skip_lseek(struct archive *, void *, int64_t request);
91 archive_read_open_file(struct archive *a, const char *filename,
94 return (archive_read_open_filename(a, filename, block_size));
98 archive_read_open_filename(struct archive *a, const char *filename,
101 enum fnt_e filename_type;
103 if (filename == NULL || filename[0] == '\0') {
104 filename_type = FNT_STDIN;
106 filename_type = FNT_MBS;
107 return (file_open_filename(a, filename_type, filename, block_size));
111 archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename,
114 enum fnt_e filename_type;
116 if (wfilename == NULL || wfilename[0] == L'\0') {
117 filename_type = FNT_STDIN;
119 #if defined(_WIN32) && !defined(__CYGWIN__)
120 filename_type = FNT_WCS;
123 * POSIX system does not support a wchar_t interface for
124 * open() system call, so we have to translate a whcar_t
125 * filename to multi-byte one and use it.
127 struct archive_string fn;
130 archive_string_init(&fn);
131 if (archive_string_append_from_wcs(&fn, wfilename,
132 wcslen(wfilename)) != 0) {
134 archive_set_error(a, errno,
135 "Can't allocate memory");
137 archive_set_error(a, EINVAL,
138 "Failed to convert a wide-character"
139 " filename to a multi-byte filename");
140 archive_string_free(&fn);
141 return (ARCHIVE_FATAL);
143 r = file_open_filename(a, FNT_MBS, fn.s, block_size);
144 archive_string_free(&fn);
148 return (file_open_filename(a, filename_type, wfilename, block_size));
152 file_open_filename(struct archive *a, enum fnt_e filename_type,
153 const void *_filename, size_t block_size)
156 struct read_file_data *mine;
158 const char *filename = NULL;
159 const wchar_t *wfilename = NULL;
161 int is_disk_like = 0;
162 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
163 off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */
164 #elif defined(__NetBSD__) || defined(__OpenBSD__)
166 #elif defined(__DragonFly__)
170 archive_clear_error(a);
171 if (filename_type == FNT_STDIN) {
172 /* We used to delegate stdin support by
173 * directly calling archive_read_open_fd(a,0,block_size)
174 * here, but that doesn't (and shouldn't) handle the
175 * end-of-file flush when reading stdout from a pipe.
176 * Basically, read_open_fd() is intended for folks who
177 * are willing to handle such details themselves. This
178 * API is intended to be a little smarter for folks who
179 * want easy handling of the common case.
182 #if defined(__CYGWIN__) || defined(_WIN32)
183 setmode(0, O_BINARY);
186 } else if (filename_type == FNT_MBS) {
187 filename = (const char *)_filename;
188 fd = open(filename, O_RDONLY | O_BINARY);
190 archive_set_error(a, errno,
191 "Failed to open '%s'", filename);
192 return (ARCHIVE_FATAL);
195 #if defined(_WIN32) && !defined(__CYGWIN__)
196 wfilename = (const wchar_t *)_filename;
197 fd = _wopen(wfilename, O_RDONLY | O_BINARY);
198 if (fd < 0 && errno == ENOENT) {
200 fullpath = __la_win_permissive_name_w(wfilename);
201 if (fullpath != NULL) {
202 fd = _wopen(fullpath, O_RDONLY | O_BINARY);
207 archive_set_error(a, errno,
208 "Failed to open '%S'", wfilename);
209 return (ARCHIVE_FATAL);
212 archive_set_error(a, ARCHIVE_ERRNO_MISC,
213 "Unexpedted operation in archive_read_open_filename");
214 return (ARCHIVE_FATAL);
217 if (fstat(fd, &st) != 0) {
218 if (filename_type == FNT_WCS)
219 archive_set_error(a, errno, "Can't stat '%S'",
222 archive_set_error(a, errno, "Can't stat '%s'",
224 return (ARCHIVE_FATAL);
228 * Determine whether the input looks like a disk device or a
229 * tape device. The results are used below to select an I/O
231 * = "disk-like" devices support arbitrary lseek() and will
232 * support I/O requests of any size. So we get easy skipping
233 * and can cheat on block sizes to get better performance.
234 * = "tape-like" devices require strict blocking and use
235 * specialized ioctls for seeking.
236 * = "socket-like" devices cannot seek at all but can improve
237 * performance by using nonblocking I/O to read "whatever is
238 * available right now".
240 * Right now, we only specially recognize disk-like devices,
241 * but it should be straightforward to add probes and strategy
242 * here for tape-like and socket-like devices.
244 if (S_ISREG(st.st_mode)) {
245 /* Safety: Tell the extractor not to overwrite the input. */
246 archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
247 /* Regular files act like disks. */
250 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
251 /* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */
252 else if (S_ISCHR(st.st_mode) &&
253 ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 &&
257 #elif defined(__NetBSD__) || defined(__OpenBSD__)
258 /* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */
259 else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) &&
260 ioctl(fd, DIOCGDINFO, &dl) == 0 &&
261 dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) {
264 #elif defined(__DragonFly__)
265 /* DragonFly BSD: if it supports DIOCGPART ioctl, it's disk-like. */
266 else if (S_ISCHR(st.st_mode) &&
267 ioctl(fd, DIOCGPART, &pi) == 0 &&
271 #elif defined(__linux__)
272 /* Linux: All block devices are disk-like. */
273 else if (S_ISBLK(st.st_mode) &&
274 lseek(fd, 0, SEEK_CUR) == 0 &&
275 lseek(fd, 0, SEEK_SET) == 0 &&
276 lseek(fd, 0, SEEK_END) > 0 &&
277 lseek(fd, 0, SEEK_SET) == 0) {
281 /* TODO: Add an "is_tape_like" variable and appropriate tests. */
283 if (filename_type == FNT_WCS)
284 mine = (struct read_file_data *)calloc(1,
285 sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t));
287 mine = (struct read_file_data *)calloc(1,
288 sizeof(*mine) + strlen(filename));
289 /* Disk-like devices prefer power-of-two block sizes. */
290 /* Use provided block_size as a guide so users have some control. */
292 size_t new_block_size = 64 * 1024;
293 while (new_block_size < block_size
294 && new_block_size < 64 * 1024 * 1024)
296 block_size = new_block_size;
298 buffer = malloc(block_size);
299 if (mine == NULL || buffer == NULL) {
300 archive_set_error(a, ENOMEM, "No memory");
303 return (ARCHIVE_FATAL);
305 if (filename_type == FNT_WCS)
306 wcscpy(mine->filename.w, wfilename);
308 strcpy(mine->filename.m, filename);
309 mine->filename_type = filename_type;
310 mine->block_size = block_size;
311 mine->buffer = buffer;
313 /* Remember mode so close can decide whether to flush. */
314 mine->st_mode = st.st_mode;
316 /* Disk-like inputs can use lseek(). */
318 archive_read_set_seek_callback(a, file_seek);
322 archive_read_set_read_callback(a, file_read);
323 archive_read_set_skip_callback(a, file_skip);
324 archive_read_set_close_callback(a, file_close);
325 archive_read_set_callback_data(a, mine);
326 return (archive_read_open1(a));
330 file_read(struct archive *a, void *client_data, const void **buff)
332 struct read_file_data *mine = (struct read_file_data *)client_data;
335 /* TODO: If a recent lseek() operation has left us
336 * mis-aligned, read and return a short block to try to get
337 * us back in alignment. */
339 /* TODO: Someday, try mmap() here; if that succeeds, give
340 * the entire file to libarchive as a single block. That
341 * could be a lot faster than block-by-block manual I/O. */
343 /* TODO: We might be able to improve performance on pipes and
344 * sockets by setting non-blocking I/O and just accepting
345 * whatever we get here instead of waiting for a full block
348 *buff = mine->buffer;
350 bytes_read = read(mine->fd, mine->buffer, mine->block_size);
351 if (bytes_read < 0) {
354 else if (mine->filename_type == FNT_STDIN)
355 archive_set_error(a, errno,
356 "Error reading stdin");
357 else if (mine->filename_type == FNT_MBS)
358 archive_set_error(a, errno,
359 "Error reading '%s'", mine->filename.m);
361 archive_set_error(a, errno,
362 "Error reading '%S'", mine->filename.w);
369 * Regular files and disk-like block devices can use simple lseek
370 * without needing to round the request to the block size.
372 * TODO: This can leave future reads mis-aligned. Since we know the
373 * offset here, we should store it and use it in file_read() above
374 * to determine whether we should perform a short read to get back
375 * into alignment. Long series of mis-aligned reads can negatively
376 * impact disk throughput. (Of course, the performance impact should
377 * be carefully tested; extra code complexity is only worthwhile if
378 * it does provide measurable improvement.)
380 * TODO: Be lazy about the actual seek. There are a few pathological
381 * cases where libarchive makes a bunch of seek requests in a row
382 * without any intervening reads. This isn't a huge performance
383 * problem, since the kernel handles seeks lazily already, but
384 * it would be very slightly faster if we simply remembered the
385 * seek request here and then actually performed the seek at the
386 * top of the read callback above.
389 file_skip_lseek(struct archive *a, void *client_data, int64_t request)
391 struct read_file_data *mine = (struct read_file_data *)client_data;
392 #if defined(_WIN32) && !defined(__CYGWIN__)
393 /* We use _lseeki64() on Windows. */
394 int64_t old_offset, new_offset;
396 off_t old_offset, new_offset;
399 /* We use off_t here because lseek() is declared that way. */
401 /* TODO: Deal with case where off_t isn't 64 bits.
402 * This shouldn't be a problem on Linux or other POSIX
403 * systems, since the configuration logic for libarchive
404 * tries to obtain a 64-bit off_t. It's still an issue
405 * on Windows, though, so it might suffice to just use
406 * _lseeki64() on Windows.
408 if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 &&
409 (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0)
410 return (new_offset - old_offset);
412 /* If lseek() fails, don't bother trying again. */
415 /* Let libarchive recover with read+discard */
419 /* If the input is corrupted or truncated, fail. */
420 if (mine->filename_type == FNT_STDIN)
421 archive_set_error(a, errno, "Error seeking in stdin");
422 else if (mine->filename_type == FNT_MBS)
423 archive_set_error(a, errno, "Error seeking in '%s'",
426 archive_set_error(a, errno, "Error seeking in '%S'",
433 * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to
434 * accelerate operation on tape drives.
438 file_skip(struct archive *a, void *client_data, int64_t request)
440 struct read_file_data *mine = (struct read_file_data *)client_data;
442 /* Delegate skip requests. */
444 return (file_skip_lseek(a, client_data, request));
446 /* If we can't skip, return 0; libarchive will read+discard instead. */
451 * TODO: Store the offset and use it in the read callback.
454 file_seek(struct archive *a, void *client_data, int64_t request, int whence)
456 struct read_file_data *mine = (struct read_file_data *)client_data;
459 /* We use off_t here because lseek() is declared that way. */
460 /* See above for notes about when off_t is less than 64 bits. */
461 r = lseek(mine->fd, request, whence);
465 /* If the input is corrupted or truncated, fail. */
466 if (mine->filename_type == FNT_STDIN)
467 archive_set_error(a, errno, "Error seeking in stdin");
468 else if (mine->filename_type == FNT_MBS)
469 archive_set_error(a, errno, "Error seeking in '%s'",
472 archive_set_error(a, errno, "Error seeking in '%S'",
474 return (ARCHIVE_FATAL);
478 file_close(struct archive *a, void *client_data)
480 struct read_file_data *mine = (struct read_file_data *)client_data;
482 (void)a; /* UNUSED */
484 /* Only flush and close if open succeeded. */
487 * Sometimes, we should flush the input before closing.
488 * Regular files: faster to just close without flush.
489 * Disk-like devices: Ditto.
490 * Tapes: must not flush (user might need to
491 * read the "next" item on a non-rewind device).
492 * Pipes and sockets: must flush (otherwise, the
493 * program feeding the pipe or socket may complain).
494 * Here, I flush everything except for regular files and
497 if (!S_ISREG(mine->st_mode)
498 && !S_ISCHR(mine->st_mode)
499 && !S_ISBLK(mine->st_mode)) {
502 bytesRead = read(mine->fd, mine->buffer,
504 } while (bytesRead > 0);
506 /* If a named file was opened, then it needs to be closed. */
507 if (mine->filename_type != FNT_STDIN)