2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * $FreeBSD: src/lib/libarchive/archive_entry.h,v 1.24 2007/12/30 04:58:21 kientzle Exp $
28 #ifndef ARCHIVE_ENTRY_H_INCLUDED
29 #define ARCHIVE_ENTRY_H_INCLUDED
31 #include <sys/types.h>
32 #include <stddef.h> /* for wchar_t */
41 * Description of an archive entry.
43 * Basically, a "struct stat" with a few text fields added in.
45 * TODO: Add "comment", "charset", and possibly other entries that are
46 * supported by "pax interchange" format. However, GNU, ustar, cpio,
47 * and other variants don't support these features, so they're not an
48 * excruciatingly high priority right now.
50 * TODO: "pax interchange" format allows essentially arbitrary
51 * key/value attributes to be attached to any entry. Supporting
52 * such extensions may make this library useful for special
53 * applications (e.g., a package manager could attach special
54 * package-management attributes to each entry).
59 * File-type constants. These are returned from archive_entry_filetype()
60 * and passed to archive_entry_set_filetype().
62 * These values match S_XXX defines on every platform I've checked,
63 * including Windows, AIX, Linux, Solaris, and BSD. They're
64 * (re)defined here because platforms generally don't define the ones
65 * they don't support. For example, Windows doesn't define S_IFLNK or
66 * S_IFBLK. Instead of having a mass of conditional logic and system
67 * checks to define any S_XXX values that aren't supported locally,
68 * I've just defined a new set of such constants so that
69 * libarchive-based applications can manipulate and identify archive
70 * entries properly even if the hosting platform can't store them on
73 * These values are also used directly within some portable formats,
74 * such as cpio. If you find a platform that varies from these, the
75 * correct solution is to leave these alone and translate from these
76 * portable values to platform-native values when entries are read from
79 #define AE_IFMT 0170000
80 #define AE_IFREG 0100000
81 #define AE_IFLNK 0120000
82 #define AE_IFSOCK 0140000
83 #define AE_IFCHR 0020000
84 #define AE_IFBLK 0060000
85 #define AE_IFDIR 0040000
86 #define AE_IFIFO 0010000
89 * Basic object manipulation
92 struct archive_entry *archive_entry_clear(struct archive_entry *);
93 /* The 'clone' function does a deep copy; all of the strings are copied too. */
94 struct archive_entry *archive_entry_clone(struct archive_entry *);
95 void archive_entry_free(struct archive_entry *);
96 struct archive_entry *archive_entry_new(void);
99 * Retrieve fields from an archive_entry.
102 time_t archive_entry_atime(struct archive_entry *);
103 long archive_entry_atime_nsec(struct archive_entry *);
104 time_t archive_entry_ctime(struct archive_entry *);
105 long archive_entry_ctime_nsec(struct archive_entry *);
106 dev_t archive_entry_dev(struct archive_entry *);
107 dev_t archive_entry_devmajor(struct archive_entry *);
108 dev_t archive_entry_devminor(struct archive_entry *);
109 mode_t archive_entry_filetype(struct archive_entry *);
110 void archive_entry_fflags(struct archive_entry *,
111 unsigned long * /* set */,
112 unsigned long * /* clear */);
113 const char *archive_entry_fflags_text(struct archive_entry *);
114 gid_t archive_entry_gid(struct archive_entry *);
115 const char *archive_entry_gname(struct archive_entry *);
116 const wchar_t *archive_entry_gname_w(struct archive_entry *);
117 const char *archive_entry_hardlink(struct archive_entry *);
118 const wchar_t *archive_entry_hardlink_w(struct archive_entry *);
119 ino_t archive_entry_ino(struct archive_entry *);
120 mode_t archive_entry_mode(struct archive_entry *);
121 time_t archive_entry_mtime(struct archive_entry *);
122 long archive_entry_mtime_nsec(struct archive_entry *);
123 unsigned int archive_entry_nlink(struct archive_entry *);
124 const char *archive_entry_pathname(struct archive_entry *);
125 const wchar_t *archive_entry_pathname_w(struct archive_entry *);
126 dev_t archive_entry_rdev(struct archive_entry *);
127 dev_t archive_entry_rdevmajor(struct archive_entry *);
128 dev_t archive_entry_rdevminor(struct archive_entry *);
129 int64_t archive_entry_size(struct archive_entry *);
130 const char *archive_entry_strmode(struct archive_entry *);
131 const char *archive_entry_symlink(struct archive_entry *);
132 const wchar_t *archive_entry_symlink_w(struct archive_entry *);
133 uid_t archive_entry_uid(struct archive_entry *);
134 const char *archive_entry_uname(struct archive_entry *);
135 const wchar_t *archive_entry_uname_w(struct archive_entry *);
138 * Set fields in an archive_entry.
140 * Note that string 'set' functions do not copy the string, only the pointer.
141 * In contrast, 'copy' functions do copy the object pointed to.
144 void archive_entry_set_atime(struct archive_entry *, time_t, long);
145 void archive_entry_set_ctime(struct archive_entry *, time_t, long);
146 void archive_entry_set_dev(struct archive_entry *, dev_t);
147 void archive_entry_set_devmajor(struct archive_entry *, dev_t);
148 void archive_entry_set_devminor(struct archive_entry *, dev_t);
149 void archive_entry_set_filetype(struct archive_entry *, unsigned int);
150 void archive_entry_set_fflags(struct archive_entry *,
151 unsigned long /* set */, unsigned long /* clear */);
152 /* Returns pointer to start of first invalid token, or NULL if none. */
153 /* Note that all recognized tokens are processed, regardless. */
154 const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *,
156 void archive_entry_set_gid(struct archive_entry *, gid_t);
157 void archive_entry_set_gname(struct archive_entry *, const char *);
158 void archive_entry_copy_gname(struct archive_entry *, const char *);
159 void archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *);
160 void archive_entry_set_hardlink(struct archive_entry *, const char *);
161 void archive_entry_copy_hardlink(struct archive_entry *, const char *);
162 void archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *);
163 void archive_entry_set_ino(struct archive_entry *, unsigned long);
164 void archive_entry_set_link(struct archive_entry *, const char *);
165 void archive_entry_copy_link(struct archive_entry *, const char *);
166 void archive_entry_copy_link_w(struct archive_entry *, const wchar_t *);
167 void archive_entry_set_mode(struct archive_entry *, mode_t);
168 void archive_entry_set_mtime(struct archive_entry *, time_t, long);
169 void archive_entry_set_nlink(struct archive_entry *, unsigned int);
170 void archive_entry_set_pathname(struct archive_entry *, const char *);
171 void archive_entry_copy_pathname(struct archive_entry *, const char *);
172 void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *);
173 void archive_entry_set_perm(struct archive_entry *, mode_t);
174 void archive_entry_set_rdev(struct archive_entry *, dev_t);
175 void archive_entry_set_rdevmajor(struct archive_entry *, dev_t);
176 void archive_entry_set_rdevminor(struct archive_entry *, dev_t);
177 void archive_entry_set_size(struct archive_entry *, int64_t);
178 void archive_entry_set_symlink(struct archive_entry *, const char *);
179 void archive_entry_copy_symlink(struct archive_entry *, const char *);
180 void archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *);
181 void archive_entry_set_uid(struct archive_entry *, uid_t);
182 void archive_entry_set_uname(struct archive_entry *, const char *);
183 void archive_entry_copy_uname(struct archive_entry *, const char *);
184 void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
187 * Routines to bulk copy fields to/from a platform-native "struct
188 * stat." Libarchive used to just store a struct stat inside of each
189 * archive_entry object, but this created issues when trying to
190 * manipulate archives on systems different than the ones they were
193 * TODO: On Linux, provide both stat32 and stat64 versions of these functions.
195 const struct stat *archive_entry_stat(struct archive_entry *);
196 void archive_entry_copy_stat(struct archive_entry *, const struct stat *);
199 * ACL routines. This used to simply store and return text-format ACL
200 * strings, but that proved insufficient for a number of reasons:
201 * = clients need control over uname/uid and gname/gid mappings
202 * = there are many different ACL text formats
203 * = would like to be able to read/convert archives containing ACLs
204 * on platforms that lack ACL libraries
206 * This last point, in particular, forces me to implement a reasonably
207 * complete set of ACL support routines.
209 * TODO: Extend this to support NFSv4/NTFS permissions. That should
210 * allow full ACL support on Mac OS, in particular, which uses
211 * POSIX.1e-style interfaces to manipulate NFSv4/NTFS permissions.
215 * Permission bits mimic POSIX.1e. Note that I've not followed POSIX.1e's
216 * "permset"/"perm" abstract type nonsense. A permset is just a simple
217 * bitmap, following long-standing Unix tradition.
219 #define ARCHIVE_ENTRY_ACL_EXECUTE 1
220 #define ARCHIVE_ENTRY_ACL_WRITE 2
221 #define ARCHIVE_ENTRY_ACL_READ 4
223 /* We need to be able to specify either or both of these. */
224 #define ARCHIVE_ENTRY_ACL_TYPE_ACCESS 256
225 #define ARCHIVE_ENTRY_ACL_TYPE_DEFAULT 512
227 /* Tag values mimic POSIX.1e */
228 #define ARCHIVE_ENTRY_ACL_USER 10001 /* Specified user. */
229 #define ARCHIVE_ENTRY_ACL_USER_OBJ 10002 /* User who owns the file. */
230 #define ARCHIVE_ENTRY_ACL_GROUP 10003 /* Specified group. */
231 #define ARCHIVE_ENTRY_ACL_GROUP_OBJ 10004 /* Group who owns the file. */
232 #define ARCHIVE_ENTRY_ACL_MASK 10005 /* Modify group access. */
233 #define ARCHIVE_ENTRY_ACL_OTHER 10006 /* Public. */
236 * Set the ACL by clearing it and adding entries one at a time.
237 * Unlike the POSIX.1e ACL routines, you must specify the type
238 * (access/default) for each entry. Internally, the ACL data is just
239 * a soup of entries. API calls here allow you to retrieve just the
240 * entries of interest. This design (which goes against the spirit of
241 * POSIX.1e) is useful for handling archive formats that combine
242 * default and access information in a single ACL list.
244 void archive_entry_acl_clear(struct archive_entry *);
245 void archive_entry_acl_add_entry(struct archive_entry *,
246 int /* type */, int /* permset */, int /* tag */,
247 int /* qual */, const char * /* name */);
248 void archive_entry_acl_add_entry_w(struct archive_entry *,
249 int /* type */, int /* permset */, int /* tag */,
250 int /* qual */, const wchar_t * /* name */);
253 * To retrieve the ACL, first "reset", then repeatedly ask for the
254 * "next" entry. The want_type parameter allows you to request only
255 * access entries or only default entries.
257 int archive_entry_acl_reset(struct archive_entry *, int /* want_type */);
258 int archive_entry_acl_next(struct archive_entry *, int /* want_type */,
259 int * /* type */, int * /* permset */, int * /* tag */,
260 int * /* qual */, const char ** /* name */);
261 int archive_entry_acl_next_w(struct archive_entry *, int /* want_type */,
262 int * /* type */, int * /* permset */, int * /* tag */,
263 int * /* qual */, const wchar_t ** /* name */);
266 * Construct a text-format ACL. The flags argument is a bitmask that
267 * can include any of the following:
269 * ARCHIVE_ENTRY_ACL_TYPE_ACCESS - Include access entries.
270 * ARCHIVE_ENTRY_ACL_TYPE_DEFAULT - Include default entries.
271 * ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID - Include extra numeric ID field in
272 * each ACL entry. (As used by 'star'.)
273 * ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT - Include "default:" before each
276 #define ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID 1024
277 #define ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT 2048
278 const wchar_t *archive_entry_acl_text_w(struct archive_entry *,
281 /* Return a count of entries matching 'want_type' */
282 int archive_entry_acl_count(struct archive_entry *, int /* want_type */);
285 * Private ACL parser. This is private because it handles some
286 * very weird formats that clients should not be messing with.
287 * Clients should only deal with their platform-native formats.
288 * Because of the need to support many formats cleanly, new arguments
289 * are likely to get added on a regular basis. Clients who try to use
290 * this interface are likely to be surprised when it changes.
294 * TODO: Move this declaration out of the public header and into
295 * a private header. Warnings above are silly.
297 int __archive_entry_acl_parse_w(struct archive_entry *,
298 const wchar_t *, int /* type */);
301 * extended attributes
304 void archive_entry_xattr_clear(struct archive_entry *);
305 void archive_entry_xattr_add_entry(struct archive_entry *,
306 const char * /* name */, const void * /* value */,
310 * To retrieve the xattr list, first "reset", then repeatedly ask for the
314 int archive_entry_xattr_count(struct archive_entry *);
315 int archive_entry_xattr_reset(struct archive_entry *);
316 int archive_entry_xattr_next(struct archive_entry *,
317 const char ** /* name */, const void ** /* value */, size_t *);
320 * Utility to detect hardlinks.
322 * The 'struct archive_entry_linkresolver' is a cache of archive entries
323 * for files with multiple links. Here's how to use it:
324 * 1. Create a lookup object with archive_entry_linkresolver_new()
325 * 2. Set the appropriate strategy.
326 * 3. Hand each archive_entry to archive_entry_linkify().
327 * That function will return 0, 1, or 2 entries that should
329 * 4. Call archive_entry_linkify(resolver, NULL) until
330 * no more entries are returned.
331 * 5. Call archive_entry_link_resolver_free(resolver) to free resources.
333 * The entries returned have their hardlink and size fields updated
334 * appropriately. If an entry is passed in that does not refer to
335 * a file with multiple links, it is returned unchanged. The intention
336 * is that you should be able to simply filter all entries through
339 * To make things more efficient, be sure that each entry has a valid
340 * nlinks value. The hardlink cache uses this to track when all links
341 * have been found. If the nlinks value is zero, it will keep every
342 * name in the cache indefinitely, which can use a lot of memory.
344 * Note that archive_entry_size() is reset to zero if the file
345 * body should not be written to the archive. Pay attention!
347 struct archive_entry_linkresolver;
350 * This machine supports three different strategies for marking
351 * hardlinks. The names come from the best-known
352 * formats that rely on each strategy:
354 * "Old cpio" is the simplest, it always returns any entry unmodified.
355 * As far as I know, only cpio formats use this. Old cpio archives
356 * store every link with the full body; the onus is on the dearchiver
357 * to detect and properly link the files as they are restored.
358 * "tar" is also pretty simple; it caches a copy the first time it sees
359 * any link. Subsequent appearances are modified to be hardlink
360 * references without any body to the first one. Used by all tar
361 * formats, although the newest tar formats permit the "old cpio" strategy
362 * as well. This strategy is very simple for the dearchiver,
363 * and reasonably straightforward for the archiver.
364 * "new cpio" is trickier. It stores the body only with the last
365 * occurrence. The complication is that we might not
366 * see every link to a particular file in a single session, so
367 * there's no easy way to know when we've seen the last occurrence.
368 * The solution here is to queue one link until we see the next.
369 * At the end of the session, you can enumerate any remaining
370 * entries by calling archive_entry_linkify(NULL) and store those
371 * bodies. If you have a file with three links l1, l2, and l3,
372 * you'll get the following behavior if you see all three links:
373 * linkify(l1) => NULL (the resolver stores l1 internally)
374 * linkify(l2) => l1 (resolver stores l2, you write l1)
375 * linkify(l3) => l2, l3 (all links seen, you can write both).
376 * If you only see l1 and l2, you'll get this behavior:
377 * linkify(l1) => NULL
379 * linkify(NULL) => l2 (at end, you retrieve remaining links)
380 * As the name suggests, this strategy is used by newer cpio variants.
381 * It's noticably more complex for the archiver, slightly more complex
382 * for the dearchiver than the tar strategy, but makes it straightforward
383 * to restore a file using any link by simply continuing to scan until
384 * you see a link that is stored with a body. In contrast, the tar
385 * strategy requires you to rescan the archive from the beginning to
386 * correctly extract an arbitrary link.
388 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
389 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 1
390 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 2
392 struct archive_entry_linkresolver *archive_entry_linkresolver_new(void);
393 void archive_entry_linkresolver_set_strategy(
394 struct archive_entry_linkresolver *, int /* strategy */);
395 void archive_entry_linkresolver_free(struct archive_entry_linkresolver *);
396 void archive_entry_linkify(struct archive_entry_linkresolver *,
397 struct archive_entry **, struct archive_entry **);
400 * DEPRECATED: This will be removed in libarchive 3.0. It was an
401 * early attempt at providing library-level hardlink recognition
402 * support, but it only handles the tar strategy and cannot easily
403 * be extended, so it's being replaced with the "linkify" function.
405 const char *archive_entry_linkresolve(struct archive_entry_linkresolver *,
406 struct archive_entry *);
412 #endif /* !ARCHIVE_ENTRY_H_INCLUDED */