4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2012, 2020 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Steven Hartland. All rights reserved.
25 * Copyright (c) 2017 Datto Inc.
26 * Copyright 2017 RackTop Systems.
27 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
28 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
32 * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
33 * It has the following characteristics:
35 * - Thread Safe. libzfs_core is accessible concurrently from multiple
36 * threads. This is accomplished primarily by avoiding global data
37 * (e.g. caching). Since it's thread-safe, there is no reason for a
38 * process to have multiple libzfs "instances". Therefore, we store
39 * our few pieces of data (e.g. the file descriptor) in global
40 * variables. The fd is reference-counted so that the libzfs_core
41 * library can be "initialized" multiple times (e.g. by different
42 * consumers within the same process).
44 * - Committed Interface. The libzfs_core interface will be committed,
45 * therefore consumers can compile against it and be confident that
46 * their code will continue to work on future releases of this code.
47 * Currently, the interface is Evolving (not Committed), but we intend
48 * to commit to it once it is more complete and we determine that it
49 * meets the needs of all consumers.
51 * - Programmatic Error Handling. libzfs_core communicates errors with
52 * defined error numbers, and doesn't print anything to stdout/stderr.
54 * - Thin Layer. libzfs_core is a thin layer, marshaling arguments
55 * to/from the kernel ioctls. There is generally a 1:1 correspondence
56 * between libzfs_core functions and ioctls to ZFS_DEV.
58 * - Clear Atomicity. Because libzfs_core functions are generally 1:1
59 * with kernel ioctls, and kernel ioctls are general atomic, each
60 * libzfs_core function is atomic. For example, creating multiple
61 * snapshots with a single call to lzc_snapshot() is atomic -- it
62 * can't fail with only some of the requested snapshots created, even
63 * in the event of power loss or system crash.
65 * - Continued libzfs Support. Some higher-level operations (e.g.
66 * support for "zfs send -R") are too complicated to fit the scope of
67 * libzfs_core. This functionality will continue to live in libzfs.
68 * Where appropriate, libzfs will use the underlying atomic operations
69 * of libzfs_core. For example, libzfs may implement "zfs send -R |
70 * zfs receive" by using individual "send one snapshot", rename,
71 * destroy, and "receive one snapshot" operations in libzfs_core.
72 * /sbin/zfs and /sbin/zpool will link with both libzfs and
73 * libzfs_core. Other consumers should aim to use only libzfs_core,
74 * since that will be the supported, stable interface going forwards.
77 #include <libzfs_core.h>
89 #include <sys/nvpair.h>
90 #include <sys/param.h>
91 #include <sys/types.h>
93 #include <sys/zfs_ioctl.h>
95 #define BIG_PIPE_SIZE (64 * 1024) /* From sys/pipe.h */
99 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
100 static int g_refcount;
103 static zfs_ioc_t fail_ioc_cmd = ZFS_IOC_LAST;
104 static zfs_errno_t fail_ioc_err;
107 libzfs_core_debug_ioc(void)
110 * To test running newer user space binaries with kernel's
111 * that don't yet support an ioctl or a new ioctl arg we
112 * provide an override to intentionally fail an ioctl.
115 * The override variable, ZFS_IOC_TEST, is of the form "cmd:err"
117 * For example, to fail a ZFS_IOC_POOL_CHECKPOINT with a
118 * ZFS_ERR_IOC_CMD_UNAVAIL, the string would be "0x5a4d:1029"
120 * $ sudo sh -c "ZFS_IOC_TEST=0x5a4d:1029 zpool checkpoint tank"
121 * cannot checkpoint 'tank': the loaded zfs module does not support
122 * this operation. A reboot may be required to enable this operation.
124 if (fail_ioc_cmd == ZFS_IOC_LAST) {
125 char *ioc_test = getenv("ZFS_IOC_TEST");
126 unsigned int ioc_num = 0, ioc_err = 0;
128 if (ioc_test != NULL &&
129 sscanf(ioc_test, "%i:%i", &ioc_num, &ioc_err) == 2 &&
130 ioc_num < ZFS_IOC_LAST) {
131 fail_ioc_cmd = ioc_num;
132 fail_ioc_err = ioc_err;
139 libzfs_core_init(void)
141 (void) pthread_mutex_lock(&g_lock);
142 if (g_refcount == 0) {
143 g_fd = open(ZFS_DEV, O_RDWR|O_CLOEXEC);
145 (void) pthread_mutex_unlock(&g_lock);
152 libzfs_core_debug_ioc();
154 (void) pthread_mutex_unlock(&g_lock);
159 libzfs_core_fini(void)
161 (void) pthread_mutex_lock(&g_lock);
162 ASSERT3S(g_refcount, >, 0);
166 if (g_refcount == 0 && g_fd != -1) {
170 (void) pthread_mutex_unlock(&g_lock);
174 lzc_ioctl(zfs_ioc_t ioc, const char *name,
175 nvlist_t *source, nvlist_t **resultp)
177 zfs_cmd_t zc = {"\0"};
182 ASSERT3S(g_refcount, >, 0);
183 VERIFY3S(g_fd, !=, -1);
186 if (ioc == fail_ioc_cmd)
187 return (fail_ioc_err);
191 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
193 if (source != NULL) {
194 packed = fnvlist_pack(source, &size);
195 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
196 zc.zc_nvlist_src_size = size;
199 if (resultp != NULL) {
201 if (ioc == ZFS_IOC_CHANNEL_PROGRAM) {
202 zc.zc_nvlist_dst_size = fnvlist_lookup_uint64(source,
205 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
207 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
208 malloc(zc.zc_nvlist_dst_size);
209 if (zc.zc_nvlist_dst == (uint64_t)0) {
215 while (lzc_ioctl_fd(g_fd, ioc, &zc) != 0) {
217 * If ioctl exited with ENOMEM, we retry the ioctl after
218 * increasing the size of the destination nvlist.
220 * Channel programs that exit with ENOMEM ran over the
221 * lua memory sandbox; they should not be retried.
223 if (errno == ENOMEM && resultp != NULL &&
224 ioc != ZFS_IOC_CHANNEL_PROGRAM) {
225 free((void *)(uintptr_t)zc.zc_nvlist_dst);
226 zc.zc_nvlist_dst_size *= 2;
227 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
228 malloc(zc.zc_nvlist_dst_size);
229 if (zc.zc_nvlist_dst == (uint64_t)0) {
238 if (zc.zc_nvlist_dst_filled) {
239 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
240 zc.zc_nvlist_dst_size);
245 fnvlist_pack_free(packed, size);
246 free((void *)(uintptr_t)zc.zc_nvlist_dst);
251 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props,
252 uint8_t *wkeydata, uint_t wkeylen)
255 nvlist_t *hidden_args = NULL;
256 nvlist_t *args = fnvlist_alloc();
258 fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
260 fnvlist_add_nvlist(args, "props", props);
262 if (wkeydata != NULL) {
263 hidden_args = fnvlist_alloc();
264 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
266 fnvlist_add_nvlist(args, ZPOOL_HIDDEN_ARGS, hidden_args);
269 error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
270 nvlist_free(hidden_args);
276 lzc_clone(const char *fsname, const char *origin, nvlist_t *props)
279 nvlist_t *hidden_args = NULL;
280 nvlist_t *args = fnvlist_alloc();
282 fnvlist_add_string(args, "origin", origin);
284 fnvlist_add_nvlist(args, "props", props);
285 error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
286 nvlist_free(hidden_args);
292 lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
295 * The promote ioctl is still legacy, so we need to construct our
296 * own zfs_cmd_t rather than using lzc_ioctl().
298 zfs_cmd_t zc = {"\0"};
300 ASSERT3S(g_refcount, >, 0);
301 VERIFY3S(g_fd, !=, -1);
303 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
304 if (lzc_ioctl_fd(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
306 if (error == EEXIST && snapnamebuf != NULL)
307 (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
314 lzc_rename(const char *source, const char *target)
316 zfs_cmd_t zc = {"\0"};
319 ASSERT3S(g_refcount, >, 0);
320 VERIFY3S(g_fd, !=, -1);
321 (void) strlcpy(zc.zc_name, source, sizeof (zc.zc_name));
322 (void) strlcpy(zc.zc_value, target, sizeof (zc.zc_value));
323 error = lzc_ioctl_fd(g_fd, ZFS_IOC_RENAME, &zc);
330 lzc_destroy(const char *fsname)
333 nvlist_t *args = fnvlist_alloc();
334 error = lzc_ioctl(ZFS_IOC_DESTROY, fsname, args, NULL);
342 * The keys in the snaps nvlist are the snapshots to be created.
343 * They must all be in the same pool.
345 * The props nvlist is properties to set. Currently only user properties
346 * are supported. { user:prop_name -> string value }
348 * The returned results nvlist will have an entry for each snapshot that failed.
349 * The value will be the (int32) error code.
351 * The return value will be 0 if all snapshots were created, otherwise it will
352 * be the errno of a (unspecified) snapshot that failed.
355 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
360 char pool[ZFS_MAX_DATASET_NAME_LEN];
364 /* determine the pool name */
365 elem = nvlist_next_nvpair(snaps, NULL);
368 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
369 pool[strcspn(pool, "/@")] = '\0';
371 args = fnvlist_alloc();
372 fnvlist_add_nvlist(args, "snaps", snaps);
374 fnvlist_add_nvlist(args, "props", props);
376 error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
383 * Destroys snapshots.
385 * The keys in the snaps nvlist are the snapshots to be destroyed.
386 * They must all be in the same pool.
388 * Snapshots that do not exist will be silently ignored.
390 * If 'defer' is not set, and a snapshot has user holds or clones, the
391 * destroy operation will fail and none of the snapshots will be
394 * If 'defer' is set, and a snapshot has user holds or clones, it will be
395 * marked for deferred destruction, and will be destroyed when the last hold
396 * or clone is removed/destroyed.
398 * The return value will be 0 if all snapshots were destroyed (or marked for
399 * later destruction if 'defer' is set) or didn't exist to begin with.
401 * Otherwise the return value will be the errno of a (unspecified) snapshot
402 * that failed, no snapshots will be destroyed, and the errlist will have an
403 * entry for each snapshot that failed. The value in the errlist will be
404 * the (int32) error code.
407 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
412 char pool[ZFS_MAX_DATASET_NAME_LEN];
414 /* determine the pool name */
415 elem = nvlist_next_nvpair(snaps, NULL);
418 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
419 pool[strcspn(pool, "/@")] = '\0';
421 args = fnvlist_alloc();
422 fnvlist_add_nvlist(args, "snaps", snaps);
424 fnvlist_add_boolean(args, "defer");
426 error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
433 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
439 char fs[ZFS_MAX_DATASET_NAME_LEN];
442 /* determine the fs name */
443 (void) strlcpy(fs, firstsnap, sizeof (fs));
444 atp = strchr(fs, '@');
449 args = fnvlist_alloc();
450 fnvlist_add_string(args, "firstsnap", firstsnap);
452 err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
455 *usedp = fnvlist_lookup_uint64(result, "used");
456 fnvlist_free(result);
462 lzc_exists(const char *dataset)
465 * The objset_stats ioctl is still legacy, so we need to construct our
466 * own zfs_cmd_t rather than using lzc_ioctl().
468 zfs_cmd_t zc = {"\0"};
470 ASSERT3S(g_refcount, >, 0);
471 VERIFY3S(g_fd, !=, -1);
473 (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
474 return (lzc_ioctl_fd(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
479 * It was added to preserve the function signature in case it is
480 * needed in the future.
483 lzc_sync(const char *pool_name, nvlist_t *innvl, nvlist_t **outnvl)
486 return (lzc_ioctl(ZFS_IOC_POOL_SYNC, pool_name, innvl, NULL));
490 * Create "user holds" on snapshots. If there is a hold on a snapshot,
491 * the snapshot can not be destroyed. (However, it can be marked for deletion
492 * by lzc_destroy_snaps(defer=B_TRUE).)
494 * The keys in the nvlist are snapshot names.
495 * The snapshots must all be in the same pool.
496 * The value is the name of the hold (string type).
498 * If cleanup_fd is not -1, it must be the result of open(ZFS_DEV, O_EXCL).
499 * In this case, when the cleanup_fd is closed (including on process
500 * termination), the holds will be released. If the system is shut down
501 * uncleanly, the holds will be released when the pool is next opened
504 * Holds for snapshots which don't exist will be skipped and have an entry
505 * added to errlist, but will not cause an overall failure.
507 * The return value will be 0 if all holds, for snapshots that existed,
508 * were successfully created.
510 * Otherwise the return value will be the errno of a (unspecified) hold that
511 * failed and no holds will be created.
513 * In all cases the errlist will have an entry for each hold that failed
514 * (name = snapshot), with its value being the error code (int32).
517 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
519 char pool[ZFS_MAX_DATASET_NAME_LEN];
524 /* determine the pool name */
525 elem = nvlist_next_nvpair(holds, NULL);
528 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
529 pool[strcspn(pool, "/@")] = '\0';
531 args = fnvlist_alloc();
532 fnvlist_add_nvlist(args, "holds", holds);
533 if (cleanup_fd != -1)
534 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
536 error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
542 * Release "user holds" on snapshots. If the snapshot has been marked for
543 * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
544 * any clones, and all the user holds are removed, then the snapshot will be
547 * The keys in the nvlist are snapshot names.
548 * The snapshots must all be in the same pool.
549 * The value is an nvlist whose keys are the holds to remove.
551 * Holds which failed to release because they didn't exist will have an entry
552 * added to errlist, but will not cause an overall failure.
554 * The return value will be 0 if the nvl holds was empty or all holds that
555 * existed, were successfully removed.
557 * Otherwise the return value will be the errno of a (unspecified) hold that
558 * failed to release and no holds will be released.
560 * In all cases the errlist will have an entry for each hold that failed to
564 lzc_release(nvlist_t *holds, nvlist_t **errlist)
566 char pool[ZFS_MAX_DATASET_NAME_LEN];
569 /* determine the pool name */
570 elem = nvlist_next_nvpair(holds, NULL);
573 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
574 pool[strcspn(pool, "/@")] = '\0';
576 return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
580 * Retrieve list of user holds on the specified snapshot.
582 * On success, *holdsp will be set to an nvlist which the caller must free.
583 * The keys are the names of the holds, and the value is the creation time
584 * of the hold (uint64) in seconds since the epoch.
587 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
589 return (lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, NULL, holdsp));
593 max_pipe_buffer(int infd)
596 static unsigned int max;
598 max = 1048576; /* fs/pipe.c default */
600 FILE *procf = fopen("/proc/sys/fs/pipe-max-size", "re");
602 if (fscanf(procf, "%u", &max) <= 0) {
603 /* ignore error: max untouched if parse fails */
609 unsigned int cur = fcntl(infd, F_GETPIPE_SZ);
610 if (cur < max && fcntl(infd, F_SETPIPE_SZ, max) != -1)
614 /* FreeBSD automatically resizes */
616 return (BIG_PIPE_SIZE);
621 struct send_worker_ctx {
622 int from; /* read end of pipe, with send data; closed on exit */
623 int to; /* original arbitrary output fd; mustn't be a pipe */
627 send_worker(void *arg)
629 struct send_worker_ctx *ctx = arg;
630 unsigned int bufsiz = max_pipe_buffer(ctx->from);
633 while ((rd = splice(ctx->from, NULL, ctx->to, NULL, bufsiz,
634 SPLICE_F_MOVE | SPLICE_F_MORE)) > 0)
637 int err = (rd == -1) ? errno : 0;
639 return ((void *)(uintptr_t)err);
644 * Since Linux 5.10, 4d03e3cc59828c82ee89ea6e27a2f3cdf95aaadf
645 * ("fs: don't allow kernel reads and writes without iter ops"),
646 * ZFS_IOC_SEND* will EINVAL when writing to /dev/null, /dev/zero, &c.
648 * This wrapper transparently executes func() with a pipe
649 * by spawning a thread to copy from that pipe to the original output
652 * Returns the error from func(), if nonzero,
653 * otherwise the error from the thread.
655 * No-op if orig_fd is -1, already a pipe (but the buffer size is bumped),
656 * and on not-Linux; as such, it is safe to wrap/call wrapped functions
657 * in a wrapped context.
660 lzc_send_wrapper(int (*func)(int, void *), int orig_fd, void *data)
664 if (orig_fd != -1 && fstat(orig_fd, &sb) == -1)
666 if (orig_fd == -1 || S_ISFIFO(sb.st_mode)) {
668 (void) max_pipe_buffer(orig_fd);
669 return (func(orig_fd, data));
671 if ((fcntl(orig_fd, F_GETFL) & O_ACCMODE) == O_RDONLY)
672 return (errno = EBADF);
675 if (pipe2(rw, O_CLOEXEC) == -1)
679 pthread_t send_thread;
680 struct send_worker_ctx ctx = {.from = rw[0], .to = orig_fd};
681 if ((err = pthread_create(&send_thread, NULL, send_worker, &ctx))
685 return (errno = err);
688 err = func(rw[1], data);
692 pthread_join(send_thread, &send_err);
693 if (err == 0 && send_err != 0)
694 errno = err = (uintptr_t)send_err;
698 return (func(orig_fd, data));
703 * Generate a zfs send stream for the specified snapshot and write it to
704 * the specified file descriptor.
706 * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
708 * If "from" is NULL, a full (non-incremental) stream will be sent.
709 * If "from" is non-NULL, it must be the full name of a snapshot or
710 * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
711 * "pool/fs#earlier_bmark"). If non-NULL, the specified snapshot or
712 * bookmark must represent an earlier point in the history of "snapname").
713 * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
714 * or it can be the origin of "snapname"'s filesystem, or an earlier
715 * snapshot in the origin, etc.
717 * "fd" is the file descriptor to write the send stream to.
719 * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
720 * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
721 * records with drr_blksz > 128K.
723 * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
724 * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
725 * which the receiving system must support (as indicated by support
726 * for the "embedded_data" feature).
728 * If "flags" contains LZC_SEND_FLAG_COMPRESS, the stream is generated by using
729 * compressed WRITE records for blocks which are compressed on disk and in
730 * memory. If the lz4_compress feature is active on the sending system, then
731 * the receiving system must have that feature enabled as well.
733 * If "flags" contains LZC_SEND_FLAG_RAW, the stream is generated, for encrypted
734 * datasets, by sending data exactly as it exists on disk. This allows backups
735 * to be taken even if encryption keys are not currently loaded.
738 lzc_send(const char *snapname, const char *from, int fd,
739 enum lzc_send_flags flags)
741 return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
746 lzc_send_redacted(const char *snapname, const char *from, int fd,
747 enum lzc_send_flags flags, const char *redactbook)
749 return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
754 lzc_send_resume(const char *snapname, const char *from, int fd,
755 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
757 return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj,
762 * snapname: The name of the "tosnap", or the snapshot whose contents we are
764 * from: The name of the "fromsnap", or the incremental source.
765 * fd: File descriptor to write the stream to.
766 * flags: flags that determine features to be used by the stream.
767 * resumeobj: Object to resume from, for resuming send
768 * resumeoff: Offset to resume from, for resuming send.
769 * redactnv: nvlist of string -> boolean(ignored) containing the names of all
770 * the snapshots that we should redact with respect to.
771 * redactbook: Name of the redaction bookmark to create.
776 lzc_send_resume_redacted_cb_impl(const char *snapname, const char *from, int fd,
777 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
778 const char *redactbook)
783 args = fnvlist_alloc();
784 fnvlist_add_int32(args, "fd", fd);
786 fnvlist_add_string(args, "fromsnap", from);
787 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
788 fnvlist_add_boolean(args, "largeblockok");
789 if (flags & LZC_SEND_FLAG_EMBED_DATA)
790 fnvlist_add_boolean(args, "embedok");
791 if (flags & LZC_SEND_FLAG_COMPRESS)
792 fnvlist_add_boolean(args, "compressok");
793 if (flags & LZC_SEND_FLAG_RAW)
794 fnvlist_add_boolean(args, "rawok");
795 if (flags & LZC_SEND_FLAG_SAVED)
796 fnvlist_add_boolean(args, "savedok");
797 if (resumeobj != 0 || resumeoff != 0) {
798 fnvlist_add_uint64(args, "resume_object", resumeobj);
799 fnvlist_add_uint64(args, "resume_offset", resumeoff);
801 if (redactbook != NULL)
802 fnvlist_add_string(args, "redactbook", redactbook);
804 err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
809 struct lzc_send_resume_redacted {
810 const char *snapname;
812 enum lzc_send_flags flags;
815 const char *redactbook;
819 lzc_send_resume_redacted_cb(int fd, void *arg)
821 struct lzc_send_resume_redacted *zsrr = arg;
822 return (lzc_send_resume_redacted_cb_impl(zsrr->snapname, zsrr->from,
823 fd, zsrr->flags, zsrr->resumeobj, zsrr->resumeoff,
828 lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
829 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
830 const char *redactbook)
832 struct lzc_send_resume_redacted zsrr = {
833 .snapname = snapname,
836 .resumeobj = resumeobj,
837 .resumeoff = resumeoff,
838 .redactbook = redactbook,
840 return (lzc_send_wrapper(lzc_send_resume_redacted_cb, fd, &zsrr));
844 * "from" can be NULL, a snapshot, or a bookmark.
846 * If from is NULL, a full (non-incremental) stream will be estimated. This
847 * is calculated very efficiently.
849 * If from is a snapshot, lzc_send_space uses the deadlists attached to
850 * each snapshot to efficiently estimate the stream size.
852 * If from is a bookmark, the indirect blocks in the destination snapshot
853 * are traversed, looking for blocks with a birth time since the creation TXG of
854 * the snapshot this bookmark was created from. This will result in
855 * significantly more I/O and be less efficient than a send space estimation on
856 * an equivalent snapshot. This process is also used if redact_snaps is
862 lzc_send_space_resume_redacted_cb_impl(const char *snapname, const char *from,
863 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
864 uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
870 args = fnvlist_alloc();
872 fnvlist_add_string(args, "from", from);
873 if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
874 fnvlist_add_boolean(args, "largeblockok");
875 if (flags & LZC_SEND_FLAG_EMBED_DATA)
876 fnvlist_add_boolean(args, "embedok");
877 if (flags & LZC_SEND_FLAG_COMPRESS)
878 fnvlist_add_boolean(args, "compressok");
879 if (flags & LZC_SEND_FLAG_RAW)
880 fnvlist_add_boolean(args, "rawok");
881 if (resumeobj != 0 || resumeoff != 0) {
882 fnvlist_add_uint64(args, "resume_object", resumeobj);
883 fnvlist_add_uint64(args, "resume_offset", resumeoff);
884 fnvlist_add_uint64(args, "bytes", resume_bytes);
886 if (redactbook != NULL)
887 fnvlist_add_string(args, "redactbook", redactbook);
889 fnvlist_add_int32(args, "fd", fd);
891 err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
894 *spacep = fnvlist_lookup_uint64(result, "space");
899 struct lzc_send_space_resume_redacted {
900 const char *snapname;
902 enum lzc_send_flags flags;
905 uint64_t resume_bytes;
906 const char *redactbook;
911 lzc_send_space_resume_redacted_cb(int fd, void *arg)
913 struct lzc_send_space_resume_redacted *zssrr = arg;
914 return (lzc_send_space_resume_redacted_cb_impl(zssrr->snapname,
915 zssrr->from, zssrr->flags, zssrr->resumeobj, zssrr->resumeoff,
916 zssrr->resume_bytes, zssrr->redactbook, fd, zssrr->spacep));
920 lzc_send_space_resume_redacted(const char *snapname, const char *from,
921 enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
922 uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
924 struct lzc_send_space_resume_redacted zssrr = {
925 .snapname = snapname,
928 .resumeobj = resumeobj,
929 .resumeoff = resumeoff,
930 .resume_bytes = resume_bytes,
931 .redactbook = redactbook,
934 return (lzc_send_wrapper(lzc_send_space_resume_redacted_cb,
939 lzc_send_space(const char *snapname, const char *from,
940 enum lzc_send_flags flags, uint64_t *spacep)
942 return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0,
947 recv_read(int fd, void *buf, int ilen)
954 rv = read(fd, cp, len);
959 if (rv < 0 || len != 0)
966 * Linux adds ZFS_IOC_RECV_NEW for resumable and raw streams and preserves the
967 * legacy ZFS_IOC_RECV user/kernel interface. The new interface supports all
968 * stream options but is currently only used for resumable streams. This way
969 * updated user space utilities will interoperate with older kernel modules.
971 * Non-Linux OpenZFS platforms have opted to modify the legacy interface.
974 recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
975 uint8_t *wkeydata, uint_t wkeylen, const char *origin, boolean_t force,
976 boolean_t resumable, boolean_t raw, int input_fd,
977 const dmu_replay_record_t *begin_record, uint64_t *read_bytes,
978 uint64_t *errflags, nvlist_t **errors)
980 dmu_replay_record_t drr;
981 char fsname[MAXPATHLEN];
984 boolean_t payload = B_FALSE;
986 ASSERT3S(g_refcount, >, 0);
987 VERIFY3S(g_fd, !=, -1);
989 /* Set 'fsname' to the name of containing filesystem */
990 (void) strlcpy(fsname, snapname, sizeof (fsname));
991 atp = strchr(fsname, '@');
996 /* If the fs does not exist, try its parent. */
997 if (!lzc_exists(fsname)) {
998 char *slashp = strrchr(fsname, '/');
1005 * It is not uncommon for gigabytes to be processed by zfs receive.
1006 * Speculatively increase the buffer size if supported by the platform.
1009 if (fstat(input_fd, &sb) == -1)
1011 if (S_ISFIFO(sb.st_mode))
1012 (void) max_pipe_buffer(input_fd);
1015 * The begin_record is normally a non-byteswapped BEGIN record.
1016 * For resumable streams it may be set to any non-byteswapped
1017 * dmu_replay_record_t.
1019 if (begin_record == NULL) {
1020 error = recv_read(input_fd, &drr, sizeof (drr));
1024 drr = *begin_record;
1025 payload = (begin_record->drr_payloadlen != 0);
1029 * All receives with a payload should use the new interface.
1031 if (resumable || raw || wkeydata != NULL || payload) {
1032 nvlist_t *outnvl = NULL;
1033 nvlist_t *innvl = fnvlist_alloc();
1035 fnvlist_add_string(innvl, "snapname", snapname);
1037 if (recvdprops != NULL)
1038 fnvlist_add_nvlist(innvl, "props", recvdprops);
1040 if (localprops != NULL)
1041 fnvlist_add_nvlist(innvl, "localprops", localprops);
1043 if (wkeydata != NULL) {
1045 * wkeydata must be placed in the special
1046 * ZPOOL_HIDDEN_ARGS nvlist so that it
1047 * will not be printed to the zpool history.
1049 nvlist_t *hidden_args = fnvlist_alloc();
1050 fnvlist_add_uint8_array(hidden_args, "wkeydata",
1052 fnvlist_add_nvlist(innvl, ZPOOL_HIDDEN_ARGS,
1054 nvlist_free(hidden_args);
1057 if (origin != NULL && strlen(origin))
1058 fnvlist_add_string(innvl, "origin", origin);
1060 fnvlist_add_byte_array(innvl, "begin_record",
1061 (uchar_t *)&drr, sizeof (drr));
1063 fnvlist_add_int32(innvl, "input_fd", input_fd);
1066 fnvlist_add_boolean(innvl, "force");
1069 fnvlist_add_boolean(innvl, "resumable");
1072 error = lzc_ioctl(ZFS_IOC_RECV_NEW, fsname, innvl, &outnvl);
1074 if (error == 0 && read_bytes != NULL)
1075 error = nvlist_lookup_uint64(outnvl, "read_bytes",
1078 if (error == 0 && errflags != NULL)
1079 error = nvlist_lookup_uint64(outnvl, "error_flags",
1082 if (error == 0 && errors != NULL) {
1084 error = nvlist_lookup_nvlist(outnvl, "errors", &nvl);
1086 *errors = fnvlist_dup(nvl);
1089 fnvlist_free(innvl);
1090 fnvlist_free(outnvl);
1092 zfs_cmd_t zc = {"\0"};
1093 char *packed = NULL;
1096 ASSERT3S(g_refcount, >, 0);
1098 (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
1099 (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
1101 if (recvdprops != NULL) {
1102 packed = fnvlist_pack(recvdprops, &size);
1103 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
1104 zc.zc_nvlist_src_size = size;
1107 if (localprops != NULL) {
1108 packed = fnvlist_pack(localprops, &size);
1109 zc.zc_nvlist_conf = (uint64_t)(uintptr_t)packed;
1110 zc.zc_nvlist_conf_size = size;
1114 (void) strlcpy(zc.zc_string, origin,
1115 sizeof (zc.zc_string));
1117 ASSERT3S(drr.drr_type, ==, DRR_BEGIN);
1118 zc.zc_begin_record = drr.drr_u.drr_begin;
1120 zc.zc_cookie = input_fd;
1121 zc.zc_cleanup_fd = -1;
1122 zc.zc_action_handle = 0;
1124 zc.zc_nvlist_dst_size = 128 * 1024;
1125 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
1126 malloc(zc.zc_nvlist_dst_size);
1128 error = lzc_ioctl_fd(g_fd, ZFS_IOC_RECV, &zc);
1132 if (read_bytes != NULL)
1133 *read_bytes = zc.zc_cookie;
1135 if (errflags != NULL)
1136 *errflags = zc.zc_obj;
1139 VERIFY0(nvlist_unpack(
1140 (void *)(uintptr_t)zc.zc_nvlist_dst,
1141 zc.zc_nvlist_dst_size, errors, KM_SLEEP));
1145 fnvlist_pack_free(packed, size);
1146 free((void *)(uintptr_t)zc.zc_nvlist_dst);
1153 * The simplest receive case: receive from the specified fd, creating the
1154 * specified snapshot. Apply the specified properties as "received" properties
1155 * (which can be overridden by locally-set properties). If the stream is a
1156 * clone, its origin snapshot must be specified by 'origin'. The 'force'
1157 * flag will cause the target filesystem to be rolled back or destroyed if
1158 * necessary to receive.
1160 * Return 0 on success or an errno on failure.
1162 * Note: this interface does not work on dedup'd streams
1163 * (those with DMU_BACKUP_FEATURE_DEDUP).
1166 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
1167 boolean_t force, boolean_t raw, int fd)
1169 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1170 B_FALSE, raw, fd, NULL, NULL, NULL, NULL));
1174 * Like lzc_receive, but if the receive fails due to premature stream
1175 * termination, the intermediate state will be preserved on disk. In this
1176 * case, ECKSUM will be returned. The receive may subsequently be resumed
1177 * with a resuming send stream generated by lzc_send_resume().
1180 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
1181 boolean_t force, boolean_t raw, int fd)
1183 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1184 B_TRUE, raw, fd, NULL, NULL, NULL, NULL));
1188 * Like lzc_receive, but allows the caller to read the begin record and then to
1189 * pass it in. That could be useful if the caller wants to derive, for example,
1190 * the snapname or the origin parameters based on the information contained in
1192 * The begin record must be in its original form as read from the stream,
1193 * in other words, it should not be byteswapped.
1195 * The 'resumable' parameter allows to obtain the same behavior as with
1196 * lzc_receive_resumable.
1199 lzc_receive_with_header(const char *snapname, nvlist_t *props,
1200 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1201 int fd, const dmu_replay_record_t *begin_record)
1203 if (begin_record == NULL)
1206 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1207 resumable, raw, fd, begin_record, NULL, NULL, NULL));
1211 * Like lzc_receive, but allows the caller to pass all supported arguments
1212 * and retrieve all values returned. The only additional input parameter
1213 * is 'cleanup_fd' which is used to set a cleanup-on-exit file descriptor.
1215 * The following parameters all provide return values. Several may be set
1216 * in the failure case and will contain additional information.
1218 * The 'read_bytes' value will be set to the total number of bytes read.
1220 * The 'errflags' value will contain zprop_errflags_t flags which are
1221 * used to describe any failures.
1223 * The 'action_handle' and 'cleanup_fd' are no longer used, and are ignored.
1225 * The 'errors' nvlist contains an entry for each unapplied received
1226 * property. Callers are responsible for freeing this nvlist.
1229 lzc_receive_one(const char *snapname, nvlist_t *props,
1230 const char *origin, boolean_t force, boolean_t resumable, boolean_t raw,
1231 int input_fd, const dmu_replay_record_t *begin_record, int cleanup_fd,
1232 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1235 (void) action_handle, (void) cleanup_fd;
1236 return (recv_impl(snapname, props, NULL, NULL, 0, origin, force,
1237 resumable, raw, input_fd, begin_record,
1238 read_bytes, errflags, errors));
1242 * Like lzc_receive_one, but allows the caller to pass an additional 'cmdprops'
1245 * The 'cmdprops' nvlist contains both override ('zfs receive -o') and
1246 * exclude ('zfs receive -x') properties. Callers are responsible for freeing
1250 lzc_receive_with_cmdprops(const char *snapname, nvlist_t *props,
1251 nvlist_t *cmdprops, uint8_t *wkeydata, uint_t wkeylen, const char *origin,
1252 boolean_t force, boolean_t resumable, boolean_t raw, int input_fd,
1253 const dmu_replay_record_t *begin_record, int cleanup_fd,
1254 uint64_t *read_bytes, uint64_t *errflags, uint64_t *action_handle,
1257 (void) action_handle, (void) cleanup_fd;
1258 return (recv_impl(snapname, props, cmdprops, wkeydata, wkeylen, origin,
1259 force, resumable, raw, input_fd, begin_record,
1260 read_bytes, errflags, errors));
1264 * Roll back this filesystem or volume to its most recent snapshot.
1265 * If snapnamebuf is not NULL, it will be filled in with the name
1266 * of the most recent snapshot.
1267 * Note that the latest snapshot may change if a new one is concurrently
1268 * created or the current one is destroyed. lzc_rollback_to can be used
1269 * to roll back to a specific latest snapshot.
1271 * Return 0 on success or an errno on failure.
1274 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
1280 args = fnvlist_alloc();
1281 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1283 if (err == 0 && snapnamebuf != NULL) {
1284 const char *snapname = fnvlist_lookup_string(result, "target");
1285 (void) strlcpy(snapnamebuf, snapname, snapnamelen);
1287 nvlist_free(result);
1293 * Roll back this filesystem or volume to the specified snapshot,
1296 * Return 0 on success or an errno on failure.
1299 lzc_rollback_to(const char *fsname, const char *snapname)
1305 args = fnvlist_alloc();
1306 fnvlist_add_string(args, "target", snapname);
1307 err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
1309 nvlist_free(result);
1314 * Creates new bookmarks from existing snapshot or bookmark.
1316 * The bookmarks nvlist maps from the full name of the new bookmark to
1317 * the full name of the source snapshot or bookmark.
1318 * All the bookmarks and snapshots must be in the same pool.
1319 * The new bookmarks names must be unique.
1320 * => see function dsl_bookmark_create_nvl_validate
1322 * The returned results nvlist will have an entry for each bookmark that failed.
1323 * The value will be the (int32) error code.
1325 * The return value will be 0 if all bookmarks were created, otherwise it will
1326 * be the errno of a (undetermined) bookmarks that failed.
1329 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
1333 char pool[ZFS_MAX_DATASET_NAME_LEN];
1335 /* determine pool name from first bookmark */
1336 elem = nvlist_next_nvpair(bookmarks, NULL);
1339 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1340 pool[strcspn(pool, "/#")] = '\0';
1342 error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
1348 * Retrieve bookmarks.
1350 * Retrieve the list of bookmarks for the given file system. The props
1351 * parameter is an nvlist of property names (with no values) that will be
1352 * returned for each bookmark.
1354 * The following are valid properties on bookmarks, most of which are numbers
1355 * (represented as uint64 in the nvlist), except redact_snaps, which is a
1356 * uint64 array, and redact_complete, which is a boolean
1358 * "guid" - globally unique identifier of the snapshot it refers to
1359 * "createtxg" - txg when the snapshot it refers to was created
1360 * "creation" - timestamp when the snapshot it refers to was created
1361 * "ivsetguid" - IVset guid for identifying encrypted snapshots
1362 * "redact_snaps" - list of guids of the redaction snapshots for the specified
1363 * bookmark. If the bookmark is not a redaction bookmark, the nvlist will
1364 * not contain an entry for this value. If it is redacted with respect to
1365 * no snapshots, it will contain value -> NULL uint64 array
1366 * "redact_complete" - boolean value; true if the redaction bookmark is
1367 * complete, false otherwise.
1369 * The format of the returned nvlist as follows:
1370 * <short name of bookmark> -> {
1371 * <name of property> -> {
1375 * "redact_snaps" -> {
1376 * "value" -> uint64 array
1378 * "redact_complete" -> {
1379 * "value" -> boolean value
1384 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
1386 return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
1390 * Get bookmark properties.
1392 * Given a bookmark's full name, retrieve all properties for the bookmark.
1394 * The format of the returned property list is as follows:
1396 * <name of property> -> {
1400 * "redact_snaps" -> {
1401 * "value" -> uint64 array
1405 lzc_get_bookmark_props(const char *bookmark, nvlist_t **props)
1409 nvlist_t *innvl = fnvlist_alloc();
1410 error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props);
1411 fnvlist_free(innvl);
1417 * Destroys bookmarks.
1419 * The keys in the bmarks nvlist are the bookmarks to be destroyed.
1420 * They must all be in the same pool. Bookmarks are specified as
1423 * Bookmarks that do not exist will be silently ignored.
1425 * The return value will be 0 if all bookmarks that existed were destroyed.
1427 * Otherwise the return value will be the errno of a (undetermined) bookmark
1428 * that failed, no bookmarks will be destroyed, and the errlist will have an
1429 * entry for each bookmarks that failed. The value in the errlist will be
1430 * the (int32) error code.
1433 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
1437 char pool[ZFS_MAX_DATASET_NAME_LEN];
1439 /* determine the pool name */
1440 elem = nvlist_next_nvpair(bmarks, NULL);
1443 (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
1444 pool[strcspn(pool, "/#")] = '\0';
1446 error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
1452 lzc_channel_program_impl(const char *pool, const char *program, boolean_t sync,
1453 uint64_t instrlimit, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1458 args = fnvlist_alloc();
1459 fnvlist_add_string(args, ZCP_ARG_PROGRAM, program);
1460 fnvlist_add_nvlist(args, ZCP_ARG_ARGLIST, argnvl);
1461 fnvlist_add_boolean_value(args, ZCP_ARG_SYNC, sync);
1462 fnvlist_add_uint64(args, ZCP_ARG_INSTRLIMIT, instrlimit);
1463 fnvlist_add_uint64(args, ZCP_ARG_MEMLIMIT, memlimit);
1464 error = lzc_ioctl(ZFS_IOC_CHANNEL_PROGRAM, pool, args, outnvl);
1471 * Executes a channel program.
1473 * If this function returns 0 the channel program was successfully loaded and
1474 * ran without failing. Note that individual commands the channel program ran
1475 * may have failed and the channel program is responsible for reporting such
1476 * errors through outnvl if they are important.
1478 * This method may also return:
1480 * EINVAL The program contains syntax errors, or an invalid memory or time
1481 * limit was given. No part of the channel program was executed.
1482 * If caused by syntax errors, 'outnvl' contains information about the
1485 * ECHRNG The program was executed, but encountered a runtime error, such as
1486 * calling a function with incorrect arguments, invoking the error()
1487 * function directly, failing an assert() command, etc. Some portion
1488 * of the channel program may have executed and committed changes.
1489 * Information about the failure can be found in 'outnvl'.
1491 * ENOMEM The program fully executed, but the output buffer was not large
1492 * enough to store the returned value. No output is returned through
1495 * ENOSPC The program was terminated because it exceeded its memory usage
1496 * limit. Some portion of the channel program may have executed and
1497 * committed changes to disk. No output is returned through 'outnvl'.
1499 * ETIME The program was terminated because it exceeded its Lua instruction
1500 * limit. Some portion of the channel program may have executed and
1501 * committed changes to disk. No output is returned through 'outnvl'.
1504 lzc_channel_program(const char *pool, const char *program, uint64_t instrlimit,
1505 uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1507 return (lzc_channel_program_impl(pool, program, B_TRUE, instrlimit,
1508 memlimit, argnvl, outnvl));
1512 * Creates a checkpoint for the specified pool.
1514 * If this function returns 0 the pool was successfully checkpointed.
1516 * This method may also return:
1518 * ZFS_ERR_CHECKPOINT_EXISTS
1519 * The pool already has a checkpoint. A pools can only have one
1520 * checkpoint at most, at any given time.
1522 * ZFS_ERR_DISCARDING_CHECKPOINT
1523 * ZFS is in the middle of discarding a checkpoint for this pool.
1524 * The pool can be checkpointed again once the discard is done.
1526 * ZFS_DEVRM_IN_PROGRESS
1527 * A vdev is currently being removed. The pool cannot be
1528 * checkpointed until the device removal is done.
1531 * One or more top-level vdevs exceed the maximum vdev size
1532 * supported for this feature.
1535 lzc_pool_checkpoint(const char *pool)
1539 nvlist_t *result = NULL;
1540 nvlist_t *args = fnvlist_alloc();
1542 error = lzc_ioctl(ZFS_IOC_POOL_CHECKPOINT, pool, args, &result);
1545 fnvlist_free(result);
1551 * Discard the checkpoint from the specified pool.
1553 * If this function returns 0 the checkpoint was successfully discarded.
1555 * This method may also return:
1557 * ZFS_ERR_NO_CHECKPOINT
1558 * The pool does not have a checkpoint.
1560 * ZFS_ERR_DISCARDING_CHECKPOINT
1561 * ZFS is already in the middle of discarding the checkpoint.
1564 lzc_pool_checkpoint_discard(const char *pool)
1568 nvlist_t *result = NULL;
1569 nvlist_t *args = fnvlist_alloc();
1571 error = lzc_ioctl(ZFS_IOC_POOL_DISCARD_CHECKPOINT, pool, args, &result);
1574 fnvlist_free(result);
1580 * Executes a read-only channel program.
1582 * A read-only channel program works programmatically the same way as a
1583 * normal channel program executed with lzc_channel_program(). The only
1584 * difference is it runs exclusively in open-context and therefore can
1585 * return faster. The downside to that, is that the program cannot change
1586 * on-disk state by calling functions from the zfs.sync submodule.
1588 * The return values of this function (and their meaning) are exactly the
1589 * same as the ones described in lzc_channel_program().
1592 lzc_channel_program_nosync(const char *pool, const char *program,
1593 uint64_t timeout, uint64_t memlimit, nvlist_t *argnvl, nvlist_t **outnvl)
1595 return (lzc_channel_program_impl(pool, program, B_FALSE, timeout,
1596 memlimit, argnvl, outnvl));
1600 lzc_get_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl)
1602 return (lzc_ioctl(ZFS_IOC_VDEV_GET_PROPS, poolname, innvl, outnvl));
1606 lzc_set_vdev_prop(const char *poolname, nvlist_t *innvl, nvlist_t **outnvl)
1608 return (lzc_ioctl(ZFS_IOC_VDEV_SET_PROPS, poolname, innvl, outnvl));
1612 * Performs key management functions
1614 * crypto_cmd should be a value from dcp_cmd_t. If the command specifies to
1615 * load or change a wrapping key, the key should be specified in the
1616 * hidden_args nvlist so that it is not logged.
1619 lzc_load_key(const char *fsname, boolean_t noop, uint8_t *wkeydata,
1624 nvlist_t *hidden_args;
1626 if (wkeydata == NULL)
1629 ioc_args = fnvlist_alloc();
1630 hidden_args = fnvlist_alloc();
1631 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata, wkeylen);
1632 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1634 fnvlist_add_boolean(ioc_args, "noop");
1635 error = lzc_ioctl(ZFS_IOC_LOAD_KEY, fsname, ioc_args, NULL);
1636 nvlist_free(hidden_args);
1637 nvlist_free(ioc_args);
1643 lzc_unload_key(const char *fsname)
1645 return (lzc_ioctl(ZFS_IOC_UNLOAD_KEY, fsname, NULL, NULL));
1649 lzc_change_key(const char *fsname, uint64_t crypt_cmd, nvlist_t *props,
1650 uint8_t *wkeydata, uint_t wkeylen)
1653 nvlist_t *ioc_args = fnvlist_alloc();
1654 nvlist_t *hidden_args = NULL;
1656 fnvlist_add_uint64(ioc_args, "crypt_cmd", crypt_cmd);
1658 if (wkeydata != NULL) {
1659 hidden_args = fnvlist_alloc();
1660 fnvlist_add_uint8_array(hidden_args, "wkeydata", wkeydata,
1662 fnvlist_add_nvlist(ioc_args, ZPOOL_HIDDEN_ARGS, hidden_args);
1666 fnvlist_add_nvlist(ioc_args, "props", props);
1668 error = lzc_ioctl(ZFS_IOC_CHANGE_KEY, fsname, ioc_args, NULL);
1669 nvlist_free(hidden_args);
1670 nvlist_free(ioc_args);
1676 lzc_reopen(const char *pool_name, boolean_t scrub_restart)
1678 nvlist_t *args = fnvlist_alloc();
1681 fnvlist_add_boolean_value(args, "scrub_restart", scrub_restart);
1683 error = lzc_ioctl(ZFS_IOC_POOL_REOPEN, pool_name, args, NULL);
1689 * Changes initializing state.
1691 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1692 * The key is ignored.
1694 * If there are errors related to vdev arguments, per-vdev errors are returned
1695 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1696 * guid is stringified with PRIu64, and errno is one of the following as
1698 * - ENODEV if the device was not found
1699 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1700 * - EROFS if the device is not writeable
1701 * - EBUSY start requested but the device is already being either
1702 * initialized or trimmed
1703 * - ESRCH cancel/suspend requested but device is not being initialized
1705 * If the errlist is empty, then return value will be:
1706 * - EINVAL if one or more arguments was invalid
1707 * - Other spa_open failures
1708 * - 0 if the operation succeeded
1711 lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
1712 nvlist_t *vdevs, nvlist_t **errlist)
1716 nvlist_t *args = fnvlist_alloc();
1717 fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
1718 fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
1720 error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
1728 * Changes TRIM state.
1730 * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
1731 * The key is ignored.
1733 * If there are errors related to vdev arguments, per-vdev errors are returned
1734 * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
1735 * guid is stringified with PRIu64, and errno is one of the following as
1737 * - ENODEV if the device was not found
1738 * - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
1739 * - EROFS if the device is not writeable
1740 * - EBUSY start requested but the device is already being either trimmed
1742 * - ESRCH cancel/suspend requested but device is not being initialized
1743 * - EOPNOTSUPP if the device does not support TRIM (or secure TRIM)
1745 * If the errlist is empty, then return value will be:
1746 * - EINVAL if one or more arguments was invalid
1747 * - Other spa_open failures
1748 * - 0 if the operation succeeded
1751 lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate,
1752 boolean_t secure, nvlist_t *vdevs, nvlist_t **errlist)
1756 nvlist_t *args = fnvlist_alloc();
1757 fnvlist_add_uint64(args, ZPOOL_TRIM_COMMAND, (uint64_t)cmd_type);
1758 fnvlist_add_nvlist(args, ZPOOL_TRIM_VDEVS, vdevs);
1759 fnvlist_add_uint64(args, ZPOOL_TRIM_RATE, rate);
1760 fnvlist_add_boolean_value(args, ZPOOL_TRIM_SECURE, secure);
1762 error = lzc_ioctl(ZFS_IOC_POOL_TRIM, poolname, args, errlist);
1770 * Create a redaction bookmark named bookname by redacting snapshot with respect
1771 * to all the snapshots in snapnv.
1774 lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv)
1776 nvlist_t *args = fnvlist_alloc();
1777 fnvlist_add_string(args, "bookname", bookname);
1778 fnvlist_add_nvlist(args, "snapnv", snapnv);
1779 int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL);
1785 wait_common(const char *pool, zpool_wait_activity_t activity, boolean_t use_tag,
1786 uint64_t tag, boolean_t *waited)
1788 nvlist_t *args = fnvlist_alloc();
1789 nvlist_t *result = NULL;
1791 fnvlist_add_int32(args, ZPOOL_WAIT_ACTIVITY, activity);
1793 fnvlist_add_uint64(args, ZPOOL_WAIT_TAG, tag);
1795 int error = lzc_ioctl(ZFS_IOC_WAIT, pool, args, &result);
1797 if (error == 0 && waited != NULL)
1798 *waited = fnvlist_lookup_boolean_value(result,
1802 fnvlist_free(result);
1808 lzc_wait(const char *pool, zpool_wait_activity_t activity, boolean_t *waited)
1810 return (wait_common(pool, activity, B_FALSE, 0, waited));
1814 lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
1817 return (wait_common(pool, activity, B_TRUE, tag, waited));
1821 lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
1823 nvlist_t *args = fnvlist_alloc();
1824 nvlist_t *result = NULL;
1826 fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);
1828 int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);
1830 if (error == 0 && waited != NULL)
1831 *waited = fnvlist_lookup_boolean_value(result,
1835 fnvlist_free(result);
1841 * Set the bootenv contents for the given pool.
1844 lzc_set_bootenv(const char *pool, const nvlist_t *env)
1846 return (lzc_ioctl(ZFS_IOC_SET_BOOTENV, pool, (nvlist_t *)env, NULL));
1850 * Get the contents of the bootenv of the given pool.
1853 lzc_get_bootenv(const char *pool, nvlist_t **outnvl)
1855 return (lzc_ioctl(ZFS_IOC_GET_BOOTENV, pool, NULL, outnvl));