4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
23 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
24 * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
27 * ZFS volume emulation driver.
29 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
30 * Volumes are accessed through the symbolic links named:
32 * /dev/<pool_name>/<dataset_name>
34 * Volumes are persistent through reboot and module load. No user command
35 * needs to be run before opening and using a device.
37 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
38 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
42 #include <sys/dmu_traverse.h>
43 #include <sys/dsl_dataset.h>
44 #include <sys/dsl_prop.h>
46 #include <sys/zfeature.h>
47 #include <sys/zil_impl.h>
48 #include <sys/dmu_tx.h>
50 #include <sys/zfs_rlock.h>
51 #include <sys/zfs_znode.h>
53 #include <linux/blkdev_compat.h>
55 unsigned int zvol_inhibit_dev = 0;
56 unsigned int zvol_major = ZVOL_MAJOR;
57 unsigned int zvol_prefetch_bytes = (128 * 1024);
58 unsigned long zvol_max_discard_blocks = 16384;
60 static kmutex_t zvol_state_lock;
61 static list_t zvol_state_list;
62 static char *zvol_tag = "zvol_tag";
65 * The in-core state of each volume.
67 typedef struct zvol_state {
68 char zv_name[MAXNAMELEN]; /* name */
69 uint64_t zv_volsize; /* advertised space */
70 uint64_t zv_volblocksize; /* volume block size */
71 objset_t *zv_objset; /* objset handle */
72 uint32_t zv_flags; /* ZVOL_* flags */
73 uint32_t zv_open_count; /* open counts */
74 uint32_t zv_changed; /* disk changed */
75 zilog_t *zv_zilog; /* ZIL handle */
76 znode_t zv_znode; /* for range locking */
77 dmu_buf_t *zv_dbuf; /* bonus handle */
78 dev_t zv_dev; /* device id */
79 struct gendisk *zv_disk; /* generic disk */
80 struct request_queue *zv_queue; /* request queue */
81 list_node_t zv_next; /* next zvol_state_t linkage */
84 #define ZVOL_RDONLY 0x1
87 * Find the next available range of ZVOL_MINORS minor numbers. The
88 * zvol_state_list is kept in ascending minor order so we simply need
89 * to scan the list for the first gap in the sequence. This allows us
90 * to recycle minor number as devices are created and removed.
93 zvol_find_minor(unsigned *minor)
98 ASSERT(MUTEX_HELD(&zvol_state_lock));
99 for (zv = list_head(&zvol_state_list); zv != NULL;
100 zv = list_next(&zvol_state_list, zv), *minor += ZVOL_MINORS) {
101 if (MINOR(zv->zv_dev) != MINOR(*minor))
105 /* All minors are in use */
106 if (*minor >= (1 << MINORBITS))
107 return (SET_ERROR(ENXIO));
113 * Find a zvol_state_t given the full major+minor dev_t.
115 static zvol_state_t *
116 zvol_find_by_dev(dev_t dev)
120 ASSERT(MUTEX_HELD(&zvol_state_lock));
121 for (zv = list_head(&zvol_state_list); zv != NULL;
122 zv = list_next(&zvol_state_list, zv)) {
123 if (zv->zv_dev == dev)
131 * Find a zvol_state_t given the name provided at zvol_alloc() time.
133 static zvol_state_t *
134 zvol_find_by_name(const char *name)
138 ASSERT(MUTEX_HELD(&zvol_state_lock));
139 for (zv = list_head(&zvol_state_list); zv != NULL;
140 zv = list_next(&zvol_state_list, zv)) {
141 if (strncmp(zv->zv_name, name, MAXNAMELEN) == 0)
150 * Given a path, return TRUE if path is a ZVOL.
153 zvol_is_zvol(const char *device)
155 struct block_device *bdev;
158 bdev = lookup_bdev(device);
162 major = MAJOR(bdev->bd_dev);
165 if (major == zvol_major)
172 * ZFS_IOC_CREATE callback handles dmu zvol and zap object creation.
175 zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
177 zfs_creat_t *zct = arg;
178 nvlist_t *nvprops = zct->zct_props;
180 uint64_t volblocksize, volsize;
182 VERIFY(nvlist_lookup_uint64(nvprops,
183 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
184 if (nvlist_lookup_uint64(nvprops,
185 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
186 volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
189 * These properties must be removed from the list so the generic
190 * property setting step won't apply to them.
192 VERIFY(nvlist_remove_all(nvprops,
193 zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
194 (void) nvlist_remove_all(nvprops,
195 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
197 error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
201 error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
205 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
210 * ZFS_IOC_OBJSET_STATS entry point.
213 zvol_get_stats(objset_t *os, nvlist_t *nv)
216 dmu_object_info_t *doi;
219 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
221 return (SET_ERROR(error));
223 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
224 doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
225 error = dmu_object_info(os, ZVOL_OBJ, doi);
228 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
229 doi->doi_data_block_size);
232 kmem_free(doi, sizeof (dmu_object_info_t));
234 return (SET_ERROR(error));
238 zvol_size_changed(zvol_state_t *zv, uint64_t volsize)
240 struct block_device *bdev;
242 bdev = bdget_disk(zv->zv_disk, 0);
245 set_capacity(zv->zv_disk, volsize >> 9);
246 zv->zv_volsize = volsize;
247 check_disk_size_change(zv->zv_disk, bdev);
253 * Sanity check volume size.
256 zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
259 return (SET_ERROR(EINVAL));
261 if (volsize % blocksize != 0)
262 return (SET_ERROR(EINVAL));
265 if (volsize - 1 > MAXOFFSET_T)
266 return (SET_ERROR(EOVERFLOW));
272 * Ensure the zap is flushed then inform the VFS of the capacity change.
275 zvol_update_volsize(uint64_t volsize, objset_t *os)
281 ASSERT(MUTEX_HELD(&zvol_state_lock));
283 tx = dmu_tx_create(os);
284 dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
285 dmu_tx_mark_netfree(tx);
286 error = dmu_tx_assign(tx, TXG_WAIT);
289 return (SET_ERROR(error));
291 txg = dmu_tx_get_txg(tx);
293 error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1,
297 txg_wait_synced(dmu_objset_pool(os), txg);
300 error = dmu_free_long_range(os,
301 ZVOL_OBJ, volsize, DMU_OBJECT_END);
307 zvol_update_live_volsize(zvol_state_t *zv, uint64_t volsize)
309 zvol_size_changed(zv, volsize);
312 * We should post a event here describing the expansion. However,
313 * the zfs_ereport_post() interface doesn't nicely support posting
314 * events for zvols, it assumes events relate to vdevs or zios.
321 * Set ZFS_PROP_VOLSIZE set entry point.
324 zvol_set_volsize(const char *name, uint64_t volsize)
326 zvol_state_t *zv = NULL;
329 dmu_object_info_t *doi;
331 boolean_t owned = B_FALSE;
333 error = dsl_prop_get_integer(name,
334 zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL);
336 return (SET_ERROR(error));
338 return (SET_ERROR(EROFS));
340 mutex_enter(&zvol_state_lock);
341 zv = zvol_find_by_name(name);
343 if (zv == NULL || zv->zv_objset == NULL) {
344 if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE,
346 mutex_exit(&zvol_state_lock);
347 return (SET_ERROR(error));
356 doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
358 if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) ||
359 (error = zvol_check_volsize(volsize, doi->doi_data_block_size)))
362 error = zvol_update_volsize(volsize, os);
363 kmem_free(doi, sizeof (dmu_object_info_t));
365 if (error == 0 && zv != NULL)
366 error = zvol_update_live_volsize(zv, volsize);
369 dmu_objset_disown(os, FTAG);
371 zv->zv_objset = NULL;
373 mutex_exit(&zvol_state_lock);
378 * Sanity check volume block size.
381 zvol_check_volblocksize(const char *name, uint64_t volblocksize)
383 /* Record sizes above 128k need the feature to be enabled */
384 if (volblocksize > SPA_OLD_MAXBLOCKSIZE) {
388 if ((error = spa_open(name, &spa, FTAG)) != 0)
391 if (!spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) {
392 spa_close(spa, FTAG);
393 return (SET_ERROR(ENOTSUP));
397 * We don't allow setting the property above 1MB,
398 * unless the tunable has been changed.
400 if (volblocksize > zfs_max_recordsize)
401 return (SET_ERROR(EDOM));
403 spa_close(spa, FTAG);
406 if (volblocksize < SPA_MINBLOCKSIZE ||
407 volblocksize > SPA_MAXBLOCKSIZE ||
409 return (SET_ERROR(EDOM));
415 * Set ZFS_PROP_VOLBLOCKSIZE set entry point.
418 zvol_set_volblocksize(const char *name, uint64_t volblocksize)
424 mutex_enter(&zvol_state_lock);
426 zv = zvol_find_by_name(name);
428 error = SET_ERROR(ENXIO);
432 if (zv->zv_flags & ZVOL_RDONLY) {
433 error = SET_ERROR(EROFS);
437 tx = dmu_tx_create(zv->zv_objset);
438 dmu_tx_hold_bonus(tx, ZVOL_OBJ);
439 error = dmu_tx_assign(tx, TXG_WAIT);
443 error = dmu_object_set_blocksize(zv->zv_objset, ZVOL_OBJ,
444 volblocksize, 0, tx);
445 if (error == ENOTSUP)
446 error = SET_ERROR(EBUSY);
449 zv->zv_volblocksize = volblocksize;
452 mutex_exit(&zvol_state_lock);
454 return (SET_ERROR(error));
458 * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we
459 * implement DKIOCFREE/free-long-range.
462 zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap)
464 uint64_t offset, length;
467 byteswap_uint64_array(lr, sizeof (*lr));
469 offset = lr->lr_offset;
470 length = lr->lr_length;
472 return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
476 * Replay a TX_WRITE ZIL transaction that didn't get committed
477 * after a system failure
480 zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
482 objset_t *os = zv->zv_objset;
483 char *data = (char *)(lr + 1); /* data follows lr_write_t */
484 uint64_t off = lr->lr_offset;
485 uint64_t len = lr->lr_length;
490 byteswap_uint64_array(lr, sizeof (*lr));
492 tx = dmu_tx_create(os);
493 dmu_tx_hold_write(tx, ZVOL_OBJ, off, len);
494 error = dmu_tx_assign(tx, TXG_WAIT);
498 dmu_write(os, ZVOL_OBJ, off, len, data, tx);
502 return (SET_ERROR(error));
506 zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
508 return (SET_ERROR(ENOTSUP));
512 * Callback vectors for replaying records.
513 * Only TX_WRITE and TX_TRUNCATE are needed for zvol.
515 zil_replay_func_t zvol_replay_vector[TX_MAX_TYPE] = {
516 (zil_replay_func_t)zvol_replay_err, /* no such transaction type */
517 (zil_replay_func_t)zvol_replay_err, /* TX_CREATE */
518 (zil_replay_func_t)zvol_replay_err, /* TX_MKDIR */
519 (zil_replay_func_t)zvol_replay_err, /* TX_MKXATTR */
520 (zil_replay_func_t)zvol_replay_err, /* TX_SYMLINK */
521 (zil_replay_func_t)zvol_replay_err, /* TX_REMOVE */
522 (zil_replay_func_t)zvol_replay_err, /* TX_RMDIR */
523 (zil_replay_func_t)zvol_replay_err, /* TX_LINK */
524 (zil_replay_func_t)zvol_replay_err, /* TX_RENAME */
525 (zil_replay_func_t)zvol_replay_write, /* TX_WRITE */
526 (zil_replay_func_t)zvol_replay_truncate, /* TX_TRUNCATE */
527 (zil_replay_func_t)zvol_replay_err, /* TX_SETATTR */
528 (zil_replay_func_t)zvol_replay_err, /* TX_ACL */
532 * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
534 * We store data in the log buffers if it's small enough.
535 * Otherwise we will later flush the data out via dmu_sync().
537 ssize_t zvol_immediate_write_sz = 32768;
540 zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
541 uint64_t size, int sync)
543 uint32_t blocksize = zv->zv_volblocksize;
544 zilog_t *zilog = zv->zv_zilog;
546 ssize_t immediate_write_sz;
548 if (zil_replaying(zilog, tx))
551 immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
552 ? 0 : zvol_immediate_write_sz;
553 slogging = spa_has_slogs(zilog->zl_spa) &&
554 (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
560 itx_wr_state_t write_state;
563 * Unlike zfs_log_write() we can be called with
564 * up to DMU_MAX_ACCESS/2 (5MB) writes.
566 if (blocksize > immediate_write_sz && !slogging &&
567 size >= blocksize && offset % blocksize == 0) {
568 write_state = WR_INDIRECT; /* uses dmu_sync */
571 write_state = WR_COPIED;
572 len = MIN(ZIL_MAX_LOG_DATA, size);
574 write_state = WR_NEED_COPY;
575 len = MIN(ZIL_MAX_LOG_DATA, size);
578 itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
579 (write_state == WR_COPIED ? len : 0));
580 lr = (lr_write_t *)&itx->itx_lr;
581 if (write_state == WR_COPIED && dmu_read(zv->zv_objset,
582 ZVOL_OBJ, offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) {
583 zil_itx_destroy(itx);
584 itx = zil_itx_create(TX_WRITE, sizeof (*lr));
585 lr = (lr_write_t *)&itx->itx_lr;
586 write_state = WR_NEED_COPY;
589 itx->itx_wr_state = write_state;
590 if (write_state == WR_NEED_COPY)
592 lr->lr_foid = ZVOL_OBJ;
593 lr->lr_offset = offset;
596 BP_ZERO(&lr->lr_blkptr);
598 itx->itx_private = zv;
599 itx->itx_sync = sync;
601 (void) zil_itx_assign(zilog, itx, tx);
609 zvol_write(zvol_state_t *zv, uio_t *uio, boolean_t sync)
611 uint64_t volsize = zv->zv_volsize;
615 ASSERT(zv && zv->zv_open_count > 0);
617 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
620 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
621 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
622 uint64_t off = uio->uio_loffset;
623 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
625 if (bytes > volsize - off) /* don't write past the end */
626 bytes = volsize - off;
628 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
630 /* This will only fail for ENOSPC */
631 error = dmu_tx_assign(tx, TXG_WAIT);
636 error = dmu_write_uio_dbuf(zv->zv_dbuf, uio, bytes, tx);
638 zvol_log_write(zv, tx, off, bytes, sync);
644 zfs_range_unlock(rl);
646 zil_commit(zv->zv_zilog, ZVOL_OBJ);
651 * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
654 zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
659 zilog_t *zilog = zv->zv_zilog;
661 if (zil_replaying(zilog, tx))
664 itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
665 lr = (lr_truncate_t *)&itx->itx_lr;
666 lr->lr_foid = ZVOL_OBJ;
670 itx->itx_sync = sync;
671 zil_itx_assign(zilog, itx, tx);
675 zvol_discard(struct bio *bio)
677 zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data;
678 uint64_t start = BIO_BI_SECTOR(bio) << 9;
679 uint64_t size = BIO_BI_SIZE(bio);
680 uint64_t end = start + size;
685 ASSERT(zv && zv->zv_open_count > 0);
687 if (end > zv->zv_volsize)
688 return (SET_ERROR(EIO));
691 * Align the request to volume block boundaries when REQ_SECURE is
692 * available, but not requested. If we don't, then this will force
693 * dnode_free_range() to zero out the unaligned parts, which is slow
694 * (read-modify-write) and useless since we are not freeing any space
695 * by doing so. Kernels that do not support REQ_SECURE (2.6.32 through
696 * 2.6.35) will not receive this optimization.
699 if (!(bio->bi_rw & REQ_SECURE)) {
700 start = P2ROUNDUP(start, zv->zv_volblocksize);
701 end = P2ALIGN(end, zv->zv_volblocksize);
709 rl = zfs_range_lock(&zv->zv_znode, start, size, RL_WRITER);
710 tx = dmu_tx_create(zv->zv_objset);
711 dmu_tx_mark_netfree(tx);
712 error = dmu_tx_assign(tx, TXG_WAIT);
716 zvol_log_truncate(zv, tx, start, size, B_TRUE);
718 error = dmu_free_long_range(zv->zv_objset,
719 ZVOL_OBJ, start, size);
722 zfs_range_unlock(rl);
728 zvol_read(zvol_state_t *zv, uio_t *uio)
730 uint64_t volsize = zv->zv_volsize;
734 ASSERT(zv && zv->zv_open_count > 0);
736 rl = zfs_range_lock(&zv->zv_znode, uio->uio_loffset, uio->uio_resid,
738 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
739 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
741 /* don't read past the end */
742 if (bytes > volsize - uio->uio_loffset)
743 bytes = volsize - uio->uio_loffset;
745 error = dmu_read_uio_dbuf(zv->zv_dbuf, uio, bytes);
747 /* convert checksum errors into IO errors */
749 error = SET_ERROR(EIO);
753 zfs_range_unlock(rl);
757 static MAKE_REQUEST_FN_RET
758 zvol_request(struct request_queue *q, struct bio *bio)
761 zvol_state_t *zv = q->queuedata;
762 fstrans_cookie_t cookie = spl_fstrans_mark();
763 int rw = bio_data_dir(bio);
764 #ifdef HAVE_GENERIC_IO_ACCT
765 unsigned long start = jiffies;
769 uio.uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
770 uio.uio_skip = BIO_BI_SKIP(bio);
771 uio.uio_resid = BIO_BI_SIZE(bio);
772 uio.uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
773 uio.uio_loffset = BIO_BI_SECTOR(bio) << 9;
774 uio.uio_limit = MAXOFFSET_T;
775 uio.uio_segflg = UIO_BVEC;
777 if (bio_has_data(bio) && uio.uio_loffset + uio.uio_resid >
780 "%s: bad access: offset=%llu, size=%lu\n",
781 zv->zv_disk->disk_name,
782 (long long unsigned)uio.uio_loffset,
783 (long unsigned)uio.uio_resid);
784 error = SET_ERROR(EIO);
788 generic_start_io_acct(rw, bio_sectors(bio), &zv->zv_disk->part0);
791 if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
792 error = SET_ERROR(EROFS);
796 if (bio->bi_rw & VDEV_REQ_DISCARD) {
797 error = zvol_discard(bio);
802 * Some requests are just for flush and nothing else.
804 if (uio.uio_resid == 0) {
805 if (bio->bi_rw & VDEV_REQ_FLUSH)
806 zil_commit(zv->zv_zilog, ZVOL_OBJ);
810 error = zvol_write(zv, &uio,
811 ((bio->bi_rw & (VDEV_REQ_FUA|VDEV_REQ_FLUSH)) ||
812 zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS));
814 error = zvol_read(zv, &uio);
817 generic_end_io_acct(rw, &zv->zv_disk->part0, start);
819 BIO_END_IO(bio, -error);
820 spl_fstrans_unmark(cookie);
821 #ifdef HAVE_MAKE_REQUEST_FN_RET_INT
823 #elif defined(HAVE_MAKE_REQUEST_FN_RET_QC)
824 return (BLK_QC_T_NONE);
829 zvol_get_done(zgd_t *zgd, int error)
832 dmu_buf_rele(zgd->zgd_db, zgd);
834 zfs_range_unlock(zgd->zgd_rl);
836 if (error == 0 && zgd->zgd_bp)
837 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
839 kmem_free(zgd, sizeof (zgd_t));
843 * Get data to generate a TX_WRITE intent log record.
846 zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
848 zvol_state_t *zv = arg;
849 objset_t *os = zv->zv_objset;
850 uint64_t object = ZVOL_OBJ;
851 uint64_t offset = lr->lr_offset;
852 uint64_t size = lr->lr_length;
853 blkptr_t *bp = &lr->lr_blkptr;
861 zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
862 zgd->zgd_zilog = zv->zv_zilog;
863 zgd->zgd_rl = zfs_range_lock(&zv->zv_znode, offset, size, RL_READER);
866 * Write records come in two flavors: immediate and indirect.
867 * For small writes it's cheaper to store the data with the
868 * log record (immediate); for large writes it's cheaper to
869 * sync the data and get a pointer to it (indirect) so that
870 * we don't have to write the data twice.
872 if (buf != NULL) { /* immediate write */
873 error = dmu_read(os, object, offset, size, buf,
874 DMU_READ_NO_PREFETCH);
876 size = zv->zv_volblocksize;
877 offset = P2ALIGN_TYPED(offset, size, uint64_t);
878 error = dmu_buf_hold(os, object, offset, zgd, &db,
879 DMU_READ_NO_PREFETCH);
881 blkptr_t *obp = dmu_buf_get_blkptr(db);
883 ASSERT(BP_IS_HOLE(bp));
888 zgd->zgd_bp = &lr->lr_blkptr;
891 ASSERT(db->db_offset == offset);
892 ASSERT(db->db_size == size);
894 error = dmu_sync(zio, lr->lr_common.lrc_txg,
902 zvol_get_done(zgd, error);
904 return (SET_ERROR(error));
908 * The zvol_state_t's are inserted in increasing MINOR(dev_t) order.
911 zvol_insert(zvol_state_t *zv_insert)
913 zvol_state_t *zv = NULL;
915 ASSERT(MUTEX_HELD(&zvol_state_lock));
916 ASSERT3U(MINOR(zv_insert->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
917 for (zv = list_head(&zvol_state_list); zv != NULL;
918 zv = list_next(&zvol_state_list, zv)) {
919 if (MINOR(zv->zv_dev) > MINOR(zv_insert->zv_dev))
923 list_insert_before(&zvol_state_list, zv, zv_insert);
927 * Simply remove the zvol from to list of zvols.
930 zvol_remove(zvol_state_t *zv_remove)
932 ASSERT(MUTEX_HELD(&zvol_state_lock));
933 list_remove(&zvol_state_list, zv_remove);
937 zvol_first_open(zvol_state_t *zv)
946 * In all other cases the spa_namespace_lock is taken before the
947 * bdev->bd_mutex lock. But in this case the Linux __blkdev_get()
948 * function calls fops->open() with the bdev->bd_mutex lock held.
950 * To avoid a potential lock inversion deadlock we preemptively
951 * try to take the spa_namespace_lock(). Normally it will not
952 * be contended and this is safe because spa_open_common() handles
953 * the case where the caller already holds the spa_namespace_lock.
955 * When it is contended we risk a lock inversion if we were to
956 * block waiting for the lock. Luckily, the __blkdev_get()
957 * function allows us to return -ERESTARTSYS which will result in
958 * bdev->bd_mutex being dropped, reacquired, and fops->open() being
959 * called again. This process can be repeated safely until both
960 * locks are acquired.
962 if (!mutex_owned(&spa_namespace_lock)) {
963 locked = mutex_tryenter(&spa_namespace_lock);
965 return (-SET_ERROR(ERESTARTSYS));
968 error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
972 /* lie and say we're read-only */
973 error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
977 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
979 dmu_objset_disown(os, zvol_tag);
984 error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
986 dmu_objset_disown(os, zvol_tag);
990 set_capacity(zv->zv_disk, volsize >> 9);
991 zv->zv_volsize = volsize;
992 zv->zv_zilog = zil_open(os, zvol_get_data);
994 if (ro || dmu_objset_is_snapshot(os) ||
995 !spa_writeable(dmu_objset_spa(os))) {
996 set_disk_ro(zv->zv_disk, 1);
997 zv->zv_flags |= ZVOL_RDONLY;
999 set_disk_ro(zv->zv_disk, 0);
1000 zv->zv_flags &= ~ZVOL_RDONLY;
1005 mutex_exit(&spa_namespace_lock);
1007 return (SET_ERROR(-error));
1011 zvol_last_close(zvol_state_t *zv)
1013 zil_close(zv->zv_zilog);
1014 zv->zv_zilog = NULL;
1016 dmu_buf_rele(zv->zv_dbuf, zvol_tag);
1022 if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
1023 !(zv->zv_flags & ZVOL_RDONLY))
1024 txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
1025 (void) dmu_objset_evict_dbufs(zv->zv_objset);
1027 dmu_objset_disown(zv->zv_objset, zvol_tag);
1028 zv->zv_objset = NULL;
1032 zvol_open(struct block_device *bdev, fmode_t flag)
1035 int error = 0, drop_mutex = 0;
1038 * If the caller is already holding the mutex do not take it
1039 * again, this will happen as part of zvol_create_minor().
1040 * Once add_disk() is called the device is live and the kernel
1041 * will attempt to open it to read the partition information.
1043 if (!mutex_owned(&zvol_state_lock)) {
1044 mutex_enter(&zvol_state_lock);
1049 * Obtain a copy of private_data under the lock to make sure
1050 * that either the result of zvol_freeg() setting
1051 * bdev->bd_disk->private_data to NULL is observed, or zvol_free()
1052 * is not called on this zv because of the positive zv_open_count.
1054 zv = bdev->bd_disk->private_data;
1060 if (zv->zv_open_count == 0) {
1061 error = zvol_first_open(zv);
1066 if ((flag & FMODE_WRITE) && (zv->zv_flags & ZVOL_RDONLY)) {
1068 goto out_open_count;
1071 zv->zv_open_count++;
1073 check_disk_change(bdev);
1076 if (zv->zv_open_count == 0)
1077 zvol_last_close(zv);
1081 mutex_exit(&zvol_state_lock);
1083 return (SET_ERROR(error));
1086 #ifdef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
1091 zvol_release(struct gendisk *disk, fmode_t mode)
1093 zvol_state_t *zv = disk->private_data;
1096 ASSERT(zv && zv->zv_open_count > 0);
1098 if (!mutex_owned(&zvol_state_lock)) {
1099 mutex_enter(&zvol_state_lock);
1103 zv->zv_open_count--;
1104 if (zv->zv_open_count == 0)
1105 zvol_last_close(zv);
1108 mutex_exit(&zvol_state_lock);
1110 #ifndef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
1116 zvol_ioctl(struct block_device *bdev, fmode_t mode,
1117 unsigned int cmd, unsigned long arg)
1119 zvol_state_t *zv = bdev->bd_disk->private_data;
1122 ASSERT(zv && zv->zv_open_count > 0);
1126 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1129 error = copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN);
1138 return (SET_ERROR(error));
1141 #ifdef CONFIG_COMPAT
1143 zvol_compat_ioctl(struct block_device *bdev, fmode_t mode,
1144 unsigned cmd, unsigned long arg)
1146 return (zvol_ioctl(bdev, mode, cmd, arg));
1149 #define zvol_compat_ioctl NULL
1152 static int zvol_media_changed(struct gendisk *disk)
1154 zvol_state_t *zv = disk->private_data;
1156 ASSERT(zv && zv->zv_open_count > 0);
1158 return (zv->zv_changed);
1161 static int zvol_revalidate_disk(struct gendisk *disk)
1163 zvol_state_t *zv = disk->private_data;
1165 ASSERT(zv && zv->zv_open_count > 0);
1168 set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
1174 * Provide a simple virtual geometry for legacy compatibility. For devices
1175 * smaller than 1 MiB a small head and sector count is used to allow very
1176 * tiny devices. For devices over 1 Mib a standard head and sector count
1177 * is used to keep the cylinders count reasonable.
1180 zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1182 zvol_state_t *zv = bdev->bd_disk->private_data;
1185 ASSERT(zv && zv->zv_open_count > 0);
1187 sectors = get_capacity(zv->zv_disk);
1189 if (sectors > 2048) {
1198 geo->cylinders = sectors / (geo->heads * geo->sectors);
1203 static struct kobject *
1204 zvol_probe(dev_t dev, int *part, void *arg)
1207 struct kobject *kobj;
1209 mutex_enter(&zvol_state_lock);
1210 zv = zvol_find_by_dev(dev);
1211 kobj = zv ? get_disk(zv->zv_disk) : NULL;
1212 mutex_exit(&zvol_state_lock);
1217 #ifdef HAVE_BDEV_BLOCK_DEVICE_OPERATIONS
1218 static struct block_device_operations zvol_ops = {
1220 .release = zvol_release,
1221 .ioctl = zvol_ioctl,
1222 .compat_ioctl = zvol_compat_ioctl,
1223 .media_changed = zvol_media_changed,
1224 .revalidate_disk = zvol_revalidate_disk,
1225 .getgeo = zvol_getgeo,
1226 .owner = THIS_MODULE,
1229 #else /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */
1232 zvol_open_by_inode(struct inode *inode, struct file *file)
1234 return (zvol_open(inode->i_bdev, file->f_mode));
1238 zvol_release_by_inode(struct inode *inode, struct file *file)
1240 return (zvol_release(inode->i_bdev->bd_disk, file->f_mode));
1244 zvol_ioctl_by_inode(struct inode *inode, struct file *file,
1245 unsigned int cmd, unsigned long arg)
1247 if (file == NULL || inode == NULL)
1248 return (SET_ERROR(-EINVAL));
1250 return (zvol_ioctl(inode->i_bdev, file->f_mode, cmd, arg));
1253 #ifdef CONFIG_COMPAT
1255 zvol_compat_ioctl_by_inode(struct file *file,
1256 unsigned int cmd, unsigned long arg)
1259 return (SET_ERROR(-EINVAL));
1261 return (zvol_compat_ioctl(file->f_dentry->d_inode->i_bdev,
1262 file->f_mode, cmd, arg));
1265 #define zvol_compat_ioctl_by_inode NULL
1268 static struct block_device_operations zvol_ops = {
1269 .open = zvol_open_by_inode,
1270 .release = zvol_release_by_inode,
1271 .ioctl = zvol_ioctl_by_inode,
1272 .compat_ioctl = zvol_compat_ioctl_by_inode,
1273 .media_changed = zvol_media_changed,
1274 .revalidate_disk = zvol_revalidate_disk,
1275 .getgeo = zvol_getgeo,
1276 .owner = THIS_MODULE,
1278 #endif /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */
1281 * Allocate memory for a new zvol_state_t and setup the required
1282 * request queue and generic disk structures for the block device.
1284 static zvol_state_t *
1285 zvol_alloc(dev_t dev, const char *name)
1289 zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
1291 list_link_init(&zv->zv_next);
1293 zv->zv_queue = blk_alloc_queue(GFP_ATOMIC);
1294 if (zv->zv_queue == NULL)
1297 blk_queue_make_request(zv->zv_queue, zvol_request);
1299 #ifdef HAVE_BLK_QUEUE_FLUSH
1300 blk_queue_flush(zv->zv_queue, VDEV_REQ_FLUSH | VDEV_REQ_FUA);
1302 blk_queue_ordered(zv->zv_queue, QUEUE_ORDERED_DRAIN, NULL);
1303 #endif /* HAVE_BLK_QUEUE_FLUSH */
1305 zv->zv_disk = alloc_disk(ZVOL_MINORS);
1306 if (zv->zv_disk == NULL)
1309 zv->zv_queue->queuedata = zv;
1311 zv->zv_open_count = 0;
1312 strlcpy(zv->zv_name, name, MAXNAMELEN);
1314 mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
1315 avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
1316 sizeof (rl_t), offsetof(rl_t, r_node));
1317 zv->zv_znode.z_is_zvol = TRUE;
1319 zv->zv_disk->major = zvol_major;
1320 zv->zv_disk->first_minor = (dev & MINORMASK);
1321 zv->zv_disk->fops = &zvol_ops;
1322 zv->zv_disk->private_data = zv;
1323 zv->zv_disk->queue = zv->zv_queue;
1324 snprintf(zv->zv_disk->disk_name, DISK_NAME_LEN, "%s%d",
1325 ZVOL_DEV_NAME, (dev & MINORMASK));
1330 blk_cleanup_queue(zv->zv_queue);
1332 kmem_free(zv, sizeof (zvol_state_t));
1338 * Cleanup then free a zvol_state_t which was created by zvol_alloc().
1341 zvol_free(zvol_state_t *zv)
1343 ASSERT(MUTEX_HELD(&zvol_state_lock));
1344 ASSERT(zv->zv_open_count == 0);
1346 avl_destroy(&zv->zv_znode.z_range_avl);
1347 mutex_destroy(&zv->zv_znode.z_range_lock);
1349 zv->zv_disk->private_data = NULL;
1351 del_gendisk(zv->zv_disk);
1352 blk_cleanup_queue(zv->zv_queue);
1353 put_disk(zv->zv_disk);
1355 kmem_free(zv, sizeof (zvol_state_t));
1359 __zvol_snapdev_hidden(const char *name)
1366 parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1367 (void) strlcpy(parent, name, MAXPATHLEN);
1369 if ((atp = strrchr(parent, '@')) != NULL) {
1371 error = dsl_prop_get_integer(parent, "snapdev", &snapdev, NULL);
1372 if ((error == 0) && (snapdev == ZFS_SNAPDEV_HIDDEN))
1373 error = SET_ERROR(ENODEV);
1376 kmem_free(parent, MAXPATHLEN);
1378 return (SET_ERROR(error));
1382 __zvol_create_minor(const char *name, boolean_t ignore_snapdev)
1386 dmu_object_info_t *doi;
1392 ASSERT(MUTEX_HELD(&zvol_state_lock));
1394 zv = zvol_find_by_name(name);
1396 error = SET_ERROR(EEXIST);
1400 if (ignore_snapdev == B_FALSE) {
1401 error = __zvol_snapdev_hidden(name);
1406 doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
1408 error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
1412 error = dmu_object_info(os, ZVOL_OBJ, doi);
1414 goto out_dmu_objset_disown;
1416 error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
1418 goto out_dmu_objset_disown;
1420 error = zvol_find_minor(&minor);
1422 goto out_dmu_objset_disown;
1424 zv = zvol_alloc(MKDEV(zvol_major, minor), name);
1426 error = SET_ERROR(EAGAIN);
1427 goto out_dmu_objset_disown;
1430 if (dmu_objset_is_snapshot(os))
1431 zv->zv_flags |= ZVOL_RDONLY;
1433 zv->zv_volblocksize = doi->doi_data_block_size;
1434 zv->zv_volsize = volsize;
1437 set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
1439 blk_queue_max_hw_sectors(zv->zv_queue, (DMU_MAX_ACCESS / 4) >> 9);
1440 blk_queue_max_segments(zv->zv_queue, UINT16_MAX);
1441 blk_queue_max_segment_size(zv->zv_queue, UINT_MAX);
1442 blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize);
1443 blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize);
1444 blk_queue_max_discard_sectors(zv->zv_queue,
1445 (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9);
1446 blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize);
1447 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue);
1448 #ifdef QUEUE_FLAG_NONROT
1449 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue);
1451 #ifdef QUEUE_FLAG_ADD_RANDOM
1452 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zv->zv_queue);
1455 if (spa_writeable(dmu_objset_spa(os))) {
1456 if (zil_replay_disable)
1457 zil_destroy(dmu_objset_zil(os), B_FALSE);
1459 zil_replay(os, zv, zvol_replay_vector);
1463 * When udev detects the addition of the device it will immediately
1464 * invoke blkid(8) to determine the type of content on the device.
1465 * Prefetching the blocks commonly scanned by blkid(8) will speed
1468 len = MIN(MAX(zvol_prefetch_bytes, 0), SPA_MAXBLOCKSIZE);
1470 dmu_prefetch(os, ZVOL_OBJ, 0, 0, len, ZIO_PRIORITY_SYNC_READ);
1471 dmu_prefetch(os, ZVOL_OBJ, 0, volsize - len, len,
1472 ZIO_PRIORITY_SYNC_READ);
1475 zv->zv_objset = NULL;
1476 out_dmu_objset_disown:
1477 dmu_objset_disown(os, zvol_tag);
1479 kmem_free(doi, sizeof (dmu_object_info_t));
1485 * Drop the lock to prevent deadlock with sys_open() ->
1486 * zvol_open(), which first takes bd_disk->bd_mutex and then
1487 * takes zvol_state_lock, whereas this code path first takes
1488 * zvol_state_lock, and then takes bd_disk->bd_mutex.
1490 mutex_exit(&zvol_state_lock);
1491 add_disk(zv->zv_disk);
1492 mutex_enter(&zvol_state_lock);
1495 return (SET_ERROR(error));
1499 * Create a block device minor node and setup the linkage between it
1500 * and the specified volume. Once this function returns the block
1501 * device is live and ready for use.
1504 zvol_create_minor(const char *name)
1508 mutex_enter(&zvol_state_lock);
1509 error = __zvol_create_minor(name, B_FALSE);
1510 mutex_exit(&zvol_state_lock);
1512 return (SET_ERROR(error));
1516 __zvol_remove_minor(const char *name)
1520 ASSERT(MUTEX_HELD(&zvol_state_lock));
1522 zv = zvol_find_by_name(name);
1524 return (SET_ERROR(ENXIO));
1526 if (zv->zv_open_count > 0)
1527 return (SET_ERROR(EBUSY));
1536 * Remove a block device minor node for the specified volume.
1539 zvol_remove_minor(const char *name)
1543 mutex_enter(&zvol_state_lock);
1544 error = __zvol_remove_minor(name);
1545 mutex_exit(&zvol_state_lock);
1547 return (SET_ERROR(error));
1551 * Rename a block device minor mode for the specified volume.
1554 __zvol_rename_minor(zvol_state_t *zv, const char *newname)
1556 int readonly = get_disk_ro(zv->zv_disk);
1558 ASSERT(MUTEX_HELD(&zvol_state_lock));
1560 strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
1563 * The block device's read-only state is briefly changed causing
1564 * a KOBJ_CHANGE uevent to be issued. This ensures udev detects
1565 * the name change and fixes the symlinks. This does not change
1566 * ZVOL_RDONLY in zv->zv_flags so the actual read-only state never
1567 * changes. This would normally be done using kobject_uevent() but
1568 * that is a GPL-only symbol which is why we need this workaround.
1570 set_disk_ro(zv->zv_disk, !readonly);
1571 set_disk_ro(zv->zv_disk, readonly);
1575 zvol_create_minors_cb(const char *dsname, void *arg)
1577 (void) zvol_create_minor(dsname);
1583 * Create minors for specified dataset including children and snapshots.
1586 zvol_create_minors(const char *name)
1589 fstrans_cookie_t cookie;
1591 if (zvol_inhibit_dev)
1594 cookie = spl_fstrans_mark();
1595 error = dmu_objset_find((char *)name, zvol_create_minors_cb,
1596 NULL, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
1597 spl_fstrans_unmark(cookie);
1599 return (SET_ERROR(error));
1603 * Remove minors for specified dataset including children and snapshots.
1606 zvol_remove_minors(const char *name)
1608 zvol_state_t *zv, *zv_next;
1609 int namelen = ((name) ? strlen(name) : 0);
1611 if (zvol_inhibit_dev)
1614 mutex_enter(&zvol_state_lock);
1616 for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
1617 zv_next = list_next(&zvol_state_list, zv);
1619 if (name == NULL || strcmp(zv->zv_name, name) == 0 ||
1620 (strncmp(zv->zv_name, name, namelen) == 0 &&
1621 (zv->zv_name[namelen] == '/' ||
1622 zv->zv_name[namelen] == '@'))) {
1624 /* If in use, leave alone */
1625 if (zv->zv_open_count > 0)
1633 mutex_exit(&zvol_state_lock);
1637 * Rename minors for specified dataset including children and snapshots.
1640 zvol_rename_minors(const char *oldname, const char *newname)
1642 zvol_state_t *zv, *zv_next;
1643 int oldnamelen, newnamelen;
1646 if (zvol_inhibit_dev)
1649 oldnamelen = strlen(oldname);
1650 newnamelen = strlen(newname);
1651 name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1653 mutex_enter(&zvol_state_lock);
1655 for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
1656 zv_next = list_next(&zvol_state_list, zv);
1658 /* If in use, leave alone */
1659 if (zv->zv_open_count > 0)
1662 if (strcmp(zv->zv_name, oldname) == 0) {
1663 __zvol_rename_minor(zv, newname);
1664 } else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
1665 (zv->zv_name[oldnamelen] == '/' ||
1666 zv->zv_name[oldnamelen] == '@')) {
1667 snprintf(name, MAXNAMELEN, "%s%c%s", newname,
1668 zv->zv_name[oldnamelen],
1669 zv->zv_name + oldnamelen + 1);
1670 __zvol_rename_minor(zv, name);
1674 mutex_exit(&zvol_state_lock);
1676 kmem_free(name, MAXNAMELEN);
1680 snapdev_snapshot_changed_cb(const char *dsname, void *arg) {
1681 uint64_t snapdev = *(uint64_t *) arg;
1683 if (strchr(dsname, '@') == NULL)
1687 case ZFS_SNAPDEV_VISIBLE:
1688 mutex_enter(&zvol_state_lock);
1689 (void) __zvol_create_minor(dsname, B_TRUE);
1690 mutex_exit(&zvol_state_lock);
1692 case ZFS_SNAPDEV_HIDDEN:
1693 (void) zvol_remove_minor(dsname);
1701 zvol_set_snapdev(const char *dsname, uint64_t snapdev) {
1702 fstrans_cookie_t cookie;
1704 if (zvol_inhibit_dev)
1705 /* caller should continue to modify snapdev property */
1708 cookie = spl_fstrans_mark();
1709 (void) dmu_objset_find((char *) dsname, snapdev_snapshot_changed_cb,
1710 &snapdev, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
1711 spl_fstrans_unmark(cookie);
1713 /* caller should continue to modify snapdev property */
1722 list_create(&zvol_state_list, sizeof (zvol_state_t),
1723 offsetof(zvol_state_t, zv_next));
1725 mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
1727 error = register_blkdev(zvol_major, ZVOL_DRIVER);
1729 printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
1733 blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS,
1734 THIS_MODULE, zvol_probe, NULL, NULL);
1739 mutex_destroy(&zvol_state_lock);
1740 list_destroy(&zvol_state_list);
1742 return (SET_ERROR(error));
1748 zvol_remove_minors(NULL);
1749 blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
1750 unregister_blkdev(zvol_major, ZVOL_DRIVER);
1751 mutex_destroy(&zvol_state_lock);
1752 list_destroy(&zvol_state_list);
1755 module_param(zvol_inhibit_dev, uint, 0644);
1756 MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes");
1758 module_param(zvol_major, uint, 0444);
1759 MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
1761 module_param(zvol_max_discard_blocks, ulong, 0444);
1762 MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");
1764 module_param(zvol_prefetch_bytes, uint, 0644);
1765 MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");