2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
19 * 3. Neither the name of The DragonFly Project nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific, prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * Per-node backend for kernel filesystem interface.
39 * This executes a VOP concurrently on multiple nodes, each node via its own
40 * thread, and competes to advance the original request. The original
41 * request is retired the moment all requirements are met, even if the
42 * operation is still in-progress on some nodes.
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/fcntl.h>
50 #include <sys/namei.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
53 #include <sys/mountctl.h>
54 #include <sys/dirent.h>
56 #include <sys/objcache.h>
57 #include <sys/event.h>
59 #include <vfs/fifofs/fifo.h>
64 * Backend for hammer2_vfs_root()
66 * This is called when a newly mounted PFS has not yet synchronized
67 * to the inode_tid and modify_tid.
70 hammer2_xop_ipcluster(hammer2_xop_t *arg, int clindex)
72 hammer2_xop_ipcluster_t *xop = &arg->xop_ipcluster;
73 hammer2_chain_t *chain;
76 chain = hammer2_inode_chain(xop->head.ip1, clindex,
77 HAMMER2_RESOLVE_ALWAYS |
78 HAMMER2_RESOLVE_SHARED);
84 hammer2_xop_feed(&xop->head, chain, clindex, error);
86 hammer2_chain_drop(chain);
90 * Backend for hammer2_vop_readdir()
93 hammer2_xop_readdir(hammer2_xop_t *arg, int clindex)
95 hammer2_xop_readdir_t *xop = &arg->xop_readdir;
96 hammer2_chain_t *parent;
97 hammer2_chain_t *chain;
98 hammer2_key_t key_next;
100 int cache_index = -1;
104 if (hammer2_debug & 0x0020)
105 kprintf("xop_readdir %p lkey=%016jx\n", xop, lkey);
108 * The inode's chain is the iterator. If we cannot acquire it our
109 * contribution ends here.
111 parent = hammer2_inode_chain(xop->head.ip1, clindex,
112 HAMMER2_RESOLVE_ALWAYS |
113 HAMMER2_RESOLVE_SHARED);
114 if (parent == NULL) {
115 kprintf("xop_readdir: NULL parent\n");
120 * Directory scan [re]start and loop, the feed inherits the chain's
121 * lock so do not unlock it on the iteration.
123 chain = hammer2_chain_lookup(&parent, &key_next, lkey, lkey,
124 &cache_index, HAMMER2_LOOKUP_SHARED);
126 chain = hammer2_chain_lookup(&parent, &key_next,
127 lkey, HAMMER2_KEY_MAX,
129 HAMMER2_LOOKUP_SHARED);
132 error = hammer2_xop_feed(&xop->head, chain, clindex, 0);
135 chain = hammer2_chain_next(&parent, chain, &key_next,
136 key_next, HAMMER2_KEY_MAX,
138 HAMMER2_LOOKUP_SHARED |
139 HAMMER2_LOOKUP_NOUNLOCK);
142 hammer2_chain_drop(chain);
143 hammer2_chain_unlock(parent);
144 hammer2_chain_drop(parent);
146 hammer2_xop_feed(&xop->head, NULL, clindex, error);
150 * Backend for hammer2_vop_nresolve()
153 hammer2_xop_nresolve(hammer2_xop_t *arg, int clindex)
155 hammer2_xop_nresolve_t *xop = &arg->xop_nresolve;
156 hammer2_chain_t *parent;
157 hammer2_chain_t *chain;
158 const hammer2_inode_data_t *ripdata;
161 hammer2_key_t key_next;
163 int cache_index = -1; /* XXX */
166 parent = hammer2_inode_chain(xop->head.ip1, clindex,
167 HAMMER2_RESOLVE_ALWAYS |
168 HAMMER2_RESOLVE_SHARED);
169 if (parent == NULL) {
170 kprintf("xop_nresolve: NULL parent\n");
175 name = xop->head.name1;
176 name_len = xop->head.name1_len;
179 * Lookup the directory entry
181 lhc = hammer2_dirhash(name, name_len);
182 chain = hammer2_chain_lookup(&parent, &key_next,
183 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
185 HAMMER2_LOOKUP_ALWAYS |
186 HAMMER2_LOOKUP_SHARED);
188 ripdata = &chain->data->ipdata;
189 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
190 ripdata->meta.name_len == name_len &&
191 bcmp(ripdata->filename, name, name_len) == 0) {
194 chain = hammer2_chain_next(&parent, chain, &key_next,
196 lhc + HAMMER2_DIRHASH_LOMASK,
198 HAMMER2_LOOKUP_ALWAYS |
199 HAMMER2_LOOKUP_SHARED);
203 * If the entry is a hardlink pointer, resolve it.
207 if (chain->data->ipdata.meta.type == HAMMER2_OBJTYPE_HARDLINK) {
208 error = hammer2_chain_hardlink_find(
211 HAMMER2_RESOLVE_SHARED);
215 error = hammer2_xop_feed(&xop->head, chain, clindex, error);
217 /* leave lock intact for feed */
218 hammer2_chain_drop(chain);
221 hammer2_chain_unlock(parent);
222 hammer2_chain_drop(parent);
227 * Backend for hammer2_vop_nremove(), hammer2_vop_nrmdir(), and helper
228 * for hammer2_vop_nrename().
230 * This function does locates and removes the directory entry. If the
231 * entry is a hardlink pointer, this function will also remove the
232 * hardlink target if the target's nlinks is 1.
234 * The frontend is responsible for moving open inodes to the hidden directory
235 * and for decrementing nlinks.
238 hammer2_xop_unlink(hammer2_xop_t *arg, int clindex)
240 hammer2_xop_unlink_t *xop = &arg->xop_unlink;
241 hammer2_chain_t *parent;
242 hammer2_chain_t *chain;
243 const hammer2_inode_data_t *ripdata;
247 hammer2_key_t key_next;
249 int cache_index = -1; /* XXX */
253 * Requires exclusive lock
255 parent = hammer2_inode_chain(xop->head.ip1, clindex,
256 HAMMER2_RESOLVE_ALWAYS);
257 if (parent == NULL) {
258 kprintf("xop_nresolve: NULL parent\n");
263 name = xop->head.name1;
264 name_len = xop->head.name1_len;
267 * Lookup the directory entry
269 lhc = hammer2_dirhash(name, name_len);
270 chain = hammer2_chain_lookup(&parent, &key_next,
271 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
273 HAMMER2_LOOKUP_ALWAYS);
275 ripdata = &chain->data->ipdata;
276 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
277 ripdata->meta.name_len == name_len &&
278 bcmp(ripdata->filename, name, name_len) == 0) {
281 chain = hammer2_chain_next(&parent, chain, &key_next,
283 lhc + HAMMER2_DIRHASH_LOMASK,
285 HAMMER2_LOOKUP_ALWAYS);
289 * If the directory entry is a HARDLINK pointer then obtain the
290 * underlying file type for the directory typing tests and delete
291 * the HARDLINK pointer chain permanently. The frontend is left
292 * responsible for handling nlinks on and deleting the actual inode.
294 * If the directory entry is the actual inode then use its type
295 * for the directory typing tests and delete the chain, permanency
296 * depends on whether the inode is open or not.
298 * Check directory typing and delete the entry. Note that
299 * nlinks adjustments are made on the real inode by the frontend,
304 int dopermanent = xop->dopermanent;
306 type = chain->data->ipdata.meta.type;
307 if (type == HAMMER2_OBJTYPE_HARDLINK) {
308 type = chain->data->ipdata.meta.target_type;
309 dopermanent |= HAMMER2_DELETE_PERMANENT;
311 if (type == HAMMER2_OBJTYPE_DIRECTORY &&
315 if (type != HAMMER2_OBJTYPE_DIRECTORY &&
319 hammer2_chain_delete(parent, chain,
320 xop->head.mtid, xop->dopermanent);
325 * If the entry is a hardlink pointer, resolve it. If this is the
326 * last link, delete it. We aren't the frontend so we can't adjust
330 if (chain->data->ipdata.meta.type == HAMMER2_OBJTYPE_HARDLINK) {
331 error = hammer2_chain_hardlink_find(
336 (int64_t)chain->data->ipdata.meta.nlinks <= 1) {
337 hammer2_chain_delete(parent, chain,
345 * Chains passed to feed are expected to be locked shared.
348 hammer2_chain_unlock(chain);
349 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS |
350 HAMMER2_RESOLVE_SHARED);
354 * We always return the hardlink target (the real inode) for
358 hammer2_xop_feed(&xop->head, chain, clindex, error);
360 hammer2_chain_drop(chain);
362 hammer2_chain_unlock(parent);
363 hammer2_chain_drop(parent);
368 * Backend for hammer2_vop_nlink() and hammer2_vop_nrename()
370 * Convert the target {dip,ip} to a hardlink target and replace
371 * the original namespace with a hardlink pointer.
374 hammer2_xop_nlink(hammer2_xop_t *arg, int clindex)
376 hammer2_xop_nlink_t *xop = &arg->xop_nlink;
378 hammer2_inode_data_t *wipdata;
379 hammer2_chain_t *parent;
380 hammer2_chain_t *chain;
381 hammer2_chain_t *tmp;
383 hammer2_key_t key_dummy;
384 int cache_index = -1;
388 * We need the precise parent chain to issue the deletion.
392 parent = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS);
394 hammer2_chain_getparent(&parent, HAMMER2_RESOLVE_ALWAYS);
395 if (parent == NULL) {
400 chain = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS);
405 hammer2_chain_delete(parent, chain, xop->head.mtid, 0);
408 * Replace the namespace with a hardlink pointer if the chain being
409 * moved is not already a hardlink target.
411 if (chain->data->ipdata.meta.name_key & HAMMER2_DIRHASH_VISIBLE) {
413 error = hammer2_chain_create(&parent, &tmp, pmp,
415 HAMMER2_BREF_TYPE_INODE,
420 hammer2_chain_modify(tmp, xop->head.mtid, 0);
421 wipdata = &tmp->data->ipdata;
422 bzero(wipdata, sizeof(*wipdata));
423 wipdata->meta.name_key = chain->data->ipdata.meta.name_key;
424 wipdata->meta.name_len = chain->data->ipdata.meta.name_len;
425 bcopy(chain->data->ipdata.filename, wipdata->filename,
426 chain->data->ipdata.meta.name_len);
427 wipdata->meta.target_type = chain->data->ipdata.meta.type;
428 wipdata->meta.type = HAMMER2_OBJTYPE_HARDLINK;
429 wipdata->meta.inum = ip->meta.inum;
430 wipdata->meta.version = HAMMER2_INODE_VERSION_ONE;
431 wipdata->meta.nlinks = 1;
432 wipdata->meta.op_flags = HAMMER2_OPFLAG_DIRECTDATA;
434 hammer2_chain_unlock(tmp);
435 hammer2_chain_drop(tmp);
438 hammer2_chain_unlock(parent);
439 hammer2_chain_drop(parent);
442 * Ok, back to the deleted chain. We must reconnect this chain
443 * as a hardlink target to cdir (ip3).
445 * WARNING! Frontend assumes filename length is 18 bytes.
447 hammer2_chain_modify(chain, xop->head.mtid, 0);
448 wipdata = &chain->data->ipdata;
449 ksnprintf(wipdata->filename, sizeof(wipdata->filename),
450 "0x%016jx", (intmax_t)ip->meta.inum);
451 wipdata->meta.name_len = strlen(wipdata->filename);
452 wipdata->meta.name_key = ip->meta.inum;
455 * We must seek parent properly for the create.
457 parent = hammer2_inode_chain(xop->head.ip3, clindex,
458 HAMMER2_RESOLVE_ALWAYS);
459 if (parent == NULL) {
463 tmp = hammer2_chain_lookup(&parent, &key_dummy,
464 ip->meta.inum, ip->meta.inum,
467 hammer2_chain_unlock(tmp);
468 hammer2_chain_drop(tmp);
472 error = hammer2_chain_create(&parent, &chain, pmp,
473 wipdata->meta.name_key, 0,
474 HAMMER2_BREF_TYPE_INODE,
478 * To avoid having to scan the collision space we can simply
479 * reuse the inode's original name_key. But ip->meta.name_key
480 * may have already been updated by the front-end, so use xop->lhc.
482 * (frontend is responsible for fixing up ip->pip).
485 hammer2_xop_feed(&xop->head, NULL, clindex, error);
487 hammer2_chain_unlock(parent);
488 hammer2_chain_drop(parent);
491 hammer2_chain_unlock(chain);
492 hammer2_chain_drop(chain);
497 * Backend for hammer2_vop_nrename()
499 * This handles the final step of renaming, either renaming the
500 * actual inode or renaming the hardlink pointer.
503 hammer2_xop_nrename(hammer2_xop_t *arg, int clindex)
505 hammer2_xop_nrename_t *xop = &arg->xop_nrename;
507 hammer2_chain_t *parent;
508 hammer2_chain_t *chain;
509 hammer2_chain_t *tmp;
511 hammer2_key_t key_dummy;
512 int cache_index = -1;
516 * We need the precise parent chain to issue the deletion.
518 * If this is not a hardlink target we can act on the inode,
519 * otherwise we have to locate the hardlink pointer.
525 if (xop->ip_key & HAMMER2_DIRHASH_VISIBLE) {
527 * Find ip's direct parent chain.
529 parent = hammer2_inode_chain(ip, clindex,
530 HAMMER2_RESOLVE_ALWAYS);
532 hammer2_chain_getparent(&parent,
533 HAMMER2_RESOLVE_ALWAYS);
534 if (parent == NULL) {
538 chain = hammer2_inode_chain(ip, clindex,
539 HAMMER2_RESOLVE_ALWAYS);
546 * head.ip1 is fdip, do a namespace search.
548 const hammer2_inode_data_t *ripdata;
550 hammer2_key_t key_next;
554 parent = hammer2_inode_chain(xop->head.ip1, clindex,
555 HAMMER2_RESOLVE_ALWAYS |
556 HAMMER2_RESOLVE_SHARED);
557 if (parent == NULL) {
558 kprintf("xop_nrename: NULL parent\n");
562 name = xop->head.name1;
563 name_len = xop->head.name1_len;
566 * Lookup the directory entry
568 lhc = hammer2_dirhash(name, name_len);
569 chain = hammer2_chain_lookup(&parent, &key_next,
570 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
572 HAMMER2_LOOKUP_ALWAYS);
574 ripdata = &chain->data->ipdata;
575 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
576 ripdata->meta.name_len == name_len &&
577 bcmp(ripdata->filename, name, name_len) == 0) {
580 chain = hammer2_chain_next(&parent, chain, &key_next,
582 lhc + HAMMER2_DIRHASH_LOMASK,
584 HAMMER2_LOOKUP_ALWAYS);
589 * Delete it, then create it in the new namespace.
591 hammer2_chain_delete(parent, chain, xop->head.mtid, 0);
592 hammer2_chain_unlock(parent);
593 hammer2_chain_drop(parent);
594 parent = NULL; /* safety */
598 * Ok, back to the deleted chain. We must reconnect this chain
599 * to tdir (ip3). The chain (a real inode or a hardlink pointer)
600 * is not otherwise modified.
602 * Frontend is expected to replicate the same inode meta data
605 * NOTE! This chain may not represent the actual inode, it
606 * can be a hardlink pointer.
608 * XXX in-inode parent directory specification?
610 if (chain->data->ipdata.meta.name_key != xop->lhc ||
611 xop->head.name1_len != xop->head.name2_len ||
612 bcmp(xop->head.name1, xop->head.name2, xop->head.name1_len) != 0) {
613 hammer2_inode_data_t *wipdata;
615 hammer2_chain_modify(chain, xop->head.mtid, 0);
616 wipdata = &chain->data->ipdata;
618 bzero(wipdata->filename, sizeof(wipdata->filename));
619 bcopy(xop->head.name2, wipdata->filename, xop->head.name2_len);
620 wipdata->meta.name_key = xop->lhc;
621 wipdata->meta.name_len = xop->head.name2_len;
625 * We must seek parent properly for the create.
627 parent = hammer2_inode_chain(xop->head.ip3, clindex,
628 HAMMER2_RESOLVE_ALWAYS);
629 if (parent == NULL) {
633 tmp = hammer2_chain_lookup(&parent, &key_dummy,
637 hammer2_chain_unlock(tmp);
638 hammer2_chain_drop(tmp);
643 error = hammer2_chain_create(&parent, &chain, pmp,
645 HAMMER2_BREF_TYPE_INODE,
649 * (frontend is responsible for fixing up ip->pip).
652 hammer2_xop_feed(&xop->head, NULL, clindex, error);
654 hammer2_chain_unlock(parent);
655 hammer2_chain_drop(parent);
658 hammer2_chain_unlock(chain);
659 hammer2_chain_drop(chain);
664 * Directory collision resolver scan helper (backend, threaded).
666 * Used by the inode create code to locate an unused lhc.
669 hammer2_xop_scanlhc(hammer2_xop_t *arg, int clindex)
671 hammer2_xop_scanlhc_t *xop = &arg->xop_scanlhc;
672 hammer2_chain_t *parent;
673 hammer2_chain_t *chain;
674 hammer2_key_t key_next;
675 int cache_index = -1; /* XXX */
678 parent = hammer2_inode_chain(xop->head.ip1, clindex,
679 HAMMER2_RESOLVE_ALWAYS |
680 HAMMER2_RESOLVE_SHARED);
681 if (parent == NULL) {
682 kprintf("xop_nresolve: NULL parent\n");
689 * Lookup all possibly conflicting directory entries, the feed
690 * inherits the chain's lock so do not unlock it on the iteration.
692 chain = hammer2_chain_lookup(&parent, &key_next,
694 xop->lhc + HAMMER2_DIRHASH_LOMASK,
696 HAMMER2_LOOKUP_ALWAYS |
697 HAMMER2_LOOKUP_SHARED);
699 error = hammer2_xop_feed(&xop->head, chain, clindex,
702 hammer2_chain_drop(chain);
703 chain = NULL; /* safety */
706 chain = hammer2_chain_next(&parent, chain, &key_next,
708 xop->lhc + HAMMER2_DIRHASH_LOMASK,
710 HAMMER2_LOOKUP_ALWAYS |
711 HAMMER2_LOOKUP_SHARED |
712 HAMMER2_LOOKUP_NOUNLOCK);
715 hammer2_xop_feed(&xop->head, NULL, clindex, error);
717 hammer2_chain_unlock(parent);
718 hammer2_chain_drop(parent);
723 * Generic lookup of a specific key.
725 * Used by the inode hidden directory code to find the hidden directory.
728 hammer2_xop_lookup(hammer2_xop_t *arg, int clindex)
730 hammer2_xop_scanlhc_t *xop = &arg->xop_scanlhc;
731 hammer2_chain_t *parent;
732 hammer2_chain_t *chain;
733 hammer2_key_t key_next;
734 int cache_index = -1; /* XXX */
737 parent = hammer2_inode_chain(xop->head.ip1, clindex,
738 HAMMER2_RESOLVE_ALWAYS |
739 HAMMER2_RESOLVE_SHARED);
741 if (parent == NULL) {
747 * Lookup all possibly conflicting directory entries, the feed
748 * inherits the chain's lock so do not unlock it on the iteration.
750 chain = hammer2_chain_lookup(&parent, &key_next,
753 HAMMER2_LOOKUP_ALWAYS |
754 HAMMER2_LOOKUP_SHARED);
756 hammer2_xop_feed(&xop->head, chain, clindex, chain->error);
758 hammer2_xop_feed(&xop->head, NULL, clindex, ENOENT);
762 /* leave lock intact for feed */
763 hammer2_chain_drop(chain);
766 hammer2_chain_unlock(parent);
767 hammer2_chain_drop(parent);
775 hammer2_xop_scanall(hammer2_xop_t *arg, int clindex)
777 hammer2_xop_scanall_t *xop = &arg->xop_scanall;
778 hammer2_chain_t *parent;
779 hammer2_chain_t *chain;
780 hammer2_key_t key_next;
781 int cache_index = -1;
785 * The inode's chain is the iterator. If we cannot acquire it our
786 * contribution ends here.
788 parent = hammer2_inode_chain(xop->head.ip1, clindex,
789 HAMMER2_RESOLVE_ALWAYS |
790 HAMMER2_RESOLVE_SHARED);
791 if (parent == NULL) {
792 kprintf("xop_readdir: NULL parent\n");
797 * Generic scan of exact records. Note that indirect blocks are
798 * automatically recursed and will not be returned.
800 chain = hammer2_chain_lookup(&parent, &key_next,
801 xop->key_beg, xop->key_end,
802 &cache_index, HAMMER2_LOOKUP_SHARED |
803 HAMMER2_LOOKUP_NODIRECT);
805 error = hammer2_xop_feed(&xop->head, chain, clindex, 0);
808 chain = hammer2_chain_next(&parent, chain, &key_next,
809 key_next, xop->key_end,
811 HAMMER2_LOOKUP_SHARED |
812 HAMMER2_LOOKUP_NODIRECT |
813 HAMMER2_LOOKUP_NOUNLOCK);
816 hammer2_chain_drop(chain);
817 hammer2_chain_unlock(parent);
818 hammer2_chain_drop(parent);
820 hammer2_xop_feed(&xop->head, NULL, clindex, error);