2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
19 * 3. Neither the name of The DragonFly Project nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific, prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * Per-node backend for kernel filesystem interface.
39 * This executes a VOP concurrently on multiple nodes, each node via its own
40 * thread, and competes to advance the original request. The original
41 * request is retired the moment all requirements are met, even if the
42 * operation is still in-progress on some nodes.
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/fcntl.h>
50 #include <sys/namei.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
53 #include <sys/mountctl.h>
54 #include <sys/dirent.h>
56 #include <sys/objcache.h>
57 #include <sys/event.h>
59 #include <vfs/fifofs/fifo.h>
64 * Determine if the specified directory is empty. Returns 0 on success.
66 * May return 0, ENOTDIR, or EAGAIN.
70 checkdirempty(hammer2_chain_t *oparent, hammer2_chain_t *ochain, int clindex)
72 hammer2_chain_t *parent;
73 hammer2_chain_t *chain;
74 hammer2_key_t key_next;
80 chain = hammer2_chain_lookup_init(ochain, 0);
82 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) {
84 hammer2_chain_unlock(oparent);
85 inum = chain->bref.embed.dirent.inum;
87 error = hammer2_chain_inode_find(chain->pmp, inum,
91 hammer2_chain_unlock(parent);
92 hammer2_chain_drop(parent);
95 hammer2_chain_lock(oparent, HAMMER2_RESOLVE_ALWAYS);
96 if (ochain->parent != oparent) {
98 hammer2_chain_unlock(chain);
99 hammer2_chain_drop(chain);
101 kprintf("H2EAGAIN\n");
109 * Determine if the directory is empty or not by checking its
110 * visible namespace (the area which contains directory entries).
115 chain = hammer2_chain_lookup(&parent, &key_next,
116 HAMMER2_DIRHASH_VISIBLE,
122 hammer2_chain_unlock(chain);
123 hammer2_chain_drop(chain);
127 hammer2_chain_lookup_done(parent);
133 * Backend for hammer2_vfs_root()
135 * This is called when a newly mounted PFS has not yet synchronized
136 * to the inode_tid and modify_tid.
139 hammer2_xop_ipcluster(hammer2_thread_t *thr, hammer2_xop_t *arg)
141 hammer2_xop_ipcluster_t *xop = &arg->xop_ipcluster;
142 hammer2_chain_t *chain;
145 chain = hammer2_inode_chain(xop->head.ip1, thr->clindex,
146 HAMMER2_RESOLVE_ALWAYS |
147 HAMMER2_RESOLVE_SHARED);
149 error = chain->error;
153 hammer2_xop_feed(&xop->head, chain, thr->clindex, error);
155 hammer2_chain_unlock(chain);
156 hammer2_chain_drop(chain);
161 * Backend for hammer2_vop_readdir()
164 hammer2_xop_readdir(hammer2_thread_t *thr, hammer2_xop_t *arg)
166 hammer2_xop_readdir_t *xop = &arg->xop_readdir;
167 hammer2_chain_t *parent;
168 hammer2_chain_t *chain;
169 hammer2_key_t key_next;
171 int cache_index = -1;
175 if (hammer2_debug & 0x0020)
176 kprintf("xop_readdir %p lkey=%016jx\n", xop, lkey);
179 * The inode's chain is the iterator. If we cannot acquire it our
180 * contribution ends here.
182 parent = hammer2_inode_chain(xop->head.ip1, thr->clindex,
183 HAMMER2_RESOLVE_ALWAYS |
184 HAMMER2_RESOLVE_SHARED);
185 if (parent == NULL) {
186 kprintf("xop_readdir: NULL parent\n");
191 * Directory scan [re]start and loop, the feed inherits the chain's
192 * lock so do not unlock it on the iteration.
194 chain = hammer2_chain_lookup(&parent, &key_next, lkey, lkey,
195 &cache_index, HAMMER2_LOOKUP_SHARED);
197 chain = hammer2_chain_lookup(&parent, &key_next,
198 lkey, HAMMER2_KEY_MAX,
200 HAMMER2_LOOKUP_SHARED);
203 error = hammer2_xop_feed(&xop->head, chain, thr->clindex, 0);
206 chain = hammer2_chain_next(&parent, chain, &key_next,
207 key_next, HAMMER2_KEY_MAX,
209 HAMMER2_LOOKUP_SHARED);
212 hammer2_chain_unlock(chain);
213 hammer2_chain_drop(chain);
215 hammer2_chain_unlock(parent);
216 hammer2_chain_drop(parent);
218 hammer2_xop_feed(&xop->head, NULL, thr->clindex, error);
222 * Backend for hammer2_vop_nresolve()
225 hammer2_xop_nresolve(hammer2_thread_t *thr, hammer2_xop_t *arg)
227 hammer2_xop_nresolve_t *xop = &arg->xop_nresolve;
228 hammer2_chain_t *parent;
229 hammer2_chain_t *chain;
232 hammer2_key_t key_next;
234 int cache_index = -1; /* XXX */
237 parent = hammer2_inode_chain(xop->head.ip1, thr->clindex,
238 HAMMER2_RESOLVE_ALWAYS |
239 HAMMER2_RESOLVE_SHARED);
240 if (parent == NULL) {
241 kprintf("xop_nresolve: NULL parent\n");
246 name = xop->head.name1;
247 name_len = xop->head.name1_len;
250 * Lookup the directory entry
252 lhc = hammer2_dirhash(name, name_len);
253 chain = hammer2_chain_lookup(&parent, &key_next,
254 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
256 HAMMER2_LOOKUP_ALWAYS |
257 HAMMER2_LOOKUP_SHARED);
259 if (hammer2_chain_dirent_test(chain, name, name_len))
261 chain = hammer2_chain_next(&parent, chain, &key_next,
263 lhc + HAMMER2_DIRHASH_LOMASK,
265 HAMMER2_LOOKUP_ALWAYS |
266 HAMMER2_LOOKUP_SHARED);
270 * If the entry is a hardlink pointer, resolve it.
274 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) {
275 lhc = chain->bref.embed.dirent.inum;
276 error = hammer2_chain_inode_find(chain->pmp,
279 HAMMER2_LOOKUP_SHARED,
285 error = hammer2_xop_feed(&xop->head, chain, thr->clindex, error);
287 hammer2_chain_unlock(chain);
288 hammer2_chain_drop(chain);
291 hammer2_chain_unlock(parent);
292 hammer2_chain_drop(parent);
297 * Backend for hammer2_vop_nremove(), hammer2_vop_nrmdir(), helper
298 * for hammer2_vop_nrename(), and backend for pfs_delete.
300 * This function locates and removes a directory entry, and will lookup
301 * and return the underlying inode. For directory entries the underlying
302 * inode is not removed. If the directory entry is the actual inode itself,
303 * it may be conditonally removed and returned.
305 * WARNING! Any target inode's nlinks may not be synchronized to the
306 * in-memory inode. hammer2_inode_unlink_finisher() is
307 * responsible for the final disposition of the actual inode.
309 * The frontend is responsible for moving open-but-deleted inodes to the
310 * mount's hidden directory and for decrementing nlinks.
313 hammer2_xop_unlink(hammer2_thread_t *thr, hammer2_xop_t *arg)
315 hammer2_xop_unlink_t *xop = &arg->xop_unlink;
316 hammer2_chain_t *parent;
317 hammer2_chain_t *chain;
320 hammer2_key_t key_next;
322 int cache_index = -1; /* XXX */
327 * Requires exclusive lock
329 parent = hammer2_inode_chain(xop->head.ip1, thr->clindex,
330 HAMMER2_RESOLVE_ALWAYS);
332 if (parent == NULL) {
333 kprintf("xop_nresolve: NULL parent\n");
337 name = xop->head.name1;
338 name_len = xop->head.name1_len;
341 * Lookup the directory entry
343 lhc = hammer2_dirhash(name, name_len);
344 chain = hammer2_chain_lookup(&parent, &key_next,
345 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
347 HAMMER2_LOOKUP_ALWAYS);
349 if (hammer2_chain_dirent_test(chain, name, name_len))
351 chain = hammer2_chain_next(&parent, chain, &key_next,
353 lhc + HAMMER2_DIRHASH_LOMASK,
355 HAMMER2_LOOKUP_ALWAYS);
359 * The directory entry will either be a BREF_TYPE_DIRENT or a
360 * BREF_TYPE_INODE. We always permanently delete DIRENTs, but
361 * must go by xop->dopermanent for BREF_TYPE_INODE.
363 * Note that the target chain's nlinks may not be synchronized with
364 * the in-memory hammer2_inode_t structure, so we don't try to do
365 * anything fancy here.
369 int dopermanent = xop->dopermanent & 1;
370 int doforce = xop->dopermanent & 2;
374 * If the directory entry is the actual inode then use its
375 * type for the directory typing tests, otherwise if it is
376 * a directory entry, pull the type field from the entry.
378 * Directory entries are always permanently deleted
379 * (because they aren't the actual inode).
381 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) {
382 type = chain->bref.embed.dirent.type;
383 dopermanent |= HAMMER2_DELETE_PERMANENT;
385 type = chain->data->ipdata.meta.type;
389 * Check directory typing and delete the entry. Note that
390 * nlinks adjustments are made on the real inode by the
391 * frontend, not here.
393 * Unfortunately, checkdirempty() may have to unlock (parent).
394 * If it no longer matches chain->parent after re-locking,
395 * EAGAIN is returned.
397 if (type == HAMMER2_OBJTYPE_DIRECTORY && doforce) {
399 * If doforce then execute the operation even if
400 * the directory is not empty.
402 error = chain->error;
403 hammer2_chain_delete(parent, chain,
404 xop->head.mtid, dopermanent);
405 } else if (type == HAMMER2_OBJTYPE_DIRECTORY &&
406 (error = checkdirempty(parent, chain, thr->clindex)) != 0) {
408 * error may be EAGAIN or ENOTEMPTY
410 if (error == EAGAIN) {
411 hammer2_chain_unlock(chain);
412 hammer2_chain_drop(chain);
413 hammer2_chain_unlock(parent);
414 hammer2_chain_drop(parent);
417 } else if (type == HAMMER2_OBJTYPE_DIRECTORY &&
420 } else if (type != HAMMER2_OBJTYPE_DIRECTORY &&
425 * Delete the directory entry. chain might also
426 * be a directly-embedded inode.
428 error = chain->error;
429 hammer2_chain_delete(parent, chain,
430 xop->head.mtid, dopermanent);
435 * If chain is a directory entry we must resolve it. We do not try
436 * to manipulate the contents as it might not be synchronized with
437 * the frontend hammer2_inode_t, nor do we try to lookup the
438 * frontend hammer2_inode_t here (we are the backend!).
440 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) {
443 lhc = chain->bref.embed.dirent.inum;
445 error2 = hammer2_chain_inode_find(chain->pmp, lhc,
449 kprintf("inode_find: %016jx %p failed\n",
451 error2 = 0; /* silently ignore */
458 * Return the inode target for further action. Typically used by
459 * hammer2_inode_unlink_finisher().
462 hammer2_xop_feed(&xop->head, chain, thr->clindex, error);
464 hammer2_chain_unlock(chain);
465 hammer2_chain_drop(chain);
469 hammer2_chain_unlock(parent);
470 hammer2_chain_drop(parent);
476 * Backend for hammer2_vop_nrename()
478 * This handles the final step of renaming, either renaming the
479 * actual inode or renaming the directory entry.
482 hammer2_xop_nrename(hammer2_thread_t *thr, hammer2_xop_t *arg)
484 hammer2_xop_nrename_t *xop = &arg->xop_nrename;
486 hammer2_chain_t *parent;
487 hammer2_chain_t *chain;
488 hammer2_chain_t *tmp;
490 hammer2_key_t key_dummy;
491 int cache_index = -1;
495 * We need the precise parent chain to issue the deletion.
497 * If this is a directory entry we must locate the underlying
498 * inode. If it is an embedded inode we can act directly on it.
504 if (xop->ip_key & HAMMER2_DIRHASH_VISIBLE) {
506 * Find ip's direct parent chain.
508 parent = hammer2_inode_chain(ip, thr->clindex,
509 HAMMER2_RESOLVE_ALWAYS);
511 hammer2_chain_getparent(&parent,
512 HAMMER2_RESOLVE_ALWAYS);
513 if (parent == NULL) {
517 chain = hammer2_inode_chain(ip, thr->clindex,
518 HAMMER2_RESOLVE_ALWAYS);
525 * The directory entry for the head.ip1 inode
526 * is in fdip, do a namespace search.
529 hammer2_key_t key_next;
533 parent = hammer2_inode_chain(xop->head.ip1, thr->clindex,
534 HAMMER2_RESOLVE_ALWAYS);
535 if (parent == NULL) {
536 kprintf("xop_nrename: NULL parent\n");
540 name = xop->head.name1;
541 name_len = xop->head.name1_len;
544 * Lookup the directory entry
546 lhc = hammer2_dirhash(name, name_len);
547 chain = hammer2_chain_lookup(&parent, &key_next,
548 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
550 HAMMER2_LOOKUP_ALWAYS);
552 if (hammer2_chain_dirent_test(chain, name, name_len))
554 chain = hammer2_chain_next(&parent, chain, &key_next,
556 lhc + HAMMER2_DIRHASH_LOMASK,
558 HAMMER2_LOOKUP_ALWAYS);
563 /* XXX shouldn't happen, but does under fsstress */
564 kprintf("hammer2_xop_rename: \"%s\" -> \"%s\" ENOENT\n",
572 * Delete it, then create it in the new namespace.
574 hammer2_chain_delete(parent, chain, xop->head.mtid, 0);
575 hammer2_chain_unlock(parent);
576 hammer2_chain_drop(parent);
577 parent = NULL; /* safety */
580 * Ok, back to the deleted chain. We must reconnect this chain
581 * to tdir (ip3) and adjust the filename. The chain (a real inode
582 * or a directory entry) is not otherwise modified.
584 * The frontend is expected to replicate the same inode meta data
585 * modifications if necessary.
587 if (chain->bref.key != xop->lhc ||
588 xop->head.name1_len != xop->head.name2_len ||
589 bcmp(xop->head.name1, xop->head.name2, xop->head.name1_len) != 0) {
590 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
591 hammer2_inode_data_t *wipdata;
593 hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
594 wipdata = &chain->data->ipdata;
596 bzero(wipdata->filename, sizeof(wipdata->filename));
597 bcopy(xop->head.name2, wipdata->filename,
598 xop->head.name2_len);
599 wipdata->meta.name_key = xop->lhc;
600 wipdata->meta.name_len = xop->head.name2_len;
602 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) {
603 if (xop->head.name2_len <= sizeof(chain->bref.check.buf)) {
604 hammer2_chain_resize(chain, xop->head.mtid, 0,
606 hammer2_chain_modify(chain, xop->head.mtid,
608 bzero(chain->bref.check.buf,
609 sizeof(chain->bref.check.buf));
610 bcopy(xop->head.name2, chain->bref.check.buf,
611 xop->head.name2_len);
613 hammer2_chain_resize(chain, xop->head.mtid, 0,
614 hammer2_getradix(HAMMER2_ALLOC_MIN), 0);
615 hammer2_chain_modify(chain, xop->head.mtid,
617 bzero(chain->data->buf,
618 sizeof(chain->data->buf));
619 bcopy(xop->head.name2, chain->data->buf,
620 xop->head.name2_len);
622 chain->bref.embed.dirent.namlen = xop->head.name2_len;
627 * If an embedded inode, adjust iparent directly.
629 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
630 chain->data->ipdata.meta.iparent != xop->head.ip3->meta.inum) {
631 hammer2_inode_data_t *wipdata;
633 hammer2_chain_modify(chain, xop->head.mtid, 0, 0);
634 wipdata = &chain->data->ipdata;
636 wipdata->meta.iparent = xop->head.ip3->meta.inum;
640 * We must seek parent properly for the create.
642 parent = hammer2_inode_chain(xop->head.ip3, thr->clindex,
643 HAMMER2_RESOLVE_ALWAYS);
644 if (parent == NULL) {
648 tmp = hammer2_chain_lookup(&parent, &key_dummy,
652 hammer2_chain_unlock(tmp);
653 hammer2_chain_drop(tmp);
658 error = hammer2_chain_create(&parent, &chain,
659 pmp, HAMMER2_METH_DEFAULT,
661 HAMMER2_BREF_TYPE_INODE,
663 xop->head.mtid, 0, 0);
665 hammer2_xop_feed(&xop->head, NULL, thr->clindex, error);
667 hammer2_chain_unlock(parent);
668 hammer2_chain_drop(parent);
671 hammer2_chain_unlock(chain);
672 hammer2_chain_drop(chain);
677 * Directory collision resolver scan helper (backend, threaded).
679 * Used by the inode create code to locate an unused lhc.
682 hammer2_xop_scanlhc(hammer2_thread_t *thr, hammer2_xop_t *arg)
684 hammer2_xop_scanlhc_t *xop = &arg->xop_scanlhc;
685 hammer2_chain_t *parent;
686 hammer2_chain_t *chain;
687 hammer2_key_t key_next;
688 int cache_index = -1; /* XXX */
691 parent = hammer2_inode_chain(xop->head.ip1, thr->clindex,
692 HAMMER2_RESOLVE_ALWAYS |
693 HAMMER2_RESOLVE_SHARED);
694 if (parent == NULL) {
695 kprintf("xop_nresolve: NULL parent\n");
702 * Lookup all possibly conflicting directory entries, the feed
703 * inherits the chain's lock so do not unlock it on the iteration.
705 chain = hammer2_chain_lookup(&parent, &key_next,
707 xop->lhc + HAMMER2_DIRHASH_LOMASK,
709 HAMMER2_LOOKUP_ALWAYS |
710 HAMMER2_LOOKUP_SHARED);
712 error = hammer2_xop_feed(&xop->head, chain, thr->clindex,
715 hammer2_chain_unlock(chain);
716 hammer2_chain_drop(chain);
717 chain = NULL; /* safety */
720 chain = hammer2_chain_next(&parent, chain, &key_next,
722 xop->lhc + HAMMER2_DIRHASH_LOMASK,
724 HAMMER2_LOOKUP_ALWAYS |
725 HAMMER2_LOOKUP_SHARED);
728 hammer2_xop_feed(&xop->head, NULL, thr->clindex, error);
730 hammer2_chain_unlock(parent);
731 hammer2_chain_drop(parent);
736 * Generic lookup of a specific key.
738 * Used by the inode hidden directory code to find the hidden directory.
741 hammer2_xop_lookup(hammer2_thread_t *thr, hammer2_xop_t *arg)
743 hammer2_xop_scanlhc_t *xop = &arg->xop_scanlhc;
744 hammer2_chain_t *parent;
745 hammer2_chain_t *chain;
746 hammer2_key_t key_next;
747 int cache_index = -1; /* XXX */
750 parent = hammer2_inode_chain(xop->head.ip1, thr->clindex,
751 HAMMER2_RESOLVE_ALWAYS |
752 HAMMER2_RESOLVE_SHARED);
754 if (parent == NULL) {
760 * Lookup all possibly conflicting directory entries, the feed
761 * inherits the chain's lock so do not unlock it on the iteration.
763 chain = hammer2_chain_lookup(&parent, &key_next,
766 HAMMER2_LOOKUP_ALWAYS |
767 HAMMER2_LOOKUP_SHARED);
769 hammer2_xop_feed(&xop->head, chain, thr->clindex, chain->error);
771 hammer2_xop_feed(&xop->head, NULL, thr->clindex, ENOENT);
775 hammer2_chain_unlock(chain);
776 hammer2_chain_drop(chain);
779 hammer2_chain_unlock(parent);
780 hammer2_chain_drop(parent);
787 * WARNING! Fed chains must be locked shared so ownership can be transfered
788 * and to prevent frontend/backend stalls that would occur with an
789 * exclusive lock. The shared lock also allows chain->data to be
793 hammer2_xop_scanall(hammer2_thread_t *thr, hammer2_xop_t *arg)
795 hammer2_xop_scanall_t *xop = &arg->xop_scanall;
796 hammer2_chain_t *parent;
797 hammer2_chain_t *chain;
798 hammer2_key_t key_next;
799 int cache_index = -1;
803 * Assert required flags.
805 KKASSERT(xop->resolve_flags & HAMMER2_RESOLVE_SHARED);
806 KKASSERT(xop->lookup_flags & HAMMER2_LOOKUP_SHARED);
809 * The inode's chain is the iterator. If we cannot acquire it our
810 * contribution ends here.
812 parent = hammer2_inode_chain(xop->head.ip1, thr->clindex,
814 if (parent == NULL) {
815 kprintf("xop_readdir: NULL parent\n");
820 * Generic scan of exact records. Note that indirect blocks are
821 * automatically recursed and will not be returned.
823 chain = hammer2_chain_lookup(&parent, &key_next,
824 xop->key_beg, xop->key_end,
825 &cache_index, xop->lookup_flags);
827 error = hammer2_xop_feed(&xop->head, chain, thr->clindex, 0);
830 chain = hammer2_chain_next(&parent, chain, &key_next,
831 key_next, xop->key_end,
832 &cache_index, xop->lookup_flags);
835 hammer2_chain_unlock(chain);
836 hammer2_chain_drop(chain);
838 hammer2_chain_unlock(parent);
839 hammer2_chain_drop(parent);
841 hammer2_xop_feed(&xop->head, NULL, thr->clindex, error);