hammer2 - serialized flush work part 4
[dragonfly.git] / sys / vfs / hammer2 / hammer2_inode.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35#include <sys/cdefs.h>
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/types.h>
39#include <sys/lock.h>
40#include <sys/uuid.h>
41
42#include "hammer2.h"
43
44/*
45 * Adding a ref to an inode is only legal if the inode already has at least
46 * one ref.
47 */
48void
49hammer2_inode_ref(hammer2_inode_t *ip)
50{
51 atomic_add_int(&ip->refs, 1);
52}
53
54/*
55 * Drop an inode reference, freeing the inode when the last reference goes
56 * away.
57 */
58void
59hammer2_inode_drop(hammer2_inode_t *ip)
60{
61 hammer2_mount_t *hmp;
62 hammer2_inode_t *pip;
63 hammer2_chain_t *chain;
64 u_int refs;
65
66 for (;;) {
67 refs = ip->refs;
68 cpu_ccfence();
69 if (refs == 1) {
70 if (atomic_cmpset_int(&ip->refs, 1, 0)) {
71 KKASSERT(ip->topo_cst.count == 0);
72
73 hmp = ip->hmp;
74 ip->hmp = NULL;
75 pip = ip->pip;
76 ip->pip = NULL;
77 chain = ip->chain;
78 ip->chain = NULL;
79 if (chain)
80 hammer2_chain_drop(hmp, chain);
81
82 /*
83 * We have to drop pip (if non-NULL) to
84 * dispose of our implied reference from
85 * ip->pip. We can simply loop on it.
86 */
87 kfree(ip, hmp->minode);
88 if (pip == NULL)
89 break;
90 ip = pip;
91 /* continue */
92 }
93 } else {
94 if (atomic_cmpset_int(&ip->refs, refs, refs - 1))
95 break;
96 }
97 }
98}
99
100/*
101 * Get the vnode associated with the given inode, allocating the vnode if
102 * necessary. The vnode will be returned exclusively locked.
103 *
104 * The caller must lock the inode (shared or exclusive).
105 *
106 * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim
107 * races.
108 */
109struct vnode *
110hammer2_igetv(hammer2_inode_t *ip, int *errorp)
111{
112 hammer2_inode_data_t *ipdata;
113 hammer2_pfsmount_t *pmp;
114 struct vnode *vp;
115 ccms_state_t ostate;
116
117 pmp = ip->pmp;
118 KKASSERT(pmp != NULL);
119 *errorp = 0;
120 ipdata = &ip->chain->data->ipdata;
121
122 for (;;) {
123 /*
124 * Attempt to reuse an existing vnode assignment. It is
125 * possible to race a reclaim so the vget() may fail. The
126 * inode must be unlocked during the vget() to avoid a
127 * deadlock against a reclaim.
128 */
129 vp = ip->vp;
130 if (vp) {
131 /*
132 * Inode must be unlocked during the vget() to avoid
133 * possible deadlocks, but leave the ip ref intact.
134 *
135 * vnode is held to prevent destruction during the
136 * vget(). The vget() can still fail if we lost
137 * a reclaim race on the vnode.
138 */
139 vhold_interlocked(vp);
140 ostate = hammer2_inode_lock_temp_release(ip);
141 if (vget(vp, LK_EXCLUSIVE)) {
142 vdrop(vp);
143 hammer2_inode_lock_restore(ip, ostate);
144 continue;
145 }
146 hammer2_inode_lock_restore(ip, ostate);
147 vdrop(vp);
148 /* vp still locked and ref from vget */
149 if (ip->vp != vp) {
150 kprintf("hammer2: igetv race %p/%p\n",
151 ip->vp, vp);
152 vput(vp);
153 continue;
154 }
155 *errorp = 0;
156 break;
157 }
158
159 /*
160 * No vnode exists, allocate a new vnode. Beware of
161 * allocation races. This function will return an
162 * exclusively locked and referenced vnode.
163 */
164 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0);
165 if (*errorp) {
166 kprintf("hammer2: igetv getnewvnode failed %d\n",
167 *errorp);
168 vp = NULL;
169 break;
170 }
171
172 /*
173 * Lock the inode and check for an allocation race.
174 */
175 ostate = hammer2_inode_lock_upgrade(ip);
176 if (ip->vp != NULL) {
177 vp->v_type = VBAD;
178 vx_put(vp);
179 hammer2_inode_lock_restore(ip, ostate);
180 continue;
181 }
182
183 switch (ipdata->type) {
184 case HAMMER2_OBJTYPE_DIRECTORY:
185 vp->v_type = VDIR;
186 break;
187 case HAMMER2_OBJTYPE_REGFILE:
188 vp->v_type = VREG;
189 vinitvmio(vp, ipdata->size,
190 HAMMER2_LBUFSIZE,
191 (int)ipdata->size & HAMMER2_LBUFMASK);
192 break;
193 case HAMMER2_OBJTYPE_SOFTLINK:
194 /*
195 * XXX for now we are using the generic file_read
196 * and file_write code so we need a buffer cache
197 * association.
198 */
199 vp->v_type = VLNK;
200 vinitvmio(vp, ipdata->size,
201 HAMMER2_LBUFSIZE,
202 (int)ipdata->size & HAMMER2_LBUFMASK);
203 break;
204 /* XXX FIFO */
205 default:
206 panic("hammer2: unhandled objtype %d", ipdata->type);
207 break;
208 }
209
210 if (ip == pmp->iroot)
211 vsetflags(vp, VROOT);
212
213 vp->v_data = ip;
214 ip->vp = vp;
215 hammer2_inode_ref(ip); /* vp association */
216 hammer2_inode_lock_restore(ip, ostate);
217 break;
218 }
219
220 /*
221 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0.
222 */
223 if (hammer2_debug & 0x0002) {
224 kprintf("igetv vp %p refs %d aux %d\n",
225 vp, vp->v_sysref.refcnt, vp->v_auxrefs);
226 }
227 return (vp);
228}
229
230/*
231 * The passed-in chain must be locked and the returned inode will also be
232 * locked. A ref is added to both the chain and the inode.
233 *
234 * The hammer2_inode structure regulates the interface between the high level
235 * kernel VNOPS API and the filesystem backend (the chains).
236 *
237 * NOTE! This routine allocates the hammer2_inode structure
238 * unconditionally, and thus there might be several which
239 * are associated with the same chain. Particularly for hardlinks
240 * but this can also happen temporarily for normal files and
241 * directories.
242 *
243 * WARNING! This routine sucks up the chain's lock (makes it part of the
244 * inode lock from the point of view of the inode lock API),
245 * so callers need to be careful.
246 *
247 * WARNING! The mount code is allowed to pass dip == NULL for iroot and
248 * is allowed to pass pmp == NULL and dip == NULL for sroot.
249 */
250hammer2_inode_t *
251hammer2_inode_get(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp,
252 hammer2_inode_t *dip, hammer2_chain_t *chain)
253{
254 hammer2_inode_t *nip;
255
256 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
257
258 nip = kmalloc(sizeof(*nip), hmp->minode, M_WAITOK | M_ZERO);
259
260 nip->chain = chain;
261 hammer2_chain_ref(hmp, chain); /* nip->chain */
262 nip->pip = dip; /* can be NULL */
263 if (dip)
264 hammer2_inode_ref(dip); /* ref dip for nip->pip */
265
266 nip->pmp = pmp;
267 nip->hmp = hmp;
268
269 /*
270 * ref and lock on nip gives it state compatible to after a
271 * hammer2_inode_lock_ex() call.
272 */
273 nip->refs = 1;
274 ccms_cst_init(&nip->topo_cst, &nip->chain);
275 ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE);
276 /* combination of thread lock and chain lock == inode lock */
277
278 return (nip);
279}
280
281/*
282 * Put away an inode, disconnecting it from its chain. The inode must be
283 * exclusively locked.
284 *
285 * The inode will be unlocked by this function. Note however that any related
286 * chain returned by the hammer2_inode_lock_*() call will NOT be unlocked
287 * by this function. The related chain is dropped to undo the ref that
288 * hammer2_inode_get() put on it.
289 *
290 * passed_chain is unlocked normally and does not have to be directly
291 * associated with (ip). This is simply so the API works the same as
292 * the hammer2_inode_unlock_ex() API. NULL is ok.
293 */
294void
295hammer2_inode_put(hammer2_inode_t *ip, hammer2_chain_t *passed_chain)
296{
297 hammer2_mount_t *hmp = ip->hmp;
298 hammer2_inode_t *pip;
299 hammer2_chain_t *chain;
300
301 /*
302 * Disconnect chain
303 */
304 if ((chain = ip->chain) != NULL) {
305 ip->chain = NULL;
306 hammer2_chain_drop(hmp, chain); /* from *_get() */
307 }
308 KKASSERT(ip->topo_cst.count == -1); /* one excl lock allowed */
309
310 /*
311 * Disconnect pip
312 */
313 if ((pip = ip->pip) != NULL) {
314 ip->pip = NULL;
315 hammer2_inode_drop(pip);
316 }
317
318 /*
319 * clean up the ip, we use an inode_unlock_ex-compatible API.
320 */
321 hammer2_inode_unlock_ex(ip, passed_chain);
322}
323
324/*
325 * Create a new inode in the specified directory using the vattr to
326 * figure out the type of inode.
327 *
328 * If no error occurs the new inode with its chain locked is returned in
329 * *nipp, otherwise an error is returned and *nipp is set to NULL.
330 *
331 * If vap and/or cred are NULL the related fields are not set and the
332 * inode type defaults to a directory. This is used when creating PFSs
333 * under the super-root, so the inode number is set to 1 in this case.
334 *
335 * dip is not locked on entry.
336 */
337int
338hammer2_inode_create(hammer2_inode_t *dip,
339 struct vattr *vap, struct ucred *cred,
340 const uint8_t *name, size_t name_len,
341 hammer2_inode_t **nipp, hammer2_chain_t **nchainp)
342{
343 hammer2_inode_data_t *nipdata;
344 hammer2_mount_t *hmp;
345 hammer2_chain_t *chain;
346 hammer2_chain_t *parent;
347 hammer2_inode_t *nip;
348 hammer2_key_t lhc;
349 int error;
350 uid_t xuid;
351 uuid_t dip_uid;
352 uuid_t dip_gid;
353 uint32_t dip_mode;
354
355 hmp = dip->hmp;
356 lhc = hammer2_dirhash(name, name_len);
357
358 /*
359 * Locate the inode or indirect block to create the new
360 * entry in. At the same time check for key collisions
361 * and iterate until we don't get one.
362 */
363retry:
364 parent = hammer2_inode_lock_ex(dip);
365
366 dip_uid = parent->data->ipdata.uid;
367 dip_gid = parent->data->ipdata.gid;
368 dip_mode = parent->data->ipdata.mode;
369
370 error = 0;
371 while (error == 0) {
372 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
373 if (chain == NULL)
374 break;
375 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0)
376 error = ENOSPC;
377 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
378 error = ENOSPC;
379 hammer2_chain_unlock(hmp, chain);
380 chain = NULL;
381 ++lhc;
382 }
383 if (error == 0) {
384 chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0,
385 HAMMER2_BREF_TYPE_INODE,
386 HAMMER2_INODE_BYTES,
387 &error);
388 }
389
390 hammer2_inode_unlock_ex(dip, parent);
391
392 /*
393 * Handle the error case
394 */
395 if (error) {
396 KKASSERT(chain == NULL);
397 if (error == EAGAIN) {
398 hammer2_chain_wait(hmp, parent);
399 goto retry;
400 }
401 *nipp = NULL;
402 *nchainp = NULL;
403 return (error);
404 }
405
406 /*
407 * Set up the new inode.
408 *
409 * NOTE: *_get() integrates chain's lock into the inode lock.
410 */
411 nip = hammer2_inode_get(dip->hmp, dip->pmp, dip, chain);
412 *nipp = nip;
413 *nchainp = chain;
414 nipdata = &chain->data->ipdata;
415
416 hammer2_voldata_lock(hmp);
417 if (vap) {
418 nipdata->type = hammer2_get_obj_type(vap->va_type);
419 nipdata->inum = hmp->voldata.alloc_tid++;
420 /* XXX modify/lock */
421 } else {
422 nipdata->type = HAMMER2_OBJTYPE_DIRECTORY;
423 nipdata->inum = 1;
424 }
425 hammer2_voldata_unlock(hmp);
426 nipdata->version = HAMMER2_INODE_VERSION_ONE;
427 hammer2_update_time(&nipdata->ctime);
428 nipdata->mtime = nipdata->ctime;
429 if (vap)
430 nipdata->mode = vap->va_mode;
431 nipdata->nlinks = 1;
432 if (vap) {
433 if (dip) {
434 xuid = hammer2_to_unix_xid(&dip_uid);
435 xuid = vop_helper_create_uid(dip->pmp->mp,
436 dip_mode,
437 xuid,
438 cred,
439 &vap->va_mode);
440 } else {
441 xuid = 0;
442 }
443 if (vap->va_vaflags & VA_UID_UUID_VALID)
444 nipdata->uid = vap->va_uid_uuid;
445 else if (vap->va_uid != (uid_t)VNOVAL)
446 hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid);
447 else
448 hammer2_guid_to_uuid(&nipdata->uid, xuid);
449
450 if (vap->va_vaflags & VA_GID_UUID_VALID)
451 nipdata->gid = vap->va_gid_uuid;
452 else if (vap->va_gid != (gid_t)VNOVAL)
453 hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid);
454 else if (dip)
455 nipdata->gid = dip_gid;
456 }
457
458 /*
459 * Regular files and softlinks allow a small amount of data to be
460 * directly embedded in the inode. This flag will be cleared if
461 * the size is extended past the embedded limit.
462 */
463 if (nipdata->type == HAMMER2_OBJTYPE_REGFILE ||
464 nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) {
465 nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA;
466 }
467
468 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
469 bcopy(name, nipdata->filename, name_len);
470 nipdata->name_key = lhc;
471 nipdata->name_len = name_len;
472
473 return (0);
474}
475
476/*
477 * Create a duplicate of the inode (chain) in the specified target directory
478 * (dip), return the duplicated chain in *nchainp (locked). chain is locked
479 * on call and remains locked on return.
480 *
481 * If name is NULL the inode is duplicated as a hidden directory entry.
482 *
483 * XXX name needs to be NULL for now.
484 */
485int
486hammer2_inode_duplicate(hammer2_inode_t *dip,
487 hammer2_chain_t *ochain, hammer2_chain_t **nchainp)
488{
489 hammer2_inode_data_t *nipdata;
490 hammer2_mount_t *hmp;
491 hammer2_chain_t *parent;
492 hammer2_chain_t *chain;
493 hammer2_key_t lhc;
494 int error = 0;
495
496 hmp = dip->hmp;
497 lhc = ochain->data->ipdata.inum;
498 *nchainp = NULL;
499 KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0);
500
501 /*
502 * Locate the inode or indirect block to create the new
503 * entry in.
504 *
505 * There should be no key collisions with invisible inode keys.
506 */
507retry:
508 parent = dip->chain;
509 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
510 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
511 if (chain) {
512 hammer2_chain_unlock(hmp, chain);
513 chain = NULL;
514 error = ENOSPC;
515 }
516
517 /*
518 * Create entry in common parent directory.
519 */
520 if (error == 0) {
521 chain = hammer2_chain_create(hmp, parent, NULL, lhc, 0,
522 HAMMER2_BREF_TYPE_INODE, /* n/a */
523 HAMMER2_INODE_BYTES, /* n/a */
524 &error);
525 }
526
527 /*
528 * Clean up, but we need to retain a ref on parent so we can wait
529 * on it for certain errors.
530 */
531 if (error == EAGAIN)
532 hammer2_chain_ref(hmp, parent);
533 hammer2_chain_unlock(hmp, parent);
534
535 /*
536 * Handle the error case
537 */
538 if (error) {
539 KKASSERT(chain == NULL);
540 if (error == EAGAIN) {
541 hammer2_chain_wait(hmp, parent);
542 hammer2_chain_drop(hmp, parent);
543 goto retry;
544 }
545 return (error);
546 }
547
548 /*
549 * XXX This is currently a horrible hack. Well, if we wanted to
550 * duplicate a file, i.e. as in a snapshot, we definitely
551 * would have to flush it first.
552 *
553 * For hardlink target generation we can theoretically move any
554 * active chain structures without flushing, but that gets really
555 * iffy for code which follows chain->parent and ip->pip links.
556 *
557 * XXX only works with files. Duplicating a directory hierarchy
558 * requires a flush but doesn't deal with races post-flush.
559 * Well, it would work I guess, but you might catch some files
560 * mid-operation.
561 *
562 * We cannot leave ochain with any in-memory chains because (for a
563 * hardlink), ochain will become a OBJTYPE_HARDLINK which is just a
564 * pointer to the real hardlink's inum and can't have any sub-chains.
565 * XXX might be 0-ref chains left.
566 */
567 hammer2_chain_flush(hmp, ochain, 0);
568 /*KKASSERT(RB_EMPTY(&ochain.rbhead));*/
569
570 hammer2_chain_modify(hmp, chain, 0);
571 nipdata = &chain->data->ipdata;
572 *nipdata = ochain->data->ipdata;
573
574 /*
575 * Directory entries are inodes but this is a hidden hardlink
576 * target. The name isn't used but to ease debugging give it
577 * a name after its inode number.
578 */
579 ksnprintf(nipdata->filename, sizeof(nipdata->filename),
580 "0x%016jx", (intmax_t)nipdata->inum);
581 nipdata->name_len = strlen(nipdata->filename);
582 nipdata->name_key = lhc;
583
584 *nchainp = chain;
585
586 return (0);
587}
588
589/*
590 * Connect *chainp to the media topology represented by (dip, name, len).
591 * A directory entry is created which points to *chainp. *chainp is then
592 * unlocked and set to NULL.
593 *
594 * If *chainp is not currently connected we simply connect it up.
595 *
596 * If *chainp is already connected we create a OBJTYPE_HARDLINK entry which
597 * points to chain's inode number. *chainp is expected to be the terminus of
598 * the hardlink sitting as a hidden file in a common parent directory
599 * in this situation.
600 *
601 * The caller always wants to reference the hardlink terminus, not the
602 * hardlink pointer that we might be creating, so we do NOT replace
603 * *chainp here, we simply unlock and NULL it out.
604 */
605int
606hammer2_inode_connect(hammer2_inode_t *dip, hammer2_chain_t **chainp,
607 const uint8_t *name, size_t name_len)
608{
609 hammer2_inode_data_t *ipdata;
610 hammer2_mount_t *hmp;
611 hammer2_chain_t *nchain;
612 hammer2_chain_t *parent;
613 hammer2_chain_t *ochain;
614 hammer2_key_t lhc;
615 int error;
616 int hlink;
617
618 hmp = dip->hmp;
619
620 ochain = *chainp;
621 *chainp = NULL;
622
623 /*
624 * Since ochain is either disconnected from the topology or represents
625 * a hardlink terminus which is always a parent of or equal to dip,
626 * we should be able to safely lock dip->chain for our setup.
627 */
628retry:
629 parent = dip->chain;
630 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
631
632 lhc = hammer2_dirhash(name, name_len);
633 hlink = (ochain->parent != NULL);
634
635 /*
636 * In fake mode flush oip so we can just snapshot it downbelow.
637 */
638 if (hlink && hammer2_hardlink_enable < 0)
639 hammer2_chain_flush(hmp, ochain, 0);
640
641 /*
642 * Locate the inode or indirect block to create the new
643 * entry in. At the same time check for key collisions
644 * and iterate until we don't get one.
645 */
646 error = 0;
647 while (error == 0) {
648 nchain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
649 if (nchain == NULL)
650 break;
651 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK)
652 error = ENOSPC;
653 hammer2_chain_unlock(hmp, nchain);
654 nchain = NULL;
655 ++lhc;
656 }
657
658 /*
659 * Passing a non-NULL chain to hammer2_chain_create() reconnects the
660 * existing chain instead of creating a new one. The chain's bref
661 * will be properly updated.
662 */
663 if (error == 0) {
664 if (hlink) {
665 nchain = hammer2_chain_create(hmp, parent,
666 NULL, lhc, 0,
667 HAMMER2_BREF_TYPE_INODE,
668 HAMMER2_INODE_BYTES,
669 &error);
670 } else {
671 /*
672 * NOTE: reconnects oip->chain to the media
673 * topology and returns its argument
674 * (oip->chain).
675 *
676 * No additional locks or refs are obtained on
677 * the returned chain so don't double-unlock!
678 */
679 nchain = hammer2_chain_create(hmp, parent,
680 ochain, lhc, 0,
681 HAMMER2_BREF_TYPE_INODE,
682 HAMMER2_INODE_BYTES,
683 &error);
684 }
685 }
686
687 /*
688 * Unlock stuff. This is a bit messy, if we have an EAGAIN error
689 * we need to wait for operations on parent to finish.
690 */
691 if (error == EAGAIN)
692 hammer2_chain_ref(hmp, parent);
693 hammer2_chain_unlock(hmp, parent);
694
695 /*
696 * ochain still active.
697 *
698 * Handle the error case
699 */
700 if (error) {
701 KKASSERT(nchain == NULL);
702 if (error == EAGAIN) {
703 hammer2_chain_wait(hmp, parent);
704 hammer2_chain_drop(hmp, parent);
705 goto retry;
706 }
707 hammer2_chain_unlock(hmp, ochain);
708 return (error);
709 }
710
711 /*
712 * Directory entries are inodes so if the name has changed we have
713 * to update the inode.
714 *
715 * When creating an OBJTYPE_HARDLINK entry remember to unlock the
716 * chain, the caller will access the hardlink via the actual hardlink
717 * target file and not the hardlink pointer entry.
718 */
719 if (hlink && hammer2_hardlink_enable >= 0) {
720 /*
721 * Create the HARDLINK pointer. oip represents the hardlink
722 * target in this situation.
723 *
724 * NOTE: *_get() integrates chain's lock into the inode lock.
725 */
726 hammer2_chain_modify(hmp, nchain, 0);
727 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
728 ipdata = &nchain->data->ipdata;
729 bcopy(name, ipdata->filename, name_len);
730 ipdata->name_key = lhc;
731 ipdata->name_len = name_len;
732 ipdata->target_type = ochain->data->ipdata.type;
733 ipdata->type = HAMMER2_OBJTYPE_HARDLINK;
734 ipdata->inum = ochain->data->ipdata.inum;
735 ipdata->nlinks = 1;
736 kprintf("created hardlink %*.*s\n",
737 (int)name_len, (int)name_len, name);
738 hammer2_chain_unlock(hmp, nchain);
739 } else if (hlink && hammer2_hardlink_enable < 0) {
740 /*
741 * Create a snapshot (hardlink fake mode for debugging).
742 *
743 * NOTE: *_get() integrates nchain's lock into the inode lock.
744 */
745 hammer2_chain_modify(hmp, nchain, 0);
746 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
747 ipdata = &nchain->data->ipdata;
748 *ipdata = ochain->data->ipdata;
749 bcopy(name, ipdata->filename, name_len);
750 ipdata->name_key = lhc;
751 ipdata->name_len = name_len;
752 kprintf("created fake hardlink %*.*s\n",
753 (int)name_len, (int)name_len, name);
754 hammer2_chain_unlock(hmp, nchain);
755 } else {
756 /*
757 * Normally disconnected inode (e.g. during a rename) that
758 * was reconnected. We must fixup the name stored in
759 * oip.
760 *
761 * We are using oip as chain, already locked by caller,
762 * do not unlock it.
763 */
764 hammer2_chain_modify(hmp, ochain, 0);
765 ipdata = &ochain->data->ipdata;
766
767 if (ipdata->name_len != name_len ||
768 bcmp(ipdata->filename, name, name_len) != 0) {
769 KKASSERT(name_len < HAMMER2_INODE_MAXNAME);
770 bcopy(name, ipdata->filename, name_len);
771 ipdata->name_key = lhc;
772 ipdata->name_len = name_len;
773 }
774 ipdata->nlinks = 1;
775 }
776 hammer2_chain_unlock(hmp, ochain);
777 return (0);
778}
779
780/*
781 * Unlink the file from the specified directory inode. The directory inode
782 * does not need to be locked. The caller should pass a non-NULL (ip)
783 * representing the object being removed only if the related vnode is
784 * potentially inactive (not referenced in the caller's active path),
785 * so we can vref/vrele it to trigger the VOP_INACTIVE path and properly
786 * recycle it.
787 *
788 * isdir determines whether a directory/non-directory check should be made.
789 * No check is made if isdir is set to -1.
790 *
791 * If retain_chain is non-NULL this function can fail with an EAGAIN if it
792 * catches the object in the middle of a flush.
793 */
794int
795hammer2_unlink_file(hammer2_inode_t *dip,
796 const uint8_t *name, size_t name_len,
797 int isdir, hammer2_chain_t *retain_chain)
798{
799 hammer2_inode_data_t *ipdata;
800 hammer2_mount_t *hmp;
801 hammer2_chain_t *parent;
802 hammer2_chain_t *ochain;
803 hammer2_chain_t *chain;
804 hammer2_chain_t *dparent;
805 hammer2_chain_t *dchain;
806 hammer2_key_t lhc;
807 int error;
808 int parent_ref;
809 uint8_t type;
810
811 parent_ref = 0;
812 error = 0;
813 ochain = NULL;
814 hmp = dip->hmp;
815 lhc = hammer2_dirhash(name, name_len);
816
817 /*
818 * Search for the filename in the directory
819 */
820 parent = hammer2_inode_lock_ex(dip);
821 chain = hammer2_chain_lookup(hmp, &parent,
822 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
823 0);
824 while (chain) {
825 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
826 name_len == chain->data->ipdata.name_len &&
827 bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
828 break;
829 }
830 chain = hammer2_chain_next(hmp, &parent, chain,
831 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
832 0);
833 }
834 hammer2_inode_unlock_ex(dip, NULL); /* retain parent */
835
836 /*
837 * Not found or wrong type (isdir < 0 disables the type check).
838 * If a hardlink pointer, type checks use the hardlink target.
839 */
840 if (chain == NULL) {
841 error = ENOENT;
842 goto done;
843 }
844 if ((type = chain->data->ipdata.type) == HAMMER2_OBJTYPE_HARDLINK)
845 type = chain->data->ipdata.target_type;
846
847 if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) {
848 error = ENOTDIR;
849 goto done;
850 }
851 if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) {
852 error = EISDIR;
853 goto done;
854 }
855
856 /*
857 * Hardlink must be resolved. We can't hold parent locked while we
858 * do this or we could deadlock.
859 *
860 * On success chain will be adjusted to point at the hardlink target
861 * and ochain will point to the hardlink pointer in the original
862 * directory. Otherwise chain remains pointing to the original.
863 */
864 if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
865 KKASSERT(parent_ref == 0);
866 hammer2_chain_unlock(hmp, parent);
867 parent = NULL;
868 error = hammer2_hardlink_find(dip, &chain, &ochain);
869 }
870
871 /*
872 * If this is a directory the directory must be empty. However, if
873 * isdir < 0 we are doing a rename and the directory does not have
874 * to be empty.
875 *
876 * NOTE: We check the full key range here which covers both visible
877 * and invisible entries. Theoretically there should be no
878 * invisible (hardlink target) entries if there are no visible
879 * entries.
880 */
881 if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir >= 0) {
882 dparent = chain;
883 hammer2_chain_lock(hmp, dparent, HAMMER2_RESOLVE_ALWAYS);
884 dchain = hammer2_chain_lookup(hmp, &dparent,
885 0, (hammer2_key_t)-1,
886 HAMMER2_LOOKUP_NODATA);
887 if (dchain) {
888 hammer2_chain_unlock(hmp, dchain);
889 hammer2_chain_unlock(hmp, dparent);
890 error = ENOTEMPTY;
891 goto done;
892 }
893 hammer2_chain_unlock(hmp, dparent);
894 dparent = NULL;
895 /* dchain NULL */
896 }
897
898 /*
899 * Ok, we can now unlink the chain. We always decrement nlinks even
900 * if the entry can be deleted in case someone has the file open and
901 * does an fstat().
902 *
903 * The chain itself will no longer be in the on-media topology but
904 * can still be flushed to the media (e.g. if an open descriptor
905 * remains). When the last vnode/ip ref goes away the chain will
906 * be marked unmodified, avoiding any further (now unnecesary) I/O.
907 *
908 * A non-NULL ochain indicates a hardlink.
909 */
910 if (ochain) {
911 /*
912 * Delete the original hardlink pointer.
913 *
914 * NOTE: parent from above is NULL when ochain != NULL
915 * so we can reuse it.
916 */
917 hammer2_chain_lock(hmp, ochain, HAMMER2_RESOLVE_ALWAYS);
918 parent_ref = 1;
919 for (;;) {
920 parent = ochain->parent;
921 hammer2_chain_ref(hmp, parent);
922 hammer2_chain_unlock(hmp, ochain);
923 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
924 hammer2_chain_lock(hmp, ochain, HAMMER2_RESOLVE_ALWAYS);
925 if (ochain->parent == parent)
926 break;
927 hammer2_chain_unlock(hmp, parent);
928 hammer2_chain_drop(hmp, parent);
929 }
930
931 if (ochain == retain_chain && ochain->flushing) {
932 hammer2_chain_unlock(hmp, ochain);
933 error = EAGAIN;
934 goto done;
935 }
936 hammer2_chain_delete(hmp, parent, ochain,
937 (ochain == retain_chain));
938 hammer2_chain_unlock(hmp, ochain);
939 hammer2_chain_unlock(hmp, parent);
940 hammer2_chain_drop(hmp, parent);
941 parent = NULL;
942
943 /*
944 * Then decrement nlinks on hardlink target, deleting
945 * the target when nlinks drops to 0.
946 */
947 if (chain->data->ipdata.nlinks == 1) {
948 dparent = chain->parent;
949 hammer2_chain_ref(hmp, chain);
950 hammer2_chain_unlock(hmp, chain);
951 hammer2_chain_lock(hmp, dparent,
952 HAMMER2_RESOLVE_ALWAYS);
953 hammer2_chain_lock(hmp, chain,
954 HAMMER2_RESOLVE_ALWAYS);
955 hammer2_chain_drop(hmp, chain);
956 hammer2_chain_modify(hmp, chain, 0);
957 --chain->data->ipdata.nlinks;
958 hammer2_chain_delete(hmp, dparent, chain, 0);
959 hammer2_chain_unlock(hmp, dparent);
960 } else {
961 hammer2_chain_modify(hmp, chain, 0);
962 --chain->data->ipdata.nlinks;
963 }
964 } else {
965 /*
966 * Otherwise this was not a hardlink and we can just
967 * remove the entry and decrement nlinks.
968 *
969 * NOTE: *_get() integrates chain's lock into the inode lock.
970 */
971 ipdata = &chain->data->ipdata;
972 if (chain == retain_chain && chain->flushing) {
973 error = EAGAIN;
974 goto done;
975 }
976 hammer2_chain_modify(hmp, chain, 0);
977 --ipdata->nlinks;
978 hammer2_chain_delete(hmp, parent, chain,
979 (retain_chain == chain));
980 }
981
982 error = 0;
983done:
984 if (chain)
985 hammer2_chain_unlock(hmp, chain);
986 if (parent) {
987 hammer2_chain_unlock(hmp, parent);
988 if (parent_ref)
989 hammer2_chain_drop(hmp, parent);
990 }
991 if (ochain)
992 hammer2_chain_drop(hmp, ochain);
993
994 return error;
995}
996
997/*
998 * Calculate the allocation size for the file fragment straddling EOF
999 */
1000int
1001hammer2_inode_calc_alloc(hammer2_key_t filesize)
1002{
1003 int frag = (int)filesize & HAMMER2_PBUFMASK;
1004 int radix;
1005
1006 if (frag == 0)
1007 return(0);
1008 for (radix = HAMMER2_MINALLOCRADIX; frag > (1 << radix); ++radix)
1009 ;
1010 return (radix);
1011}
1012
1013/*
1014 * Given an unlocked ip consolidate for hardlink creation, adding (nlinks)
1015 * to the file's link count and potentially relocating the file to a
1016 * directory common to ip->pip and tdip.
1017 *
1018 * If the file has to be relocated ip->chain will also be adjusted.
1019 */
1020int
1021hammer2_hardlink_consolidate(hammer2_inode_t *ip, hammer2_chain_t **chainp,
1022 hammer2_inode_t *tdip, int nlinks)
1023{
1024 hammer2_inode_data_t *ipdata;
1025 hammer2_mount_t *hmp;
1026 hammer2_inode_t *fdip;
1027 hammer2_inode_t *cdip;
1028 hammer2_chain_t *chain;
1029 hammer2_chain_t *nchain;
1030 hammer2_chain_t *parent;
1031 int error;
1032
1033 hmp = tdip->hmp;
1034 *chainp = NULL;
1035 chain = hammer2_inode_lock_ex(ip);
1036
1037 if (nlinks == 0 && /* no hardlink needed */
1038 (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1039 hammer2_inode_unlock_ex(ip, NULL);
1040 *chainp = chain;
1041 return (0);
1042 }
1043 if (hammer2_hardlink_enable < 0) { /* fake hardlinks */
1044 hammer2_inode_unlock_ex(ip, NULL);
1045 *chainp = chain;
1046 return (0);
1047 }
1048 if (hammer2_hardlink_enable == 0) { /* disallow hardlinks */
1049 hammer2_inode_unlock_ex(ip, chain);
1050 return (ENOTSUP);
1051 }
1052
1053 /*
1054 * cdip will be returned with a ref, but not locked.
1055 */
1056 fdip = ip->pip;
1057 cdip = hammer2_inode_common_parent(hmp, fdip, tdip);
1058
1059 /*
1060 * If no change in the hardlink's target directory is required and
1061 * this is already a hardlink target, all we need to do is adjust
1062 * the link count.
1063 */
1064 if (cdip == fdip &&
1065 (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1066 if (nlinks) {
1067 hammer2_chain_modify(hmp, chain, 0);
1068 chain->data->ipdata.nlinks += nlinks;
1069 }
1070 *chainp = chain;
1071 error = 0;
1072 goto done;
1073 }
1074
1075 /*
1076 * We either have to move an existing hardlink target or we have
1077 * to create a fresh hardlink target.
1078 *
1079 * Hardlink targets are hidden inodes in a parent directory common
1080 * to all directory entries referencing the hardlink.
1081 */
1082 error = hammer2_inode_duplicate(cdip, chain, &nchain);
1083 if (error == 0) {
1084 /*
1085 * Bump nlinks on duplicated hidden inode.
1086 */
1087 hammer2_chain_modify(hmp, nchain, 0);
1088 nchain->data->ipdata.nlinks += nlinks;
1089
1090 /*
1091 * If the old chain is not a hardlink target then replace
1092 * it with a OBJTYPE_HARDLINK pointer.
1093 *
1094 * If the old chain IS a hardlink target then delete it.
1095 */
1096 if (chain->data->ipdata.name_key & HAMMER2_DIRHASH_VISIBLE) {
1097 hammer2_chain_modify(hmp, chain, 0);
1098 ipdata = &chain->data->ipdata;
1099 ipdata->target_type = ipdata->type;
1100 ipdata->type = HAMMER2_OBJTYPE_HARDLINK;
1101 ipdata->uflags = 0;
1102 ipdata->rmajor = 0;
1103 ipdata->rminor = 0;
1104 ipdata->ctime = 0;
1105 ipdata->mtime = 0;
1106 ipdata->atime = 0;
1107 ipdata->btime = 0;
1108 bzero(&ipdata->uid, sizeof(ipdata->uid));
1109 bzero(&ipdata->gid, sizeof(ipdata->gid));
1110 ipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA;
1111 ipdata->cap_flags = 0;
1112 ipdata->mode = 0;
1113 ipdata->size = 0;
1114 ipdata->nlinks = 1;
1115 ipdata->iparent = 0; /* XXX */
1116 ipdata->pfs_type = 0;
1117 ipdata->pfs_inum = 0;
1118 bzero(&ipdata->pfs_clid, sizeof(ipdata->pfs_clid));
1119 bzero(&ipdata->pfs_fsid, sizeof(ipdata->pfs_fsid));
1120 ipdata->data_quota = 0;
1121 ipdata->data_count = 0;
1122 ipdata->inode_quota = 0;
1123 ipdata->inode_count = 0;
1124 ipdata->attr_tid = 0;
1125 ipdata->dirent_tid = 0;
1126 bzero(&ipdata->u, sizeof(ipdata->u));
1127 /* XXX transaction ids */
1128 } else {
1129 kprintf("DELETE INVISIBLE\n");
1130 for (;;) {
1131 parent = chain->parent;
1132 hammer2_chain_ref(hmp, parent);
1133 hammer2_chain_ref(hmp, chain);
1134 hammer2_chain_unlock(hmp, chain);
1135 hammer2_chain_lock(hmp, parent,
1136 HAMMER2_RESOLVE_ALWAYS);
1137 hammer2_chain_lock(hmp, chain,
1138 HAMMER2_RESOLVE_ALWAYS);
1139 hammer2_chain_drop(hmp, chain);
1140 if (chain->parent == parent)
1141 break;
1142 hammer2_chain_unlock(hmp, parent);
1143 hammer2_chain_drop(hmp, parent);
1144 }
1145 hammer2_chain_delete(hmp, parent, chain, 0);
1146 hammer2_chain_unlock(hmp, parent);
1147 hammer2_chain_drop(hmp, parent);
1148 }
1149
1150 /*
1151 * Replace ip->chain with nchain (ip is still locked).
1152 */
1153 hammer2_chain_ref(hmp, nchain); /* ip->chain */
1154 if (ip->chain)
1155 hammer2_chain_drop(hmp, ip->chain); /* ip->chain */
1156 ip->chain = nchain;
1157
1158 hammer2_chain_unlock(hmp, chain);
1159 *chainp = nchain;
1160 } else {
1161 hammer2_chain_unlock(hmp, chain);
1162 }
1163
1164 /*
1165 * Cleanup, chain/nchain already dealt with.
1166 */
1167done:
1168 hammer2_inode_unlock_ex(ip, NULL);
1169 hammer2_inode_drop(cdip);
1170
1171 return (error);
1172}
1173
1174/*
1175 * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK
1176 * inode while (*chainp) points to the resolved (hidden hardlink
1177 * target) inode. In this situation when nlinks is 1 we wish to
1178 * deconsolidate the hardlink, moving it back to the directory that now
1179 * represents the only remaining link.
1180 */
1181int
1182hammer2_hardlink_deconsolidate(hammer2_inode_t *dip,
1183 hammer2_chain_t **chainp,
1184 hammer2_chain_t **ochainp)
1185{
1186 if (*ochainp == NULL)
1187 return (0);
1188 /* XXX */
1189 return (0);
1190}
1191
1192/*
1193 * The caller presents a locked *chainp pointing to a HAMMER2_BREF_TYPE_INODE
1194 * with an obj_type of HAMMER2_OBJTYPE_HARDLINK. This routine will gobble
1195 * the *chainp and return a new locked *chainp representing the file target
1196 * (the original *chainp will be unlocked).
1197 *
1198 * When a match is found the chain representing the original HARDLINK
1199 * will be returned in *ochainp with a ref, but not locked.
1200 *
1201 * When no match is found *chainp is set to NULL and EIO is returned.
1202 * (*ochainp) will still be set to the original chain with a ref but not
1203 * locked.
1204 */
1205int
1206hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_chain_t **chainp,
1207 hammer2_chain_t **ochainp)
1208{
1209 hammer2_mount_t *hmp = dip->hmp;
1210 hammer2_chain_t *chain = *chainp;
1211 hammer2_chain_t *parent;
1212 hammer2_inode_t *ip;
1213 hammer2_inode_t *pip;
1214 hammer2_key_t lhc;
1215
1216 pip = dip;
1217 hammer2_inode_ref(pip); /* for loop */
1218 hammer2_chain_ref(hmp, chain); /* for (*ochainp) */
1219
1220 *ochainp = chain;
1221
1222 /*
1223 * Locate the hardlink. pip is referenced and not locked,
1224 * ipp.
1225 *
1226 * chain is reused.
1227 */
1228 lhc = chain->data->ipdata.inum;
1229 hammer2_chain_unlock(hmp, chain);
1230 chain = NULL;
1231
1232 while ((ip = pip) != NULL) {
1233 parent = hammer2_inode_lock_ex(ip);
1234 hammer2_inode_drop(ip); /* loop */
1235 KKASSERT(parent->bref.type == HAMMER2_BREF_TYPE_INODE);
1236 chain = hammer2_chain_lookup(hmp, &parent, lhc, lhc, 0);
1237 hammer2_chain_unlock(hmp, parent);
1238 if (chain)
1239 break;
1240 pip = ip->pip; /* safe, ip held locked */
1241 if (pip)
1242 hammer2_inode_ref(pip); /* loop */
1243 hammer2_inode_unlock_ex(ip, NULL);
1244 }
1245
1246 /*
1247 * chain is locked, ip is locked. Unlock ip, return the locked
1248 * chain. *ipp is already set w/a ref count and not locked.
1249 *
1250 * (parent is already unlocked).
1251 */
1252 hammer2_inode_unlock_ex(ip, NULL);
1253 *chainp = chain;
1254 if (chain) {
1255 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INODE);
1256 /* already locked */
1257 return (0);
1258 } else {
1259 return (EIO);
1260 }
1261}
1262
1263/*
1264 * Find the directory common to both fdip and tdip, hold and return
1265 * its inode.
1266 */
1267hammer2_inode_t *
1268hammer2_inode_common_parent(hammer2_mount_t *hmp,
1269 hammer2_inode_t *fdip, hammer2_inode_t *tdip)
1270{
1271 hammer2_inode_t *scan1;
1272 hammer2_inode_t *scan2;
1273
1274 /*
1275 * We used to have a depth field but it complicated matters too
1276 * much for directory renames. So now its ugly. Check for
1277 * simple cases before giving up and doing it the expensive way.
1278 *
1279 * XXX need a bottom-up topology stability lock
1280 */
1281 if (fdip == tdip || fdip == tdip->pip) {
1282 hammer2_inode_ref(fdip);
1283 return(fdip);
1284 }
1285 if (fdip->pip == tdip) {
1286 hammer2_inode_ref(tdip);
1287 return(tdip);
1288 }
1289
1290 /*
1291 * XXX not MPSAFE
1292 */
1293 for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) {
1294 scan2 = tdip;
1295 while (scan2->pmp == tdip->pmp) {
1296 if (scan1 == scan2) {
1297 hammer2_inode_ref(scan1);
1298 return(scan1);
1299 }
1300 scan2 = scan2->pip;
1301 }
1302 }
1303 panic("hammer2_inode_common_parent: no common parent %p %p\n",
1304 fdip, tdip);
1305 /* NOT REACHED */
1306 return(NULL);
1307}