hammer2 - Remove LOCK* debugging
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
1 /*
2  * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 /*
37  * Kernel Filesystem interface
38  *
39  * NOTE! local ipdata pointers must be reloaded on any modifying operation
40  *       to the inode as its underlying chain may have changed.
41  */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/fcntl.h>
47 #include <sys/buf.h>
48 #include <sys/proc.h>
49 #include <sys/namei.h>
50 #include <sys/mount.h>
51 #include <sys/vnode.h>
52 #include <sys/mountctl.h>
53 #include <sys/dirent.h>
54 #include <sys/uio.h>
55 #include <sys/objcache.h>
56 #include <sys/event.h>
57 #include <sys/file.h>
58 #include <vfs/fifofs/fifo.h>
59
60 #include "hammer2.h"
61
62 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
63                                 int seqcount);
64 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
65                                 int ioflag, int seqcount);
66 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
67 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
68
69 struct objcache *cache_xops;
70
71 static __inline
72 void
73 hammer2_knote(struct vnode *vp, int flags)
74 {
75         if (flags)
76                 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
77 }
78
79 /*
80  * Last reference to a vnode is going away but it is still cached.
81  */
82 static
83 int
84 hammer2_vop_inactive(struct vop_inactive_args *ap)
85 {
86         hammer2_inode_t *ip;
87         struct vnode *vp;
88
89         vp = ap->a_vp;
90         ip = VTOI(vp);
91
92         /*
93          * Degenerate case
94          */
95         if (ip == NULL) {
96                 vrecycle(vp);
97                 return (0);
98         }
99
100         /*
101          * Check for deleted inodes and recycle immediately on the last
102          * release.  Be sure to destroy any left-over buffer cache buffers
103          * so we do not waste time trying to flush them.
104          *
105          * Note that deleting the file block chains under the inode chain
106          * would just be a waste of energy, so don't do it.
107          *
108          * WARNING: nvtruncbuf() can only be safely called without the inode
109          *          lock held due to the way our write thread works.
110          */
111         if (ip->flags & HAMMER2_INODE_ISUNLINKED) {
112                 hammer2_key_t lbase;
113                 int nblksize;
114
115                 /*
116                  * Detect updates to the embedded data which may be
117                  * synchronized by the strategy code.  Simply mark the
118                  * inode modified so it gets picked up by our normal flush.
119                  */
120                 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL);
121                 nvtruncbuf(vp, 0, nblksize, 0, 0);
122                 vrecycle(vp);
123         }
124         return (0);
125 }
126
127 /*
128  * Reclaim a vnode so that it can be reused; after the inode is
129  * disassociated, the filesystem must manage it alone.
130  */
131 static
132 int
133 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
134 {
135         hammer2_inode_t *ip;
136         hammer2_pfs_t *pmp;
137         struct vnode *vp;
138
139         vp = ap->a_vp;
140         ip = VTOI(vp);
141         if (ip == NULL) {
142                 return(0);
143         }
144         pmp = ip->pmp;
145
146         /*
147          * The final close of a deleted file or directory marks it for
148          * destruction.  The DELETED flag allows the flusher to shortcut
149          * any modified blocks still unflushed (that is, just ignore them).
150          *
151          * HAMMER2 usually does not try to optimize the freemap by returning
152          * deleted blocks to it as it does not usually know how many snapshots
153          * might be referencing portions of the file/dir.
154          */
155         vp->v_data = NULL;
156         ip->vp = NULL;
157
158         /*
159          * NOTE! We do not attempt to flush chains here, flushing is
160          *       really fragile and could also deadlock.
161          */
162         vclrisdirty(vp);
163
164         /*
165          * This occurs if the inode was unlinked while open.  Reclamation of
166          * these inodes requires processing we cannot safely do here so add
167          * the inode to the sideq in that situation.
168          *
169          * A modified inode may require chain synchronization which will no
170          * longer be driven by a sync or fsync without the vnode, also use
171          * the sideq for that.
172          *
173          * A reclaim can occur at any time so we cannot safely start a
174          * transaction to handle reclamation of unlinked files.  Instead,
175          * the ip is left with a reference and placed on a linked list and
176          * handled later on.
177          */
178
179         if ((ip->flags & (HAMMER2_INODE_ISUNLINKED |
180                           HAMMER2_INODE_MODIFIED |
181                           HAMMER2_INODE_RESIZED)) &&
182             (ip->flags & HAMMER2_INODE_ISDELETED) == 0) {
183                 hammer2_inode_sideq_t *ipul;
184
185                 ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO);
186                 ipul->ip = ip;
187
188                 hammer2_spin_ex(&pmp->list_spin);
189                 if ((ip->flags & HAMMER2_INODE_ONSIDEQ) == 0) {
190                         /* ref -> sideq */
191                         atomic_set_int(&ip->flags, HAMMER2_INODE_ONSIDEQ);
192                         TAILQ_INSERT_TAIL(&pmp->sideq, ipul, entry);
193                         hammer2_spin_unex(&pmp->list_spin);
194                 } else {
195                         hammer2_spin_unex(&pmp->list_spin);
196                         kfree(ipul, pmp->minode);
197                         hammer2_inode_drop(ip);         /* vp ref */
198                 }
199                 /* retain ref from vp for ipul */
200         } else {
201                 hammer2_inode_drop(ip);                 /* vp ref */
202         }
203
204         /*
205          * XXX handle background sync when ip dirty, kernel will no longer
206          * notify us regarding this inode because there is no longer a
207          * vnode attached to it.
208          */
209
210         return (0);
211 }
212
213 static
214 int
215 hammer2_vop_fsync(struct vop_fsync_args *ap)
216 {
217         hammer2_inode_t *ip;
218         struct vnode *vp;
219
220         vp = ap->a_vp;
221         ip = VTOI(vp);
222
223 #if 0
224         /* XXX can't do this yet */
225         hammer2_trans_init(ip->pmp, HAMMER2_TRANS_ISFLUSH);
226         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
227 #endif
228         hammer2_trans_init(ip->pmp, 0);
229         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
230
231         /*
232          * Calling chain_flush here creates a lot of duplicative
233          * COW operations due to non-optimal vnode ordering.
234          *
235          * Only do it for an actual fsync() syscall.  The other forms
236          * which call this function will eventually call chain_flush
237          * on the volume root as a catch-all, which is far more optimal.
238          */
239         hammer2_inode_lock(ip, 0);
240         if (ip->flags & HAMMER2_INODE_MODIFIED)
241                 hammer2_inode_chain_sync(ip);
242         hammer2_inode_unlock(ip);
243         hammer2_trans_done(ip->pmp);
244
245         return (0);
246 }
247
248 static
249 int
250 hammer2_vop_access(struct vop_access_args *ap)
251 {
252         hammer2_inode_t *ip = VTOI(ap->a_vp);
253         uid_t uid;
254         gid_t gid;
255         int error;
256
257         hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
258         uid = hammer2_to_unix_xid(&ip->meta.uid);
259         gid = hammer2_to_unix_xid(&ip->meta.gid);
260         error = vop_helper_access(ap, uid, gid, ip->meta.mode, ip->meta.uflags);
261         hammer2_inode_unlock(ip);
262
263         return (error);
264 }
265
266 static
267 int
268 hammer2_vop_getattr(struct vop_getattr_args *ap)
269 {
270         hammer2_pfs_t *pmp;
271         hammer2_inode_t *ip;
272         struct vnode *vp;
273         struct vattr *vap;
274         hammer2_chain_t *chain;
275         int i;
276
277         vp = ap->a_vp;
278         vap = ap->a_vap;
279
280         ip = VTOI(vp);
281         pmp = ip->pmp;
282
283         hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
284
285         vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
286         vap->va_fileid = ip->meta.inum;
287         vap->va_mode = ip->meta.mode;
288         vap->va_nlink = ip->meta.nlinks;
289         vap->va_uid = hammer2_to_unix_xid(&ip->meta.uid);
290         vap->va_gid = hammer2_to_unix_xid(&ip->meta.gid);
291         vap->va_rmajor = 0;
292         vap->va_rminor = 0;
293         vap->va_size = ip->meta.size;   /* protected by shared lock */
294         vap->va_blocksize = HAMMER2_PBUFSIZE;
295         vap->va_flags = ip->meta.uflags;
296         hammer2_time_to_timespec(ip->meta.ctime, &vap->va_ctime);
297         hammer2_time_to_timespec(ip->meta.mtime, &vap->va_mtime);
298         hammer2_time_to_timespec(ip->meta.mtime, &vap->va_atime);
299         vap->va_gen = 1;
300         vap->va_bytes = 0;
301         if (ip->meta.type == HAMMER2_OBJTYPE_DIRECTORY) {
302                 /*
303                  * Can't really calculate directory use sans the files under
304                  * it, just assume one block for now.
305                  */
306                 vap->va_bytes += HAMMER2_INODE_BYTES;
307         } else {
308                 for (i = 0; i < ip->cluster.nchains; ++i) {
309                         if ((chain = ip->cluster.array[i].chain) != NULL) {
310                                 if (vap->va_bytes <
311                                     chain->bref.embed.stats.data_count) {
312                                         vap->va_bytes =
313                                             chain->bref.embed.stats.data_count;
314                                 }
315                         }
316                 }
317         }
318         vap->va_type = hammer2_get_vtype(ip->meta.type);
319         vap->va_filerev = 0;
320         vap->va_uid_uuid = ip->meta.uid;
321         vap->va_gid_uuid = ip->meta.gid;
322         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
323                           VA_FSID_UUID_VALID;
324
325         hammer2_inode_unlock(ip);
326
327         return (0);
328 }
329
330 static
331 int
332 hammer2_vop_setattr(struct vop_setattr_args *ap)
333 {
334         hammer2_inode_t *ip;
335         struct vnode *vp;
336         struct vattr *vap;
337         int error;
338         int kflags = 0;
339         uint64_t ctime;
340
341         vp = ap->a_vp;
342         vap = ap->a_vap;
343         hammer2_update_time(&ctime);
344
345         ip = VTOI(vp);
346
347         if (ip->pmp->ronly)
348                 return(EROFS);
349
350         hammer2_pfs_memory_wait(ip->pmp);
351         hammer2_trans_init(ip->pmp, 0);
352         hammer2_inode_lock(ip, 0);
353         error = 0;
354
355         if (vap->va_flags != VNOVAL) {
356                 uint32_t flags;
357
358                 flags = ip->meta.uflags;
359                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
360                                      hammer2_to_unix_xid(&ip->meta.uid),
361                                      ap->a_cred);
362                 if (error == 0) {
363                         if (ip->meta.uflags != flags) {
364                                 hammer2_inode_modify(ip);
365                                 ip->meta.uflags = flags;
366                                 ip->meta.ctime = ctime;
367                                 kflags |= NOTE_ATTRIB;
368                         }
369                         if (ip->meta.uflags & (IMMUTABLE | APPEND)) {
370                                 error = 0;
371                                 goto done;
372                         }
373                 }
374                 goto done;
375         }
376         if (ip->meta.uflags & (IMMUTABLE | APPEND)) {
377                 error = EPERM;
378                 goto done;
379         }
380         if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
381                 mode_t cur_mode = ip->meta.mode;
382                 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid);
383                 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid);
384                 uuid_t uuid_uid;
385                 uuid_t uuid_gid;
386
387                 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
388                                          ap->a_cred,
389                                          &cur_uid, &cur_gid, &cur_mode);
390                 if (error == 0) {
391                         hammer2_guid_to_uuid(&uuid_uid, cur_uid);
392                         hammer2_guid_to_uuid(&uuid_gid, cur_gid);
393                         if (bcmp(&uuid_uid, &ip->meta.uid, sizeof(uuid_uid)) ||
394                             bcmp(&uuid_gid, &ip->meta.gid, sizeof(uuid_gid)) ||
395                             ip->meta.mode != cur_mode
396                         ) {
397                                 hammer2_inode_modify(ip);
398                                 ip->meta.uid = uuid_uid;
399                                 ip->meta.gid = uuid_gid;
400                                 ip->meta.mode = cur_mode;
401                                 ip->meta.ctime = ctime;
402                         }
403                         kflags |= NOTE_ATTRIB;
404                 }
405         }
406
407         /*
408          * Resize the file
409          */
410         if (vap->va_size != VNOVAL && ip->meta.size != vap->va_size) {
411                 switch(vp->v_type) {
412                 case VREG:
413                         if (vap->va_size == ip->meta.size)
414                                 break;
415                         if (vap->va_size < ip->meta.size) {
416                                 hammer2_mtx_ex(&ip->truncate_lock);
417                                 hammer2_truncate_file(ip, vap->va_size);
418                                 hammer2_mtx_unlock(&ip->truncate_lock);
419                                 kflags |= NOTE_WRITE;
420                         } else {
421                                 hammer2_extend_file(ip, vap->va_size);
422                                 kflags |= NOTE_WRITE | NOTE_EXTEND;
423                         }
424                         hammer2_inode_modify(ip);
425                         ip->meta.mtime = ctime;
426                         break;
427                 default:
428                         error = EINVAL;
429                         goto done;
430                 }
431         }
432 #if 0
433         /* atime not supported */
434         if (vap->va_atime.tv_sec != VNOVAL) {
435                 hammer2_inode_modify(ip);
436                 ip->meta.atime = hammer2_timespec_to_time(&vap->va_atime);
437                 kflags |= NOTE_ATTRIB;
438         }
439 #endif
440         if (vap->va_mode != (mode_t)VNOVAL) {
441                 mode_t cur_mode = ip->meta.mode;
442                 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid);
443                 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid);
444
445                 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
446                                          cur_uid, cur_gid, &cur_mode);
447                 if (error == 0 && ip->meta.mode != cur_mode) {
448                         hammer2_inode_modify(ip);
449                         ip->meta.mode = cur_mode;
450                         ip->meta.ctime = ctime;
451                         kflags |= NOTE_ATTRIB;
452                 }
453         }
454
455         if (vap->va_mtime.tv_sec != VNOVAL) {
456                 hammer2_inode_modify(ip);
457                 ip->meta.mtime = hammer2_timespec_to_time(&vap->va_mtime);
458                 kflags |= NOTE_ATTRIB;
459         }
460
461 done:
462         /*
463          * If a truncation occurred we must call inode_fsync() now in order
464          * to trim the related data chains, otherwise a later expansion can
465          * cause havoc.
466          *
467          * If an extend occured that changed the DIRECTDATA state, we must
468          * call inode_fsync now in order to prepare the inode's indirect
469          * block table.
470          */
471         if (ip->flags & HAMMER2_INODE_RESIZED)
472                 hammer2_inode_chain_sync(ip);
473
474         /*
475          * Cleanup.
476          */
477         hammer2_inode_unlock(ip);
478         hammer2_trans_done(ip->pmp);
479         hammer2_knote(ip->vp, kflags);
480
481         return (error);
482 }
483
484 static
485 int
486 hammer2_vop_readdir(struct vop_readdir_args *ap)
487 {
488         hammer2_xop_readdir_t *xop;
489         hammer2_blockref_t bref;
490         hammer2_inode_t *ip;
491         hammer2_tid_t inum;
492         hammer2_key_t lkey;
493         struct uio *uio;
494         off_t *cookies;
495         off_t saveoff;
496         int cookie_index;
497         int ncookies;
498         int error;
499         int eofflag;
500         int r;
501
502         ip = VTOI(ap->a_vp);
503         uio = ap->a_uio;
504         saveoff = uio->uio_offset;
505         eofflag = 0;
506         error = 0;
507
508         /*
509          * Setup cookies directory entry cookies if requested
510          */
511         if (ap->a_ncookies) {
512                 ncookies = uio->uio_resid / 16 + 1;
513                 if (ncookies > 1024)
514                         ncookies = 1024;
515                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
516         } else {
517                 ncookies = -1;
518                 cookies = NULL;
519         }
520         cookie_index = 0;
521
522         hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
523
524         /*
525          * Handle artificial entries.  To ensure that only positive 64 bit
526          * quantities are returned to userland we always strip off bit 63.
527          * The hash code is designed such that codes 0x0000-0x7FFF are not
528          * used, allowing us to use these codes for articial entries.
529          *
530          * Entry 0 is used for '.' and entry 1 is used for '..'.  Do not
531          * allow '..' to cross the mount point into (e.g.) the super-root.
532          */
533         if (saveoff == 0) {
534                 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
535                 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, ".");
536                 if (r)
537                         goto done;
538                 if (cookies)
539                         cookies[cookie_index] = saveoff;
540                 ++saveoff;
541                 ++cookie_index;
542                 if (cookie_index == ncookies)
543                         goto done;
544         }
545
546         if (saveoff == 1) {
547                 /*
548                  * Be careful with lockorder when accessing ".."
549                  *
550                  * (ip is the current dir. xip is the parent dir).
551                  */
552                 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
553                 if (ip != ip->pmp->iroot)
554                         inum = ip->meta.iparent & HAMMER2_DIRHASH_USERMSK;
555                 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, "..");
556                 if (r)
557                         goto done;
558                 if (cookies)
559                         cookies[cookie_index] = saveoff;
560                 ++saveoff;
561                 ++cookie_index;
562                 if (cookie_index == ncookies)
563                         goto done;
564         }
565
566         lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
567         if (hammer2_debug & 0x0020)
568                 kprintf("readdir: lkey %016jx\n", lkey);
569         if (error)
570                 goto done;
571
572         /*
573          * Use XOP for cluster scan.
574          *
575          * parent is the inode cluster, already locked for us.  Don't
576          * double lock shared locks as this will screw up upgrades.
577          */
578         xop = hammer2_xop_alloc(ip, 0);
579         xop->lkey = lkey;
580         hammer2_xop_start(&xop->head, hammer2_xop_readdir);
581
582         for (;;) {
583                 const hammer2_inode_data_t *ripdata;
584                 const char *dname;
585                 int dtype;
586
587                 error = hammer2_xop_collect(&xop->head, 0);
588                 if (error)
589                         break;
590                 if (cookie_index == ncookies)
591                         break;
592                 if (hammer2_debug & 0x0020)
593                 kprintf("cluster chain %p %p\n",
594                         xop->head.cluster.focus,
595                         (xop->head.cluster.focus ?
596                          xop->head.cluster.focus->data : (void *)-1));
597                 hammer2_cluster_bref(&xop->head.cluster, &bref);
598
599                 if (bref.type == HAMMER2_BREF_TYPE_INODE) {
600                         ripdata =
601                             &hammer2_cluster_rdata(&xop->head.cluster)->ipdata;
602                         dtype = hammer2_get_dtype(ripdata->meta.type);
603                         saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
604                         r = vop_write_dirent(&error, uio,
605                                              ripdata->meta.inum &
606                                               HAMMER2_DIRHASH_USERMSK,
607                                              dtype,
608                                              ripdata->meta.name_len,
609                                              ripdata->filename);
610                         if (r)
611                                 break;
612                         if (cookies)
613                                 cookies[cookie_index] = saveoff;
614                         ++cookie_index;
615                 } else if (bref.type == HAMMER2_BREF_TYPE_DIRENT) {
616                         dtype = hammer2_get_dtype(bref.embed.dirent.type);
617                         saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
618                         if (bref.embed.dirent.namlen <=
619                             sizeof(bref.check.buf)) {
620                                 dname = bref.check.buf;
621                         } else {
622                                 dname =
623                                  hammer2_cluster_rdata(&xop->head.cluster)->buf;
624                         }
625                         r = vop_write_dirent(&error, uio,
626                                              bref.embed.dirent.inum,
627                                              dtype,
628                                              bref.embed.dirent.namlen,
629                                              dname);
630                         if (r)
631                                 break;
632                         if (cookies)
633                                 cookies[cookie_index] = saveoff;
634                         ++cookie_index;
635                 } else {
636                         /* XXX chain error */
637                         kprintf("bad chain type readdir %d\n", bref.type);
638                 }
639         }
640         hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
641         if (error == ENOENT) {
642                 error = 0;
643                 eofflag = 1;
644                 saveoff = (hammer2_key_t)-1;
645         } else {
646                 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
647         }
648 done:
649         hammer2_inode_unlock(ip);
650         if (ap->a_eofflag)
651                 *ap->a_eofflag = eofflag;
652         if (hammer2_debug & 0x0020)
653                 kprintf("readdir: done at %016jx\n", saveoff);
654         uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
655         if (error && cookie_index == 0) {
656                 if (cookies) {
657                         kfree(cookies, M_TEMP);
658                         *ap->a_ncookies = 0;
659                         *ap->a_cookies = NULL;
660                 }
661         } else {
662                 if (cookies) {
663                         *ap->a_ncookies = cookie_index;
664                         *ap->a_cookies = cookies;
665                 }
666         }
667         return (error);
668 }
669
670 /*
671  * hammer2_vop_readlink { vp, uio, cred }
672  */
673 static
674 int
675 hammer2_vop_readlink(struct vop_readlink_args *ap)
676 {
677         struct vnode *vp;
678         hammer2_inode_t *ip;
679         int error;
680
681         vp = ap->a_vp;
682         if (vp->v_type != VLNK)
683                 return (EINVAL);
684         ip = VTOI(vp);
685
686         error = hammer2_read_file(ip, ap->a_uio, 0);
687         return (error);
688 }
689
690 static
691 int
692 hammer2_vop_read(struct vop_read_args *ap)
693 {
694         struct vnode *vp;
695         hammer2_inode_t *ip;
696         struct uio *uio;
697         int error;
698         int seqcount;
699         int bigread;
700
701         /*
702          * Read operations supported on this vnode?
703          */
704         vp = ap->a_vp;
705         if (vp->v_type != VREG)
706                 return (EINVAL);
707
708         /*
709          * Misc
710          */
711         ip = VTOI(vp);
712         uio = ap->a_uio;
713         error = 0;
714
715         seqcount = ap->a_ioflag >> 16;
716         bigread = (uio->uio_resid > 100 * 1024 * 1024);
717
718         error = hammer2_read_file(ip, uio, seqcount);
719         return (error);
720 }
721
722 static
723 int
724 hammer2_vop_write(struct vop_write_args *ap)
725 {
726         hammer2_inode_t *ip;
727         thread_t td;
728         struct vnode *vp;
729         struct uio *uio;
730         int error;
731         int seqcount;
732
733         /*
734          * Read operations supported on this vnode?
735          */
736         vp = ap->a_vp;
737         if (vp->v_type != VREG)
738                 return (EINVAL);
739
740         /*
741          * Misc
742          */
743         ip = VTOI(vp);
744         uio = ap->a_uio;
745         error = 0;
746         if (ip->pmp->ronly) {
747                 return (EROFS);
748         }
749
750         seqcount = ap->a_ioflag >> 16;
751
752         /*
753          * Check resource limit
754          */
755         if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
756             uio->uio_offset + uio->uio_resid >
757              td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
758                 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
759                 return (EFBIG);
760         }
761
762         /*
763          * The transaction interlocks against flush initiations
764          * (note: but will run concurrently with the actual flush).
765          *
766          * To avoid deadlocking against the VM system, we must flag any
767          * transaction related to the buffer cache or other direct
768          * VM page manipulation.
769          */
770         if (uio->uio_segflg == UIO_NOCOPY)
771                 hammer2_trans_init(ip->pmp, HAMMER2_TRANS_BUFCACHE);
772         else
773                 hammer2_trans_init(ip->pmp, 0);
774         error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
775         hammer2_trans_done(ip->pmp);
776
777         return (error);
778 }
779
780 /*
781  * Perform read operations on a file or symlink given an UNLOCKED
782  * inode and uio.
783  *
784  * The passed ip is not locked.
785  */
786 static
787 int
788 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
789 {
790         hammer2_off_t size;
791         struct buf *bp;
792         int error;
793
794         error = 0;
795
796         /*
797          * UIO read loop.
798          *
799          * WARNING! Assumes that the kernel interlocks size changes at the
800          *          vnode level.
801          */
802         hammer2_mtx_sh(&ip->lock);
803         hammer2_mtx_sh(&ip->truncate_lock);
804         size = ip->meta.size;
805         hammer2_mtx_unlock(&ip->lock);
806
807         while (uio->uio_resid > 0 && uio->uio_offset < size) {
808                 hammer2_key_t lbase;
809                 hammer2_key_t leof;
810                 int lblksize;
811                 int loff;
812                 int n;
813
814                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
815                                                 &lbase, &leof);
816
817 #if 1
818                 error = cluster_read(ip->vp, leof, lbase, lblksize,
819                                      uio->uio_resid, seqcount * MAXBSIZE,
820                                      &bp);
821 #else
822                 if (uio->uio_segflg == UIO_NOCOPY) {
823                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
824                         if (bp->b_flags & B_CACHE) {
825                                 int i;
826                                 int j = 0;
827                                 if (bp->b_xio.xio_npages != 16)
828                                         kprintf("NPAGES BAD\n");
829                                 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
830                                         vm_page_t m;
831                                         m = bp->b_xio.xio_pages[i];
832                                         if (m == NULL || m->valid == 0) {
833                                                 kprintf("bp %016jx %016jx pg %d inv",
834                                                         lbase, leof, i);
835                                                 if (m)
836                                                         kprintf("m->object %p/%p", m->object, ip->vp->v_object);
837                                                 kprintf("\n");
838                                                 j = 1;
839                                         }
840                                 }
841                                 if (j)
842                                         kprintf("b_flags %08x, b_error %d\n", bp->b_flags, bp->b_error);
843                         }
844                         bqrelse(bp);
845                 }
846                 error = bread(ip->vp, lbase, lblksize, &bp);
847 #endif
848                 if (error) {
849                         brelse(bp);
850                         break;
851                 }
852                 loff = (int)(uio->uio_offset - lbase);
853                 n = lblksize - loff;
854                 if (n > uio->uio_resid)
855                         n = uio->uio_resid;
856                 if (n > size - uio->uio_offset)
857                         n = (int)(size - uio->uio_offset);
858                 bp->b_flags |= B_AGE;
859                 uiomovebp(bp, (char *)bp->b_data + loff, n, uio);
860                 bqrelse(bp);
861         }
862         hammer2_mtx_unlock(&ip->truncate_lock);
863
864         return (error);
865 }
866
867 /*
868  * Write to the file represented by the inode via the logical buffer cache.
869  * The inode may represent a regular file or a symlink.
870  *
871  * The inode must not be locked.
872  */
873 static
874 int
875 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
876                    int ioflag, int seqcount)
877 {
878         hammer2_key_t old_eof;
879         hammer2_key_t new_eof;
880         struct buf *bp;
881         int kflags;
882         int error;
883         int modified;
884
885         /*
886          * Setup if append
887          *
888          * WARNING! Assumes that the kernel interlocks size changes at the
889          *          vnode level.
890          */
891         hammer2_mtx_ex(&ip->lock);
892         hammer2_mtx_sh(&ip->truncate_lock);
893         if (ioflag & IO_APPEND)
894                 uio->uio_offset = ip->meta.size;
895         old_eof = ip->meta.size;
896
897         /*
898          * Extend the file if necessary.  If the write fails at some point
899          * we will truncate it back down to cover as much as we were able
900          * to write.
901          *
902          * Doing this now makes it easier to calculate buffer sizes in
903          * the loop.
904          */
905         kflags = 0;
906         error = 0;
907         modified = 0;
908
909         if (uio->uio_offset + uio->uio_resid > old_eof) {
910                 new_eof = uio->uio_offset + uio->uio_resid;
911                 modified = 1;
912                 hammer2_extend_file(ip, new_eof);
913                 kflags |= NOTE_EXTEND;
914         } else {
915                 new_eof = old_eof;
916         }
917         hammer2_mtx_unlock(&ip->lock);
918         
919         /*
920          * UIO write loop
921          */
922         while (uio->uio_resid > 0) {
923                 hammer2_key_t lbase;
924                 int trivial;
925                 int endofblk;
926                 int lblksize;
927                 int loff;
928                 int n;
929
930                 /*
931                  * Don't allow the buffer build to blow out the buffer
932                  * cache.
933                  */
934                 if ((ioflag & IO_RECURSE) == 0)
935                         bwillwrite(HAMMER2_PBUFSIZE);
936
937                 /*
938                  * This nominally tells us how much we can cluster and
939                  * what the logical buffer size needs to be.  Currently
940                  * we don't try to cluster the write and just handle one
941                  * block at a time.
942                  */
943                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
944                                                 &lbase, NULL);
945                 loff = (int)(uio->uio_offset - lbase);
946                 
947                 KKASSERT(lblksize <= 65536);
948
949                 /*
950                  * Calculate bytes to copy this transfer and whether the
951                  * copy completely covers the buffer or not.
952                  */
953                 trivial = 0;
954                 n = lblksize - loff;
955                 if (n > uio->uio_resid) {
956                         n = uio->uio_resid;
957                         if (loff == lbase && uio->uio_offset + n == new_eof)
958                                 trivial = 1;
959                         endofblk = 0;
960                 } else {
961                         if (loff == 0)
962                                 trivial = 1;
963                         endofblk = 1;
964                 }
965                 if (lbase >= new_eof)
966                         trivial = 1;
967
968                 /*
969                  * Get the buffer
970                  */
971                 if (uio->uio_segflg == UIO_NOCOPY) {
972                         /*
973                          * Issuing a write with the same data backing the
974                          * buffer.  Instantiate the buffer to collect the
975                          * backing vm pages, then read-in any missing bits.
976                          *
977                          * This case is used by vop_stdputpages().
978                          */
979                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
980                         if ((bp->b_flags & B_CACHE) == 0) {
981                                 bqrelse(bp);
982                                 error = bread(ip->vp, lbase, lblksize, &bp);
983                         }
984                 } else if (trivial) {
985                         /*
986                          * Even though we are entirely overwriting the buffer
987                          * we may still have to zero it out to avoid a
988                          * mmap/write visibility issue.
989                          */
990                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
991                         if ((bp->b_flags & B_CACHE) == 0)
992                                 vfs_bio_clrbuf(bp);
993                 } else {
994                         /*
995                          * Partial overwrite, read in any missing bits then
996                          * replace the portion being written.
997                          *
998                          * (The strategy code will detect zero-fill physical
999                          * blocks for this case).
1000                          */
1001                         error = bread(ip->vp, lbase, lblksize, &bp);
1002                         if (error == 0)
1003                                 bheavy(bp);
1004                 }
1005
1006                 if (error) {
1007                         brelse(bp);
1008                         break;
1009                 }
1010
1011                 /*
1012                  * Ok, copy the data in
1013                  */
1014                 error = uiomovebp(bp, bp->b_data + loff, n, uio);
1015                 kflags |= NOTE_WRITE;
1016                 modified = 1;
1017                 if (error) {
1018                         brelse(bp);
1019                         break;
1020                 }
1021
1022                 /*
1023                  * WARNING: Pageout daemon will issue UIO_NOCOPY writes
1024                  *          with IO_SYNC or IO_ASYNC set.  These writes
1025                  *          must be handled as the pageout daemon expects.
1026                  *
1027                  * NOTE!    H2 relies on cluster_write() here because it
1028                  *          cannot preallocate disk blocks at the logical
1029                  *          level due to not knowing what the compression
1030                  *          size will be at this time.
1031                  *
1032                  *          We must use cluster_write() here and we depend
1033                  *          on the write-behind feature to flush buffers
1034                  *          appropriately.  If we let the buffer daemons do
1035                  *          it the block allocations will be all over the
1036                  *          map.
1037                  */
1038                 if (ioflag & IO_SYNC) {
1039                         bwrite(bp);
1040                 } else if ((ioflag & IO_DIRECT) && endofblk) {
1041                         bawrite(bp);
1042                 } else if (ioflag & IO_ASYNC) {
1043                         bawrite(bp);
1044                 } else if (ip->vp->v_mount->mnt_flag & MNT_NOCLUSTERW) {
1045                         bdwrite(bp);
1046                 } else {
1047 #if 1
1048                         bp->b_flags |= B_CLUSTEROK;
1049                         cluster_write(bp, new_eof, lblksize, seqcount);
1050 #else
1051                         bp->b_flags |= B_CLUSTEROK;
1052                         bdwrite(bp);
1053 #endif
1054                 }
1055         }
1056
1057         /*
1058          * Cleanup.  If we extended the file EOF but failed to write through
1059          * the entire write is a failure and we have to back-up.
1060          */
1061         if (error && new_eof != old_eof) {
1062                 hammer2_mtx_unlock(&ip->truncate_lock);
1063                 hammer2_mtx_ex(&ip->lock);
1064                 hammer2_mtx_ex(&ip->truncate_lock);
1065                 hammer2_truncate_file(ip, old_eof);
1066                 if (ip->flags & HAMMER2_INODE_MODIFIED)
1067                         hammer2_inode_chain_sync(ip);
1068                 hammer2_mtx_unlock(&ip->lock);
1069         } else if (modified) {
1070                 hammer2_mtx_ex(&ip->lock);
1071                 hammer2_inode_modify(ip);
1072                 hammer2_update_time(&ip->meta.mtime);
1073                 if (ip->flags & HAMMER2_INODE_MODIFIED)
1074                         hammer2_inode_chain_sync(ip);
1075                 hammer2_mtx_unlock(&ip->lock);
1076                 hammer2_knote(ip->vp, kflags);
1077         }
1078         hammer2_trans_assert_strategy(ip->pmp);
1079         hammer2_mtx_unlock(&ip->truncate_lock);
1080
1081         return error;
1082 }
1083
1084 /*
1085  * Truncate the size of a file.  The inode must not be locked.
1086  *
1087  * We must unconditionally set HAMMER2_INODE_RESIZED to properly
1088  * ensure that any on-media data beyond the new file EOF has been destroyed.
1089  *
1090  * WARNING: nvtruncbuf() can only be safely called without the inode lock
1091  *          held due to the way our write thread works.  If the truncation
1092  *          occurs in the middle of a buffer, nvtruncbuf() is responsible
1093  *          for dirtying that buffer and zeroing out trailing bytes.
1094  *
1095  * WARNING! Assumes that the kernel interlocks size changes at the
1096  *          vnode level.
1097  *
1098  * WARNING! Caller assumes responsibility for removing dead blocks
1099  *          if INODE_RESIZED is set.
1100  */
1101 static
1102 void
1103 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1104 {
1105         hammer2_key_t lbase;
1106         int nblksize;
1107
1108         hammer2_mtx_unlock(&ip->lock);
1109         if (ip->vp) {
1110                 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
1111                 nvtruncbuf(ip->vp, nsize,
1112                            nblksize, (int)nsize & (nblksize - 1),
1113                            0);
1114         }
1115         hammer2_mtx_ex(&ip->lock);
1116         KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0);
1117         ip->osize = ip->meta.size;
1118         ip->meta.size = nsize;
1119         atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
1120         hammer2_inode_modify(ip);
1121 }
1122
1123 /*
1124  * Extend the size of a file.  The inode must not be locked.
1125  *
1126  * Even though the file size is changing, we do not have to set the
1127  * INODE_RESIZED bit unless the file size crosses the EMBEDDED_BYTES
1128  * boundary.  When this occurs a hammer2_inode_chain_sync() is required
1129  * to prepare the inode cluster's indirect block table, otherwise
1130  * async execution of the strategy code will implode on us.
1131  *
1132  * WARNING! Assumes that the kernel interlocks size changes at the
1133  *          vnode level.
1134  *
1135  * WARNING! Caller assumes responsibility for transitioning out
1136  *          of the inode DIRECTDATA mode if INODE_RESIZED is set.
1137  */
1138 static
1139 void
1140 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1141 {
1142         hammer2_key_t lbase;
1143         hammer2_key_t osize;
1144         int oblksize;
1145         int nblksize;
1146
1147         KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0);
1148         hammer2_inode_modify(ip);
1149         osize = ip->meta.size;
1150         ip->osize = osize;
1151         ip->meta.size = nsize;
1152
1153         if (osize <= HAMMER2_EMBEDDED_BYTES && nsize > HAMMER2_EMBEDDED_BYTES) {
1154                 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
1155                 hammer2_inode_chain_sync(ip);
1156         }
1157
1158         hammer2_mtx_unlock(&ip->lock);
1159         if (ip->vp) {
1160                 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL);
1161                 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
1162                 nvextendbuf(ip->vp,
1163                             osize, nsize,
1164                             oblksize, nblksize,
1165                             -1, -1, 0);
1166         }
1167         hammer2_mtx_ex(&ip->lock);
1168 }
1169
1170 static
1171 int
1172 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
1173 {
1174         hammer2_xop_nresolve_t *xop;
1175         hammer2_inode_t *ip;
1176         hammer2_inode_t *dip;
1177         struct namecache *ncp;
1178         struct vnode *vp;
1179         int error;
1180
1181         dip = VTOI(ap->a_dvp);
1182         xop = hammer2_xop_alloc(dip, 0);
1183
1184         ncp = ap->a_nch->ncp;
1185         hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1186
1187         /*
1188          * Note: In DragonFly the kernel handles '.' and '..'.
1189          */
1190         hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1191         hammer2_xop_start(&xop->head, hammer2_xop_nresolve);
1192
1193         error = hammer2_xop_collect(&xop->head, 0);
1194         if (error) {
1195                 ip = NULL;
1196         } else {
1197                 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1198         }
1199         hammer2_inode_unlock(dip);
1200
1201         /*
1202          * Acquire the related vnode
1203          *
1204          * NOTE: For error processing, only ENOENT resolves the namecache
1205          *       entry to NULL, otherwise we just return the error and
1206          *       leave the namecache unresolved.
1207          *
1208          * NOTE: multiple hammer2_inode structures can be aliased to the
1209          *       same chain element, for example for hardlinks.  This
1210          *       use case does not 'reattach' inode associations that
1211          *       might already exist, but always allocates a new one.
1212          *
1213          * WARNING: inode structure is locked exclusively via inode_get
1214          *          but chain was locked shared.  inode_unlock()
1215          *          will handle it properly.
1216          */
1217         if (ip) {
1218                 vp = hammer2_igetv(ip, &error);
1219                 if (error == 0) {
1220                         vn_unlock(vp);
1221                         cache_setvp(ap->a_nch, vp);
1222                 } else if (error == ENOENT) {
1223                         cache_setvp(ap->a_nch, NULL);
1224                 }
1225                 hammer2_inode_unlock(ip);
1226
1227                 /*
1228                  * The vp should not be released until after we've disposed
1229                  * of our locks, because it might cause vop_inactive() to
1230                  * be called.
1231                  */
1232                 if (vp)
1233                         vrele(vp);
1234         } else {
1235                 error = ENOENT;
1236                 cache_setvp(ap->a_nch, NULL);
1237         }
1238         hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1239         KASSERT(error || ap->a_nch->ncp->nc_vp != NULL,
1240                 ("resolve error %d/%p ap %p\n",
1241                  error, ap->a_nch->ncp->nc_vp, ap));
1242
1243         return error;
1244 }
1245
1246 static
1247 int
1248 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1249 {
1250         hammer2_inode_t *dip;
1251         hammer2_tid_t inum;
1252         int error;
1253
1254         dip = VTOI(ap->a_dvp);
1255         inum = dip->meta.iparent;
1256         *ap->a_vpp = NULL;
1257
1258         if (inum) {
1259                 error = hammer2_vfs_vget(ap->a_dvp->v_mount, NULL,
1260                                          inum, ap->a_vpp);
1261         } else {
1262                 error = ENOENT;
1263         }
1264         return error;
1265 }
1266
1267 static
1268 int
1269 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1270 {
1271         hammer2_inode_t *dip;
1272         hammer2_inode_t *nip;
1273         struct namecache *ncp;
1274         const uint8_t *name;
1275         size_t name_len;
1276         hammer2_tid_t inum;
1277         int error;
1278
1279         dip = VTOI(ap->a_dvp);
1280         if (dip->pmp->ronly)
1281                 return (EROFS);
1282
1283         ncp = ap->a_nch->ncp;
1284         name = ncp->nc_name;
1285         name_len = ncp->nc_nlen;
1286
1287         hammer2_pfs_memory_wait(dip->pmp);
1288         hammer2_trans_init(dip->pmp, 0);
1289
1290         inum = hammer2_trans_newinum(dip->pmp);
1291
1292         /*
1293          * Create the actual inode as a hidden file in the iroot, then
1294          * create the directory entry.  The creation of the actual inode
1295          * sets its nlinks to 1 which is the value we desire.
1296          */
1297         nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1298                                    NULL, 0, inum,
1299                                    inum, 0, 0,
1300                                    0, &error);
1301         if (error == 0) {
1302                 error = hammer2_dirent_create(dip, name, name_len,
1303                                               nip->meta.inum, nip->meta.type);
1304         }
1305
1306         if (error) {
1307                 KKASSERT(nip == NULL);
1308                 *ap->a_vpp = NULL;
1309         } else {
1310                 *ap->a_vpp = hammer2_igetv(nip, &error);
1311                 hammer2_inode_unlock(nip);
1312         }
1313
1314         /*
1315          * Update dip's mtime
1316          */
1317         if (error == 0) {
1318                 uint64_t mtime;
1319
1320                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1321                 hammer2_update_time(&mtime);
1322                 hammer2_inode_modify(dip);
1323                 dip->meta.mtime = mtime;
1324                 hammer2_inode_unlock(dip);
1325         }
1326
1327         hammer2_trans_done(dip->pmp);
1328
1329         if (error == 0) {
1330                 cache_setunresolved(ap->a_nch);
1331                 cache_setvp(ap->a_nch, *ap->a_vpp);
1332                 hammer2_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK);
1333         }
1334         return error;
1335 }
1336
1337 static
1338 int
1339 hammer2_vop_open(struct vop_open_args *ap)
1340 {
1341         return vop_stdopen(ap);
1342 }
1343
1344 /*
1345  * hammer2_vop_advlock { vp, id, op, fl, flags }
1346  */
1347 static
1348 int
1349 hammer2_vop_advlock(struct vop_advlock_args *ap)
1350 {
1351         hammer2_inode_t *ip = VTOI(ap->a_vp);
1352         hammer2_off_t size;
1353
1354         size = ip->meta.size;
1355         return (lf_advlock(ap, &ip->advlock, size));
1356 }
1357
1358 static
1359 int
1360 hammer2_vop_close(struct vop_close_args *ap)
1361 {
1362         return vop_stdclose(ap);
1363 }
1364
1365 /*
1366  * hammer2_vop_nlink { nch, dvp, vp, cred }
1367  *
1368  * Create a hardlink from (vp) to {dvp, nch}.
1369  */
1370 static
1371 int
1372 hammer2_vop_nlink(struct vop_nlink_args *ap)
1373 {
1374         hammer2_inode_t *tdip;  /* target directory to create link in */
1375         hammer2_inode_t *ip;    /* inode we are hardlinking to */
1376         struct namecache *ncp;
1377         const uint8_t *name;
1378         size_t name_len;
1379         int error;
1380
1381         if (ap->a_dvp->v_mount != ap->a_vp->v_mount)
1382                 return(EXDEV);
1383
1384         tdip = VTOI(ap->a_dvp);
1385         if (tdip->pmp->ronly)
1386                 return (EROFS);
1387
1388         ncp = ap->a_nch->ncp;
1389         name = ncp->nc_name;
1390         name_len = ncp->nc_nlen;
1391
1392         /*
1393          * ip represents the file being hardlinked.  The file could be a
1394          * normal file or a hardlink target if it has already been hardlinked.
1395          * (with the new semantics, it will almost always be a hardlink
1396          * target).
1397          *
1398          * Bump nlinks and potentially also create or move the hardlink
1399          * target in the parent directory common to (ip) and (tdip).  The
1400          * consolidation code can modify ip->cluster.  The returned cluster
1401          * is locked.
1402          */
1403         ip = VTOI(ap->a_vp);
1404         KASSERT(ip->pmp, ("ip->pmp is NULL %p %p", ip, ip->pmp));
1405         hammer2_pfs_memory_wait(ip->pmp);
1406         hammer2_trans_init(ip->pmp, 0);
1407
1408         /*
1409          * Target should be an indexed inode or there's no way we will ever
1410          * be able to find it!
1411          */
1412         KKASSERT((ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0);
1413
1414         error = 0;
1415
1416         /*
1417          * Can return NULL and error == EXDEV if the common parent
1418          * crosses a directory with the xlink flag set.
1419          */
1420         hammer2_inode_lock(tdip, 0);
1421         hammer2_inode_lock(ip, 0);
1422
1423         /*
1424          * Create the directory entry and bump nlinks.
1425          */
1426         if (error == 0) {
1427                 error = hammer2_dirent_create(tdip, name, name_len,
1428                                               ip->meta.inum, ip->meta.type);
1429                 hammer2_inode_modify(ip);
1430                 ++ip->meta.nlinks;
1431         }
1432         if (error == 0) {
1433                 /*
1434                  * Update dip's mtime
1435                  */
1436                 uint64_t mtime;
1437
1438                 hammer2_update_time(&mtime);
1439                 hammer2_inode_modify(tdip);
1440                 tdip->meta.mtime = mtime;
1441
1442                 cache_setunresolved(ap->a_nch);
1443                 cache_setvp(ap->a_nch, ap->a_vp);
1444         }
1445         hammer2_inode_unlock(ip);
1446         hammer2_inode_unlock(tdip);
1447
1448         hammer2_trans_done(ip->pmp);
1449         hammer2_knote(ap->a_vp, NOTE_LINK);
1450         hammer2_knote(ap->a_dvp, NOTE_WRITE);
1451
1452         return error;
1453 }
1454
1455 /*
1456  * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
1457  *
1458  * The operating system has already ensured that the directory entry
1459  * does not exist and done all appropriate namespace locking.
1460  */
1461 static
1462 int
1463 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1464 {
1465         hammer2_inode_t *dip;
1466         hammer2_inode_t *nip;
1467         struct namecache *ncp;
1468         const uint8_t *name;
1469         size_t name_len;
1470         hammer2_tid_t inum;
1471         int error;
1472
1473         dip = VTOI(ap->a_dvp);
1474         if (dip->pmp->ronly)
1475                 return (EROFS);
1476
1477         ncp = ap->a_nch->ncp;
1478         name = ncp->nc_name;
1479         name_len = ncp->nc_nlen;
1480         hammer2_pfs_memory_wait(dip->pmp);
1481         hammer2_trans_init(dip->pmp, 0);
1482
1483         inum = hammer2_trans_newinum(dip->pmp);
1484
1485         /*
1486          * Create the actual inode as a hidden file in the iroot, then
1487          * create the directory entry.  The creation of the actual inode
1488          * sets its nlinks to 1 which is the value we desire.
1489          */
1490         nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1491                                    NULL, 0, inum,
1492                                    inum, 0, 0,
1493                                    0, &error);
1494
1495         if (error == 0) {
1496                 error = hammer2_dirent_create(dip, name, name_len,
1497                                               nip->meta.inum, nip->meta.type);
1498         }
1499         if (error) {
1500                 KKASSERT(nip == NULL);
1501                 *ap->a_vpp = NULL;
1502         } else {
1503                 *ap->a_vpp = hammer2_igetv(nip, &error);
1504                 hammer2_inode_unlock(nip);
1505         }
1506
1507         /*
1508          * Update dip's mtime
1509          */
1510         if (error == 0) {
1511                 uint64_t mtime;
1512
1513                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1514                 hammer2_update_time(&mtime);
1515                 hammer2_inode_modify(dip);
1516                 dip->meta.mtime = mtime;
1517                 hammer2_inode_unlock(dip);
1518         }
1519
1520         hammer2_trans_done(dip->pmp);
1521
1522         if (error == 0) {
1523                 cache_setunresolved(ap->a_nch);
1524                 cache_setvp(ap->a_nch, *ap->a_vpp);
1525                 hammer2_knote(ap->a_dvp, NOTE_WRITE);
1526         }
1527         return error;
1528 }
1529
1530 /*
1531  * Make a device node (typically a fifo)
1532  */
1533 static
1534 int
1535 hammer2_vop_nmknod(struct vop_nmknod_args *ap)
1536 {
1537         hammer2_inode_t *dip;
1538         hammer2_inode_t *nip;
1539         struct namecache *ncp;
1540         const uint8_t *name;
1541         size_t name_len;
1542         hammer2_tid_t inum;
1543         int error;
1544
1545         dip = VTOI(ap->a_dvp);
1546         if (dip->pmp->ronly)
1547                 return (EROFS);
1548
1549         ncp = ap->a_nch->ncp;
1550         name = ncp->nc_name;
1551         name_len = ncp->nc_nlen;
1552         hammer2_pfs_memory_wait(dip->pmp);
1553         hammer2_trans_init(dip->pmp, 0);
1554
1555         /*
1556          * Create the device inode and then create the directory entry.
1557          */
1558         inum = hammer2_trans_newinum(dip->pmp);
1559         nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1560                                    NULL, 0, inum,
1561                                    inum, 0, 0,
1562                                    0, &error);
1563         if (error == 0) {
1564                 error = hammer2_dirent_create(dip, name, name_len,
1565                                               nip->meta.inum, nip->meta.type);
1566         }
1567
1568
1569         if (error) {
1570                 KKASSERT(nip == NULL);
1571                 *ap->a_vpp = NULL;
1572         } else {
1573                 *ap->a_vpp = hammer2_igetv(nip, &error);
1574                 hammer2_inode_unlock(nip);
1575         }
1576
1577         /*
1578          * Update dip's mtime
1579          */
1580         if (error == 0) {
1581                 uint64_t mtime;
1582
1583                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1584                 hammer2_update_time(&mtime);
1585                 hammer2_inode_modify(dip);
1586                 dip->meta.mtime = mtime;
1587                 hammer2_inode_unlock(dip);
1588         }
1589
1590         hammer2_trans_done(dip->pmp);
1591
1592         if (error == 0) {
1593                 cache_setunresolved(ap->a_nch);
1594                 cache_setvp(ap->a_nch, *ap->a_vpp);
1595                 hammer2_knote(ap->a_dvp, NOTE_WRITE);
1596         }
1597         return error;
1598 }
1599
1600 /*
1601  * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1602  */
1603 static
1604 int
1605 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1606 {
1607         hammer2_inode_t *dip;
1608         hammer2_inode_t *nip;
1609         struct namecache *ncp;
1610         const uint8_t *name;
1611         size_t name_len;
1612         hammer2_tid_t inum;
1613         int error;
1614         
1615         dip = VTOI(ap->a_dvp);
1616         if (dip->pmp->ronly)
1617                 return (EROFS);
1618
1619         ncp = ap->a_nch->ncp;
1620         name = ncp->nc_name;
1621         name_len = ncp->nc_nlen;
1622         hammer2_pfs_memory_wait(dip->pmp);
1623         hammer2_trans_init(dip->pmp, 0);
1624
1625         ap->a_vap->va_type = VLNK;      /* enforce type */
1626
1627         /*
1628          * Create the softlink as an inode and then create the directory
1629          * entry.
1630          */
1631         inum = hammer2_trans_newinum(dip->pmp);
1632
1633         nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1634                                    NULL, 0, inum,
1635                                    inum, 0, 0,
1636                                    0, &error);
1637         if (error == 0) {
1638                 error = hammer2_dirent_create(dip, name, name_len,
1639                                               nip->meta.inum, nip->meta.type);
1640         }
1641
1642
1643         if (error) {
1644                 KKASSERT(nip == NULL);
1645                 *ap->a_vpp = NULL;
1646                 hammer2_trans_done(dip->pmp);
1647                 return error;
1648         }
1649         *ap->a_vpp = hammer2_igetv(nip, &error);
1650
1651         /*
1652          * Build the softlink (~like file data) and finalize the namecache.
1653          */
1654         if (error == 0) {
1655                 size_t bytes;
1656                 struct uio auio;
1657                 struct iovec aiov;
1658
1659                 bytes = strlen(ap->a_target);
1660
1661                 hammer2_inode_unlock(nip);
1662                 bzero(&auio, sizeof(auio));
1663                 bzero(&aiov, sizeof(aiov));
1664                 auio.uio_iov = &aiov;
1665                 auio.uio_segflg = UIO_SYSSPACE;
1666                 auio.uio_rw = UIO_WRITE;
1667                 auio.uio_resid = bytes;
1668                 auio.uio_iovcnt = 1;
1669                 auio.uio_td = curthread;
1670                 aiov.iov_base = ap->a_target;
1671                 aiov.iov_len = bytes;
1672                 error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
1673                 /* XXX handle error */
1674                 error = 0;
1675         } else {
1676                 hammer2_inode_unlock(nip);
1677         }
1678
1679         /*
1680          * Update dip's mtime
1681          */
1682         if (error == 0) {
1683                 uint64_t mtime;
1684
1685                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1686                 hammer2_update_time(&mtime);
1687                 hammer2_inode_modify(dip);
1688                 dip->meta.mtime = mtime;
1689                 hammer2_inode_unlock(dip);
1690         }
1691
1692         hammer2_trans_done(dip->pmp);
1693
1694         /*
1695          * Finalize namecache
1696          */
1697         if (error == 0) {
1698                 cache_setunresolved(ap->a_nch);
1699                 cache_setvp(ap->a_nch, *ap->a_vpp);
1700                 hammer2_knote(ap->a_dvp, NOTE_WRITE);
1701         }
1702         return error;
1703 }
1704
1705 /*
1706  * hammer2_vop_nremove { nch, dvp, cred }
1707  */
1708 static
1709 int
1710 hammer2_vop_nremove(struct vop_nremove_args *ap)
1711 {
1712         hammer2_xop_unlink_t *xop;
1713         hammer2_inode_t *dip;
1714         hammer2_inode_t *ip;
1715         struct namecache *ncp;
1716         int error;
1717         int isopen;
1718
1719         dip = VTOI(ap->a_dvp);
1720         if (dip->pmp->ronly)
1721                 return(EROFS);
1722
1723         ncp = ap->a_nch->ncp;
1724
1725         hammer2_pfs_memory_wait(dip->pmp);
1726         hammer2_trans_init(dip->pmp, 0);
1727         hammer2_inode_lock(dip, 0);
1728
1729         /*
1730          * The unlink XOP unlinks the path from the directory and
1731          * locates and returns the cluster associated with the real inode.
1732          * We have to handle nlinks here on the frontend.
1733          */
1734         xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING);
1735         hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1736
1737         /*
1738          * The namecache entry is locked so nobody can use this namespace.
1739          * Calculate isopen to determine if this namespace has an open vp
1740          * associated with it and resolve the vp only if it does.
1741          *
1742          * We try to avoid resolving the vnode if nobody has it open, but
1743          * note that the test is via this namespace only.
1744          */
1745         isopen = cache_isopen(ap->a_nch);
1746         xop->isdir = 0;
1747         xop->dopermanent = 0;
1748         hammer2_xop_start(&xop->head, hammer2_xop_unlink);
1749
1750         /*
1751          * Collect the real inode and adjust nlinks, destroy the real
1752          * inode if nlinks transitions to 0 and it was the real inode
1753          * (else it has already been removed).
1754          */
1755         error = hammer2_xop_collect(&xop->head, 0);
1756         hammer2_inode_unlock(dip);
1757
1758         if (error == 0) {
1759                 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1760                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1761                 if (ip) {
1762                         hammer2_inode_unlink_finisher(ip, isopen);
1763                         hammer2_inode_unlock(ip);
1764                 }
1765         } else {
1766                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1767         }
1768
1769         /*
1770          * Update dip's mtime
1771          */
1772         if (error == 0) {
1773                 uint64_t mtime;
1774
1775                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1776                 hammer2_update_time(&mtime);
1777                 hammer2_inode_modify(dip);
1778                 dip->meta.mtime = mtime;
1779                 hammer2_inode_unlock(dip);
1780         }
1781
1782         hammer2_inode_run_sideq(dip->pmp);
1783         hammer2_trans_done(dip->pmp);
1784         if (error == 0) {
1785                 cache_unlink(ap->a_nch);
1786                 hammer2_knote(ap->a_dvp, NOTE_WRITE);
1787         }
1788         return (error);
1789 }
1790
1791 /*
1792  * hammer2_vop_nrmdir { nch, dvp, cred }
1793  */
1794 static
1795 int
1796 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1797 {
1798         hammer2_xop_unlink_t *xop;
1799         hammer2_inode_t *dip;
1800         hammer2_inode_t *ip;
1801         struct namecache *ncp;
1802         int isopen;
1803         int error;
1804
1805         dip = VTOI(ap->a_dvp);
1806         if (dip->pmp->ronly)
1807                 return(EROFS);
1808
1809         hammer2_pfs_memory_wait(dip->pmp);
1810         hammer2_trans_init(dip->pmp, 0);
1811         hammer2_inode_lock(dip, 0);
1812
1813         xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING);
1814
1815         ncp = ap->a_nch->ncp;
1816         hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1817         isopen = cache_isopen(ap->a_nch);
1818         xop->isdir = 1;
1819         xop->dopermanent = 0;
1820         hammer2_xop_start(&xop->head, hammer2_xop_unlink);
1821
1822         /*
1823          * Collect the real inode and adjust nlinks, destroy the real
1824          * inode if nlinks transitions to 0 and it was the real inode
1825          * (else it has already been removed).
1826          */
1827         error = hammer2_xop_collect(&xop->head, 0);
1828         hammer2_inode_unlock(dip);
1829
1830         if (error == 0) {
1831                 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1832                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1833                 if (ip) {
1834                         hammer2_inode_unlink_finisher(ip, isopen);
1835                         hammer2_inode_unlock(ip);
1836                 }
1837         } else {
1838                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1839         }
1840
1841         /*
1842          * Update dip's mtime
1843          */
1844         if (error == 0) {
1845                 uint64_t mtime;
1846
1847                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1848                 hammer2_update_time(&mtime);
1849                 hammer2_inode_modify(dip);
1850                 dip->meta.mtime = mtime;
1851                 hammer2_inode_unlock(dip);
1852         }
1853
1854         hammer2_inode_run_sideq(dip->pmp);
1855         hammer2_trans_done(dip->pmp);
1856         if (error == 0) {
1857                 cache_unlink(ap->a_nch);
1858                 hammer2_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK);
1859         }
1860         return (error);
1861 }
1862
1863 /*
1864  * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1865  */
1866 static
1867 int
1868 hammer2_vop_nrename(struct vop_nrename_args *ap)
1869 {
1870         struct namecache *fncp;
1871         struct namecache *tncp;
1872         hammer2_inode_t *fdip;  /* source directory */
1873         hammer2_inode_t *tdip;  /* target directory */
1874         hammer2_inode_t *ip;    /* file being renamed */
1875         hammer2_inode_t *tip;   /* replaced target during rename or NULL */
1876         const uint8_t *fname;
1877         size_t fname_len;
1878         const uint8_t *tname;
1879         size_t tname_len;
1880         int error;
1881         int update_tdip;
1882         int update_fdip;
1883         hammer2_key_t tlhc;
1884
1885         if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1886                 return(EXDEV);
1887         if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1888                 return(EXDEV);
1889
1890         fdip = VTOI(ap->a_fdvp);        /* source directory */
1891         tdip = VTOI(ap->a_tdvp);        /* target directory */
1892
1893         if (fdip->pmp->ronly)
1894                 return(EROFS);
1895
1896         fncp = ap->a_fnch->ncp;         /* entry name in source */
1897         fname = fncp->nc_name;
1898         fname_len = fncp->nc_nlen;
1899
1900         tncp = ap->a_tnch->ncp;         /* entry name in target */
1901         tname = tncp->nc_name;
1902         tname_len = tncp->nc_nlen;
1903
1904         hammer2_pfs_memory_wait(tdip->pmp);
1905         hammer2_trans_init(tdip->pmp, 0);
1906
1907         update_tdip = 0;
1908         update_fdip = 0;
1909
1910         ip = VTOI(fncp->nc_vp);
1911         hammer2_inode_ref(ip);          /* extra ref */
1912
1913         /*
1914          * Lookup the target name to determine if a directory entry
1915          * is being overwritten.  We only hold related inode locks
1916          * temporarily, the operating system is expected to protect
1917          * against rename races.
1918          */
1919         tip = tncp->nc_vp ? VTOI(tncp->nc_vp) : NULL;
1920         if (tip)
1921                 hammer2_inode_ref(tip); /* extra ref */
1922
1923         /*
1924          * Can return NULL and error == EXDEV if the common parent
1925          * crosses a directory with the xlink flag set.
1926          *
1927          * For now try to avoid deadlocks with a simple pointer address
1928          * test.  (tip) can be NULL.
1929          */
1930         error = 0;
1931         if (fdip <= tdip) {
1932                 hammer2_inode_lock(fdip, 0);
1933                 hammer2_inode_lock(tdip, 0);
1934         } else {
1935                 hammer2_inode_lock(tdip, 0);
1936                 hammer2_inode_lock(fdip, 0);
1937         }
1938         if (tip) {
1939                 if (ip <= tip) {
1940                         hammer2_inode_lock(ip, 0);
1941                         hammer2_inode_lock(tip, 0);
1942                 } else {
1943                         hammer2_inode_lock(tip, 0);
1944                         hammer2_inode_lock(ip, 0);
1945                 }
1946         } else {
1947                 hammer2_inode_lock(ip, 0);
1948         }
1949
1950 #if 0
1951         /*
1952          * Delete the target namespace.
1953          *
1954          * REMOVED - NOW FOLDED INTO XOP_NRENAME OPERATION
1955          */
1956         {
1957                 hammer2_xop_unlink_t *xop2;
1958                 hammer2_inode_t *tip;
1959                 int isopen;
1960
1961                 /*
1962                  * The unlink XOP unlinks the path from the directory and
1963                  * locates and returns the cluster associated with the real
1964                  * inode.  We have to handle nlinks here on the frontend.
1965                  */
1966                 xop2 = hammer2_xop_alloc(tdip, HAMMER2_XOP_MODIFYING);
1967                 hammer2_xop_setname(&xop2->head, tname, tname_len);
1968                 isopen = cache_isopen(ap->a_tnch);
1969                 xop2->isdir = -1;
1970                 xop2->dopermanent = 0;
1971                 hammer2_xop_start(&xop2->head, hammer2_xop_unlink);
1972
1973                 /*
1974                  * Collect the real inode and adjust nlinks, destroy the real
1975                  * inode if nlinks transitions to 0 and it was the real inode
1976                  * (else it has already been removed).
1977                  */
1978                 tnch_error = hammer2_xop_collect(&xop2->head, 0);
1979                 /* hammer2_inode_unlock(tdip); */
1980
1981                 if (tnch_error == 0) {
1982                         tip = hammer2_inode_get(tdip->pmp, NULL,
1983                                                 &xop2->head.cluster, -1);
1984                         hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
1985                         if (tip) {
1986                                 hammer2_inode_unlink_finisher(tip, isopen);
1987                                 hammer2_inode_unlock(tip);
1988                         }
1989                 } else {
1990                         hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
1991                 }
1992                 /* hammer2_inode_lock(tdip, 0); */
1993
1994                 if (tnch_error && tnch_error != ENOENT) {
1995                         error = tnch_error;
1996                         goto done2;
1997                 }
1998                 update_tdip = 1;
1999         }
2000 #endif
2001
2002         /*
2003          * Resolve the collision space for (tdip, tname, tname_len)
2004          *
2005          * tdip must be held exclusively locked to prevent races since
2006          * multiple filenames can end up in the same collision space.
2007          */
2008         {
2009                 hammer2_xop_scanlhc_t *sxop;
2010                 hammer2_tid_t lhcbase;
2011
2012                 tlhc = hammer2_dirhash(tname, tname_len);
2013                 lhcbase = tlhc;
2014                 sxop = hammer2_xop_alloc(tdip, HAMMER2_XOP_MODIFYING);
2015                 sxop->lhc = tlhc;
2016                 hammer2_xop_start(&sxop->head, hammer2_xop_scanlhc);
2017                 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) {
2018                         if (tlhc != sxop->head.cluster.focus->bref.key)
2019                                 break;
2020                         ++tlhc;
2021                 }
2022                 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP);
2023
2024                 if (error) {
2025                         if (error != ENOENT)
2026                                 goto done2;
2027                         ++tlhc;
2028                         error = 0;
2029                 }
2030                 if ((lhcbase ^ tlhc) & ~HAMMER2_DIRHASH_LOMASK) {
2031                         error = ENOSPC;
2032                         goto done2;
2033                 }
2034         }
2035
2036         /*
2037          * Ready to go, issue the rename to the backend.  Note that meta-data
2038          * updates to the related inodes occur separately from the rename
2039          * operation.
2040          *
2041          * NOTE: While it is not necessary to update ip->meta.name*, doing
2042          *       so aids catastrophic recovery and debugging.
2043          */
2044         if (error == 0) {
2045                 hammer2_xop_nrename_t *xop4;
2046
2047                 xop4 = hammer2_xop_alloc(fdip, HAMMER2_XOP_MODIFYING);
2048                 xop4->lhc = tlhc;
2049                 xop4->ip_key = ip->meta.name_key;
2050                 hammer2_xop_setip2(&xop4->head, ip);
2051                 hammer2_xop_setip3(&xop4->head, tdip);
2052                 hammer2_xop_setname(&xop4->head, fname, fname_len);
2053                 hammer2_xop_setname2(&xop4->head, tname, tname_len);
2054                 hammer2_xop_start(&xop4->head, hammer2_xop_nrename);
2055
2056                 error = hammer2_xop_collect(&xop4->head, 0);
2057                 hammer2_xop_retire(&xop4->head, HAMMER2_XOPMASK_VOP);
2058
2059                 if (error == ENOENT)
2060                         error = 0;
2061
2062                 /*
2063                  * Update inode meta-data.
2064                  *
2065                  * WARNING!  The in-memory inode (ip) structure does not
2066                  *           maintain a copy of the inode's filename buffer.
2067                  */
2068                 if (error == 0 &&
2069                     (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
2070                         hammer2_inode_modify(ip);
2071                         ip->meta.name_len = tname_len;
2072                         ip->meta.name_key = tlhc;
2073                 }
2074                 if (error == 0) {
2075                         hammer2_inode_modify(ip);
2076                         ip->meta.iparent = tdip->meta.inum;
2077                 }
2078                 update_fdip = 1;
2079                 update_tdip = 1;
2080         }
2081
2082 done2:
2083         /*
2084          * If no error, the backend has replaced the target directory entry.
2085          * We must adjust nlinks on the original replace target if it exists.
2086          */
2087         if (error == 0 && tip) {
2088                 int isopen;
2089
2090                 isopen = cache_isopen(ap->a_tnch);
2091                 hammer2_inode_unlink_finisher(tip, isopen);
2092         }
2093
2094         /*
2095          * Update directory mtimes to represent the something changed.
2096          */
2097         if (update_fdip || update_tdip) {
2098                 uint64_t mtime;
2099
2100                 hammer2_update_time(&mtime);
2101                 if (update_fdip) {
2102                         hammer2_inode_modify(fdip);
2103                         fdip->meta.mtime = mtime;
2104                 }
2105                 if (update_tdip) {
2106                         hammer2_inode_modify(tdip);
2107                         tdip->meta.mtime = mtime;
2108                 }
2109         }
2110         if (tip) {
2111                 hammer2_inode_unlock(tip);
2112                 hammer2_inode_drop(tip);
2113         }
2114         hammer2_inode_unlock(ip);
2115         hammer2_inode_unlock(tdip);
2116         hammer2_inode_unlock(fdip);
2117         hammer2_inode_drop(ip);
2118         hammer2_inode_run_sideq(fdip->pmp);
2119
2120         hammer2_trans_done(tdip->pmp);
2121
2122         /*
2123          * Issue the namecache update after unlocking all the internal
2124          * hammer structures, otherwise we might deadlock.
2125          */
2126         if (error == 0 && tip) {
2127                 cache_unlink(ap->a_tnch);
2128                 cache_setunresolved(ap->a_tnch);
2129         }
2130         if (error == 0) {
2131                 cache_rename(ap->a_fnch, ap->a_tnch);
2132                 hammer2_knote(ap->a_fdvp, NOTE_WRITE);
2133                 hammer2_knote(ap->a_tdvp, NOTE_WRITE);
2134                 hammer2_knote(fncp->nc_vp, NOTE_RENAME);
2135         }
2136
2137         return (error);
2138 }
2139
2140 /*
2141  * hammer2_vop_ioctl { vp, command, data, fflag, cred }
2142  */
2143 static
2144 int
2145 hammer2_vop_ioctl(struct vop_ioctl_args *ap)
2146 {
2147         hammer2_inode_t *ip;
2148         int error;
2149
2150         ip = VTOI(ap->a_vp);
2151
2152         error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
2153                               ap->a_fflag, ap->a_cred);
2154         return (error);
2155 }
2156
2157 static
2158 int 
2159 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
2160 {
2161         struct mount *mp;
2162         hammer2_pfs_t *pmp;
2163         int rc;
2164
2165         switch (ap->a_op) {
2166         case (MOUNTCTL_SET_EXPORT):
2167                 mp = ap->a_head.a_ops->head.vv_mount;
2168                 pmp = MPTOPMP(mp);
2169
2170                 if (ap->a_ctllen != sizeof(struct export_args))
2171                         rc = (EINVAL);
2172                 else
2173                         rc = vfs_export(mp, &pmp->export,
2174                                         (const struct export_args *)ap->a_ctl);
2175                 break;
2176         default:
2177                 rc = vop_stdmountctl(ap);
2178                 break;
2179         }
2180         return (rc);
2181 }
2182
2183 /*
2184  * KQFILTER
2185  */
2186 static void filt_hammer2detach(struct knote *kn);
2187 static int filt_hammer2read(struct knote *kn, long hint);
2188 static int filt_hammer2write(struct knote *kn, long hint);
2189 static int filt_hammer2vnode(struct knote *kn, long hint);
2190
2191 static struct filterops hammer2read_filtops =
2192         { FILTEROP_ISFD | FILTEROP_MPSAFE,
2193           NULL, filt_hammer2detach, filt_hammer2read };
2194 static struct filterops hammer2write_filtops =
2195         { FILTEROP_ISFD | FILTEROP_MPSAFE,
2196           NULL, filt_hammer2detach, filt_hammer2write };
2197 static struct filterops hammer2vnode_filtops =
2198         { FILTEROP_ISFD | FILTEROP_MPSAFE,
2199           NULL, filt_hammer2detach, filt_hammer2vnode };
2200
2201 static
2202 int
2203 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap)
2204 {
2205         struct vnode *vp = ap->a_vp;
2206         struct knote *kn = ap->a_kn;
2207
2208         switch (kn->kn_filter) {
2209         case EVFILT_READ:
2210                 kn->kn_fop = &hammer2read_filtops;
2211                 break;
2212         case EVFILT_WRITE:
2213                 kn->kn_fop = &hammer2write_filtops;
2214                 break;
2215         case EVFILT_VNODE:
2216                 kn->kn_fop = &hammer2vnode_filtops;
2217                 break;
2218         default:
2219                 return (EOPNOTSUPP);
2220         }
2221
2222         kn->kn_hook = (caddr_t)vp;
2223
2224         knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2225
2226         return(0);
2227 }
2228
2229 static void
2230 filt_hammer2detach(struct knote *kn)
2231 {
2232         struct vnode *vp = (void *)kn->kn_hook;
2233
2234         knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2235 }
2236
2237 static int
2238 filt_hammer2read(struct knote *kn, long hint)
2239 {
2240         struct vnode *vp = (void *)kn->kn_hook;
2241         hammer2_inode_t *ip = VTOI(vp);
2242         off_t off;
2243
2244         if (hint == NOTE_REVOKE) {
2245                 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2246                 return(1);
2247         }
2248         off = ip->meta.size - kn->kn_fp->f_offset;
2249         kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
2250         if (kn->kn_sfflags & NOTE_OLDAPI)
2251                 return(1);
2252         return (kn->kn_data != 0);
2253 }
2254
2255
2256 static int
2257 filt_hammer2write(struct knote *kn, long hint)
2258 {
2259         if (hint == NOTE_REVOKE)
2260                 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2261         kn->kn_data = 0;
2262         return (1);
2263 }
2264
2265 static int
2266 filt_hammer2vnode(struct knote *kn, long hint)
2267 {
2268         if (kn->kn_sfflags & hint)
2269                 kn->kn_fflags |= hint;
2270         if (hint == NOTE_REVOKE) {
2271                 kn->kn_flags |= (EV_EOF | EV_NODATA);
2272                 return (1);
2273         }
2274         return (kn->kn_fflags != 0);
2275 }
2276
2277 /*
2278  * FIFO VOPS
2279  */
2280 static
2281 int
2282 hammer2_vop_markatime(struct vop_markatime_args *ap)
2283 {
2284         hammer2_inode_t *ip;
2285         struct vnode *vp;
2286
2287         vp = ap->a_vp;
2288         ip = VTOI(vp);
2289
2290         if (ip->pmp->ronly)
2291                 return(EROFS);
2292         return(0);
2293 }
2294
2295 static
2296 int
2297 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap)
2298 {
2299         int error;
2300
2301         error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2302         if (error)
2303                 error = hammer2_vop_kqfilter(ap);
2304         return(error);
2305 }
2306
2307 /*
2308  * VOPS vector
2309  */
2310 struct vop_ops hammer2_vnode_vops = {
2311         .vop_default    = vop_defaultop,
2312         .vop_fsync      = hammer2_vop_fsync,
2313         .vop_getpages   = vop_stdgetpages,
2314         .vop_putpages   = vop_stdputpages,
2315         .vop_access     = hammer2_vop_access,
2316         .vop_advlock    = hammer2_vop_advlock,
2317         .vop_close      = hammer2_vop_close,
2318         .vop_nlink      = hammer2_vop_nlink,
2319         .vop_ncreate    = hammer2_vop_ncreate,
2320         .vop_nsymlink   = hammer2_vop_nsymlink,
2321         .vop_nremove    = hammer2_vop_nremove,
2322         .vop_nrmdir     = hammer2_vop_nrmdir,
2323         .vop_nrename    = hammer2_vop_nrename,
2324         .vop_getattr    = hammer2_vop_getattr,
2325         .vop_setattr    = hammer2_vop_setattr,
2326         .vop_readdir    = hammer2_vop_readdir,
2327         .vop_readlink   = hammer2_vop_readlink,
2328         .vop_getpages   = vop_stdgetpages,
2329         .vop_putpages   = vop_stdputpages,
2330         .vop_read       = hammer2_vop_read,
2331         .vop_write      = hammer2_vop_write,
2332         .vop_open       = hammer2_vop_open,
2333         .vop_inactive   = hammer2_vop_inactive,
2334         .vop_reclaim    = hammer2_vop_reclaim,
2335         .vop_nresolve   = hammer2_vop_nresolve,
2336         .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2337         .vop_nmkdir     = hammer2_vop_nmkdir,
2338         .vop_nmknod     = hammer2_vop_nmknod,
2339         .vop_ioctl      = hammer2_vop_ioctl,
2340         .vop_mountctl   = hammer2_vop_mountctl,
2341         .vop_bmap       = hammer2_vop_bmap,
2342         .vop_strategy   = hammer2_vop_strategy,
2343         .vop_kqfilter   = hammer2_vop_kqfilter
2344 };
2345
2346 struct vop_ops hammer2_spec_vops = {
2347         .vop_default =          vop_defaultop,
2348         .vop_fsync =            hammer2_vop_fsync,
2349         .vop_read =             vop_stdnoread,
2350         .vop_write =            vop_stdnowrite,
2351         .vop_access =           hammer2_vop_access,
2352         .vop_close =            hammer2_vop_close,
2353         .vop_markatime =        hammer2_vop_markatime,
2354         .vop_getattr =          hammer2_vop_getattr,
2355         .vop_inactive =         hammer2_vop_inactive,
2356         .vop_reclaim =          hammer2_vop_reclaim,
2357         .vop_setattr =          hammer2_vop_setattr
2358 };
2359
2360 struct vop_ops hammer2_fifo_vops = {
2361         .vop_default =          fifo_vnoperate,
2362         .vop_fsync =            hammer2_vop_fsync,
2363 #if 0
2364         .vop_read =             hammer2_vop_fiforead,
2365         .vop_write =            hammer2_vop_fifowrite,
2366 #endif
2367         .vop_access =           hammer2_vop_access,
2368 #if 0
2369         .vop_close =            hammer2_vop_fifoclose,
2370 #endif
2371         .vop_markatime =        hammer2_vop_markatime,
2372         .vop_getattr =          hammer2_vop_getattr,
2373         .vop_inactive =         hammer2_vop_inactive,
2374         .vop_reclaim =          hammer2_vop_reclaim,
2375         .vop_setattr =          hammer2_vop_setattr,
2376         .vop_kqfilter =         hammer2_vop_fifokqfilter
2377 };
2378