hammer2 - Refactor frontend part 2/many
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
1 /*
2  * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 /*
37  * Kernel Filesystem interface
38  *
39  * NOTE! local ipdata pointers must be reloaded on any modifying operation
40  *       to the inode as its underlying chain may have changed.
41  */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/fcntl.h>
47 #include <sys/buf.h>
48 #include <sys/proc.h>
49 #include <sys/namei.h>
50 #include <sys/mount.h>
51 #include <sys/vnode.h>
52 #include <sys/mountctl.h>
53 #include <sys/dirent.h>
54 #include <sys/uio.h>
55 #include <sys/objcache.h>
56 #include <sys/event.h>
57 #include <sys/file.h>
58 #include <vfs/fifofs/fifo.h>
59
60 #include "hammer2.h"
61
62 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
63                                 int seqcount);
64 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
65                                 int ioflag, int seqcount);
66 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
67 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
68
69 struct objcache *cache_vop_info;
70
71 static __inline
72 void
73 hammer2_knote(struct vnode *vp, int flags)
74 {
75         if (flags)
76                 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
77 }
78
79 /*
80  * Last reference to a vnode is going away but it is still cached.
81  */
82 static
83 int
84 hammer2_vop_inactive(struct vop_inactive_args *ap)
85 {
86         hammer2_inode_t *ip;
87         struct vnode *vp;
88
89         LOCKSTART;
90         vp = ap->a_vp;
91         ip = VTOI(vp);
92
93         /*
94          * Degenerate case
95          */
96         if (ip == NULL) {
97                 vrecycle(vp);
98                 LOCKSTOP;
99                 return (0);
100         }
101
102         /*
103          * Check for deleted inodes and recycle immediately on the last
104          * release.  Be sure to destroy any left-over buffer cache buffers
105          * so we do not waste time trying to flush them.
106          *
107          * WARNING: nvtruncbuf() can only be safely called without the inode
108          *          lock held due to the way our write thread works.
109          */
110         if (ip->flags & HAMMER2_INODE_ISUNLINKED) {
111                 hammer2_key_t lbase;
112                 int nblksize;
113
114                 /*
115                  * Detect updates to the embedded data which may be
116                  * synchronized by the strategy code.  Simply mark the
117                  * inode modified so it gets picked up by our normal flush.
118                  */
119                 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL);
120                 nvtruncbuf(vp, 0, nblksize, 0, 0);
121                 vrecycle(vp);
122         }
123         LOCKSTOP;
124         return (0);
125 }
126
127 /*
128  * Reclaim a vnode so that it can be reused; after the inode is
129  * disassociated, the filesystem must manage it alone.
130  */
131 static
132 int
133 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
134 {
135         hammer2_inode_t *ip;
136         hammer2_pfs_t *pmp;
137         struct vnode *vp;
138
139         LOCKSTART;
140         vp = ap->a_vp;
141         ip = VTOI(vp);
142         if (ip == NULL) {
143                 LOCKSTOP;
144                 return(0);
145         }
146         pmp = ip->pmp;
147
148         /*
149          * The final close of a deleted file or directory marks it for
150          * destruction.  The DELETED flag allows the flusher to shortcut
151          * any modified blocks still unflushed (that is, just ignore them).
152          *
153          * HAMMER2 usually does not try to optimize the freemap by returning
154          * deleted blocks to it as it does not usually know how many snapshots
155          * might be referencing portions of the file/dir.
156          */
157         vp->v_data = NULL;
158         ip->vp = NULL;
159
160         /*
161          * NOTE! We do not attempt to flush chains here, flushing is
162          *       really fragile and could also deadlock.
163          */
164         vclrisdirty(vp);
165
166         /*
167          * Once reclaimed the inode is disconnected from the normal flush
168          * mechanism and must be tracked
169          *
170          * A reclaim can occur at any time so we cannot safely start a
171          * transaction to handle reclamation of unlinked files.  Instead,
172          * the ip is left with a reference and placed on a linked list and
173          * handled later on.
174          */
175         if (ip->flags & HAMMER2_INODE_ISUNLINKED) {
176                 hammer2_inode_unlink_t *ipul;
177
178                 ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO);
179                 ipul->ip = ip;
180
181                 hammer2_spin_ex(&pmp->list_spin);
182                 TAILQ_INSERT_TAIL(&pmp->unlinkq, ipul, entry);
183                 hammer2_spin_unex(&pmp->list_spin);
184                 /* retain ref from vp for ipul */
185         } else {
186                 hammer2_inode_drop(ip);                 /* vp ref */
187         }
188
189         /*
190          * XXX handle background sync when ip dirty, kernel will no longer
191          * notify us regarding this inode because there is no longer a
192          * vnode attached to it.
193          */
194
195         LOCKSTOP;
196         return (0);
197 }
198
199 static
200 int
201 hammer2_vop_fsync(struct vop_fsync_args *ap)
202 {
203         hammer2_inode_t *ip;
204         hammer2_trans_t trans;
205         hammer2_cluster_t *cluster;
206         struct vnode *vp;
207
208         LOCKSTART;
209         vp = ap->a_vp;
210         ip = VTOI(vp);
211
212 #if 0
213         /* XXX can't do this yet */
214         hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_ISFLUSH);
215         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
216 #endif
217         hammer2_trans_init(&trans, ip->pmp, 0);
218         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
219
220         /*
221          * Calling chain_flush here creates a lot of duplicative
222          * COW operations due to non-optimal vnode ordering.
223          *
224          * Only do it for an actual fsync() syscall.  The other forms
225          * which call this function will eventually call chain_flush
226          * on the volume root as a catch-all, which is far more optimal.
227          */
228         cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
229         atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED);
230         /*vclrisdirty(vp);*/
231         if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MTIME))
232                 hammer2_inode_fsync(&trans, ip, cluster);
233
234         hammer2_inode_unlock(ip, cluster);
235         hammer2_trans_done(&trans);
236
237         LOCKSTOP;
238         return (0);
239 }
240
241 static
242 int
243 hammer2_vop_access(struct vop_access_args *ap)
244 {
245         hammer2_inode_t *ip = VTOI(ap->a_vp);
246         const hammer2_inode_data_t *ripdata;
247         hammer2_cluster_t *cluster;
248         uid_t uid;
249         gid_t gid;
250         int error;
251
252         LOCKSTART;
253         cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS |
254                                          HAMMER2_RESOLVE_SHARED);
255         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
256         uid = hammer2_to_unix_xid(&ripdata->uid);
257         gid = hammer2_to_unix_xid(&ripdata->gid);
258         error = vop_helper_access(ap, uid, gid, ripdata->mode, ripdata->uflags);
259         hammer2_inode_unlock(ip, cluster);
260
261         LOCKSTOP;
262         return (error);
263 }
264
265 static
266 int
267 hammer2_vop_getattr(struct vop_getattr_args *ap)
268 {
269         const hammer2_inode_data_t *ripdata;
270         hammer2_cluster_t *cluster;
271         hammer2_pfs_t *pmp;
272         hammer2_inode_t *ip;
273         hammer2_blockref_t bref;
274         struct vnode *vp;
275         struct vattr *vap;
276
277         LOCKSTART;
278         vp = ap->a_vp;
279         vap = ap->a_vap;
280
281         ip = VTOI(vp);
282         pmp = ip->pmp;
283
284         cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS |
285                                          HAMMER2_RESOLVE_SHARED);
286         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
287         KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
288         hammer2_cluster_bref(cluster, &bref);
289
290         vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
291         vap->va_fileid = ripdata->inum;
292         vap->va_mode = ripdata->mode;
293         vap->va_nlink = ripdata->nlinks;
294         vap->va_uid = hammer2_to_unix_xid(&ripdata->uid);
295         vap->va_gid = hammer2_to_unix_xid(&ripdata->gid);
296         vap->va_rmajor = 0;
297         vap->va_rminor = 0;
298         vap->va_size = ip->size;        /* protected by shared lock */
299         vap->va_blocksize = HAMMER2_PBUFSIZE;
300         vap->va_flags = ripdata->uflags;
301         hammer2_time_to_timespec(ripdata->ctime, &vap->va_ctime);
302         hammer2_time_to_timespec(ripdata->mtime, &vap->va_mtime);
303         hammer2_time_to_timespec(ripdata->mtime, &vap->va_atime);
304         vap->va_gen = 1;
305         vap->va_bytes = bref.data_count;
306         vap->va_type = hammer2_get_vtype(ripdata);
307         vap->va_filerev = 0;
308         vap->va_uid_uuid = ripdata->uid;
309         vap->va_gid_uuid = ripdata->gid;
310         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
311                           VA_FSID_UUID_VALID;
312
313         hammer2_inode_unlock(ip, cluster);
314
315         LOCKSTOP;
316         return (0);
317 }
318
319 static
320 int
321 hammer2_vop_setattr(struct vop_setattr_args *ap)
322 {
323         const hammer2_inode_data_t *ripdata;
324         hammer2_inode_data_t *wipdata;
325         hammer2_inode_t *ip;
326         hammer2_cluster_t *cluster;
327         hammer2_trans_t trans;
328         struct vnode *vp;
329         struct vattr *vap;
330         int error;
331         int kflags = 0;
332         int domtime = 0;
333         int dosync = 0;
334         uint64_t ctime;
335
336         LOCKSTART;
337         vp = ap->a_vp;
338         vap = ap->a_vap;
339         hammer2_update_time(&ctime);
340
341         ip = VTOI(vp);
342
343         if (ip->pmp->ronly) {
344                 LOCKSTOP;
345                 return(EROFS);
346         }
347
348         hammer2_pfs_memory_wait(ip->pmp);
349         hammer2_trans_init(&trans, ip->pmp, 0);
350         cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
351         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
352         error = 0;
353
354         if (vap->va_flags != VNOVAL) {
355                 u_int32_t flags;
356
357                 flags = ripdata->uflags;
358                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
359                                          hammer2_to_unix_xid(&ripdata->uid),
360                                          ap->a_cred);
361                 if (error == 0) {
362                         if (ripdata->uflags != flags) {
363                                 wipdata = hammer2_cluster_modify_ip(&trans, ip,
364                                                                     cluster, 0);
365                                 wipdata->uflags = flags;
366                                 wipdata->ctime = ctime;
367                                 kflags |= NOTE_ATTRIB;
368                                 dosync = 1;
369                                 ripdata = wipdata;
370                         }
371                         if (ripdata->uflags & (IMMUTABLE | APPEND)) {
372                                 error = 0;
373                                 goto done;
374                         }
375                 }
376                 goto done;
377         }
378         if (ripdata->uflags & (IMMUTABLE | APPEND)) {
379                 error = EPERM;
380                 goto done;
381         }
382         if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
383                 mode_t cur_mode = ripdata->mode;
384                 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid);
385                 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid);
386                 uuid_t uuid_uid;
387                 uuid_t uuid_gid;
388
389                 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
390                                          ap->a_cred,
391                                          &cur_uid, &cur_gid, &cur_mode);
392                 if (error == 0) {
393                         hammer2_guid_to_uuid(&uuid_uid, cur_uid);
394                         hammer2_guid_to_uuid(&uuid_gid, cur_gid);
395                         if (bcmp(&uuid_uid, &ripdata->uid, sizeof(uuid_uid)) ||
396                             bcmp(&uuid_gid, &ripdata->gid, sizeof(uuid_gid)) ||
397                             ripdata->mode != cur_mode
398                         ) {
399                                 wipdata = hammer2_cluster_modify_ip(&trans, ip,
400                                                                     cluster, 0);
401                                 wipdata->uid = uuid_uid;
402                                 wipdata->gid = uuid_gid;
403                                 wipdata->mode = cur_mode;
404                                 wipdata->ctime = ctime;
405                                 dosync = 1;
406                                 ripdata = wipdata;
407                         }
408                         kflags |= NOTE_ATTRIB;
409                 }
410         }
411
412         /*
413          * Resize the file
414          */
415         if (vap->va_size != VNOVAL && ip->size != vap->va_size) {
416                 switch(vp->v_type) {
417                 case VREG:
418                         if (vap->va_size == ip->size)
419                                 break;
420                         hammer2_inode_unlock(ip, cluster);
421                         if (vap->va_size < ip->size) {
422                                 hammer2_truncate_file(ip, vap->va_size);
423                         } else {
424                                 hammer2_extend_file(ip, vap->va_size);
425                         }
426                         cluster = hammer2_inode_lock(ip,
427                                                      HAMMER2_RESOLVE_ALWAYS);
428                         /* RELOAD */
429                         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
430                         domtime = 1;
431                         break;
432                 default:
433                         error = EINVAL;
434                         goto done;
435                 }
436         }
437 #if 0
438         /* atime not supported */
439         if (vap->va_atime.tv_sec != VNOVAL) {
440                 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0);
441                 wipdata->atime = hammer2_timespec_to_time(&vap->va_atime);
442                 kflags |= NOTE_ATTRIB;
443                 dosync = 1;
444                 ripdata = wipdata;
445         }
446 #endif
447         if (vap->va_mtime.tv_sec != VNOVAL) {
448                 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0);
449                 wipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime);
450                 kflags |= NOTE_ATTRIB;
451                 domtime = 0;
452                 dosync = 1;
453                 ripdata = wipdata;
454         }
455         if (vap->va_mode != (mode_t)VNOVAL) {
456                 mode_t cur_mode = ripdata->mode;
457                 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid);
458                 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid);
459
460                 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
461                                          cur_uid, cur_gid, &cur_mode);
462                 if (error == 0 && ripdata->mode != cur_mode) {
463                         wipdata = hammer2_cluster_modify_ip(&trans, ip,
464                                                             cluster, 0);
465                         wipdata->mode = cur_mode;
466                         wipdata->ctime = ctime;
467                         kflags |= NOTE_ATTRIB;
468                         dosync = 1;
469                         ripdata = wipdata;
470                 }
471         }
472
473         /*
474          * If a truncation occurred we must call inode_fsync() now in order
475          * to trim the related data chains, otherwise a later expansion can
476          * cause havoc.
477          */
478         if (dosync) {
479                 hammer2_cluster_modsync(cluster);
480                 dosync = 0;
481         }
482         hammer2_inode_fsync(&trans, ip, cluster);
483
484         /*
485          * Cleanup.  If domtime is set an additional inode modification
486          * must be flagged.  All other modifications will have already
487          * set INODE_MODIFIED and called vsetisdirty().
488          */
489 done:
490         if (domtime) {
491                 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED |
492                                            HAMMER2_INODE_MTIME);
493                 vsetisdirty(ip->vp);
494         }
495         if (dosync)
496                 hammer2_cluster_modsync(cluster);
497         hammer2_inode_unlock(ip, cluster);
498         hammer2_trans_done(&trans);
499         hammer2_knote(ip->vp, kflags);
500
501         LOCKSTOP;
502         return (error);
503 }
504
505 static
506 int
507 hammer2_vop_readdir(struct vop_readdir_args *ap)
508 {
509         const hammer2_inode_data_t *ripdata;
510         hammer2_inode_t *ip;
511         hammer2_inode_t *xip;
512         hammer2_cluster_t *cparent;
513         hammer2_cluster_t *cluster;
514         hammer2_cluster_t *xcluster;
515         hammer2_blockref_t bref;
516         hammer2_tid_t inum;
517         hammer2_key_t key_next;
518         hammer2_key_t lkey;
519         struct uio *uio;
520         off_t *cookies;
521         off_t saveoff;
522         int cookie_index;
523         int ncookies;
524         int error;
525         int dtype;
526         int r;
527
528         LOCKSTART;
529         ip = VTOI(ap->a_vp);
530         uio = ap->a_uio;
531         saveoff = uio->uio_offset;
532
533         /*
534          * Setup cookies directory entry cookies if requested
535          */
536         if (ap->a_ncookies) {
537                 ncookies = uio->uio_resid / 16 + 1;
538                 if (ncookies > 1024)
539                         ncookies = 1024;
540                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
541         } else {
542                 ncookies = -1;
543                 cookies = NULL;
544         }
545         cookie_index = 0;
546
547         cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS |
548                                          HAMMER2_RESOLVE_SHARED);
549
550         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
551
552         /*
553          * Handle artificial entries.  To ensure that only positive 64 bit
554          * quantities are returned to userland we always strip off bit 63.
555          * The hash code is designed such that codes 0x0000-0x7FFF are not
556          * used, allowing us to use these codes for articial entries.
557          *
558          * Entry 0 is used for '.' and entry 1 is used for '..'.  Do not
559          * allow '..' to cross the mount point into (e.g.) the super-root.
560          */
561         error = 0;
562         cluster = (void *)(intptr_t)-1; /* non-NULL for early goto done case */
563
564         if (saveoff == 0) {
565                 inum = ripdata->inum & HAMMER2_DIRHASH_USERMSK;
566                 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, ".");
567                 if (r)
568                         goto done;
569                 if (cookies)
570                         cookies[cookie_index] = saveoff;
571                 ++saveoff;
572                 ++cookie_index;
573                 if (cookie_index == ncookies)
574                         goto done;
575         }
576
577         if (saveoff == 1) {
578                 /*
579                  * Be careful with lockorder when accessing ".."
580                  *
581                  * (ip is the current dir. xip is the parent dir).
582                  */
583                 inum = ripdata->inum & HAMMER2_DIRHASH_USERMSK;
584                 while (ip->pip != NULL && ip != ip->pmp->iroot) {
585                         xip = ip->pip;
586                         hammer2_inode_ref(xip);
587                         hammer2_inode_unlock(ip, cparent);
588                         xcluster = hammer2_inode_lock(xip,
589                                                       HAMMER2_RESOLVE_ALWAYS |
590                                                       HAMMER2_RESOLVE_SHARED);
591
592                         cparent = hammer2_inode_lock(ip,
593                                                       HAMMER2_RESOLVE_ALWAYS |
594                                                       HAMMER2_RESOLVE_SHARED);
595                         hammer2_inode_drop(xip);
596                         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
597                         if (xip == ip->pip) {
598                                 inum = hammer2_cluster_rdata(xcluster)->
599                                         ipdata.inum & HAMMER2_DIRHASH_USERMSK;
600                                 hammer2_inode_unlock(xip, xcluster);
601                                 break;
602                         }
603                         hammer2_inode_unlock(xip, xcluster);
604                 }
605                 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, "..");
606                 if (r)
607                         goto done;
608                 if (cookies)
609                         cookies[cookie_index] = saveoff;
610                 ++saveoff;
611                 ++cookie_index;
612                 if (cookie_index == ncookies)
613                         goto done;
614         }
615
616         lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
617         if (hammer2_debug & 0x0020)
618                 kprintf("readdir: lkey %016jx\n", lkey);
619
620         /*
621          * parent is the inode cluster, already locked for us.  Don't
622          * double lock shared locks as this will screw up upgrades.
623          */
624         if (error) {
625                 goto done;
626         }
627         cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, lkey,
628                                      HAMMER2_LOOKUP_SHARED);
629         if (cluster == NULL) {
630                 cluster = hammer2_cluster_lookup(cparent, &key_next,
631                                              lkey, (hammer2_key_t)-1,
632                                              HAMMER2_LOOKUP_SHARED);
633         }
634         if (cluster)
635                 hammer2_cluster_bref(cluster, &bref);
636         while (cluster) {
637                 if (hammer2_debug & 0x0020)
638                         kprintf("readdir: p=%p chain=%p %016jx (next %016jx)\n",
639                                 cparent->focus, cluster->focus,
640                                 bref.key, key_next);
641
642                 if (bref.type == HAMMER2_BREF_TYPE_INODE) {
643                         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
644                         dtype = hammer2_get_dtype(ripdata);
645                         saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
646                         r = vop_write_dirent(&error, uio,
647                                              ripdata->inum &
648                                               HAMMER2_DIRHASH_USERMSK,
649                                              dtype,
650                                              ripdata->name_len,
651                                              ripdata->filename);
652                         if (r)
653                                 break;
654                         if (cookies)
655                                 cookies[cookie_index] = saveoff;
656                         ++cookie_index;
657                 } else {
658                         /* XXX chain error */
659                         kprintf("bad chain type readdir %d\n", bref.type);
660                 }
661
662                 /*
663                  * Keys may not be returned in order so once we have a
664                  * placemarker (cluster) the scan must allow the full range
665                  * or some entries will be missed.
666                  */
667                 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
668                                                key_next, (hammer2_key_t)-1,
669                                                HAMMER2_LOOKUP_SHARED);
670                 if (cluster) {
671                         hammer2_cluster_bref(cluster, &bref);
672                         saveoff = (bref.key & HAMMER2_DIRHASH_USERMSK) + 1;
673                 } else {
674                         saveoff = (hammer2_key_t)-1;
675                 }
676                 if (cookie_index == ncookies)
677                         break;
678         }
679         if (cluster) {
680                 hammer2_cluster_unlock(cluster);
681                 hammer2_cluster_drop(cluster);
682         }
683 done:
684         hammer2_inode_unlock(ip, cparent);
685         if (ap->a_eofflag)
686                 *ap->a_eofflag = (cluster == NULL);
687         if (hammer2_debug & 0x0020)
688                 kprintf("readdir: done at %016jx\n", saveoff);
689         uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
690         if (error && cookie_index == 0) {
691                 if (cookies) {
692                         kfree(cookies, M_TEMP);
693                         *ap->a_ncookies = 0;
694                         *ap->a_cookies = NULL;
695                 }
696         } else {
697                 if (cookies) {
698                         *ap->a_ncookies = cookie_index;
699                         *ap->a_cookies = cookies;
700                 }
701         }
702         LOCKSTOP;
703         return (error);
704 }
705
706 /*
707  * hammer2_vop_readlink { vp, uio, cred }
708  */
709 static
710 int
711 hammer2_vop_readlink(struct vop_readlink_args *ap)
712 {
713         struct vnode *vp;
714         hammer2_inode_t *ip;
715         int error;
716
717         vp = ap->a_vp;
718         if (vp->v_type != VLNK)
719                 return (EINVAL);
720         ip = VTOI(vp);
721
722         error = hammer2_read_file(ip, ap->a_uio, 0);
723         return (error);
724 }
725
726 static
727 int
728 hammer2_vop_read(struct vop_read_args *ap)
729 {
730         struct vnode *vp;
731         hammer2_inode_t *ip;
732         struct uio *uio;
733         int error;
734         int seqcount;
735         int bigread;
736
737         /*
738          * Read operations supported on this vnode?
739          */
740         vp = ap->a_vp;
741         if (vp->v_type != VREG)
742                 return (EINVAL);
743
744         /*
745          * Misc
746          */
747         ip = VTOI(vp);
748         uio = ap->a_uio;
749         error = 0;
750
751         seqcount = ap->a_ioflag >> 16;
752         bigread = (uio->uio_resid > 100 * 1024 * 1024);
753
754         error = hammer2_read_file(ip, uio, seqcount);
755         return (error);
756 }
757
758 static
759 int
760 hammer2_vop_write(struct vop_write_args *ap)
761 {
762         hammer2_inode_t *ip;
763         hammer2_trans_t trans;
764         thread_t td;
765         struct vnode *vp;
766         struct uio *uio;
767         int error;
768         int seqcount;
769         int bigwrite;
770
771         /*
772          * Read operations supported on this vnode?
773          */
774         vp = ap->a_vp;
775         if (vp->v_type != VREG)
776                 return (EINVAL);
777
778         /*
779          * Misc
780          */
781         ip = VTOI(vp);
782         uio = ap->a_uio;
783         error = 0;
784         if (ip->pmp->ronly) {
785                 return (EROFS);
786         }
787
788         seqcount = ap->a_ioflag >> 16;
789         bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
790
791         /*
792          * Check resource limit
793          */
794         if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
795             uio->uio_offset + uio->uio_resid >
796              td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
797                 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
798                 return (EFBIG);
799         }
800
801         bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
802
803         /*
804          * The transaction interlocks against flushes initiations
805          * (note: but will run concurrently with the actual flush).
806          */
807         hammer2_trans_init(&trans, ip->pmp, 0);
808         error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
809         hammer2_trans_done(&trans);
810
811         return (error);
812 }
813
814 /*
815  * Perform read operations on a file or symlink given an UNLOCKED
816  * inode and uio.
817  *
818  * The passed ip is not locked.
819  */
820 static
821 int
822 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
823 {
824         hammer2_off_t size;
825         struct buf *bp;
826         int error;
827
828         error = 0;
829
830         /*
831          * UIO read loop.
832          *
833          * WARNING! Assumes that the kernel interlocks size changes at the
834          *          vnode level.
835          */
836         hammer2_mtx_sh(&ip->lock);
837         size = ip->size;
838         hammer2_mtx_unlock(&ip->lock);
839
840         while (uio->uio_resid > 0 && uio->uio_offset < size) {
841                 hammer2_key_t lbase;
842                 hammer2_key_t leof;
843                 int lblksize;
844                 int loff;
845                 int n;
846
847                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
848                                                 &lbase, &leof);
849
850                 error = cluster_read(ip->vp, leof, lbase, lblksize,
851                                      uio->uio_resid, seqcount * BKVASIZE,
852                                      &bp);
853
854                 if (error)
855                         break;
856                 loff = (int)(uio->uio_offset - lbase);
857                 n = lblksize - loff;
858                 if (n > uio->uio_resid)
859                         n = uio->uio_resid;
860                 if (n > size - uio->uio_offset)
861                         n = (int)(size - uio->uio_offset);
862                 bp->b_flags |= B_AGE;
863                 uiomove((char *)bp->b_data + loff, n, uio);
864                 bqrelse(bp);
865         }
866         return (error);
867 }
868
869 /*
870  * Write to the file represented by the inode via the logical buffer cache.
871  * The inode may represent a regular file or a symlink.
872  *
873  * The inode must not be locked.
874  */
875 static
876 int
877 hammer2_write_file(hammer2_inode_t *ip,
878                    struct uio *uio, int ioflag, int seqcount)
879 {
880         hammer2_key_t old_eof;
881         hammer2_key_t new_eof;
882         struct buf *bp;
883         int kflags;
884         int error;
885         int modified;
886
887         /*
888          * Setup if append
889          *
890          * WARNING! Assumes that the kernel interlocks size changes at the
891          *          vnode level.
892          */
893         hammer2_mtx_ex(&ip->lock);
894         if (ioflag & IO_APPEND)
895                 uio->uio_offset = ip->size;
896         old_eof = ip->size;
897         hammer2_mtx_unlock(&ip->lock);
898
899         /*
900          * Extend the file if necessary.  If the write fails at some point
901          * we will truncate it back down to cover as much as we were able
902          * to write.
903          *
904          * Doing this now makes it easier to calculate buffer sizes in
905          * the loop.
906          */
907         kflags = 0;
908         error = 0;
909         modified = 0;
910
911         if (uio->uio_offset + uio->uio_resid > old_eof) {
912                 new_eof = uio->uio_offset + uio->uio_resid;
913                 modified = 1;
914                 hammer2_extend_file(ip, new_eof);
915                 kflags |= NOTE_EXTEND;
916         } else {
917                 new_eof = old_eof;
918         }
919         
920         /*
921          * UIO write loop
922          */
923         while (uio->uio_resid > 0) {
924                 hammer2_key_t lbase;
925                 int trivial;
926                 int endofblk;
927                 int lblksize;
928                 int loff;
929                 int n;
930
931                 /*
932                  * Don't allow the buffer build to blow out the buffer
933                  * cache.
934                  */
935                 if ((ioflag & IO_RECURSE) == 0)
936                         bwillwrite(HAMMER2_PBUFSIZE);
937
938                 /*
939                  * This nominally tells us how much we can cluster and
940                  * what the logical buffer size needs to be.  Currently
941                  * we don't try to cluster the write and just handle one
942                  * block at a time.
943                  */
944                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
945                                                 &lbase, NULL);
946                 loff = (int)(uio->uio_offset - lbase);
947                 
948                 KKASSERT(lblksize <= 65536);
949
950                 /*
951                  * Calculate bytes to copy this transfer and whether the
952                  * copy completely covers the buffer or not.
953                  */
954                 trivial = 0;
955                 n = lblksize - loff;
956                 if (n > uio->uio_resid) {
957                         n = uio->uio_resid;
958                         if (loff == lbase && uio->uio_offset + n == new_eof)
959                                 trivial = 1;
960                         endofblk = 0;
961                 } else {
962                         if (loff == 0)
963                                 trivial = 1;
964                         endofblk = 1;
965                 }
966
967                 /*
968                  * Get the buffer
969                  */
970                 if (uio->uio_segflg == UIO_NOCOPY) {
971                         /*
972                          * Issuing a write with the same data backing the
973                          * buffer.  Instantiate the buffer to collect the
974                          * backing vm pages, then read-in any missing bits.
975                          *
976                          * This case is used by vop_stdputpages().
977                          */
978                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
979                         if ((bp->b_flags & B_CACHE) == 0) {
980                                 bqrelse(bp);
981                                 error = bread(ip->vp, lbase, lblksize, &bp);
982                         }
983                 } else if (trivial) {
984                         /*
985                          * Even though we are entirely overwriting the buffer
986                          * we may still have to zero it out to avoid a
987                          * mmap/write visibility issue.
988                          */
989                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
990                         if ((bp->b_flags & B_CACHE) == 0)
991                                 vfs_bio_clrbuf(bp);
992                 } else {
993                         /*
994                          * Partial overwrite, read in any missing bits then
995                          * replace the portion being written.
996                          *
997                          * (The strategy code will detect zero-fill physical
998                          * blocks for this case).
999                          */
1000                         error = bread(ip->vp, lbase, lblksize, &bp);
1001                         if (error == 0)
1002                                 bheavy(bp);
1003                 }
1004
1005                 if (error) {
1006                         brelse(bp);
1007                         break;
1008                 }
1009
1010                 /*
1011                  * Ok, copy the data in
1012                  */
1013                 error = uiomove(bp->b_data + loff, n, uio);
1014                 kflags |= NOTE_WRITE;
1015                 modified = 1;
1016                 if (error) {
1017                         brelse(bp);
1018                         break;
1019                 }
1020
1021                 /*
1022                  * WARNING: Pageout daemon will issue UIO_NOCOPY writes
1023                  *          with IO_SYNC or IO_ASYNC set.  These writes
1024                  *          must be handled as the pageout daemon expects.
1025                  */
1026                 if (ioflag & IO_SYNC) {
1027                         bwrite(bp);
1028                 } else if ((ioflag & IO_DIRECT) && endofblk) {
1029                         bawrite(bp);
1030                 } else if (ioflag & IO_ASYNC) {
1031                         bawrite(bp);
1032                 } else {
1033                         bdwrite(bp);
1034                 }
1035         }
1036
1037         /*
1038          * Cleanup.  If we extended the file EOF but failed to write through
1039          * the entire write is a failure and we have to back-up.
1040          */
1041         if (error && new_eof != old_eof) {
1042                 hammer2_truncate_file(ip, old_eof);
1043         } else if (modified) {
1044                 hammer2_mtx_ex(&ip->lock);
1045                 hammer2_update_time(&ip->mtime);
1046                 atomic_set_int(&ip->flags, HAMMER2_INODE_MTIME);
1047                 hammer2_mtx_unlock(&ip->lock);
1048         }
1049         atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED);
1050         hammer2_knote(ip->vp, kflags);
1051         vsetisdirty(ip->vp);
1052         hammer2_trans_assert_strategy(ip->pmp);
1053
1054         return error;
1055 }
1056
1057 /*
1058  * Truncate the size of a file.  The inode must not be locked.
1059  *
1060  * NOTE:    Caller handles setting HAMMER2_INODE_MODIFIED
1061  *
1062  * WARNING: nvtruncbuf() can only be safely called without the inode lock
1063  *          held due to the way our write thread works.
1064  *
1065  * WARNING! Assumes that the kernel interlocks size changes at the
1066  *          vnode level.
1067  */
1068 static
1069 void
1070 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1071 {
1072         hammer2_key_t lbase;
1073         int nblksize;
1074
1075         LOCKSTART;
1076         if (ip->vp) {
1077                 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
1078                 nvtruncbuf(ip->vp, nsize,
1079                            nblksize, (int)nsize & (nblksize - 1),
1080                            0);
1081         }
1082         hammer2_mtx_ex(&ip->lock);
1083         ip->size = nsize;
1084         atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
1085         hammer2_mtx_unlock(&ip->lock);
1086         LOCKSTOP;
1087 }
1088
1089 /*
1090  * Extend the size of a file.  The inode must not be locked.
1091  *
1092  * WARNING! Assumes that the kernel interlocks size changes at the
1093  *          vnode level.
1094  *
1095  * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED
1096  */
1097 static
1098 void
1099 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1100 {
1101         hammer2_key_t lbase;
1102         hammer2_key_t osize;
1103         int oblksize;
1104         int nblksize;
1105
1106         LOCKSTART;
1107         hammer2_mtx_ex(&ip->lock);
1108         osize = ip->size;
1109         ip->size = nsize;
1110         hammer2_mtx_unlock(&ip->lock);
1111
1112         if (ip->vp) {
1113                 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL);
1114                 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
1115                 nvextendbuf(ip->vp,
1116                             osize, nsize,
1117                             oblksize, nblksize,
1118                             -1, -1, 0);
1119         }
1120         atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
1121         LOCKSTOP;
1122 }
1123
1124 static
1125 int
1126 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
1127 {
1128         hammer2_inode_t *ip;
1129         hammer2_inode_t *dip;
1130         hammer2_cluster_t *cparent;
1131         hammer2_cluster_t *cluster;
1132         const hammer2_inode_data_t *ripdata;
1133         hammer2_key_t key_next;
1134         hammer2_key_t lhc;
1135         struct namecache *ncp;
1136         const uint8_t *name;
1137         size_t name_len;
1138         int error = 0;
1139         struct vnode *vp;
1140
1141         LOCKSTART;
1142         dip = VTOI(ap->a_dvp);
1143         ncp = ap->a_nch->ncp;
1144         name = ncp->nc_name;
1145         name_len = ncp->nc_nlen;
1146         lhc = hammer2_dirhash(name, name_len);
1147
1148         /*
1149          * Note: In DragonFly the kernel handles '.' and '..'.
1150          */
1151         cparent = hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS |
1152                                           HAMMER2_RESOLVE_SHARED);
1153
1154         cluster = hammer2_cluster_lookup(cparent, &key_next,
1155                                          lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1156                                          HAMMER2_LOOKUP_SHARED);
1157         while (cluster) {
1158                 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
1159                         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1160                         if (ripdata->name_len == name_len &&
1161                             bcmp(ripdata->filename, name, name_len) == 0) {
1162                                 break;
1163                         }
1164                 }
1165                 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1166                                                key_next,
1167                                                lhc + HAMMER2_DIRHASH_LOMASK,
1168                                                HAMMER2_LOOKUP_SHARED);
1169         }
1170         hammer2_inode_unlock(dip, cparent);
1171
1172         /*
1173          * Resolve hardlink entries before acquiring the inode.
1174          */
1175         if (cluster) {
1176                 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1177                 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1178                         hammer2_tid_t inum = ripdata->inum;
1179                         error = hammer2_hardlink_find(dip, NULL, &cluster);
1180                         if (error) {
1181                                 kprintf("hammer2: unable to find hardlink "
1182                                         "0x%016jx\n", inum);
1183                                 LOCKSTOP;
1184
1185                                 return error;
1186                         }
1187                 }
1188         }
1189
1190         /*
1191          * nresolve needs to resolve hardlinks, the original cluster is not
1192          * sufficient.
1193          */
1194         if (cluster) {
1195                 ip = hammer2_inode_get(dip->pmp, dip, cluster);
1196                 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1197                 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1198                         kprintf("nresolve: fixup hardlink\n");
1199                         hammer2_inode_ref(ip);
1200                         hammer2_inode_unlock(ip, NULL);
1201                         hammer2_cluster_unlock(cluster);
1202                         hammer2_cluster_drop(cluster);
1203                         cluster = hammer2_inode_lock(ip,
1204                                                      HAMMER2_RESOLVE_ALWAYS);
1205                         ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1206                         hammer2_inode_drop(ip);
1207                         kprintf("nresolve: fixup to type %02x\n",
1208                                 ripdata->type);
1209                 }
1210         } else {
1211                 ip = NULL;
1212         }
1213
1214 #if 0
1215         /*
1216          * Deconsolidate any hardlink whos nlinks == 1.  Ignore errors.
1217          * If an error occurs chain and ip are left alone.
1218          *
1219          * XXX upgrade shared lock?
1220          */
1221         if (ochain && chain &&
1222             chain->data->ipdata.nlinks == 1 && !dip->pmp->ronly) {
1223                 kprintf("hammer2: need to unconsolidate hardlink for %s\n",
1224                         chain->data->ipdata.filename);
1225                 /* XXX retain shared lock on dip? (currently not held) */
1226                 hammer2_trans_init(&trans, dip->pmp, 0);
1227                 hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain);
1228                 hammer2_trans_done(&trans);
1229         }
1230 #endif
1231
1232         /*
1233          * Acquire the related vnode
1234          *
1235          * NOTE: For error processing, only ENOENT resolves the namecache
1236          *       entry to NULL, otherwise we just return the error and
1237          *       leave the namecache unresolved.
1238          *
1239          * NOTE: multiple hammer2_inode structures can be aliased to the
1240          *       same chain element, for example for hardlinks.  This
1241          *       use case does not 'reattach' inode associations that
1242          *       might already exist, but always allocates a new one.
1243          *
1244          * WARNING: inode structure is locked exclusively via inode_get
1245          *          but chain was locked shared.  inode_unlock()
1246          *          will handle it properly.
1247          */
1248         if (cluster) {
1249                 vp = hammer2_igetv(ip, cluster, &error);
1250                 if (error == 0) {
1251                         vn_unlock(vp);
1252                         cache_setvp(ap->a_nch, vp);
1253                 } else if (error == ENOENT) {
1254                         cache_setvp(ap->a_nch, NULL);
1255                 }
1256                 hammer2_inode_unlock(ip, cluster);
1257
1258                 /*
1259                  * The vp should not be released until after we've disposed
1260                  * of our locks, because it might cause vop_inactive() to
1261                  * be called.
1262                  */
1263                 if (vp)
1264                         vrele(vp);
1265         } else {
1266                 error = ENOENT;
1267                 cache_setvp(ap->a_nch, NULL);
1268         }
1269         KASSERT(error || ap->a_nch->ncp->nc_vp != NULL,
1270                 ("resolve error %d/%p ap %p\n",
1271                  error, ap->a_nch->ncp->nc_vp, ap));
1272         LOCKSTOP;
1273         return error;
1274 }
1275
1276 static
1277 int
1278 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1279 {
1280         hammer2_inode_t *dip;
1281         hammer2_inode_t *ip;
1282         hammer2_cluster_t *cparent;
1283         int error;
1284
1285         LOCKSTART;
1286         dip = VTOI(ap->a_dvp);
1287
1288         if ((ip = dip->pip) == NULL) {
1289                 *ap->a_vpp = NULL;
1290                 LOCKSTOP;
1291                 return ENOENT;
1292         }
1293         cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
1294         *ap->a_vpp = hammer2_igetv(ip, cparent, &error);
1295         hammer2_inode_unlock(ip, cparent);
1296
1297         LOCKSTOP;
1298         return error;
1299 }
1300
1301 static
1302 int
1303 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1304 {
1305         hammer2_inode_t *dip;
1306         hammer2_inode_t *nip;
1307         hammer2_trans_t trans;
1308         hammer2_cluster_t *cluster;
1309         struct namecache *ncp;
1310         const uint8_t *name;
1311         size_t name_len;
1312         int error;
1313
1314         LOCKSTART;
1315         dip = VTOI(ap->a_dvp);
1316         if (dip->pmp->ronly) {
1317                 LOCKSTOP;
1318                 return (EROFS);
1319         }
1320
1321         ncp = ap->a_nch->ncp;
1322         name = ncp->nc_name;
1323         name_len = ncp->nc_nlen;
1324         cluster = NULL;
1325
1326         hammer2_pfs_memory_wait(dip->pmp);
1327         hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE);
1328         nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
1329                                    name, name_len,
1330                                    &cluster, 0, &error);
1331         if (error) {
1332                 KKASSERT(nip == NULL);
1333                 *ap->a_vpp = NULL;
1334         } else {
1335                 *ap->a_vpp = hammer2_igetv(nip, cluster, &error);
1336                 hammer2_inode_unlock(nip, cluster);
1337         }
1338         hammer2_trans_done(&trans);
1339
1340         if (error == 0) {
1341                 cache_setunresolved(ap->a_nch);
1342                 cache_setvp(ap->a_nch, *ap->a_vpp);
1343         }
1344         LOCKSTOP;
1345         return error;
1346 }
1347
1348 static
1349 int
1350 hammer2_vop_open(struct vop_open_args *ap)
1351 {
1352         return vop_stdopen(ap);
1353 }
1354
1355 /*
1356  * hammer2_vop_advlock { vp, id, op, fl, flags }
1357  */
1358 static
1359 int
1360 hammer2_vop_advlock(struct vop_advlock_args *ap)
1361 {
1362         hammer2_inode_t *ip = VTOI(ap->a_vp);
1363         const hammer2_inode_data_t *ripdata;
1364         hammer2_cluster_t *cparent;
1365         hammer2_off_t size;
1366
1367         cparent = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS |
1368                                          HAMMER2_RESOLVE_SHARED);
1369         ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1370         size = ripdata->size;
1371         hammer2_inode_unlock(ip, cparent);
1372         return (lf_advlock(ap, &ip->advlock, size));
1373 }
1374
1375
1376 static
1377 int
1378 hammer2_vop_close(struct vop_close_args *ap)
1379 {
1380         return vop_stdclose(ap);
1381 }
1382
1383 /*
1384  * hammer2_vop_nlink { nch, dvp, vp, cred }
1385  *
1386  * Create a hardlink from (vp) to {dvp, nch}.
1387  */
1388 static
1389 int
1390 hammer2_vop_nlink(struct vop_nlink_args *ap)
1391 {
1392         hammer2_inode_t *fdip;  /* target directory to create link in */
1393         hammer2_inode_t *tdip;  /* target directory to create link in */
1394         hammer2_inode_t *cdip;  /* common parent directory */
1395         hammer2_inode_t *ip;    /* inode we are hardlinking to */
1396         hammer2_cluster_t *cluster;
1397         hammer2_cluster_t *fdcluster;
1398         hammer2_cluster_t *tdcluster;
1399         hammer2_cluster_t *cdcluster;
1400         hammer2_trans_t trans;
1401         struct namecache *ncp;
1402         const uint8_t *name;
1403         size_t name_len;
1404         int error;
1405
1406         LOCKSTART;
1407         tdip = VTOI(ap->a_dvp);
1408         if (tdip->pmp->ronly) {
1409                 LOCKSTOP;
1410                 return (EROFS);
1411         }
1412
1413         ncp = ap->a_nch->ncp;
1414         name = ncp->nc_name;
1415         name_len = ncp->nc_nlen;
1416
1417         /*
1418          * ip represents the file being hardlinked.  The file could be a
1419          * normal file or a hardlink target if it has already been hardlinked.
1420          * If ip is a hardlinked target then ip->pip represents the location
1421          * of the hardlinked target, NOT the location of the hardlink pointer.
1422          *
1423          * Bump nlinks and potentially also create or move the hardlink
1424          * target in the parent directory common to (ip) and (tdip).  The
1425          * consolidation code can modify ip->cluster and ip->pip.  The
1426          * returned cluster is locked.
1427          */
1428         ip = VTOI(ap->a_vp);
1429         hammer2_pfs_memory_wait(ip->pmp);
1430         hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_NEWINODE);
1431
1432         /*
1433          * The common parent directory must be locked first to avoid deadlocks.
1434          * Also note that fdip and/or tdip might match cdip.
1435          */
1436         fdip = ip->pip;
1437         cdip = hammer2_inode_common_parent(fdip, tdip);
1438         cdcluster = hammer2_inode_lock(cdip, HAMMER2_RESOLVE_ALWAYS);
1439         fdcluster = hammer2_inode_lock(fdip, HAMMER2_RESOLVE_ALWAYS);
1440         tdcluster = hammer2_inode_lock(tdip, HAMMER2_RESOLVE_ALWAYS);
1441         cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
1442         error = hammer2_hardlink_consolidate(&trans, ip, &cluster,
1443                                              cdip, cdcluster, 1);
1444         if (error)
1445                 goto done;
1446
1447         /*
1448          * Create a directory entry connected to the specified cluster.
1449          *
1450          * WARNING! chain can get moved by the connect (indirectly due to
1451          *          potential indirect block creation).
1452          */
1453         error = hammer2_inode_connect(&trans, &cluster, 1,
1454                                       tdip, tdcluster,
1455                                       name, name_len, 0);
1456         if (error == 0) {
1457                 cache_setunresolved(ap->a_nch);
1458                 cache_setvp(ap->a_nch, ap->a_vp);
1459         }
1460 done:
1461         hammer2_inode_unlock(ip, cluster);
1462         hammer2_inode_unlock(tdip, tdcluster);
1463         hammer2_inode_unlock(fdip, fdcluster);
1464         hammer2_inode_unlock(cdip, cdcluster);
1465         hammer2_inode_drop(cdip);
1466         hammer2_trans_done(&trans);
1467
1468         LOCKSTOP;
1469         return error;
1470 }
1471
1472 /*
1473  * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
1474  *
1475  * The operating system has already ensured that the directory entry
1476  * does not exist and done all appropriate namespace locking.
1477  */
1478 static
1479 int
1480 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1481 {
1482         hammer2_inode_t *dip;
1483         hammer2_inode_t *nip;
1484         hammer2_trans_t trans;
1485         hammer2_cluster_t *ncluster;
1486         struct namecache *ncp;
1487         const uint8_t *name;
1488         size_t name_len;
1489         int error;
1490
1491         LOCKSTART;
1492         dip = VTOI(ap->a_dvp);
1493         if (dip->pmp->ronly) {
1494                 LOCKSTOP;
1495                 return (EROFS);
1496         }
1497
1498         ncp = ap->a_nch->ncp;
1499         name = ncp->nc_name;
1500         name_len = ncp->nc_nlen;
1501         hammer2_pfs_memory_wait(dip->pmp);
1502         hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE);
1503         ncluster = NULL;
1504
1505         nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
1506                                    name, name_len,
1507                                    &ncluster, 0, &error);
1508         if (error) {
1509                 KKASSERT(nip == NULL);
1510                 *ap->a_vpp = NULL;
1511         } else {
1512                 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error);
1513                 hammer2_inode_unlock(nip, ncluster);
1514         }
1515         hammer2_trans_done(&trans);
1516
1517         if (error == 0) {
1518                 cache_setunresolved(ap->a_nch);
1519                 cache_setvp(ap->a_nch, *ap->a_vpp);
1520         }
1521         LOCKSTOP;
1522         return error;
1523 }
1524
1525 /*
1526  * Make a device node (typically a fifo)
1527  */
1528 static
1529 int
1530 hammer2_vop_nmknod(struct vop_nmknod_args *ap)
1531 {
1532         hammer2_inode_t *dip;
1533         hammer2_inode_t *nip;
1534         hammer2_trans_t trans;
1535         hammer2_cluster_t *ncluster;
1536         struct namecache *ncp;
1537         const uint8_t *name;
1538         size_t name_len;
1539         int error;
1540
1541         LOCKSTART;
1542         dip = VTOI(ap->a_dvp);
1543         if (dip->pmp->ronly) {
1544                 LOCKSTOP;
1545                 return (EROFS);
1546         }
1547
1548         ncp = ap->a_nch->ncp;
1549         name = ncp->nc_name;
1550         name_len = ncp->nc_nlen;
1551         hammer2_pfs_memory_wait(dip->pmp);
1552         hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE);
1553         ncluster = NULL;
1554
1555         nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
1556                                    name, name_len,
1557                                    &ncluster, 0, &error);
1558         if (error) {
1559                 KKASSERT(nip == NULL);
1560                 *ap->a_vpp = NULL;
1561         } else {
1562                 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error);
1563                 hammer2_inode_unlock(nip, ncluster);
1564         }
1565         hammer2_trans_done(&trans);
1566
1567         if (error == 0) {
1568                 cache_setunresolved(ap->a_nch);
1569                 cache_setvp(ap->a_nch, *ap->a_vpp);
1570         }
1571         LOCKSTOP;
1572         return error;
1573 }
1574
1575 /*
1576  * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1577  */
1578 static
1579 int
1580 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1581 {
1582         hammer2_inode_t *dip;
1583         hammer2_inode_t *nip;
1584         hammer2_cluster_t *ncparent;
1585         hammer2_trans_t trans;
1586         struct namecache *ncp;
1587         const uint8_t *name;
1588         size_t name_len;
1589         int error;
1590         
1591         dip = VTOI(ap->a_dvp);
1592         if (dip->pmp->ronly)
1593                 return (EROFS);
1594
1595         ncp = ap->a_nch->ncp;
1596         name = ncp->nc_name;
1597         name_len = ncp->nc_nlen;
1598         hammer2_pfs_memory_wait(dip->pmp);
1599         hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE);
1600         ncparent = NULL;
1601
1602         ap->a_vap->va_type = VLNK;      /* enforce type */
1603
1604         nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
1605                                    name, name_len,
1606                                    &ncparent, 0, &error);
1607         if (error) {
1608                 KKASSERT(nip == NULL);
1609                 *ap->a_vpp = NULL;
1610                 hammer2_trans_done(&trans);
1611                 return error;
1612         }
1613         *ap->a_vpp = hammer2_igetv(nip, ncparent, &error);
1614
1615         /*
1616          * Build the softlink (~like file data) and finalize the namecache.
1617          */
1618         if (error == 0) {
1619                 size_t bytes;
1620                 struct uio auio;
1621                 struct iovec aiov;
1622                 hammer2_inode_data_t *nipdata;
1623
1624                 nipdata = &hammer2_cluster_wdata(ncparent)->ipdata;
1625                 /* nipdata = &nip->chain->data->ipdata;XXX */
1626                 bytes = strlen(ap->a_target);
1627
1628                 if (bytes <= HAMMER2_EMBEDDED_BYTES) {
1629                         KKASSERT(nipdata->op_flags &
1630                                  HAMMER2_OPFLAG_DIRECTDATA);
1631                         bcopy(ap->a_target, nipdata->u.data, bytes);
1632                         nipdata->size = bytes;
1633                         nip->size = bytes;
1634                         hammer2_cluster_modsync(ncparent);
1635                         hammer2_inode_unlock(nip, ncparent);
1636                         /* nipdata = NULL; not needed */
1637                 } else {
1638                         hammer2_inode_unlock(nip, ncparent);
1639                         /* nipdata = NULL; not needed */
1640                         bzero(&auio, sizeof(auio));
1641                         bzero(&aiov, sizeof(aiov));
1642                         auio.uio_iov = &aiov;
1643                         auio.uio_segflg = UIO_SYSSPACE;
1644                         auio.uio_rw = UIO_WRITE;
1645                         auio.uio_resid = bytes;
1646                         auio.uio_iovcnt = 1;
1647                         auio.uio_td = curthread;
1648                         aiov.iov_base = ap->a_target;
1649                         aiov.iov_len = bytes;
1650                         error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
1651                         /* XXX handle error */
1652                         error = 0;
1653                 }
1654         } else {
1655                 hammer2_inode_unlock(nip, ncparent);
1656         }
1657         hammer2_trans_done(&trans);
1658
1659         /*
1660          * Finalize namecache
1661          */
1662         if (error == 0) {
1663                 cache_setunresolved(ap->a_nch);
1664                 cache_setvp(ap->a_nch, *ap->a_vpp);
1665                 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */
1666         }
1667         return error;
1668 }
1669
1670 /*
1671  * hammer2_vop_nremove { nch, dvp, cred }
1672  */
1673 static
1674 int
1675 hammer2_vop_nremove(struct vop_nremove_args *ap)
1676 {
1677         hammer2_inode_t *dip;
1678         hammer2_trans_t trans;
1679         struct namecache *ncp;
1680         const uint8_t *name;
1681         size_t name_len;
1682         int error;
1683
1684         LOCKSTART;
1685         dip = VTOI(ap->a_dvp);
1686         if (dip->pmp->ronly) {
1687                 LOCKSTOP;
1688                 return(EROFS);
1689         }
1690
1691         ncp = ap->a_nch->ncp;
1692         name = ncp->nc_name;
1693         name_len = ncp->nc_nlen;
1694
1695         hammer2_pfs_memory_wait(dip->pmp);
1696         hammer2_trans_init(&trans, dip->pmp, 0);
1697         error = hammer2_unlink_file(&trans, dip, name, name_len,
1698                                     0, NULL, ap->a_nch, -1);
1699         hammer2_run_unlinkq(&trans, dip->pmp);
1700         hammer2_trans_done(&trans);
1701         if (error == 0)
1702                 cache_unlink(ap->a_nch);
1703         LOCKSTOP;
1704         return (error);
1705 }
1706
1707 /*
1708  * hammer2_vop_nrmdir { nch, dvp, cred }
1709  */
1710 static
1711 int
1712 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1713 {
1714         hammer2_inode_t *dip;
1715         hammer2_trans_t trans;
1716         struct namecache *ncp;
1717         const uint8_t *name;
1718         size_t name_len;
1719         int error;
1720
1721         LOCKSTART;
1722         dip = VTOI(ap->a_dvp);
1723         if (dip->pmp->ronly) {
1724                 LOCKSTOP;
1725                 return(EROFS);
1726         }
1727
1728         ncp = ap->a_nch->ncp;
1729         name = ncp->nc_name;
1730         name_len = ncp->nc_nlen;
1731
1732         hammer2_pfs_memory_wait(dip->pmp);
1733         hammer2_trans_init(&trans, dip->pmp, 0);
1734         hammer2_run_unlinkq(&trans, dip->pmp);
1735         error = hammer2_unlink_file(&trans, dip, name, name_len,
1736                                     1, NULL, ap->a_nch, -1);
1737         hammer2_trans_done(&trans);
1738         if (error == 0)
1739                 cache_unlink(ap->a_nch);
1740         LOCKSTOP;
1741         return (error);
1742 }
1743
1744 /*
1745  * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1746  */
1747 static
1748 int
1749 hammer2_vop_nrename(struct vop_nrename_args *ap)
1750 {
1751         struct namecache *fncp;
1752         struct namecache *tncp;
1753         hammer2_inode_t *cdip;
1754         hammer2_inode_t *fdip;
1755         hammer2_inode_t *tdip;
1756         hammer2_inode_t *ip;
1757         hammer2_cluster_t *cluster;
1758         hammer2_cluster_t *fdcluster;
1759         hammer2_cluster_t *tdcluster;
1760         hammer2_cluster_t *cdcluster;
1761         hammer2_trans_t trans;
1762         const uint8_t *fname;
1763         size_t fname_len;
1764         const uint8_t *tname;
1765         size_t tname_len;
1766         int error;
1767         int tnch_error;
1768         int hlink;
1769
1770         if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1771                 return(EXDEV);
1772         if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1773                 return(EXDEV);
1774
1775         fdip = VTOI(ap->a_fdvp);        /* source directory */
1776         tdip = VTOI(ap->a_tdvp);        /* target directory */
1777
1778         if (fdip->pmp->ronly)
1779                 return(EROFS);
1780
1781         LOCKSTART;
1782         fncp = ap->a_fnch->ncp;         /* entry name in source */
1783         fname = fncp->nc_name;
1784         fname_len = fncp->nc_nlen;
1785
1786         tncp = ap->a_tnch->ncp;         /* entry name in target */
1787         tname = tncp->nc_name;
1788         tname_len = tncp->nc_nlen;
1789
1790         hammer2_pfs_memory_wait(tdip->pmp);
1791         hammer2_trans_init(&trans, tdip->pmp, 0);
1792
1793         /*
1794          * ip is the inode being renamed.  If this is a hardlink then
1795          * ip represents the actual file and not the hardlink marker.
1796          */
1797         ip = VTOI(fncp->nc_vp);
1798         cluster = NULL;
1799
1800
1801         /*
1802          * The common parent directory must be locked first to avoid deadlocks.
1803          * Also note that fdip and/or tdip might match cdip.
1804          *
1805          * WARNING! fdip may not match ip->pip.  That is, if the source file
1806          *          is already a hardlink then what we are renaming is the
1807          *          hardlink pointer, not the hardlink itself.  The hardlink
1808          *          directory (ip->pip) will already be at a common parent
1809          *          of fdrip.
1810          *
1811          *          Be sure to use ip->pip when finding the common parent
1812          *          against tdip or we might accidently move the hardlink
1813          *          target into a subdirectory that makes it inaccessible to
1814          *          other pointers.
1815          */
1816         cdip = hammer2_inode_common_parent(ip->pip, tdip);
1817         cdcluster = hammer2_inode_lock(cdip, HAMMER2_RESOLVE_ALWAYS);
1818         fdcluster = hammer2_inode_lock(fdip, HAMMER2_RESOLVE_ALWAYS);
1819         tdcluster = hammer2_inode_lock(tdip, HAMMER2_RESOLVE_ALWAYS);
1820
1821         /*
1822          * Keep a tight grip on the inode so the temporary unlinking from
1823          * the source location prior to linking to the target location
1824          * does not cause the cluster to be destroyed.
1825          *
1826          * NOTE: To avoid deadlocks we cannot lock (ip) while we are
1827          *       unlinking elements from their directories.  Locking
1828          *       the nlinks field does not lock the whole inode.
1829          */
1830         hammer2_inode_ref(ip);
1831
1832         /*
1833          * Remove target if it exists.
1834          */
1835         error = hammer2_unlink_file(&trans, tdip, tname, tname_len,
1836                                     -1, NULL, ap->a_tnch, -1);
1837         tnch_error = error;
1838         if (error && error != ENOENT)
1839                 goto done;
1840
1841         /*
1842          * When renaming a hardlinked file we may have to re-consolidate
1843          * the location of the hardlink target.
1844          *
1845          * If ip represents a regular file the consolidation code essentially
1846          * does nothing other than return the same locked cluster that was
1847          * passed in.
1848          *
1849          * The returned cluster will be locked.
1850          *
1851          * WARNING!  We do not currently have a local copy of ipdata but
1852          *           we do use one later remember that it must be reloaded
1853          *           on any modification to the inode, including connects.
1854          */
1855         cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
1856         error = hammer2_hardlink_consolidate(&trans, ip, &cluster,
1857                                              cdip, cdcluster, 0);
1858         if (error)
1859                 goto done;
1860
1861         /*
1862          * Disconnect (fdip, fname) from the source directory.  This will
1863          * disconnect (ip) if it represents a direct file.  If (ip) represents
1864          * a hardlink the HARDLINK pointer object will be removed but the
1865          * hardlink will stay intact.
1866          *
1867          * Always pass nch as NULL because we intend to reconnect the inode,
1868          * so we don't want hammer2_unlink_file() to rename it to the hidden
1869          * open-but-unlinked directory.
1870          *
1871          * The target cluster may be marked DELETED but will not be destroyed
1872          * since we retain our hold on ip and cluster.
1873          *
1874          * NOTE: We pass nlinks as 0 (not -1) in order to retain the file's
1875          *       link count.
1876          */
1877         error = hammer2_unlink_file(&trans, fdip, fname, fname_len,
1878                                     -1, &hlink, NULL, 0);
1879         KKASSERT(error != EAGAIN);
1880         if (error)
1881                 goto done;
1882
1883         /*
1884          * Reconnect ip to target directory using cluster.  Chains cannot
1885          * actually be moved, so this will duplicate the cluster in the new
1886          * spot and assign it to the ip, replacing the old cluster.
1887          *
1888          * WARNING: Because recursive locks are allowed and we unlinked the
1889          *          file that we have a cluster-in-hand for just above, the
1890          *          cluster might have been delete-duplicated.  We must
1891          *          refactor the cluster.
1892          *
1893          * WARNING: Chain locks can lock buffer cache buffers, to avoid
1894          *          deadlocks we want to unlock before issuing a cache_*()
1895          *          op (that might have to lock a vnode).
1896          *
1897          * NOTE:    Pass nlinks as 0 because we retained the link count from
1898          *          the unlink, so we do not have to modify it.
1899          */
1900         error = hammer2_inode_connect(&trans, &cluster, hlink,
1901                                       tdip, tdcluster,
1902                                       tname, tname_len, 0);
1903         if (error == 0) {
1904                 KKASSERT(cluster != NULL);
1905                 hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), cluster);
1906         }
1907 done:
1908         hammer2_inode_unlock(ip, cluster);
1909         hammer2_inode_unlock(tdip, tdcluster);
1910         hammer2_inode_unlock(fdip, fdcluster);
1911         hammer2_inode_unlock(cdip, cdcluster);
1912         hammer2_inode_drop(ip);
1913         hammer2_inode_drop(cdip);
1914         hammer2_run_unlinkq(&trans, fdip->pmp);
1915         hammer2_trans_done(&trans);
1916
1917         /*
1918          * Issue the namecache update after unlocking all the internal
1919          * hammer structures, otherwise we might deadlock.
1920          */
1921         if (tnch_error == 0) {
1922                 cache_unlink(ap->a_tnch);
1923                 cache_setunresolved(ap->a_tnch);
1924         }
1925         if (error == 0)
1926                 cache_rename(ap->a_fnch, ap->a_tnch);
1927
1928         LOCKSTOP;
1929         return (error);
1930 }
1931
1932 /*
1933  * hammer2_vop_ioctl { vp, command, data, fflag, cred }
1934  */
1935 static
1936 int
1937 hammer2_vop_ioctl(struct vop_ioctl_args *ap)
1938 {
1939         hammer2_inode_t *ip;
1940         int error;
1941
1942         LOCKSTART;
1943         ip = VTOI(ap->a_vp);
1944
1945         error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
1946                               ap->a_fflag, ap->a_cred);
1947         LOCKSTOP;
1948         return (error);
1949 }
1950
1951 static
1952 int 
1953 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
1954 {
1955         struct mount *mp;
1956         hammer2_pfs_t *pmp;
1957         int rc;
1958
1959         LOCKSTART;
1960         switch (ap->a_op) {
1961         case (MOUNTCTL_SET_EXPORT):
1962                 mp = ap->a_head.a_ops->head.vv_mount;
1963                 pmp = MPTOPMP(mp);
1964
1965                 if (ap->a_ctllen != sizeof(struct export_args))
1966                         rc = (EINVAL);
1967                 else
1968                         rc = vfs_export(mp, &pmp->export,
1969                                         (const struct export_args *)ap->a_ctl);
1970                 break;
1971         default:
1972                 rc = vop_stdmountctl(ap);
1973                 break;
1974         }
1975         LOCKSTOP;
1976         return (rc);
1977 }
1978
1979 /*
1980  * This handles unlinked open files after the vnode is finally dereferenced.
1981  * To avoid deadlocks it cannot be called from the normal vnode recycling
1982  * path, so we call it (1) after a unlink, rmdir, or rename, (2) on every
1983  * flush, and (3) on umount.
1984  */
1985 void
1986 hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfs_t *pmp)
1987 {
1988         const hammer2_inode_data_t *ripdata;
1989         hammer2_inode_unlink_t *ipul;
1990         hammer2_inode_t *ip;
1991         hammer2_cluster_t *cluster;
1992         hammer2_cluster_t *cparent;
1993
1994         if (TAILQ_EMPTY(&pmp->unlinkq))
1995                 return;
1996
1997         LOCKSTART;
1998         hammer2_spin_ex(&pmp->list_spin);
1999         while ((ipul = TAILQ_FIRST(&pmp->unlinkq)) != NULL) {
2000                 TAILQ_REMOVE(&pmp->unlinkq, ipul, entry);
2001                 hammer2_spin_unex(&pmp->list_spin);
2002                 ip = ipul->ip;
2003                 kfree(ipul, pmp->minode);
2004
2005                 cluster = hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
2006                 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
2007                 if (hammer2_debug & 0x400) {
2008                         kprintf("hammer2: unlink on reclaim: %s refs=%d\n",
2009                                 ripdata->filename, ip->refs);
2010                 }
2011
2012                 /*
2013                  * NOTE: Due to optimizations to avoid I/O on the inode for
2014                  *       the last unlink, ripdata->nlinks is not necessarily
2015                  *       0 here.
2016                  */
2017                 /* KKASSERT(ripdata->nlinks == 0); (see NOTE) */
2018                 cparent = hammer2_cluster_parent(cluster);
2019                 hammer2_cluster_delete(trans, cparent, cluster,
2020                                        HAMMER2_DELETE_PERMANENT);
2021                 hammer2_cluster_unlock(cparent);
2022                 hammer2_cluster_drop(cparent);
2023                 hammer2_inode_unlock(ip, cluster);      /* inode lock */
2024                 hammer2_inode_drop(ip);                 /* ipul ref */
2025
2026                 hammer2_spin_ex(&pmp->list_spin);
2027         }
2028         hammer2_spin_unex(&pmp->list_spin);
2029         LOCKSTOP;
2030 }
2031
2032
2033 /*
2034  * KQFILTER
2035  */
2036 static void filt_hammer2detach(struct knote *kn);
2037 static int filt_hammer2read(struct knote *kn, long hint);
2038 static int filt_hammer2write(struct knote *kn, long hint);
2039 static int filt_hammer2vnode(struct knote *kn, long hint);
2040
2041 static struct filterops hammer2read_filtops =
2042         { FILTEROP_ISFD | FILTEROP_MPSAFE,
2043           NULL, filt_hammer2detach, filt_hammer2read };
2044 static struct filterops hammer2write_filtops =
2045         { FILTEROP_ISFD | FILTEROP_MPSAFE,
2046           NULL, filt_hammer2detach, filt_hammer2write };
2047 static struct filterops hammer2vnode_filtops =
2048         { FILTEROP_ISFD | FILTEROP_MPSAFE,
2049           NULL, filt_hammer2detach, filt_hammer2vnode };
2050
2051 static
2052 int
2053 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap)
2054 {
2055         struct vnode *vp = ap->a_vp;
2056         struct knote *kn = ap->a_kn;
2057
2058         switch (kn->kn_filter) {
2059         case EVFILT_READ:
2060                 kn->kn_fop = &hammer2read_filtops;
2061                 break;
2062         case EVFILT_WRITE:
2063                 kn->kn_fop = &hammer2write_filtops;
2064                 break;
2065         case EVFILT_VNODE:
2066                 kn->kn_fop = &hammer2vnode_filtops;
2067                 break;
2068         default:
2069                 return (EOPNOTSUPP);
2070         }
2071
2072         kn->kn_hook = (caddr_t)vp;
2073
2074         knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2075
2076         return(0);
2077 }
2078
2079 static void
2080 filt_hammer2detach(struct knote *kn)
2081 {
2082         struct vnode *vp = (void *)kn->kn_hook;
2083
2084         knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2085 }
2086
2087 static int
2088 filt_hammer2read(struct knote *kn, long hint)
2089 {
2090         struct vnode *vp = (void *)kn->kn_hook;
2091         hammer2_inode_t *ip = VTOI(vp);
2092         off_t off;
2093
2094         if (hint == NOTE_REVOKE) {
2095                 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2096                 return(1);
2097         }
2098         off = ip->size - kn->kn_fp->f_offset;
2099         kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
2100         if (kn->kn_sfflags & NOTE_OLDAPI)
2101                 return(1);
2102         return (kn->kn_data != 0);
2103 }
2104
2105
2106 static int
2107 filt_hammer2write(struct knote *kn, long hint)
2108 {
2109         if (hint == NOTE_REVOKE)
2110                 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2111         kn->kn_data = 0;
2112         return (1);
2113 }
2114
2115 static int
2116 filt_hammer2vnode(struct knote *kn, long hint)
2117 {
2118         if (kn->kn_sfflags & hint)
2119                 kn->kn_fflags |= hint;
2120         if (hint == NOTE_REVOKE) {
2121                 kn->kn_flags |= (EV_EOF | EV_NODATA);
2122                 return (1);
2123         }
2124         return (kn->kn_fflags != 0);
2125 }
2126
2127 /*
2128  * FIFO VOPS
2129  */
2130 static
2131 int
2132 hammer2_vop_markatime(struct vop_markatime_args *ap)
2133 {
2134         hammer2_inode_t *ip;
2135         struct vnode *vp;
2136
2137         vp = ap->a_vp;
2138         ip = VTOI(vp);
2139
2140         if (ip->pmp->ronly)
2141                 return(EROFS);
2142         return(0);
2143 }
2144
2145 static
2146 int
2147 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap)
2148 {
2149         int error;
2150
2151         error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2152         if (error)
2153                 error = hammer2_vop_kqfilter(ap);
2154         return(error);
2155 }
2156
2157 /*
2158  * VOPS vector
2159  */
2160 struct vop_ops hammer2_vnode_vops = {
2161         .vop_default    = vop_defaultop,
2162         .vop_fsync      = hammer2_vop_fsync,
2163         .vop_getpages   = vop_stdgetpages,
2164         .vop_putpages   = vop_stdputpages,
2165         .vop_access     = hammer2_vop_access,
2166         .vop_advlock    = hammer2_vop_advlock,
2167         .vop_close      = hammer2_vop_close,
2168         .vop_nlink      = hammer2_vop_nlink,
2169         .vop_ncreate    = hammer2_vop_ncreate,
2170         .vop_nsymlink   = hammer2_vop_nsymlink,
2171         .vop_nremove    = hammer2_vop_nremove,
2172         .vop_nrmdir     = hammer2_vop_nrmdir,
2173         .vop_nrename    = hammer2_vop_nrename,
2174         .vop_getattr    = hammer2_vop_getattr,
2175         .vop_setattr    = hammer2_vop_setattr,
2176         .vop_readdir    = hammer2_vop_readdir,
2177         .vop_readlink   = hammer2_vop_readlink,
2178         .vop_getpages   = vop_stdgetpages,
2179         .vop_putpages   = vop_stdputpages,
2180         .vop_read       = hammer2_vop_read,
2181         .vop_write      = hammer2_vop_write,
2182         .vop_open       = hammer2_vop_open,
2183         .vop_inactive   = hammer2_vop_inactive,
2184         .vop_reclaim    = hammer2_vop_reclaim,
2185         .vop_nresolve   = hammer2_vop_nresolve,
2186         .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2187         .vop_nmkdir     = hammer2_vop_nmkdir,
2188         .vop_nmknod     = hammer2_vop_nmknod,
2189         .vop_ioctl      = hammer2_vop_ioctl,
2190         .vop_mountctl   = hammer2_vop_mountctl,
2191         .vop_bmap       = hammer2_vop_bmap,
2192         .vop_strategy   = hammer2_vop_strategy,
2193         .vop_kqfilter   = hammer2_vop_kqfilter
2194 };
2195
2196 struct vop_ops hammer2_spec_vops = {
2197         .vop_default =          vop_defaultop,
2198         .vop_fsync =            hammer2_vop_fsync,
2199         .vop_read =             vop_stdnoread,
2200         .vop_write =            vop_stdnowrite,
2201         .vop_access =           hammer2_vop_access,
2202         .vop_close =            hammer2_vop_close,
2203         .vop_markatime =        hammer2_vop_markatime,
2204         .vop_getattr =          hammer2_vop_getattr,
2205         .vop_inactive =         hammer2_vop_inactive,
2206         .vop_reclaim =          hammer2_vop_reclaim,
2207         .vop_setattr =          hammer2_vop_setattr
2208 };
2209
2210 struct vop_ops hammer2_fifo_vops = {
2211         .vop_default =          fifo_vnoperate,
2212         .vop_fsync =            hammer2_vop_fsync,
2213 #if 0
2214         .vop_read =             hammer2_vop_fiforead,
2215         .vop_write =            hammer2_vop_fifowrite,
2216 #endif
2217         .vop_access =           hammer2_vop_access,
2218 #if 0
2219         .vop_close =            hammer2_vop_fifoclose,
2220 #endif
2221         .vop_markatime =        hammer2_vop_markatime,
2222         .vop_getattr =          hammer2_vop_getattr,
2223         .vop_inactive =         hammer2_vop_inactive,
2224         .vop_reclaim =          hammer2_vop_reclaim,
2225         .vop_setattr =          hammer2_vop_setattr,
2226         .vop_kqfilter =         hammer2_vop_fifokqfilter
2227 };
2228