1a105525ea998ba9f7187c0bdea1cc2fe3c38222
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
1 /*
2  * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 /*
37  * Kernel Filesystem interface
38  *
39  * NOTE! local ipdata pointers must be reloaded on any modifying operation
40  *       to the inode as its underlying chain may have changed.
41  */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/fcntl.h>
47 #include <sys/buf.h>
48 #include <sys/proc.h>
49 #include <sys/namei.h>
50 #include <sys/mount.h>
51 #include <sys/vnode.h>
52 #include <sys/mountctl.h>
53 #include <sys/dirent.h>
54 #include <sys/uio.h>
55 #include <sys/objcache.h>
56 #include <sys/event.h>
57 #include <sys/file.h>
58 #include <vfs/fifofs/fifo.h>
59
60 #include "hammer2.h"
61
62 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
63                                 int seqcount);
64 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
65                                 int ioflag, int seqcount);
66 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
67 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
68
69 struct objcache *cache_xops;
70
71 static __inline
72 void
73 hammer2_knote(struct vnode *vp, int flags)
74 {
75         if (flags)
76                 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
77 }
78
79 /*
80  * Last reference to a vnode is going away but it is still cached.
81  */
82 static
83 int
84 hammer2_vop_inactive(struct vop_inactive_args *ap)
85 {
86         hammer2_inode_t *ip;
87         struct vnode *vp;
88
89         LOCKSTART;
90         vp = ap->a_vp;
91         ip = VTOI(vp);
92
93         /*
94          * Degenerate case
95          */
96         if (ip == NULL) {
97                 vrecycle(vp);
98                 LOCKSTOP;
99                 return (0);
100         }
101
102         /*
103          * Check for deleted inodes and recycle immediately on the last
104          * release.  Be sure to destroy any left-over buffer cache buffers
105          * so we do not waste time trying to flush them.
106          *
107          * WARNING: nvtruncbuf() can only be safely called without the inode
108          *          lock held due to the way our write thread works.
109          */
110         if (ip->flags & HAMMER2_INODE_ISUNLINKED) {
111                 hammer2_key_t lbase;
112                 int nblksize;
113
114                 /*
115                  * Detect updates to the embedded data which may be
116                  * synchronized by the strategy code.  Simply mark the
117                  * inode modified so it gets picked up by our normal flush.
118                  */
119                 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL);
120                 nvtruncbuf(vp, 0, nblksize, 0, 0);
121                 vrecycle(vp);
122         }
123         LOCKSTOP;
124         return (0);
125 }
126
127 /*
128  * Reclaim a vnode so that it can be reused; after the inode is
129  * disassociated, the filesystem must manage it alone.
130  */
131 static
132 int
133 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
134 {
135         hammer2_inode_t *ip;
136         hammer2_pfs_t *pmp;
137         struct vnode *vp;
138
139         LOCKSTART;
140         vp = ap->a_vp;
141         ip = VTOI(vp);
142         if (ip == NULL) {
143                 LOCKSTOP;
144                 return(0);
145         }
146         pmp = ip->pmp;
147
148         /*
149          * The final close of a deleted file or directory marks it for
150          * destruction.  The DELETED flag allows the flusher to shortcut
151          * any modified blocks still unflushed (that is, just ignore them).
152          *
153          * HAMMER2 usually does not try to optimize the freemap by returning
154          * deleted blocks to it as it does not usually know how many snapshots
155          * might be referencing portions of the file/dir.
156          */
157         vp->v_data = NULL;
158         ip->vp = NULL;
159
160         /*
161          * NOTE! We do not attempt to flush chains here, flushing is
162          *       really fragile and could also deadlock.
163          */
164         vclrisdirty(vp);
165
166         /*
167          * An unlinked inode may have been relinked to the ihidden directory.
168          * This occurs if the inode was unlinked while open.  Reclamation of
169          * these inodes requires processing we cannot safely do here so add
170          * the inode to the unlinkq in that situation.
171          *
172          * A reclaim can occur at any time so we cannot safely start a
173          * transaction to handle reclamation of unlinked files.  Instead,
174          * the ip is left with a reference and placed on a linked list and
175          * handled later on.
176          */
177         if ((ip->flags & HAMMER2_INODE_ISUNLINKED) &&
178             (ip->flags & HAMMER2_INODE_ISDELETED) == 0) {
179                 hammer2_inode_unlink_t *ipul;
180
181                 ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO);
182                 ipul->ip = ip;
183
184                 hammer2_spin_ex(&pmp->list_spin);
185                 TAILQ_INSERT_TAIL(&pmp->unlinkq, ipul, entry);
186                 hammer2_spin_unex(&pmp->list_spin);
187                 /* retain ref from vp for ipul */
188         } else {
189                 hammer2_inode_drop(ip);                 /* vp ref */
190         }
191
192         /*
193          * XXX handle background sync when ip dirty, kernel will no longer
194          * notify us regarding this inode because there is no longer a
195          * vnode attached to it.
196          */
197
198         LOCKSTOP;
199         return (0);
200 }
201
202 static
203 int
204 hammer2_vop_fsync(struct vop_fsync_args *ap)
205 {
206         hammer2_inode_t *ip;
207         struct vnode *vp;
208
209         LOCKSTART;
210         vp = ap->a_vp;
211         ip = VTOI(vp);
212
213 #if 0
214         /* XXX can't do this yet */
215         hammer2_trans_init(ip->pmp, HAMMER2_TRANS_ISFLUSH);
216         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
217 #endif
218         hammer2_trans_init(ip->pmp, 0);
219         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
220
221         /*
222          * Calling chain_flush here creates a lot of duplicative
223          * COW operations due to non-optimal vnode ordering.
224          *
225          * Only do it for an actual fsync() syscall.  The other forms
226          * which call this function will eventually call chain_flush
227          * on the volume root as a catch-all, which is far more optimal.
228          */
229         hammer2_inode_lock(ip, 0);
230         if (ip->flags & HAMMER2_INODE_MODIFIED)
231                 hammer2_inode_fsync(ip);
232         hammer2_inode_unlock(ip);
233         hammer2_trans_done(ip->pmp);
234
235         LOCKSTOP;
236         return (0);
237 }
238
239 static
240 int
241 hammer2_vop_access(struct vop_access_args *ap)
242 {
243         hammer2_inode_t *ip = VTOI(ap->a_vp);
244         uid_t uid;
245         gid_t gid;
246         int error;
247
248         LOCKSTART;
249         hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
250         uid = hammer2_to_unix_xid(&ip->meta.uid);
251         gid = hammer2_to_unix_xid(&ip->meta.gid);
252         error = vop_helper_access(ap, uid, gid, ip->meta.mode, ip->meta.uflags);
253         hammer2_inode_unlock(ip);
254
255         LOCKSTOP;
256         return (error);
257 }
258
259 static
260 int
261 hammer2_vop_getattr(struct vop_getattr_args *ap)
262 {
263         hammer2_pfs_t *pmp;
264         hammer2_inode_t *ip;
265         struct vnode *vp;
266         struct vattr *vap;
267         hammer2_chain_t *chain;
268         int i;
269
270         LOCKSTART;
271         vp = ap->a_vp;
272         vap = ap->a_vap;
273
274         ip = VTOI(vp);
275         pmp = ip->pmp;
276
277         hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
278
279         vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
280         vap->va_fileid = ip->meta.inum;
281         vap->va_mode = ip->meta.mode;
282         vap->va_nlink = ip->meta.nlinks;
283         vap->va_uid = hammer2_to_unix_xid(&ip->meta.uid);
284         vap->va_gid = hammer2_to_unix_xid(&ip->meta.gid);
285         vap->va_rmajor = 0;
286         vap->va_rminor = 0;
287         vap->va_size = ip->meta.size;   /* protected by shared lock */
288         vap->va_blocksize = HAMMER2_PBUFSIZE;
289         vap->va_flags = ip->meta.uflags;
290         hammer2_time_to_timespec(ip->meta.ctime, &vap->va_ctime);
291         hammer2_time_to_timespec(ip->meta.mtime, &vap->va_mtime);
292         hammer2_time_to_timespec(ip->meta.mtime, &vap->va_atime);
293         vap->va_gen = 1;
294         vap->va_bytes = 0;
295         for (i = 0; i < ip->cluster.nchains; ++i) {
296                 if ((chain = ip->cluster.array[i].chain) != NULL) {
297                         if (vap->va_bytes < chain->bref.data_count)
298                                 vap->va_bytes = chain->bref.data_count;
299                 }
300         }
301         vap->va_type = hammer2_get_vtype(ip->meta.type);
302         vap->va_filerev = 0;
303         vap->va_uid_uuid = ip->meta.uid;
304         vap->va_gid_uuid = ip->meta.gid;
305         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
306                           VA_FSID_UUID_VALID;
307
308         hammer2_inode_unlock(ip);
309
310         LOCKSTOP;
311         return (0);
312 }
313
314 static
315 int
316 hammer2_vop_setattr(struct vop_setattr_args *ap)
317 {
318         hammer2_inode_t *ip;
319         struct vnode *vp;
320         struct vattr *vap;
321         int error;
322         int kflags = 0;
323         uint64_t ctime;
324
325         LOCKSTART;
326         vp = ap->a_vp;
327         vap = ap->a_vap;
328         hammer2_update_time(&ctime);
329
330         ip = VTOI(vp);
331
332         if (ip->pmp->ronly) {
333                 LOCKSTOP;
334                 return(EROFS);
335         }
336
337         hammer2_pfs_memory_wait(ip->pmp);
338         hammer2_trans_init(ip->pmp, 0);
339         hammer2_inode_lock(ip, 0);
340         error = 0;
341
342         if (vap->va_flags != VNOVAL) {
343                 u_int32_t flags;
344
345                 flags = ip->meta.uflags;
346                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
347                                      hammer2_to_unix_xid(&ip->meta.uid),
348                                      ap->a_cred);
349                 if (error == 0) {
350                         if (ip->meta.uflags != flags) {
351                                 hammer2_inode_modify(ip);
352                                 ip->meta.uflags = flags;
353                                 ip->meta.ctime = ctime;
354                                 kflags |= NOTE_ATTRIB;
355                         }
356                         if (ip->meta.uflags & (IMMUTABLE | APPEND)) {
357                                 error = 0;
358                                 goto done;
359                         }
360                 }
361                 goto done;
362         }
363         if (ip->meta.uflags & (IMMUTABLE | APPEND)) {
364                 error = EPERM;
365                 goto done;
366         }
367         if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
368                 mode_t cur_mode = ip->meta.mode;
369                 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid);
370                 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid);
371                 uuid_t uuid_uid;
372                 uuid_t uuid_gid;
373
374                 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
375                                          ap->a_cred,
376                                          &cur_uid, &cur_gid, &cur_mode);
377                 if (error == 0) {
378                         hammer2_guid_to_uuid(&uuid_uid, cur_uid);
379                         hammer2_guid_to_uuid(&uuid_gid, cur_gid);
380                         if (bcmp(&uuid_uid, &ip->meta.uid, sizeof(uuid_uid)) ||
381                             bcmp(&uuid_gid, &ip->meta.gid, sizeof(uuid_gid)) ||
382                             ip->meta.mode != cur_mode
383                         ) {
384                                 hammer2_inode_modify(ip);
385                                 ip->meta.uid = uuid_uid;
386                                 ip->meta.gid = uuid_gid;
387                                 ip->meta.mode = cur_mode;
388                                 ip->meta.ctime = ctime;
389                         }
390                         kflags |= NOTE_ATTRIB;
391                 }
392         }
393
394         /*
395          * Resize the file
396          */
397         if (vap->va_size != VNOVAL && ip->meta.size != vap->va_size) {
398                 switch(vp->v_type) {
399                 case VREG:
400                         if (vap->va_size == ip->meta.size)
401                                 break;
402                         if (vap->va_size < ip->meta.size) {
403                                 hammer2_truncate_file(ip, vap->va_size);
404                         } else {
405                                 hammer2_extend_file(ip, vap->va_size);
406                         }
407                         hammer2_inode_modify(ip);
408                         ip->meta.mtime = ctime;
409                         break;
410                 default:
411                         error = EINVAL;
412                         goto done;
413                 }
414         }
415 #if 0
416         /* atime not supported */
417         if (vap->va_atime.tv_sec != VNOVAL) {
418                 hammer2_inode_modify(ip);
419                 ip->meta.atime = hammer2_timespec_to_time(&vap->va_atime);
420                 kflags |= NOTE_ATTRIB;
421         }
422 #endif
423         if (vap->va_mode != (mode_t)VNOVAL) {
424                 mode_t cur_mode = ip->meta.mode;
425                 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid);
426                 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid);
427
428                 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
429                                          cur_uid, cur_gid, &cur_mode);
430                 if (error == 0 && ip->meta.mode != cur_mode) {
431                         hammer2_inode_modify(ip);
432                         ip->meta.mode = cur_mode;
433                         ip->meta.ctime = ctime;
434                         kflags |= NOTE_ATTRIB;
435                 }
436         }
437
438         if (vap->va_mtime.tv_sec != VNOVAL) {
439                 hammer2_inode_modify(ip);
440                 ip->meta.mtime = hammer2_timespec_to_time(&vap->va_mtime);
441                 kflags |= NOTE_ATTRIB;
442         }
443
444 done:
445         /*
446          * If a truncation occurred we must call inode_fsync() now in order
447          * to trim the related data chains, otherwise a later expansion can
448          * cause havoc.
449          *
450          * If an extend occured that changed the DIRECTDATA state, we must
451          * call inode_fsync now in order to prepare the inode's indirect
452          * block table.
453          */
454         if (ip->flags & HAMMER2_INODE_RESIZED)
455                 hammer2_inode_fsync(ip);
456
457         /*
458          * Cleanup.
459          */
460         hammer2_inode_unlock(ip);
461         hammer2_trans_done(ip->pmp);
462         hammer2_knote(ip->vp, kflags);
463
464         LOCKSTOP;
465         return (error);
466 }
467
468 static
469 int
470 hammer2_vop_readdir(struct vop_readdir_args *ap)
471 {
472         hammer2_xop_readdir_t *xop;
473         hammer2_blockref_t bref;
474         hammer2_inode_t *ip;
475         hammer2_tid_t inum;
476         hammer2_key_t lkey;
477         struct uio *uio;
478         off_t *cookies;
479         off_t saveoff;
480         int cookie_index;
481         int ncookies;
482         int error;
483         int eofflag;
484         int dtype;
485         int r;
486
487         LOCKSTART;
488         ip = VTOI(ap->a_vp);
489         uio = ap->a_uio;
490         saveoff = uio->uio_offset;
491         eofflag = 0;
492         error = 0;
493
494         /*
495          * Setup cookies directory entry cookies if requested
496          */
497         if (ap->a_ncookies) {
498                 ncookies = uio->uio_resid / 16 + 1;
499                 if (ncookies > 1024)
500                         ncookies = 1024;
501                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
502         } else {
503                 ncookies = -1;
504                 cookies = NULL;
505         }
506         cookie_index = 0;
507
508         hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
509
510         /*
511          * Handle artificial entries.  To ensure that only positive 64 bit
512          * quantities are returned to userland we always strip off bit 63.
513          * The hash code is designed such that codes 0x0000-0x7FFF are not
514          * used, allowing us to use these codes for articial entries.
515          *
516          * Entry 0 is used for '.' and entry 1 is used for '..'.  Do not
517          * allow '..' to cross the mount point into (e.g.) the super-root.
518          */
519         if (saveoff == 0) {
520                 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
521                 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, ".");
522                 if (r)
523                         goto done;
524                 if (cookies)
525                         cookies[cookie_index] = saveoff;
526                 ++saveoff;
527                 ++cookie_index;
528                 if (cookie_index == ncookies)
529                         goto done;
530         }
531
532         if (saveoff == 1) {
533                 /*
534                  * Be careful with lockorder when accessing ".."
535                  *
536                  * (ip is the current dir. xip is the parent dir).
537                  */
538                 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
539                 if (ip->pip && ip != ip->pmp->iroot)
540                         inum = ip->pip->meta.inum & HAMMER2_DIRHASH_USERMSK;
541                 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, "..");
542                 if (r)
543                         goto done;
544                 if (cookies)
545                         cookies[cookie_index] = saveoff;
546                 ++saveoff;
547                 ++cookie_index;
548                 if (cookie_index == ncookies)
549                         goto done;
550         }
551
552         lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
553         if (hammer2_debug & 0x0020)
554                 kprintf("readdir: lkey %016jx\n", lkey);
555         if (error)
556                 goto done;
557
558         /*
559          * Use XOP for cluster scan.
560          *
561          * parent is the inode cluster, already locked for us.  Don't
562          * double lock shared locks as this will screw up upgrades.
563          */
564         xop = hammer2_xop_alloc(ip, 0);
565         xop->lkey = lkey;
566         hammer2_xop_start(&xop->head, hammer2_xop_readdir);
567
568         for (;;) {
569                 const hammer2_inode_data_t *ripdata;
570
571                 error = hammer2_xop_collect(&xop->head, 0);
572                 if (error)
573                         break;
574                 if (cookie_index == ncookies)
575                         break;
576                 if (hammer2_debug & 0x0020)
577                 kprintf("cluster chain %p %p\n",
578                         xop->head.cluster.focus,
579                         (xop->head.cluster.focus ?
580                          xop->head.cluster.focus->data : (void *)-1));
581                 ripdata = &hammer2_cluster_rdata(&xop->head.cluster)->ipdata;
582                 hammer2_cluster_bref(&xop->head.cluster, &bref);
583                 if (bref.type == HAMMER2_BREF_TYPE_INODE) {
584                         dtype = hammer2_get_dtype(ripdata);
585                         saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
586                         r = vop_write_dirent(&error, uio,
587                                              ripdata->meta.inum &
588                                               HAMMER2_DIRHASH_USERMSK,
589                                              dtype,
590                                              ripdata->meta.name_len,
591                                              ripdata->filename);
592                         if (r)
593                                 break;
594                         if (cookies)
595                                 cookies[cookie_index] = saveoff;
596                         ++cookie_index;
597                 } else {
598                         /* XXX chain error */
599                         kprintf("bad chain type readdir %d\n", bref.type);
600                 }
601         }
602         hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
603         if (error == ENOENT) {
604                 error = 0;
605                 eofflag = 1;
606                 saveoff = (hammer2_key_t)-1;
607         } else {
608                 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
609         }
610 done:
611         hammer2_inode_unlock(ip);
612         if (ap->a_eofflag)
613                 *ap->a_eofflag = eofflag;
614         if (hammer2_debug & 0x0020)
615                 kprintf("readdir: done at %016jx\n", saveoff);
616         uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
617         if (error && cookie_index == 0) {
618                 if (cookies) {
619                         kfree(cookies, M_TEMP);
620                         *ap->a_ncookies = 0;
621                         *ap->a_cookies = NULL;
622                 }
623         } else {
624                 if (cookies) {
625                         *ap->a_ncookies = cookie_index;
626                         *ap->a_cookies = cookies;
627                 }
628         }
629         LOCKSTOP;
630         return (error);
631 }
632
633 /*
634  * hammer2_vop_readlink { vp, uio, cred }
635  */
636 static
637 int
638 hammer2_vop_readlink(struct vop_readlink_args *ap)
639 {
640         struct vnode *vp;
641         hammer2_inode_t *ip;
642         int error;
643
644         vp = ap->a_vp;
645         if (vp->v_type != VLNK)
646                 return (EINVAL);
647         ip = VTOI(vp);
648
649         error = hammer2_read_file(ip, ap->a_uio, 0);
650         return (error);
651 }
652
653 static
654 int
655 hammer2_vop_read(struct vop_read_args *ap)
656 {
657         struct vnode *vp;
658         hammer2_inode_t *ip;
659         struct uio *uio;
660         int error;
661         int seqcount;
662         int bigread;
663
664         /*
665          * Read operations supported on this vnode?
666          */
667         vp = ap->a_vp;
668         if (vp->v_type != VREG)
669                 return (EINVAL);
670
671         /*
672          * Misc
673          */
674         ip = VTOI(vp);
675         uio = ap->a_uio;
676         error = 0;
677
678         seqcount = ap->a_ioflag >> 16;
679         bigread = (uio->uio_resid > 100 * 1024 * 1024);
680
681         error = hammer2_read_file(ip, uio, seqcount);
682         return (error);
683 }
684
685 static
686 int
687 hammer2_vop_write(struct vop_write_args *ap)
688 {
689         hammer2_inode_t *ip;
690         thread_t td;
691         struct vnode *vp;
692         struct uio *uio;
693         int error;
694         int seqcount;
695
696         /*
697          * Read operations supported on this vnode?
698          */
699         vp = ap->a_vp;
700         if (vp->v_type != VREG)
701                 return (EINVAL);
702
703         /*
704          * Misc
705          */
706         ip = VTOI(vp);
707         uio = ap->a_uio;
708         error = 0;
709         if (ip->pmp->ronly) {
710                 return (EROFS);
711         }
712
713         seqcount = ap->a_ioflag >> 16;
714
715         /*
716          * Check resource limit
717          */
718         if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
719             uio->uio_offset + uio->uio_resid >
720              td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
721                 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
722                 return (EFBIG);
723         }
724
725         /*
726          * The transaction interlocks against flushes initiations
727          * (note: but will run concurrently with the actual flush).
728          */
729         hammer2_trans_init(ip->pmp, 0);
730         error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
731         hammer2_trans_done(ip->pmp);
732
733         return (error);
734 }
735
736 /*
737  * Perform read operations on a file or symlink given an UNLOCKED
738  * inode and uio.
739  *
740  * The passed ip is not locked.
741  */
742 static
743 int
744 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
745 {
746         hammer2_off_t size;
747         struct buf *bp;
748         int error;
749
750         error = 0;
751
752         /*
753          * UIO read loop.
754          *
755          * WARNING! Assumes that the kernel interlocks size changes at the
756          *          vnode level.
757          */
758         hammer2_mtx_sh(&ip->lock);
759         size = ip->meta.size;
760         hammer2_mtx_unlock(&ip->lock);
761
762         while (uio->uio_resid > 0 && uio->uio_offset < size) {
763                 hammer2_key_t lbase;
764                 hammer2_key_t leof;
765                 int lblksize;
766                 int loff;
767                 int n;
768
769                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
770                                                 &lbase, &leof);
771
772                 error = cluster_read(ip->vp, leof, lbase, lblksize,
773                                      uio->uio_resid, seqcount * BKVASIZE,
774                                      &bp);
775
776                 if (error)
777                         break;
778                 loff = (int)(uio->uio_offset - lbase);
779                 n = lblksize - loff;
780                 if (n > uio->uio_resid)
781                         n = uio->uio_resid;
782                 if (n > size - uio->uio_offset)
783                         n = (int)(size - uio->uio_offset);
784                 bp->b_flags |= B_AGE;
785                 uiomove((char *)bp->b_data + loff, n, uio);
786                 bqrelse(bp);
787         }
788         return (error);
789 }
790
791 /*
792  * Write to the file represented by the inode via the logical buffer cache.
793  * The inode may represent a regular file or a symlink.
794  *
795  * The inode must not be locked.
796  */
797 static
798 int
799 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
800                    int ioflag, int seqcount)
801 {
802         hammer2_key_t old_eof;
803         hammer2_key_t new_eof;
804         struct buf *bp;
805         int kflags;
806         int error;
807         int modified;
808
809         /*
810          * Setup if append
811          *
812          * WARNING! Assumes that the kernel interlocks size changes at the
813          *          vnode level.
814          */
815         hammer2_mtx_ex(&ip->lock);
816         if (ioflag & IO_APPEND)
817                 uio->uio_offset = ip->meta.size;
818         old_eof = ip->meta.size;
819
820         /*
821          * Extend the file if necessary.  If the write fails at some point
822          * we will truncate it back down to cover as much as we were able
823          * to write.
824          *
825          * Doing this now makes it easier to calculate buffer sizes in
826          * the loop.
827          */
828         kflags = 0;
829         error = 0;
830         modified = 0;
831
832         if (uio->uio_offset + uio->uio_resid > old_eof) {
833                 new_eof = uio->uio_offset + uio->uio_resid;
834                 modified = 1;
835                 hammer2_extend_file(ip, new_eof);
836                 kflags |= NOTE_EXTEND;
837         } else {
838                 new_eof = old_eof;
839         }
840         hammer2_mtx_unlock(&ip->lock);
841         
842         /*
843          * UIO write loop
844          */
845         while (uio->uio_resid > 0) {
846                 hammer2_key_t lbase;
847                 int trivial;
848                 int endofblk;
849                 int lblksize;
850                 int loff;
851                 int n;
852
853                 /*
854                  * Don't allow the buffer build to blow out the buffer
855                  * cache.
856                  */
857                 if ((ioflag & IO_RECURSE) == 0)
858                         bwillwrite(HAMMER2_PBUFSIZE);
859
860                 /*
861                  * This nominally tells us how much we can cluster and
862                  * what the logical buffer size needs to be.  Currently
863                  * we don't try to cluster the write and just handle one
864                  * block at a time.
865                  */
866                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
867                                                 &lbase, NULL);
868                 loff = (int)(uio->uio_offset - lbase);
869                 
870                 KKASSERT(lblksize <= 65536);
871
872                 /*
873                  * Calculate bytes to copy this transfer and whether the
874                  * copy completely covers the buffer or not.
875                  */
876                 trivial = 0;
877                 n = lblksize - loff;
878                 if (n > uio->uio_resid) {
879                         n = uio->uio_resid;
880                         if (loff == lbase && uio->uio_offset + n == new_eof)
881                                 trivial = 1;
882                         endofblk = 0;
883                 } else {
884                         if (loff == 0)
885                                 trivial = 1;
886                         endofblk = 1;
887                 }
888
889                 /*
890                  * Get the buffer
891                  */
892                 if (uio->uio_segflg == UIO_NOCOPY) {
893                         /*
894                          * Issuing a write with the same data backing the
895                          * buffer.  Instantiate the buffer to collect the
896                          * backing vm pages, then read-in any missing bits.
897                          *
898                          * This case is used by vop_stdputpages().
899                          */
900                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
901                         if ((bp->b_flags & B_CACHE) == 0) {
902                                 bqrelse(bp);
903                                 error = bread(ip->vp, lbase, lblksize, &bp);
904                         }
905                 } else if (trivial) {
906                         /*
907                          * Even though we are entirely overwriting the buffer
908                          * we may still have to zero it out to avoid a
909                          * mmap/write visibility issue.
910                          */
911                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
912                         if ((bp->b_flags & B_CACHE) == 0)
913                                 vfs_bio_clrbuf(bp);
914                 } else {
915                         /*
916                          * Partial overwrite, read in any missing bits then
917                          * replace the portion being written.
918                          *
919                          * (The strategy code will detect zero-fill physical
920                          * blocks for this case).
921                          */
922                         error = bread(ip->vp, lbase, lblksize, &bp);
923                         if (error == 0)
924                                 bheavy(bp);
925                 }
926
927                 if (error) {
928                         brelse(bp);
929                         break;
930                 }
931
932                 /*
933                  * Ok, copy the data in
934                  */
935                 error = uiomove(bp->b_data + loff, n, uio);
936                 kflags |= NOTE_WRITE;
937                 modified = 1;
938                 if (error) {
939                         brelse(bp);
940                         break;
941                 }
942
943                 /*
944                  * WARNING: Pageout daemon will issue UIO_NOCOPY writes
945                  *          with IO_SYNC or IO_ASYNC set.  These writes
946                  *          must be handled as the pageout daemon expects.
947                  */
948                 if (ioflag & IO_SYNC) {
949                         bwrite(bp);
950                 } else if ((ioflag & IO_DIRECT) && endofblk) {
951                         bawrite(bp);
952                 } else if (ioflag & IO_ASYNC) {
953                         bawrite(bp);
954                 } else {
955                         bdwrite(bp);
956                 }
957         }
958
959         /*
960          * Cleanup.  If we extended the file EOF but failed to write through
961          * the entire write is a failure and we have to back-up.
962          */
963         if (error && new_eof != old_eof) {
964                 hammer2_mtx_ex(&ip->lock);
965                 hammer2_truncate_file(ip, old_eof);
966                 if (ip->flags & HAMMER2_INODE_MODIFIED)
967                         hammer2_inode_fsync(ip);
968                 hammer2_mtx_unlock(&ip->lock);
969         } else if (modified) {
970                 hammer2_mtx_ex(&ip->lock);
971                 hammer2_inode_modify(ip);
972                 hammer2_update_time(&ip->meta.mtime);
973                 if (ip->flags & HAMMER2_INODE_MODIFIED)
974                         hammer2_inode_fsync(ip);
975                 hammer2_mtx_unlock(&ip->lock);
976                 hammer2_knote(ip->vp, kflags);
977         }
978         hammer2_trans_assert_strategy(ip->pmp);
979
980         return error;
981 }
982
983 /*
984  * Truncate the size of a file.  The inode must not be locked.
985  *
986  * We must unconditionally set HAMMER2_INODE_RESIZED to properly
987  * ensure that any on-media data beyond the new file EOF has been destroyed.
988  *
989  * WARNING: nvtruncbuf() can only be safely called without the inode lock
990  *          held due to the way our write thread works.  If the truncation
991  *          occurs in the middle of a buffer, nvtruncbuf() is responsible
992  *          for dirtying that buffer and zeroing out trailing bytes.
993  *
994  * WARNING! Assumes that the kernel interlocks size changes at the
995  *          vnode level.
996  *
997  * WARNING! Caller assumes responsibility for removing dead blocks
998  *          if INODE_RESIZED is set.
999  */
1000 static
1001 void
1002 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1003 {
1004         hammer2_key_t lbase;
1005         int nblksize;
1006
1007         LOCKSTART;
1008         hammer2_mtx_unlock(&ip->lock);
1009         if (ip->vp) {
1010                 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
1011                 nvtruncbuf(ip->vp, nsize,
1012                            nblksize, (int)nsize & (nblksize - 1),
1013                            0);
1014         }
1015         hammer2_mtx_ex(&ip->lock);
1016         KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0);
1017         ip->osize = ip->meta.size;
1018         ip->meta.size = nsize;
1019         atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED |
1020                                    HAMMER2_INODE_RESIZED);
1021         LOCKSTOP;
1022 }
1023
1024 /*
1025  * Extend the size of a file.  The inode must not be locked.
1026  *
1027  * Even though the file size is changing, we do not have to set the
1028  * INODE_RESIZED bit unless the file size crosses the EMBEDDED_BYTES
1029  * boundary.  When this occurs a hammer2_inode_fsync() is required
1030  * to prepare the inode cluster's indirect block table.
1031  *
1032  * WARNING! Assumes that the kernel interlocks size changes at the
1033  *          vnode level.
1034  *
1035  * WARNING! Caller assumes responsibility for transitioning out
1036  *          of the inode DIRECTDATA mode if INODE_RESIZED is set.
1037  */
1038 static
1039 void
1040 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1041 {
1042         hammer2_key_t lbase;
1043         hammer2_key_t osize;
1044         int oblksize;
1045         int nblksize;
1046
1047         LOCKSTART;
1048
1049         KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0);
1050         osize = ip->meta.size;
1051         ip->osize = osize;
1052         ip->meta.size = nsize;
1053         atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED);
1054
1055         if (osize <= HAMMER2_EMBEDDED_BYTES && nsize > HAMMER2_EMBEDDED_BYTES)
1056                 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
1057
1058         hammer2_mtx_unlock(&ip->lock);
1059         if (ip->vp) {
1060                 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL);
1061                 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
1062                 nvextendbuf(ip->vp,
1063                             osize, nsize,
1064                             oblksize, nblksize,
1065                             -1, -1, 0);
1066         }
1067         hammer2_mtx_ex(&ip->lock);
1068
1069         LOCKSTOP;
1070 }
1071
1072 static
1073 int
1074 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
1075 {
1076         hammer2_xop_nresolve_t *xop;
1077         hammer2_inode_t *ip;
1078         hammer2_inode_t *dip;
1079         struct namecache *ncp;
1080         struct vnode *vp;
1081         int error;
1082
1083         LOCKSTART;
1084         dip = VTOI(ap->a_dvp);
1085         xop = hammer2_xop_alloc(dip, 0);
1086
1087         ncp = ap->a_nch->ncp;
1088         hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1089
1090         /*
1091          * Note: In DragonFly the kernel handles '.' and '..'.
1092          */
1093         hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1094         hammer2_xop_start(&xop->head, hammer2_xop_nresolve);
1095
1096         error = hammer2_xop_collect(&xop->head, 0);
1097         if (error) {
1098                 ip = NULL;
1099         } else {
1100                 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1101         }
1102         hammer2_inode_unlock(dip);
1103
1104         /*
1105          * Acquire the related vnode
1106          *
1107          * NOTE: For error processing, only ENOENT resolves the namecache
1108          *       entry to NULL, otherwise we just return the error and
1109          *       leave the namecache unresolved.
1110          *
1111          * NOTE: multiple hammer2_inode structures can be aliased to the
1112          *       same chain element, for example for hardlinks.  This
1113          *       use case does not 'reattach' inode associations that
1114          *       might already exist, but always allocates a new one.
1115          *
1116          * WARNING: inode structure is locked exclusively via inode_get
1117          *          but chain was locked shared.  inode_unlock()
1118          *          will handle it properly.
1119          */
1120         if (ip) {
1121                 vp = hammer2_igetv(ip, &error);
1122                 if (error == 0) {
1123                         vn_unlock(vp);
1124                         cache_setvp(ap->a_nch, vp);
1125                 } else if (error == ENOENT) {
1126                         cache_setvp(ap->a_nch, NULL);
1127                 }
1128                 hammer2_inode_unlock(ip);
1129
1130                 /*
1131                  * The vp should not be released until after we've disposed
1132                  * of our locks, because it might cause vop_inactive() to
1133                  * be called.
1134                  */
1135                 if (vp)
1136                         vrele(vp);
1137         } else {
1138                 error = ENOENT;
1139                 cache_setvp(ap->a_nch, NULL);
1140         }
1141         hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1142         KASSERT(error || ap->a_nch->ncp->nc_vp != NULL,
1143                 ("resolve error %d/%p ap %p\n",
1144                  error, ap->a_nch->ncp->nc_vp, ap));
1145         LOCKSTOP;
1146
1147         return error;
1148 }
1149
1150 static
1151 int
1152 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1153 {
1154         hammer2_inode_t *dip;
1155         hammer2_inode_t *ip;
1156         int error;
1157
1158         LOCKSTART;
1159         dip = VTOI(ap->a_dvp);
1160
1161         if ((ip = dip->pip) == NULL) {
1162                 *ap->a_vpp = NULL;
1163                 LOCKSTOP;
1164                 return ENOENT;
1165         }
1166         hammer2_inode_lock(ip, 0);
1167         *ap->a_vpp = hammer2_igetv(ip, &error);
1168         hammer2_inode_unlock(ip);
1169
1170         LOCKSTOP;
1171         return error;
1172 }
1173
1174 static
1175 int
1176 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1177 {
1178         hammer2_inode_t *dip;
1179         hammer2_inode_t *nip;
1180         struct namecache *ncp;
1181         const uint8_t *name;
1182         size_t name_len;
1183         int error;
1184
1185         LOCKSTART;
1186         dip = VTOI(ap->a_dvp);
1187         if (dip->pmp->ronly) {
1188                 LOCKSTOP;
1189                 return (EROFS);
1190         }
1191
1192         ncp = ap->a_nch->ncp;
1193         name = ncp->nc_name;
1194         name_len = ncp->nc_nlen;
1195
1196         hammer2_pfs_memory_wait(dip->pmp);
1197         hammer2_trans_init(dip->pmp, 0);
1198         nip = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1199                                    name, name_len, 0,
1200                                    hammer2_trans_newinum(dip->pmp), 0, 0,
1201                                    0, &error);
1202         if (error) {
1203                 KKASSERT(nip == NULL);
1204                 *ap->a_vpp = NULL;
1205         } else {
1206                 *ap->a_vpp = hammer2_igetv(nip, &error);
1207                 hammer2_inode_unlock(nip);
1208         }
1209         hammer2_trans_done(dip->pmp);
1210
1211         if (error == 0) {
1212                 cache_setunresolved(ap->a_nch);
1213                 cache_setvp(ap->a_nch, *ap->a_vpp);
1214         }
1215         LOCKSTOP;
1216         return error;
1217 }
1218
1219 static
1220 int
1221 hammer2_vop_open(struct vop_open_args *ap)
1222 {
1223         return vop_stdopen(ap);
1224 }
1225
1226 /*
1227  * hammer2_vop_advlock { vp, id, op, fl, flags }
1228  */
1229 static
1230 int
1231 hammer2_vop_advlock(struct vop_advlock_args *ap)
1232 {
1233         hammer2_inode_t *ip = VTOI(ap->a_vp);
1234         hammer2_off_t size;
1235
1236         size = ip->meta.size;
1237         return (lf_advlock(ap, &ip->advlock, size));
1238 }
1239
1240 static
1241 int
1242 hammer2_vop_close(struct vop_close_args *ap)
1243 {
1244         return vop_stdclose(ap);
1245 }
1246
1247 /*
1248  * hammer2_vop_nlink { nch, dvp, vp, cred }
1249  *
1250  * Create a hardlink from (vp) to {dvp, nch}.
1251  */
1252 static
1253 int
1254 hammer2_vop_nlink(struct vop_nlink_args *ap)
1255 {
1256         hammer2_xop_nlink_t *xop1;
1257         hammer2_inode_t *fdip;  /* target directory to create link in */
1258         hammer2_inode_t *tdip;  /* target directory to create link in */
1259         hammer2_inode_t *cdip;  /* common parent directory */
1260         hammer2_inode_t *ip;    /* inode we are hardlinking to */
1261         struct namecache *ncp;
1262         const uint8_t *name;
1263         size_t name_len;
1264         int error;
1265
1266         LOCKSTART;
1267         tdip = VTOI(ap->a_dvp);
1268         if (tdip->pmp->ronly) {
1269                 LOCKSTOP;
1270                 return (EROFS);
1271         }
1272
1273         ncp = ap->a_nch->ncp;
1274         name = ncp->nc_name;
1275         name_len = ncp->nc_nlen;
1276
1277         /*
1278          * ip represents the file being hardlinked.  The file could be a
1279          * normal file or a hardlink target if it has already been hardlinked.
1280          * If ip is a hardlinked target then ip->pip represents the location
1281          * of the hardlinked target, NOT the location of the hardlink pointer.
1282          *
1283          * Bump nlinks and potentially also create or move the hardlink
1284          * target in the parent directory common to (ip) and (tdip).  The
1285          * consolidation code can modify ip->cluster and ip->pip.  The
1286          * returned cluster is locked.
1287          */
1288         ip = VTOI(ap->a_vp);
1289         hammer2_pfs_memory_wait(ip->pmp);
1290         hammer2_trans_init(ip->pmp, 0);
1291
1292         /*
1293          * The common parent directory must be locked first to avoid deadlocks.
1294          * Also note that fdip and/or tdip might match cdip.
1295          */
1296         fdip = ip->pip;
1297         cdip = hammer2_inode_common_parent(fdip, tdip);
1298         hammer2_inode_lock(cdip, 0);
1299         hammer2_inode_lock(fdip, 0);
1300         hammer2_inode_lock(tdip, 0);
1301         hammer2_inode_lock(ip, 0);
1302         error = 0;
1303
1304         /*
1305          * If ip is not a hardlink target we must convert it to a hardlink.
1306          * If fdip != cdip we must shift the inode to cdip.
1307          */
1308         if (fdip != cdip || (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1309                 xop1 = hammer2_xop_alloc(fdip, HAMMER2_XOP_MODIFYING);
1310                 hammer2_xop_setip2(&xop1->head, ip);
1311                 hammer2_xop_setip3(&xop1->head, cdip);
1312
1313                 hammer2_xop_start(&xop1->head, hammer2_xop_nlink);
1314                 error = hammer2_xop_collect(&xop1->head, 0);
1315                 hammer2_xop_retire(&xop1->head, HAMMER2_XOPMASK_VOP);
1316                 if (error == ENOENT)
1317                         error = 0;
1318         }
1319
1320         /*
1321          * Must synchronize original inode whos chains are now a hardlink
1322          * target.  We must match what the backend XOP did to the
1323          * chains.
1324          */
1325         if (error == 0 && (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1326                 hammer2_inode_modify(ip);
1327                 ip->meta.name_key = ip->meta.inum;
1328                 ip->meta.name_len = 18; /* "0x%016jx" */
1329         }
1330
1331         /*
1332          * Create the hardlink target and bump nlinks.
1333          */
1334         if (error == 0) {
1335                 hammer2_inode_create(tdip, NULL, NULL,
1336                                      name, name_len, 0,
1337                                      ip->meta.inum,
1338                                      HAMMER2_OBJTYPE_HARDLINK, ip->meta.type,
1339                                      0, &error);
1340                 hammer2_inode_modify(ip);
1341                 ++ip->meta.nlinks;
1342         }
1343         if (error == 0) {
1344                 cache_setunresolved(ap->a_nch);
1345                 cache_setvp(ap->a_nch, ap->a_vp);
1346         }
1347         hammer2_inode_unlock(ip);
1348         hammer2_inode_unlock(tdip);
1349         hammer2_inode_unlock(fdip);
1350         hammer2_inode_unlock(cdip);
1351         hammer2_inode_drop(cdip);
1352         hammer2_trans_done(ip->pmp);
1353
1354         LOCKSTOP;
1355         return error;
1356 }
1357
1358 /*
1359  * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
1360  *
1361  * The operating system has already ensured that the directory entry
1362  * does not exist and done all appropriate namespace locking.
1363  */
1364 static
1365 int
1366 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1367 {
1368         hammer2_inode_t *dip;
1369         hammer2_inode_t *nip;
1370         struct namecache *ncp;
1371         const uint8_t *name;
1372         size_t name_len;
1373         int error;
1374
1375         LOCKSTART;
1376         dip = VTOI(ap->a_dvp);
1377         if (dip->pmp->ronly) {
1378                 LOCKSTOP;
1379                 return (EROFS);
1380         }
1381
1382         ncp = ap->a_nch->ncp;
1383         name = ncp->nc_name;
1384         name_len = ncp->nc_nlen;
1385         hammer2_pfs_memory_wait(dip->pmp);
1386         hammer2_trans_init(dip->pmp, 0);
1387
1388         nip = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1389                                    name, name_len, 0,
1390                                    hammer2_trans_newinum(dip->pmp), 0, 0,
1391                                    0, &error);
1392         if (error) {
1393                 KKASSERT(nip == NULL);
1394                 *ap->a_vpp = NULL;
1395         } else {
1396                 *ap->a_vpp = hammer2_igetv(nip, &error);
1397                 hammer2_inode_unlock(nip);
1398         }
1399         hammer2_trans_done(dip->pmp);
1400
1401         if (error == 0) {
1402                 cache_setunresolved(ap->a_nch);
1403                 cache_setvp(ap->a_nch, *ap->a_vpp);
1404         }
1405         LOCKSTOP;
1406         return error;
1407 }
1408
1409 /*
1410  * Make a device node (typically a fifo)
1411  */
1412 static
1413 int
1414 hammer2_vop_nmknod(struct vop_nmknod_args *ap)
1415 {
1416         hammer2_inode_t *dip;
1417         hammer2_inode_t *nip;
1418         struct namecache *ncp;
1419         const uint8_t *name;
1420         size_t name_len;
1421         int error;
1422
1423         LOCKSTART;
1424         dip = VTOI(ap->a_dvp);
1425         if (dip->pmp->ronly) {
1426                 LOCKSTOP;
1427                 return (EROFS);
1428         }
1429
1430         ncp = ap->a_nch->ncp;
1431         name = ncp->nc_name;
1432         name_len = ncp->nc_nlen;
1433         hammer2_pfs_memory_wait(dip->pmp);
1434         hammer2_trans_init(dip->pmp, 0);
1435
1436         nip = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1437                                    name, name_len, 0,
1438                                    hammer2_trans_newinum(dip->pmp), 0, 0,
1439                                    0, &error);
1440         if (error) {
1441                 KKASSERT(nip == NULL);
1442                 *ap->a_vpp = NULL;
1443         } else {
1444                 *ap->a_vpp = hammer2_igetv(nip, &error);
1445                 hammer2_inode_unlock(nip);
1446         }
1447         hammer2_trans_done(dip->pmp);
1448
1449         if (error == 0) {
1450                 cache_setunresolved(ap->a_nch);
1451                 cache_setvp(ap->a_nch, *ap->a_vpp);
1452         }
1453         LOCKSTOP;
1454         return error;
1455 }
1456
1457 /*
1458  * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1459  */
1460 static
1461 int
1462 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1463 {
1464         hammer2_inode_t *dip;
1465         hammer2_inode_t *nip;
1466         struct namecache *ncp;
1467         const uint8_t *name;
1468         size_t name_len;
1469         int error;
1470         
1471         dip = VTOI(ap->a_dvp);
1472         if (dip->pmp->ronly)
1473                 return (EROFS);
1474
1475         ncp = ap->a_nch->ncp;
1476         name = ncp->nc_name;
1477         name_len = ncp->nc_nlen;
1478         hammer2_pfs_memory_wait(dip->pmp);
1479         hammer2_trans_init(dip->pmp, 0);
1480
1481         ap->a_vap->va_type = VLNK;      /* enforce type */
1482
1483         nip = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1484                                    name, name_len, 0,
1485                                    hammer2_trans_newinum(dip->pmp), 0, 0,
1486                                    0, &error);
1487         if (error) {
1488                 KKASSERT(nip == NULL);
1489                 *ap->a_vpp = NULL;
1490                 hammer2_trans_done(dip->pmp);
1491                 return error;
1492         }
1493         *ap->a_vpp = hammer2_igetv(nip, &error);
1494
1495         /*
1496          * Build the softlink (~like file data) and finalize the namecache.
1497          */
1498         if (error == 0) {
1499                 size_t bytes;
1500                 struct uio auio;
1501                 struct iovec aiov;
1502
1503                 bytes = strlen(ap->a_target);
1504
1505                 hammer2_inode_unlock(nip);
1506                 bzero(&auio, sizeof(auio));
1507                 bzero(&aiov, sizeof(aiov));
1508                 auio.uio_iov = &aiov;
1509                 auio.uio_segflg = UIO_SYSSPACE;
1510                 auio.uio_rw = UIO_WRITE;
1511                 auio.uio_resid = bytes;
1512                 auio.uio_iovcnt = 1;
1513                 auio.uio_td = curthread;
1514                 aiov.iov_base = ap->a_target;
1515                 aiov.iov_len = bytes;
1516                 error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
1517                 /* XXX handle error */
1518                 error = 0;
1519         } else {
1520                 hammer2_inode_unlock(nip);
1521         }
1522         hammer2_trans_done(dip->pmp);
1523
1524         /*
1525          * Finalize namecache
1526          */
1527         if (error == 0) {
1528                 cache_setunresolved(ap->a_nch);
1529                 cache_setvp(ap->a_nch, *ap->a_vpp);
1530                 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */
1531         }
1532         return error;
1533 }
1534
1535 /*
1536  * hammer2_vop_nremove { nch, dvp, cred }
1537  */
1538 static
1539 int
1540 hammer2_vop_nremove(struct vop_nremove_args *ap)
1541 {
1542         hammer2_xop_unlink_t *xop;
1543         hammer2_inode_t *dip;
1544         hammer2_inode_t *ip;
1545         struct namecache *ncp;
1546         int error;
1547         int isopen;
1548
1549         LOCKSTART;
1550         dip = VTOI(ap->a_dvp);
1551         if (dip->pmp->ronly) {
1552                 LOCKSTOP;
1553                 return(EROFS);
1554         }
1555
1556         ncp = ap->a_nch->ncp;
1557
1558         hammer2_pfs_memory_wait(dip->pmp);
1559         hammer2_trans_init(dip->pmp, 0);
1560         hammer2_inode_lock(dip, 0);
1561
1562         /*
1563          * The unlink XOP unlinks the path from the directory and
1564          * locates and returns the cluster associated with the real inode.
1565          * We have to handle nlinks here on the frontend.
1566          */
1567         xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING);
1568         hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1569         isopen = cache_isopen(ap->a_nch);
1570         xop->isdir = 0;
1571         xop->dopermanent = isopen ?  0 : HAMMER2_DELETE_PERMANENT;
1572         hammer2_xop_start(&xop->head, hammer2_xop_unlink);
1573
1574         /*
1575          * Collect the real inode and adjust nlinks, destroy the real
1576          * inode if nlinks transitions to 0 and it was the real inode
1577          * (else it has already been removed).
1578          */
1579         error = hammer2_xop_collect(&xop->head, 0);
1580         hammer2_inode_unlock(dip);
1581
1582         if (error == 0) {
1583                 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1584                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1585                 if (ip) {
1586                         hammer2_inode_unlink_finisher(ip, isopen);
1587                         hammer2_inode_unlock(ip);
1588                 }
1589         } else {
1590                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1591         }
1592
1593         hammer2_inode_run_unlinkq(dip->pmp);
1594         hammer2_trans_done(dip->pmp);
1595         if (error == 0)
1596                 cache_unlink(ap->a_nch);
1597         LOCKSTOP;
1598         return (error);
1599 }
1600
1601 /*
1602  * hammer2_vop_nrmdir { nch, dvp, cred }
1603  */
1604 static
1605 int
1606 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1607 {
1608         hammer2_xop_unlink_t *xop;
1609         hammer2_inode_t *dip;
1610         hammer2_inode_t *ip;
1611         struct namecache *ncp;
1612         int isopen;
1613         int error;
1614
1615         LOCKSTART;
1616         dip = VTOI(ap->a_dvp);
1617         if (dip->pmp->ronly) {
1618                 LOCKSTOP;
1619                 return(EROFS);
1620         }
1621
1622         hammer2_pfs_memory_wait(dip->pmp);
1623         hammer2_trans_init(dip->pmp, 0);
1624         hammer2_inode_lock(dip, 0);
1625
1626         xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING);
1627
1628         ncp = ap->a_nch->ncp;
1629         hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1630         isopen = cache_isopen(ap->a_nch);
1631         xop->isdir = 1;
1632         xop->dopermanent = isopen ?  0 : HAMMER2_DELETE_PERMANENT;
1633         hammer2_xop_start(&xop->head, hammer2_xop_unlink);
1634
1635         /*
1636          * Collect the real inode and adjust nlinks, destroy the real
1637          * inode if nlinks transitions to 0 and it was the real inode
1638          * (else it has already been removed).
1639          */
1640         error = hammer2_xop_collect(&xop->head, 0);
1641         hammer2_inode_unlock(dip);
1642
1643         if (error == 0) {
1644                 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1645                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1646                 if (ip) {
1647                         hammer2_inode_unlink_finisher(ip, isopen);
1648                         hammer2_inode_unlock(ip);
1649                 }
1650         } else {
1651                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1652         }
1653         hammer2_inode_run_unlinkq(dip->pmp);
1654         hammer2_trans_done(dip->pmp);
1655         if (error == 0)
1656                 cache_unlink(ap->a_nch);
1657         LOCKSTOP;
1658         return (error);
1659 }
1660
1661 /*
1662  * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1663  */
1664 static
1665 int
1666 hammer2_vop_nrename(struct vop_nrename_args *ap)
1667 {
1668         struct namecache *fncp;
1669         struct namecache *tncp;
1670         hammer2_inode_t *cdip;
1671         hammer2_inode_t *fdip;
1672         hammer2_inode_t *tdip;
1673         hammer2_inode_t *ip;
1674         const uint8_t *fname;
1675         size_t fname_len;
1676         const uint8_t *tname;
1677         size_t tname_len;
1678         int error;
1679         int tnch_error;
1680         hammer2_key_t tlhc;
1681
1682         if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1683                 return(EXDEV);
1684         if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1685                 return(EXDEV);
1686
1687         fdip = VTOI(ap->a_fdvp);        /* source directory */
1688         tdip = VTOI(ap->a_tdvp);        /* target directory */
1689
1690         if (fdip->pmp->ronly)
1691                 return(EROFS);
1692
1693         LOCKSTART;
1694         fncp = ap->a_fnch->ncp;         /* entry name in source */
1695         fname = fncp->nc_name;
1696         fname_len = fncp->nc_nlen;
1697
1698         tncp = ap->a_tnch->ncp;         /* entry name in target */
1699         tname = tncp->nc_name;
1700         tname_len = tncp->nc_nlen;
1701
1702         hammer2_pfs_memory_wait(tdip->pmp);
1703         hammer2_trans_init(tdip->pmp, 0);
1704
1705         /*
1706          * ip is the inode being renamed.  If this is a hardlink then
1707          * ip represents the actual file and not the hardlink marker.
1708          */
1709         ip = VTOI(fncp->nc_vp);
1710
1711         /*
1712          * The common parent directory must be locked first to avoid deadlocks.
1713          * Also note that fdip and/or tdip might match cdip.
1714          */
1715         cdip = hammer2_inode_common_parent(ip->pip, tdip);
1716         hammer2_inode_lock(cdip, 0);
1717         hammer2_inode_lock(fdip, 0);
1718         hammer2_inode_lock(tdip, 0);
1719         hammer2_inode_ref(ip);          /* extra ref */
1720         error = 0;
1721
1722         /*
1723          * If ip is a hardlink target and fdip != cdip we must shift the
1724          * inode to cdip.
1725          */
1726         if (fdip != cdip &&
1727             (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1728                 hammer2_xop_nlink_t *xop1;
1729
1730                 xop1 = hammer2_xop_alloc(fdip, HAMMER2_XOP_MODIFYING);
1731                 hammer2_xop_setip2(&xop1->head, ip);
1732                 hammer2_xop_setip3(&xop1->head, cdip);
1733
1734                 hammer2_xop_start(&xop1->head, hammer2_xop_nlink);
1735                 error = hammer2_xop_collect(&xop1->head, 0);
1736                 hammer2_xop_retire(&xop1->head, HAMMER2_XOPMASK_VOP);
1737         }
1738
1739         /*
1740          * Delete the target namespace.
1741          */
1742         {
1743                 hammer2_xop_unlink_t *xop2;
1744                 hammer2_inode_t *tip;
1745                 int isopen;
1746
1747                 /*
1748                  * The unlink XOP unlinks the path from the directory and
1749                  * locates and returns the cluster associated with the real
1750                  * inode.  We have to handle nlinks here on the frontend.
1751                  */
1752                 xop2 = hammer2_xop_alloc(tdip, HAMMER2_XOP_MODIFYING);
1753                 hammer2_xop_setname(&xop2->head, tname, tname_len);
1754                 isopen = cache_isopen(ap->a_tnch);
1755                 xop2->isdir = -1;
1756                 xop2->dopermanent = isopen ?  0 : HAMMER2_DELETE_PERMANENT;
1757                 hammer2_xop_start(&xop2->head, hammer2_xop_unlink);
1758
1759                 /*
1760                  * Collect the real inode and adjust nlinks, destroy the real
1761                  * inode if nlinks transitions to 0 and it was the real inode
1762                  * (else it has already been removed).
1763                  */
1764                 tnch_error = hammer2_xop_collect(&xop2->head, 0);
1765                 /* hammer2_inode_unlock(tdip); */
1766
1767                 if (tnch_error == 0) {
1768                         tip = hammer2_inode_get(tdip->pmp, NULL,
1769                                                 &xop2->head.cluster, -1);
1770                         hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
1771                         if (tip) {
1772                                 hammer2_inode_unlink_finisher(tip, isopen);
1773                                 hammer2_inode_unlock(tip);
1774                         }
1775                 } else {
1776                         hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
1777                 }
1778                 /* hammer2_inode_lock(tdip, 0); */
1779
1780                 if (tnch_error && tnch_error != ENOENT) {
1781                         error = tnch_error;
1782                         goto done2;
1783                 }
1784         }
1785
1786         /*
1787          * Resolve the collision space for (tdip, tname, tname_len)
1788          *
1789          * tdip must be held exclusively locked to prevent races.
1790          */
1791         {
1792                 hammer2_xop_scanlhc_t *sxop;
1793                 hammer2_tid_t lhcbase;
1794
1795                 tlhc = hammer2_dirhash(tname, tname_len);
1796                 lhcbase = tlhc;
1797                 sxop = hammer2_xop_alloc(tdip, HAMMER2_XOP_MODIFYING);
1798                 sxop->lhc = tlhc;
1799                 hammer2_xop_start(&sxop->head, hammer2_xop_scanlhc);
1800                 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) {
1801                         if (tlhc != sxop->head.cluster.focus->bref.key)
1802                                 break;
1803                         ++tlhc;
1804                 }
1805                 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP);
1806
1807                 if (error) {
1808                         if (error != ENOENT)
1809                                 goto done2;
1810                         ++tlhc;
1811                         error = 0;
1812                 }
1813                 if ((lhcbase ^ tlhc) & ~HAMMER2_DIRHASH_LOMASK) {
1814                         error = ENOSPC;
1815                         goto done2;
1816                 }
1817         }
1818
1819         /*
1820          * Everything is setup, do the rename.
1821          *
1822          * We have to synchronize ip->meta to the underlying operation.
1823          *
1824          * NOTE: To avoid deadlocks we cannot lock (ip) while we are
1825          *       unlinking elements from their directories.  Locking
1826          *       the nlinks field does not lock the whole inode.
1827          */
1828         hammer2_inode_lock(ip, 0);
1829         if (error == 0) {
1830                 hammer2_xop_nrename_t *xop4;
1831
1832                 xop4 = hammer2_xop_alloc(fdip, HAMMER2_XOP_MODIFYING);
1833                 xop4->lhc = tlhc;
1834                 xop4->ip_key = ip->meta.name_key;
1835                 hammer2_xop_setip2(&xop4->head, ip);
1836                 hammer2_xop_setip3(&xop4->head, tdip);
1837                 hammer2_xop_setname(&xop4->head, fname, fname_len);
1838                 hammer2_xop_setname2(&xop4->head, tname, tname_len);
1839                 hammer2_xop_start(&xop4->head, hammer2_xop_nrename);
1840
1841                 error = hammer2_xop_collect(&xop4->head, 0);
1842                 hammer2_xop_retire(&xop4->head, HAMMER2_XOPMASK_VOP);
1843
1844                 if (error == ENOENT)
1845                         error = 0;
1846                 if (error == 0 &&
1847                     (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1848                         hammer2_inode_modify(ip);
1849                         ip->meta.name_len = tname_len;
1850                         ip->meta.name_key = tlhc;
1851
1852                 }
1853         }
1854
1855         /*
1856          * Fixup ip->pip if we were renaming the actual file and not a
1857          * hardlink pointer.
1858          */
1859         if (error == 0 && (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1860                 hammer2_inode_t *opip;
1861
1862                 if (ip->pip != tdip) {
1863                         hammer2_inode_ref(tdip);
1864                         opip = ip->pip;
1865                         ip->pip = tdip;
1866                         if (opip)
1867                                 hammer2_inode_drop(opip);
1868                 }
1869         }
1870         hammer2_inode_unlock(ip);
1871 done2:
1872         hammer2_inode_unlock(tdip);
1873         hammer2_inode_unlock(fdip);
1874         hammer2_inode_unlock(cdip);
1875         hammer2_inode_drop(ip);
1876         hammer2_inode_drop(cdip);
1877         hammer2_inode_run_unlinkq(fdip->pmp);
1878         hammer2_trans_done(tdip->pmp);
1879
1880         /*
1881          * Issue the namecache update after unlocking all the internal
1882          * hammer structures, otherwise we might deadlock.
1883          */
1884         if (tnch_error == 0) {
1885                 cache_unlink(ap->a_tnch);
1886                 cache_setunresolved(ap->a_tnch);
1887         }
1888         if (error == 0)
1889                 cache_rename(ap->a_fnch, ap->a_tnch);
1890
1891         LOCKSTOP;
1892         return (error);
1893 }
1894
1895 /*
1896  * hammer2_vop_ioctl { vp, command, data, fflag, cred }
1897  */
1898 static
1899 int
1900 hammer2_vop_ioctl(struct vop_ioctl_args *ap)
1901 {
1902         hammer2_inode_t *ip;
1903         int error;
1904
1905         LOCKSTART;
1906         ip = VTOI(ap->a_vp);
1907
1908         error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
1909                               ap->a_fflag, ap->a_cred);
1910         LOCKSTOP;
1911         return (error);
1912 }
1913
1914 static
1915 int 
1916 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
1917 {
1918         struct mount *mp;
1919         hammer2_pfs_t *pmp;
1920         int rc;
1921
1922         LOCKSTART;
1923         switch (ap->a_op) {
1924         case (MOUNTCTL_SET_EXPORT):
1925                 mp = ap->a_head.a_ops->head.vv_mount;
1926                 pmp = MPTOPMP(mp);
1927
1928                 if (ap->a_ctllen != sizeof(struct export_args))
1929                         rc = (EINVAL);
1930                 else
1931                         rc = vfs_export(mp, &pmp->export,
1932                                         (const struct export_args *)ap->a_ctl);
1933                 break;
1934         default:
1935                 rc = vop_stdmountctl(ap);
1936                 break;
1937         }
1938         LOCKSTOP;
1939         return (rc);
1940 }
1941
1942 /*
1943  * KQFILTER
1944  */
1945 static void filt_hammer2detach(struct knote *kn);
1946 static int filt_hammer2read(struct knote *kn, long hint);
1947 static int filt_hammer2write(struct knote *kn, long hint);
1948 static int filt_hammer2vnode(struct knote *kn, long hint);
1949
1950 static struct filterops hammer2read_filtops =
1951         { FILTEROP_ISFD | FILTEROP_MPSAFE,
1952           NULL, filt_hammer2detach, filt_hammer2read };
1953 static struct filterops hammer2write_filtops =
1954         { FILTEROP_ISFD | FILTEROP_MPSAFE,
1955           NULL, filt_hammer2detach, filt_hammer2write };
1956 static struct filterops hammer2vnode_filtops =
1957         { FILTEROP_ISFD | FILTEROP_MPSAFE,
1958           NULL, filt_hammer2detach, filt_hammer2vnode };
1959
1960 static
1961 int
1962 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap)
1963 {
1964         struct vnode *vp = ap->a_vp;
1965         struct knote *kn = ap->a_kn;
1966
1967         switch (kn->kn_filter) {
1968         case EVFILT_READ:
1969                 kn->kn_fop = &hammer2read_filtops;
1970                 break;
1971         case EVFILT_WRITE:
1972                 kn->kn_fop = &hammer2write_filtops;
1973                 break;
1974         case EVFILT_VNODE:
1975                 kn->kn_fop = &hammer2vnode_filtops;
1976                 break;
1977         default:
1978                 return (EOPNOTSUPP);
1979         }
1980
1981         kn->kn_hook = (caddr_t)vp;
1982
1983         knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1984
1985         return(0);
1986 }
1987
1988 static void
1989 filt_hammer2detach(struct knote *kn)
1990 {
1991         struct vnode *vp = (void *)kn->kn_hook;
1992
1993         knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1994 }
1995
1996 static int
1997 filt_hammer2read(struct knote *kn, long hint)
1998 {
1999         struct vnode *vp = (void *)kn->kn_hook;
2000         hammer2_inode_t *ip = VTOI(vp);
2001         off_t off;
2002
2003         if (hint == NOTE_REVOKE) {
2004                 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2005                 return(1);
2006         }
2007         off = ip->meta.size - kn->kn_fp->f_offset;
2008         kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
2009         if (kn->kn_sfflags & NOTE_OLDAPI)
2010                 return(1);
2011         return (kn->kn_data != 0);
2012 }
2013
2014
2015 static int
2016 filt_hammer2write(struct knote *kn, long hint)
2017 {
2018         if (hint == NOTE_REVOKE)
2019                 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2020         kn->kn_data = 0;
2021         return (1);
2022 }
2023
2024 static int
2025 filt_hammer2vnode(struct knote *kn, long hint)
2026 {
2027         if (kn->kn_sfflags & hint)
2028                 kn->kn_fflags |= hint;
2029         if (hint == NOTE_REVOKE) {
2030                 kn->kn_flags |= (EV_EOF | EV_NODATA);
2031                 return (1);
2032         }
2033         return (kn->kn_fflags != 0);
2034 }
2035
2036 /*
2037  * FIFO VOPS
2038  */
2039 static
2040 int
2041 hammer2_vop_markatime(struct vop_markatime_args *ap)
2042 {
2043         hammer2_inode_t *ip;
2044         struct vnode *vp;
2045
2046         vp = ap->a_vp;
2047         ip = VTOI(vp);
2048
2049         if (ip->pmp->ronly)
2050                 return(EROFS);
2051         return(0);
2052 }
2053
2054 static
2055 int
2056 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap)
2057 {
2058         int error;
2059
2060         error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2061         if (error)
2062                 error = hammer2_vop_kqfilter(ap);
2063         return(error);
2064 }
2065
2066 /*
2067  * VOPS vector
2068  */
2069 struct vop_ops hammer2_vnode_vops = {
2070         .vop_default    = vop_defaultop,
2071         .vop_fsync      = hammer2_vop_fsync,
2072         .vop_getpages   = vop_stdgetpages,
2073         .vop_putpages   = vop_stdputpages,
2074         .vop_access     = hammer2_vop_access,
2075         .vop_advlock    = hammer2_vop_advlock,
2076         .vop_close      = hammer2_vop_close,
2077         .vop_nlink      = hammer2_vop_nlink,
2078         .vop_ncreate    = hammer2_vop_ncreate,
2079         .vop_nsymlink   = hammer2_vop_nsymlink,
2080         .vop_nremove    = hammer2_vop_nremove,
2081         .vop_nrmdir     = hammer2_vop_nrmdir,
2082         .vop_nrename    = hammer2_vop_nrename,
2083         .vop_getattr    = hammer2_vop_getattr,
2084         .vop_setattr    = hammer2_vop_setattr,
2085         .vop_readdir    = hammer2_vop_readdir,
2086         .vop_readlink   = hammer2_vop_readlink,
2087         .vop_getpages   = vop_stdgetpages,
2088         .vop_putpages   = vop_stdputpages,
2089         .vop_read       = hammer2_vop_read,
2090         .vop_write      = hammer2_vop_write,
2091         .vop_open       = hammer2_vop_open,
2092         .vop_inactive   = hammer2_vop_inactive,
2093         .vop_reclaim    = hammer2_vop_reclaim,
2094         .vop_nresolve   = hammer2_vop_nresolve,
2095         .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2096         .vop_nmkdir     = hammer2_vop_nmkdir,
2097         .vop_nmknod     = hammer2_vop_nmknod,
2098         .vop_ioctl      = hammer2_vop_ioctl,
2099         .vop_mountctl   = hammer2_vop_mountctl,
2100         .vop_bmap       = hammer2_vop_bmap,
2101         .vop_strategy   = hammer2_vop_strategy,
2102         .vop_kqfilter   = hammer2_vop_kqfilter
2103 };
2104
2105 struct vop_ops hammer2_spec_vops = {
2106         .vop_default =          vop_defaultop,
2107         .vop_fsync =            hammer2_vop_fsync,
2108         .vop_read =             vop_stdnoread,
2109         .vop_write =            vop_stdnowrite,
2110         .vop_access =           hammer2_vop_access,
2111         .vop_close =            hammer2_vop_close,
2112         .vop_markatime =        hammer2_vop_markatime,
2113         .vop_getattr =          hammer2_vop_getattr,
2114         .vop_inactive =         hammer2_vop_inactive,
2115         .vop_reclaim =          hammer2_vop_reclaim,
2116         .vop_setattr =          hammer2_vop_setattr
2117 };
2118
2119 struct vop_ops hammer2_fifo_vops = {
2120         .vop_default =          fifo_vnoperate,
2121         .vop_fsync =            hammer2_vop_fsync,
2122 #if 0
2123         .vop_read =             hammer2_vop_fiforead,
2124         .vop_write =            hammer2_vop_fifowrite,
2125 #endif
2126         .vop_access =           hammer2_vop_access,
2127 #if 0
2128         .vop_close =            hammer2_vop_fifoclose,
2129 #endif
2130         .vop_markatime =        hammer2_vop_markatime,
2131         .vop_getattr =          hammer2_vop_getattr,
2132         .vop_inactive =         hammer2_vop_inactive,
2133         .vop_reclaim =          hammer2_vop_reclaim,
2134         .vop_setattr =          hammer2_vop_setattr,
2135         .vop_kqfilter =         hammer2_vop_fifokqfilter
2136 };
2137