hammer2 - Flesh out kqueue support
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
1 /*
2  * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression) 
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 /*
37  * Kernel Filesystem interface
38  *
39  * NOTE! local ipdata pointers must be reloaded on any modifying operation
40  *       to the inode as its underlying chain may have changed.
41  */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/fcntl.h>
47 #include <sys/buf.h>
48 #include <sys/proc.h>
49 #include <sys/namei.h>
50 #include <sys/mount.h>
51 #include <sys/vnode.h>
52 #include <sys/mountctl.h>
53 #include <sys/dirent.h>
54 #include <sys/uio.h>
55 #include <sys/objcache.h>
56 #include <sys/event.h>
57 #include <sys/file.h>
58 #include <vfs/fifofs/fifo.h>
59
60 #include "hammer2.h"
61
62 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
63                                 int seqcount);
64 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
65                                 int ioflag, int seqcount);
66 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
67 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
68
69 struct objcache *cache_xops;
70
71 static __inline
72 void
73 hammer2_knote(struct vnode *vp, int flags)
74 {
75         if (flags)
76                 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
77 }
78
79 /*
80  * Last reference to a vnode is going away but it is still cached.
81  */
82 static
83 int
84 hammer2_vop_inactive(struct vop_inactive_args *ap)
85 {
86         hammer2_inode_t *ip;
87         struct vnode *vp;
88
89         LOCKSTART;
90         vp = ap->a_vp;
91         ip = VTOI(vp);
92
93         /*
94          * Degenerate case
95          */
96         if (ip == NULL) {
97                 vrecycle(vp);
98                 LOCKSTOP;
99                 return (0);
100         }
101
102         /*
103          * Check for deleted inodes and recycle immediately on the last
104          * release.  Be sure to destroy any left-over buffer cache buffers
105          * so we do not waste time trying to flush them.
106          *
107          * Note that deleting the file block chains under the inode chain
108          * would just be a waste of energy, so don't do it.
109          *
110          * WARNING: nvtruncbuf() can only be safely called without the inode
111          *          lock held due to the way our write thread works.
112          */
113         if (ip->flags & HAMMER2_INODE_ISUNLINKED) {
114                 hammer2_key_t lbase;
115                 int nblksize;
116
117                 /*
118                  * Detect updates to the embedded data which may be
119                  * synchronized by the strategy code.  Simply mark the
120                  * inode modified so it gets picked up by our normal flush.
121                  */
122                 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL);
123                 nvtruncbuf(vp, 0, nblksize, 0, 0);
124                 vrecycle(vp);
125         }
126         LOCKSTOP;
127         return (0);
128 }
129
130 /*
131  * Reclaim a vnode so that it can be reused; after the inode is
132  * disassociated, the filesystem must manage it alone.
133  */
134 static
135 int
136 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
137 {
138         hammer2_inode_t *ip;
139         hammer2_pfs_t *pmp;
140         struct vnode *vp;
141
142         LOCKSTART;
143         vp = ap->a_vp;
144         ip = VTOI(vp);
145         if (ip == NULL) {
146                 LOCKSTOP;
147                 return(0);
148         }
149         pmp = ip->pmp;
150
151         /*
152          * The final close of a deleted file or directory marks it for
153          * destruction.  The DELETED flag allows the flusher to shortcut
154          * any modified blocks still unflushed (that is, just ignore them).
155          *
156          * HAMMER2 usually does not try to optimize the freemap by returning
157          * deleted blocks to it as it does not usually know how many snapshots
158          * might be referencing portions of the file/dir.
159          */
160         vp->v_data = NULL;
161         ip->vp = NULL;
162
163         /*
164          * NOTE! We do not attempt to flush chains here, flushing is
165          *       really fragile and could also deadlock.
166          */
167         vclrisdirty(vp);
168
169         /*
170          * This occurs if the inode was unlinked while open.  Reclamation of
171          * these inodes requires processing we cannot safely do here so add
172          * the inode to the sideq in that situation.
173          *
174          * A modified inode may require chain synchronization which will no
175          * longer be driven by a sync or fsync without the vnode, also use
176          * the sideq for that.
177          *
178          * A reclaim can occur at any time so we cannot safely start a
179          * transaction to handle reclamation of unlinked files.  Instead,
180          * the ip is left with a reference and placed on a linked list and
181          * handled later on.
182          */
183
184         if ((ip->flags & (HAMMER2_INODE_ISUNLINKED |
185                           HAMMER2_INODE_MODIFIED |
186                           HAMMER2_INODE_RESIZED)) &&
187             (ip->flags & HAMMER2_INODE_ISDELETED) == 0) {
188                 hammer2_inode_sideq_t *ipul;
189
190                 ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO);
191                 ipul->ip = ip;
192
193                 hammer2_spin_ex(&pmp->list_spin);
194                 if ((ip->flags & HAMMER2_INODE_ONSIDEQ) == 0) {
195                         /* ref -> sideq */
196                         atomic_set_int(&ip->flags, HAMMER2_INODE_ONSIDEQ);
197                         TAILQ_INSERT_TAIL(&pmp->sideq, ipul, entry);
198                         hammer2_spin_unex(&pmp->list_spin);
199                 } else {
200                         hammer2_spin_unex(&pmp->list_spin);
201                         kfree(ipul, pmp->minode);
202                         hammer2_inode_drop(ip);         /* vp ref */
203                 }
204                 /* retain ref from vp for ipul */
205         } else {
206                 hammer2_inode_drop(ip);                 /* vp ref */
207         }
208
209         /*
210          * XXX handle background sync when ip dirty, kernel will no longer
211          * notify us regarding this inode because there is no longer a
212          * vnode attached to it.
213          */
214
215         LOCKSTOP;
216         return (0);
217 }
218
219 static
220 int
221 hammer2_vop_fsync(struct vop_fsync_args *ap)
222 {
223         hammer2_inode_t *ip;
224         struct vnode *vp;
225
226         LOCKSTART;
227         vp = ap->a_vp;
228         ip = VTOI(vp);
229
230 #if 0
231         /* XXX can't do this yet */
232         hammer2_trans_init(ip->pmp, HAMMER2_TRANS_ISFLUSH);
233         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
234 #endif
235         hammer2_trans_init(ip->pmp, 0);
236         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
237
238         /*
239          * Calling chain_flush here creates a lot of duplicative
240          * COW operations due to non-optimal vnode ordering.
241          *
242          * Only do it for an actual fsync() syscall.  The other forms
243          * which call this function will eventually call chain_flush
244          * on the volume root as a catch-all, which is far more optimal.
245          */
246         hammer2_inode_lock(ip, 0);
247         if (ip->flags & HAMMER2_INODE_MODIFIED)
248                 hammer2_inode_chain_sync(ip);
249         hammer2_inode_unlock(ip);
250         hammer2_trans_done(ip->pmp);
251
252         LOCKSTOP;
253         return (0);
254 }
255
256 static
257 int
258 hammer2_vop_access(struct vop_access_args *ap)
259 {
260         hammer2_inode_t *ip = VTOI(ap->a_vp);
261         uid_t uid;
262         gid_t gid;
263         int error;
264
265         LOCKSTART;
266         hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
267         uid = hammer2_to_unix_xid(&ip->meta.uid);
268         gid = hammer2_to_unix_xid(&ip->meta.gid);
269         error = vop_helper_access(ap, uid, gid, ip->meta.mode, ip->meta.uflags);
270         hammer2_inode_unlock(ip);
271
272         LOCKSTOP;
273         return (error);
274 }
275
276 static
277 int
278 hammer2_vop_getattr(struct vop_getattr_args *ap)
279 {
280         hammer2_pfs_t *pmp;
281         hammer2_inode_t *ip;
282         struct vnode *vp;
283         struct vattr *vap;
284         hammer2_chain_t *chain;
285         int i;
286
287         LOCKSTART;
288         vp = ap->a_vp;
289         vap = ap->a_vap;
290
291         ip = VTOI(vp);
292         pmp = ip->pmp;
293
294         hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
295
296         vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
297         vap->va_fileid = ip->meta.inum;
298         vap->va_mode = ip->meta.mode;
299         vap->va_nlink = ip->meta.nlinks;
300         vap->va_uid = hammer2_to_unix_xid(&ip->meta.uid);
301         vap->va_gid = hammer2_to_unix_xid(&ip->meta.gid);
302         vap->va_rmajor = 0;
303         vap->va_rminor = 0;
304         vap->va_size = ip->meta.size;   /* protected by shared lock */
305         vap->va_blocksize = HAMMER2_PBUFSIZE;
306         vap->va_flags = ip->meta.uflags;
307         hammer2_time_to_timespec(ip->meta.ctime, &vap->va_ctime);
308         hammer2_time_to_timespec(ip->meta.mtime, &vap->va_mtime);
309         hammer2_time_to_timespec(ip->meta.mtime, &vap->va_atime);
310         vap->va_gen = 1;
311         vap->va_bytes = 0;
312         if (ip->meta.type == HAMMER2_OBJTYPE_DIRECTORY) {
313                 /*
314                  * Can't really calculate directory use sans the files under
315                  * it, just assume one block for now.
316                  */
317                 vap->va_bytes += HAMMER2_INODE_BYTES;
318         } else {
319                 for (i = 0; i < ip->cluster.nchains; ++i) {
320                         if ((chain = ip->cluster.array[i].chain) != NULL) {
321                                 if (vap->va_bytes <
322                                     chain->bref.embed.stats.data_count) {
323                                         vap->va_bytes =
324                                             chain->bref.embed.stats.data_count;
325                                 }
326                         }
327                 }
328         }
329         vap->va_type = hammer2_get_vtype(ip->meta.type);
330         vap->va_filerev = 0;
331         vap->va_uid_uuid = ip->meta.uid;
332         vap->va_gid_uuid = ip->meta.gid;
333         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
334                           VA_FSID_UUID_VALID;
335
336         hammer2_inode_unlock(ip);
337
338         LOCKSTOP;
339         return (0);
340 }
341
342 static
343 int
344 hammer2_vop_setattr(struct vop_setattr_args *ap)
345 {
346         hammer2_inode_t *ip;
347         struct vnode *vp;
348         struct vattr *vap;
349         int error;
350         int kflags = 0;
351         uint64_t ctime;
352
353         LOCKSTART;
354         vp = ap->a_vp;
355         vap = ap->a_vap;
356         hammer2_update_time(&ctime);
357
358         ip = VTOI(vp);
359
360         if (ip->pmp->ronly) {
361                 LOCKSTOP;
362                 return(EROFS);
363         }
364
365         hammer2_pfs_memory_wait(ip->pmp);
366         hammer2_trans_init(ip->pmp, 0);
367         hammer2_inode_lock(ip, 0);
368         error = 0;
369
370         if (vap->va_flags != VNOVAL) {
371                 uint32_t flags;
372
373                 flags = ip->meta.uflags;
374                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
375                                      hammer2_to_unix_xid(&ip->meta.uid),
376                                      ap->a_cred);
377                 if (error == 0) {
378                         if (ip->meta.uflags != flags) {
379                                 hammer2_inode_modify(ip);
380                                 ip->meta.uflags = flags;
381                                 ip->meta.ctime = ctime;
382                                 kflags |= NOTE_ATTRIB;
383                         }
384                         if (ip->meta.uflags & (IMMUTABLE | APPEND)) {
385                                 error = 0;
386                                 goto done;
387                         }
388                 }
389                 goto done;
390         }
391         if (ip->meta.uflags & (IMMUTABLE | APPEND)) {
392                 error = EPERM;
393                 goto done;
394         }
395         if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
396                 mode_t cur_mode = ip->meta.mode;
397                 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid);
398                 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid);
399                 uuid_t uuid_uid;
400                 uuid_t uuid_gid;
401
402                 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
403                                          ap->a_cred,
404                                          &cur_uid, &cur_gid, &cur_mode);
405                 if (error == 0) {
406                         hammer2_guid_to_uuid(&uuid_uid, cur_uid);
407                         hammer2_guid_to_uuid(&uuid_gid, cur_gid);
408                         if (bcmp(&uuid_uid, &ip->meta.uid, sizeof(uuid_uid)) ||
409                             bcmp(&uuid_gid, &ip->meta.gid, sizeof(uuid_gid)) ||
410                             ip->meta.mode != cur_mode
411                         ) {
412                                 hammer2_inode_modify(ip);
413                                 ip->meta.uid = uuid_uid;
414                                 ip->meta.gid = uuid_gid;
415                                 ip->meta.mode = cur_mode;
416                                 ip->meta.ctime = ctime;
417                         }
418                         kflags |= NOTE_ATTRIB;
419                 }
420         }
421
422         /*
423          * Resize the file
424          */
425         if (vap->va_size != VNOVAL && ip->meta.size != vap->va_size) {
426                 switch(vp->v_type) {
427                 case VREG:
428                         if (vap->va_size == ip->meta.size)
429                                 break;
430                         if (vap->va_size < ip->meta.size) {
431                                 hammer2_mtx_ex(&ip->truncate_lock);
432                                 hammer2_truncate_file(ip, vap->va_size);
433                                 hammer2_mtx_unlock(&ip->truncate_lock);
434                                 kflags |= NOTE_WRITE;
435                         } else {
436                                 hammer2_extend_file(ip, vap->va_size);
437                                 kflags |= NOTE_WRITE | NOTE_EXTEND;
438                         }
439                         hammer2_inode_modify(ip);
440                         ip->meta.mtime = ctime;
441                         break;
442                 default:
443                         error = EINVAL;
444                         goto done;
445                 }
446         }
447 #if 0
448         /* atime not supported */
449         if (vap->va_atime.tv_sec != VNOVAL) {
450                 hammer2_inode_modify(ip);
451                 ip->meta.atime = hammer2_timespec_to_time(&vap->va_atime);
452                 kflags |= NOTE_ATTRIB;
453         }
454 #endif
455         if (vap->va_mode != (mode_t)VNOVAL) {
456                 mode_t cur_mode = ip->meta.mode;
457                 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid);
458                 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid);
459
460                 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
461                                          cur_uid, cur_gid, &cur_mode);
462                 if (error == 0 && ip->meta.mode != cur_mode) {
463                         hammer2_inode_modify(ip);
464                         ip->meta.mode = cur_mode;
465                         ip->meta.ctime = ctime;
466                         kflags |= NOTE_ATTRIB;
467                 }
468         }
469
470         if (vap->va_mtime.tv_sec != VNOVAL) {
471                 hammer2_inode_modify(ip);
472                 ip->meta.mtime = hammer2_timespec_to_time(&vap->va_mtime);
473                 kflags |= NOTE_ATTRIB;
474         }
475
476 done:
477         /*
478          * If a truncation occurred we must call inode_fsync() now in order
479          * to trim the related data chains, otherwise a later expansion can
480          * cause havoc.
481          *
482          * If an extend occured that changed the DIRECTDATA state, we must
483          * call inode_fsync now in order to prepare the inode's indirect
484          * block table.
485          */
486         if (ip->flags & HAMMER2_INODE_RESIZED)
487                 hammer2_inode_chain_sync(ip);
488
489         /*
490          * Cleanup.
491          */
492         hammer2_inode_unlock(ip);
493         hammer2_trans_done(ip->pmp);
494         hammer2_knote(ip->vp, kflags);
495
496         LOCKSTOP;
497         return (error);
498 }
499
500 static
501 int
502 hammer2_vop_readdir(struct vop_readdir_args *ap)
503 {
504         hammer2_xop_readdir_t *xop;
505         hammer2_blockref_t bref;
506         hammer2_inode_t *ip;
507         hammer2_tid_t inum;
508         hammer2_key_t lkey;
509         struct uio *uio;
510         off_t *cookies;
511         off_t saveoff;
512         int cookie_index;
513         int ncookies;
514         int error;
515         int eofflag;
516         int r;
517
518         LOCKSTART;
519         ip = VTOI(ap->a_vp);
520         uio = ap->a_uio;
521         saveoff = uio->uio_offset;
522         eofflag = 0;
523         error = 0;
524
525         /*
526          * Setup cookies directory entry cookies if requested
527          */
528         if (ap->a_ncookies) {
529                 ncookies = uio->uio_resid / 16 + 1;
530                 if (ncookies > 1024)
531                         ncookies = 1024;
532                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
533         } else {
534                 ncookies = -1;
535                 cookies = NULL;
536         }
537         cookie_index = 0;
538
539         hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
540
541         /*
542          * Handle artificial entries.  To ensure that only positive 64 bit
543          * quantities are returned to userland we always strip off bit 63.
544          * The hash code is designed such that codes 0x0000-0x7FFF are not
545          * used, allowing us to use these codes for articial entries.
546          *
547          * Entry 0 is used for '.' and entry 1 is used for '..'.  Do not
548          * allow '..' to cross the mount point into (e.g.) the super-root.
549          */
550         if (saveoff == 0) {
551                 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
552                 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, ".");
553                 if (r)
554                         goto done;
555                 if (cookies)
556                         cookies[cookie_index] = saveoff;
557                 ++saveoff;
558                 ++cookie_index;
559                 if (cookie_index == ncookies)
560                         goto done;
561         }
562
563         if (saveoff == 1) {
564                 /*
565                  * Be careful with lockorder when accessing ".."
566                  *
567                  * (ip is the current dir. xip is the parent dir).
568                  */
569                 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
570                 if (ip != ip->pmp->iroot)
571                         inum = ip->meta.iparent & HAMMER2_DIRHASH_USERMSK;
572                 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, "..");
573                 if (r)
574                         goto done;
575                 if (cookies)
576                         cookies[cookie_index] = saveoff;
577                 ++saveoff;
578                 ++cookie_index;
579                 if (cookie_index == ncookies)
580                         goto done;
581         }
582
583         lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
584         if (hammer2_debug & 0x0020)
585                 kprintf("readdir: lkey %016jx\n", lkey);
586         if (error)
587                 goto done;
588
589         /*
590          * Use XOP for cluster scan.
591          *
592          * parent is the inode cluster, already locked for us.  Don't
593          * double lock shared locks as this will screw up upgrades.
594          */
595         xop = hammer2_xop_alloc(ip, 0);
596         xop->lkey = lkey;
597         hammer2_xop_start(&xop->head, hammer2_xop_readdir);
598
599         for (;;) {
600                 const hammer2_inode_data_t *ripdata;
601                 const char *dname;
602                 int dtype;
603
604                 error = hammer2_xop_collect(&xop->head, 0);
605                 if (error)
606                         break;
607                 if (cookie_index == ncookies)
608                         break;
609                 if (hammer2_debug & 0x0020)
610                 kprintf("cluster chain %p %p\n",
611                         xop->head.cluster.focus,
612                         (xop->head.cluster.focus ?
613                          xop->head.cluster.focus->data : (void *)-1));
614                 hammer2_cluster_bref(&xop->head.cluster, &bref);
615
616                 if (bref.type == HAMMER2_BREF_TYPE_INODE) {
617                         ripdata =
618                             &hammer2_cluster_rdata(&xop->head.cluster)->ipdata;
619                         dtype = hammer2_get_dtype(ripdata->meta.type);
620                         saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
621                         r = vop_write_dirent(&error, uio,
622                                              ripdata->meta.inum &
623                                               HAMMER2_DIRHASH_USERMSK,
624                                              dtype,
625                                              ripdata->meta.name_len,
626                                              ripdata->filename);
627                         if (r)
628                                 break;
629                         if (cookies)
630                                 cookies[cookie_index] = saveoff;
631                         ++cookie_index;
632                 } else if (bref.type == HAMMER2_BREF_TYPE_DIRENT) {
633                         dtype = hammer2_get_dtype(bref.embed.dirent.type);
634                         saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
635                         if (bref.embed.dirent.namlen <=
636                             sizeof(bref.check.buf)) {
637                                 dname = bref.check.buf;
638                         } else {
639                                 dname =
640                                  hammer2_cluster_rdata(&xop->head.cluster)->buf;
641                         }
642                         r = vop_write_dirent(&error, uio,
643                                              bref.embed.dirent.inum,
644                                              dtype,
645                                              bref.embed.dirent.namlen,
646                                              dname);
647                         if (r)
648                                 break;
649                         if (cookies)
650                                 cookies[cookie_index] = saveoff;
651                         ++cookie_index;
652                 } else {
653                         /* XXX chain error */
654                         kprintf("bad chain type readdir %d\n", bref.type);
655                 }
656         }
657         hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
658         if (error == ENOENT) {
659                 error = 0;
660                 eofflag = 1;
661                 saveoff = (hammer2_key_t)-1;
662         } else {
663                 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
664         }
665 done:
666         hammer2_inode_unlock(ip);
667         if (ap->a_eofflag)
668                 *ap->a_eofflag = eofflag;
669         if (hammer2_debug & 0x0020)
670                 kprintf("readdir: done at %016jx\n", saveoff);
671         uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
672         if (error && cookie_index == 0) {
673                 if (cookies) {
674                         kfree(cookies, M_TEMP);
675                         *ap->a_ncookies = 0;
676                         *ap->a_cookies = NULL;
677                 }
678         } else {
679                 if (cookies) {
680                         *ap->a_ncookies = cookie_index;
681                         *ap->a_cookies = cookies;
682                 }
683         }
684         LOCKSTOP;
685         return (error);
686 }
687
688 /*
689  * hammer2_vop_readlink { vp, uio, cred }
690  */
691 static
692 int
693 hammer2_vop_readlink(struct vop_readlink_args *ap)
694 {
695         struct vnode *vp;
696         hammer2_inode_t *ip;
697         int error;
698
699         vp = ap->a_vp;
700         if (vp->v_type != VLNK)
701                 return (EINVAL);
702         ip = VTOI(vp);
703
704         error = hammer2_read_file(ip, ap->a_uio, 0);
705         return (error);
706 }
707
708 static
709 int
710 hammer2_vop_read(struct vop_read_args *ap)
711 {
712         struct vnode *vp;
713         hammer2_inode_t *ip;
714         struct uio *uio;
715         int error;
716         int seqcount;
717         int bigread;
718
719         /*
720          * Read operations supported on this vnode?
721          */
722         vp = ap->a_vp;
723         if (vp->v_type != VREG)
724                 return (EINVAL);
725
726         /*
727          * Misc
728          */
729         ip = VTOI(vp);
730         uio = ap->a_uio;
731         error = 0;
732
733         seqcount = ap->a_ioflag >> 16;
734         bigread = (uio->uio_resid > 100 * 1024 * 1024);
735
736         error = hammer2_read_file(ip, uio, seqcount);
737         return (error);
738 }
739
740 static
741 int
742 hammer2_vop_write(struct vop_write_args *ap)
743 {
744         hammer2_inode_t *ip;
745         thread_t td;
746         struct vnode *vp;
747         struct uio *uio;
748         int error;
749         int seqcount;
750
751         /*
752          * Read operations supported on this vnode?
753          */
754         vp = ap->a_vp;
755         if (vp->v_type != VREG)
756                 return (EINVAL);
757
758         /*
759          * Misc
760          */
761         ip = VTOI(vp);
762         uio = ap->a_uio;
763         error = 0;
764         if (ip->pmp->ronly) {
765                 return (EROFS);
766         }
767
768         seqcount = ap->a_ioflag >> 16;
769
770         /*
771          * Check resource limit
772          */
773         if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
774             uio->uio_offset + uio->uio_resid >
775              td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
776                 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
777                 return (EFBIG);
778         }
779
780         /*
781          * The transaction interlocks against flush initiations
782          * (note: but will run concurrently with the actual flush).
783          *
784          * To avoid deadlocking against the VM system, we must flag any
785          * transaction related to the buffer cache or other direct
786          * VM page manipulation.
787          */
788         if (uio->uio_segflg == UIO_NOCOPY)
789                 hammer2_trans_init(ip->pmp, HAMMER2_TRANS_BUFCACHE);
790         else
791                 hammer2_trans_init(ip->pmp, 0);
792         error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
793         hammer2_trans_done(ip->pmp);
794
795         return (error);
796 }
797
798 /*
799  * Perform read operations on a file or symlink given an UNLOCKED
800  * inode and uio.
801  *
802  * The passed ip is not locked.
803  */
804 static
805 int
806 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
807 {
808         hammer2_off_t size;
809         struct buf *bp;
810         int error;
811
812         error = 0;
813
814         /*
815          * UIO read loop.
816          *
817          * WARNING! Assumes that the kernel interlocks size changes at the
818          *          vnode level.
819          */
820         hammer2_mtx_sh(&ip->lock);
821         hammer2_mtx_sh(&ip->truncate_lock);
822         size = ip->meta.size;
823         hammer2_mtx_unlock(&ip->lock);
824
825         while (uio->uio_resid > 0 && uio->uio_offset < size) {
826                 hammer2_key_t lbase;
827                 hammer2_key_t leof;
828                 int lblksize;
829                 int loff;
830                 int n;
831
832                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
833                                                 &lbase, &leof);
834
835 #if 1
836                 error = cluster_read(ip->vp, leof, lbase, lblksize,
837                                      uio->uio_resid, seqcount * MAXBSIZE,
838                                      &bp);
839 #else
840                 if (uio->uio_segflg == UIO_NOCOPY) {
841                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
842                         if (bp->b_flags & B_CACHE) {
843                                 int i;
844                                 int j = 0;
845                                 if (bp->b_xio.xio_npages != 16)
846                                         kprintf("NPAGES BAD\n");
847                                 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
848                                         vm_page_t m;
849                                         m = bp->b_xio.xio_pages[i];
850                                         if (m == NULL || m->valid == 0) {
851                                                 kprintf("bp %016jx %016jx pg %d inv",
852                                                         lbase, leof, i);
853                                                 if (m)
854                                                         kprintf("m->object %p/%p", m->object, ip->vp->v_object);
855                                                 kprintf("\n");
856                                                 j = 1;
857                                         }
858                                 }
859                                 if (j)
860                                         kprintf("b_flags %08x, b_error %d\n", bp->b_flags, bp->b_error);
861                         }
862                         bqrelse(bp);
863                 }
864                 error = bread(ip->vp, lbase, lblksize, &bp);
865 #endif
866                 if (error) {
867                         brelse(bp);
868                         break;
869                 }
870                 loff = (int)(uio->uio_offset - lbase);
871                 n = lblksize - loff;
872                 if (n > uio->uio_resid)
873                         n = uio->uio_resid;
874                 if (n > size - uio->uio_offset)
875                         n = (int)(size - uio->uio_offset);
876                 bp->b_flags |= B_AGE;
877                 uiomovebp(bp, (char *)bp->b_data + loff, n, uio);
878                 bqrelse(bp);
879         }
880         hammer2_mtx_unlock(&ip->truncate_lock);
881
882         return (error);
883 }
884
885 /*
886  * Write to the file represented by the inode via the logical buffer cache.
887  * The inode may represent a regular file or a symlink.
888  *
889  * The inode must not be locked.
890  */
891 static
892 int
893 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
894                    int ioflag, int seqcount)
895 {
896         hammer2_key_t old_eof;
897         hammer2_key_t new_eof;
898         struct buf *bp;
899         int kflags;
900         int error;
901         int modified;
902
903         /*
904          * Setup if append
905          *
906          * WARNING! Assumes that the kernel interlocks size changes at the
907          *          vnode level.
908          */
909         hammer2_mtx_ex(&ip->lock);
910         hammer2_mtx_sh(&ip->truncate_lock);
911         if (ioflag & IO_APPEND)
912                 uio->uio_offset = ip->meta.size;
913         old_eof = ip->meta.size;
914
915         /*
916          * Extend the file if necessary.  If the write fails at some point
917          * we will truncate it back down to cover as much as we were able
918          * to write.
919          *
920          * Doing this now makes it easier to calculate buffer sizes in
921          * the loop.
922          */
923         kflags = 0;
924         error = 0;
925         modified = 0;
926
927         if (uio->uio_offset + uio->uio_resid > old_eof) {
928                 new_eof = uio->uio_offset + uio->uio_resid;
929                 modified = 1;
930                 hammer2_extend_file(ip, new_eof);
931                 kflags |= NOTE_EXTEND;
932         } else {
933                 new_eof = old_eof;
934         }
935         hammer2_mtx_unlock(&ip->lock);
936         
937         /*
938          * UIO write loop
939          */
940         while (uio->uio_resid > 0) {
941                 hammer2_key_t lbase;
942                 int trivial;
943                 int endofblk;
944                 int lblksize;
945                 int loff;
946                 int n;
947
948                 /*
949                  * Don't allow the buffer build to blow out the buffer
950                  * cache.
951                  */
952                 if ((ioflag & IO_RECURSE) == 0)
953                         bwillwrite(HAMMER2_PBUFSIZE);
954
955                 /*
956                  * This nominally tells us how much we can cluster and
957                  * what the logical buffer size needs to be.  Currently
958                  * we don't try to cluster the write and just handle one
959                  * block at a time.
960                  */
961                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
962                                                 &lbase, NULL);
963                 loff = (int)(uio->uio_offset - lbase);
964                 
965                 KKASSERT(lblksize <= 65536);
966
967                 /*
968                  * Calculate bytes to copy this transfer and whether the
969                  * copy completely covers the buffer or not.
970                  */
971                 trivial = 0;
972                 n = lblksize - loff;
973                 if (n > uio->uio_resid) {
974                         n = uio->uio_resid;
975                         if (loff == lbase && uio->uio_offset + n == new_eof)
976                                 trivial = 1;
977                         endofblk = 0;
978                 } else {
979                         if (loff == 0)
980                                 trivial = 1;
981                         endofblk = 1;
982                 }
983                 if (lbase >= new_eof)
984                         trivial = 1;
985
986                 /*
987                  * Get the buffer
988                  */
989                 if (uio->uio_segflg == UIO_NOCOPY) {
990                         /*
991                          * Issuing a write with the same data backing the
992                          * buffer.  Instantiate the buffer to collect the
993                          * backing vm pages, then read-in any missing bits.
994                          *
995                          * This case is used by vop_stdputpages().
996                          */
997                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
998                         if ((bp->b_flags & B_CACHE) == 0) {
999                                 bqrelse(bp);
1000                                 error = bread(ip->vp, lbase, lblksize, &bp);
1001                         }
1002                 } else if (trivial) {
1003                         /*
1004                          * Even though we are entirely overwriting the buffer
1005                          * we may still have to zero it out to avoid a
1006                          * mmap/write visibility issue.
1007                          */
1008                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
1009                         if ((bp->b_flags & B_CACHE) == 0)
1010                                 vfs_bio_clrbuf(bp);
1011                 } else {
1012                         /*
1013                          * Partial overwrite, read in any missing bits then
1014                          * replace the portion being written.
1015                          *
1016                          * (The strategy code will detect zero-fill physical
1017                          * blocks for this case).
1018                          */
1019                         error = bread(ip->vp, lbase, lblksize, &bp);
1020                         if (error == 0)
1021                                 bheavy(bp);
1022                 }
1023
1024                 if (error) {
1025                         brelse(bp);
1026                         break;
1027                 }
1028
1029                 /*
1030                  * Ok, copy the data in
1031                  */
1032                 error = uiomovebp(bp, bp->b_data + loff, n, uio);
1033                 kflags |= NOTE_WRITE;
1034                 modified = 1;
1035                 if (error) {
1036                         brelse(bp);
1037                         break;
1038                 }
1039
1040                 /*
1041                  * WARNING: Pageout daemon will issue UIO_NOCOPY writes
1042                  *          with IO_SYNC or IO_ASYNC set.  These writes
1043                  *          must be handled as the pageout daemon expects.
1044                  *
1045                  * NOTE!    H2 relies on cluster_write() here because it
1046                  *          cannot preallocate disk blocks at the logical
1047                  *          level due to not knowing what the compression
1048                  *          size will be at this time.
1049                  *
1050                  *          We must use cluster_write() here and we depend
1051                  *          on the write-behind feature to flush buffers
1052                  *          appropriately.  If we let the buffer daemons do
1053                  *          it the block allocations will be all over the
1054                  *          map.
1055                  */
1056                 if (ioflag & IO_SYNC) {
1057                         bwrite(bp);
1058                 } else if ((ioflag & IO_DIRECT) && endofblk) {
1059                         bawrite(bp);
1060                 } else if (ioflag & IO_ASYNC) {
1061                         bawrite(bp);
1062                 } else if (ip->vp->v_mount->mnt_flag & MNT_NOCLUSTERW) {
1063                         bdwrite(bp);
1064                 } else {
1065 #if 1
1066                         bp->b_flags |= B_CLUSTEROK;
1067                         cluster_write(bp, new_eof, lblksize, seqcount);
1068 #else
1069                         bp->b_flags |= B_CLUSTEROK;
1070                         bdwrite(bp);
1071 #endif
1072                 }
1073         }
1074
1075         /*
1076          * Cleanup.  If we extended the file EOF but failed to write through
1077          * the entire write is a failure and we have to back-up.
1078          */
1079         if (error && new_eof != old_eof) {
1080                 hammer2_mtx_unlock(&ip->truncate_lock);
1081                 hammer2_mtx_ex(&ip->lock);
1082                 hammer2_mtx_ex(&ip->truncate_lock);
1083                 hammer2_truncate_file(ip, old_eof);
1084                 if (ip->flags & HAMMER2_INODE_MODIFIED)
1085                         hammer2_inode_chain_sync(ip);
1086                 hammer2_mtx_unlock(&ip->lock);
1087         } else if (modified) {
1088                 hammer2_mtx_ex(&ip->lock);
1089                 hammer2_inode_modify(ip);
1090                 hammer2_update_time(&ip->meta.mtime);
1091                 if (ip->flags & HAMMER2_INODE_MODIFIED)
1092                         hammer2_inode_chain_sync(ip);
1093                 hammer2_mtx_unlock(&ip->lock);
1094                 hammer2_knote(ip->vp, kflags);
1095         }
1096         hammer2_trans_assert_strategy(ip->pmp);
1097         hammer2_mtx_unlock(&ip->truncate_lock);
1098
1099         return error;
1100 }
1101
1102 /*
1103  * Truncate the size of a file.  The inode must not be locked.
1104  *
1105  * We must unconditionally set HAMMER2_INODE_RESIZED to properly
1106  * ensure that any on-media data beyond the new file EOF has been destroyed.
1107  *
1108  * WARNING: nvtruncbuf() can only be safely called without the inode lock
1109  *          held due to the way our write thread works.  If the truncation
1110  *          occurs in the middle of a buffer, nvtruncbuf() is responsible
1111  *          for dirtying that buffer and zeroing out trailing bytes.
1112  *
1113  * WARNING! Assumes that the kernel interlocks size changes at the
1114  *          vnode level.
1115  *
1116  * WARNING! Caller assumes responsibility for removing dead blocks
1117  *          if INODE_RESIZED is set.
1118  */
1119 static
1120 void
1121 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1122 {
1123         hammer2_key_t lbase;
1124         int nblksize;
1125
1126         LOCKSTART;
1127         hammer2_mtx_unlock(&ip->lock);
1128         if (ip->vp) {
1129                 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
1130                 nvtruncbuf(ip->vp, nsize,
1131                            nblksize, (int)nsize & (nblksize - 1),
1132                            0);
1133         }
1134         hammer2_mtx_ex(&ip->lock);
1135         KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0);
1136         ip->osize = ip->meta.size;
1137         ip->meta.size = nsize;
1138         atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
1139         hammer2_inode_modify(ip);
1140         LOCKSTOP;
1141 }
1142
1143 /*
1144  * Extend the size of a file.  The inode must not be locked.
1145  *
1146  * Even though the file size is changing, we do not have to set the
1147  * INODE_RESIZED bit unless the file size crosses the EMBEDDED_BYTES
1148  * boundary.  When this occurs a hammer2_inode_chain_sync() is required
1149  * to prepare the inode cluster's indirect block table, otherwise
1150  * async execution of the strategy code will implode on us.
1151  *
1152  * WARNING! Assumes that the kernel interlocks size changes at the
1153  *          vnode level.
1154  *
1155  * WARNING! Caller assumes responsibility for transitioning out
1156  *          of the inode DIRECTDATA mode if INODE_RESIZED is set.
1157  */
1158 static
1159 void
1160 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1161 {
1162         hammer2_key_t lbase;
1163         hammer2_key_t osize;
1164         int oblksize;
1165         int nblksize;
1166
1167         LOCKSTART;
1168
1169         KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0);
1170         hammer2_inode_modify(ip);
1171         osize = ip->meta.size;
1172         ip->osize = osize;
1173         ip->meta.size = nsize;
1174
1175         if (osize <= HAMMER2_EMBEDDED_BYTES && nsize > HAMMER2_EMBEDDED_BYTES) {
1176                 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
1177                 hammer2_inode_chain_sync(ip);
1178         }
1179
1180         hammer2_mtx_unlock(&ip->lock);
1181         if (ip->vp) {
1182                 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL);
1183                 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
1184                 nvextendbuf(ip->vp,
1185                             osize, nsize,
1186                             oblksize, nblksize,
1187                             -1, -1, 0);
1188         }
1189         hammer2_mtx_ex(&ip->lock);
1190
1191         LOCKSTOP;
1192 }
1193
1194 static
1195 int
1196 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
1197 {
1198         hammer2_xop_nresolve_t *xop;
1199         hammer2_inode_t *ip;
1200         hammer2_inode_t *dip;
1201         struct namecache *ncp;
1202         struct vnode *vp;
1203         int error;
1204
1205         LOCKSTART;
1206         dip = VTOI(ap->a_dvp);
1207         xop = hammer2_xop_alloc(dip, 0);
1208
1209         ncp = ap->a_nch->ncp;
1210         hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1211
1212         /*
1213          * Note: In DragonFly the kernel handles '.' and '..'.
1214          */
1215         hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1216         hammer2_xop_start(&xop->head, hammer2_xop_nresolve);
1217
1218         error = hammer2_xop_collect(&xop->head, 0);
1219         if (error) {
1220                 ip = NULL;
1221         } else {
1222                 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1223         }
1224         hammer2_inode_unlock(dip);
1225
1226         /*
1227          * Acquire the related vnode
1228          *
1229          * NOTE: For error processing, only ENOENT resolves the namecache
1230          *       entry to NULL, otherwise we just return the error and
1231          *       leave the namecache unresolved.
1232          *
1233          * NOTE: multiple hammer2_inode structures can be aliased to the
1234          *       same chain element, for example for hardlinks.  This
1235          *       use case does not 'reattach' inode associations that
1236          *       might already exist, but always allocates a new one.
1237          *
1238          * WARNING: inode structure is locked exclusively via inode_get
1239          *          but chain was locked shared.  inode_unlock()
1240          *          will handle it properly.
1241          */
1242         if (ip) {
1243                 vp = hammer2_igetv(ip, &error);
1244                 if (error == 0) {
1245                         vn_unlock(vp);
1246                         cache_setvp(ap->a_nch, vp);
1247                 } else if (error == ENOENT) {
1248                         cache_setvp(ap->a_nch, NULL);
1249                 }
1250                 hammer2_inode_unlock(ip);
1251
1252                 /*
1253                  * The vp should not be released until after we've disposed
1254                  * of our locks, because it might cause vop_inactive() to
1255                  * be called.
1256                  */
1257                 if (vp)
1258                         vrele(vp);
1259         } else {
1260                 error = ENOENT;
1261                 cache_setvp(ap->a_nch, NULL);
1262         }
1263         hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1264         KASSERT(error || ap->a_nch->ncp->nc_vp != NULL,
1265                 ("resolve error %d/%p ap %p\n",
1266                  error, ap->a_nch->ncp->nc_vp, ap));
1267         LOCKSTOP;
1268
1269         return error;
1270 }
1271
1272 static
1273 int
1274 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1275 {
1276         hammer2_inode_t *dip;
1277         hammer2_tid_t inum;
1278         int error;
1279
1280         LOCKSTART;
1281         dip = VTOI(ap->a_dvp);
1282         inum = dip->meta.iparent;
1283         *ap->a_vpp = NULL;
1284
1285         if (inum) {
1286                 error = hammer2_vfs_vget(ap->a_dvp->v_mount, NULL,
1287                                          inum, ap->a_vpp);
1288         } else {
1289                 error = ENOENT;
1290         }
1291         LOCKSTOP;
1292         return error;
1293 }
1294
1295 static
1296 int
1297 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1298 {
1299         hammer2_inode_t *dip;
1300         hammer2_inode_t *nip;
1301         struct namecache *ncp;
1302         const uint8_t *name;
1303         size_t name_len;
1304         hammer2_tid_t inum;
1305         int error;
1306
1307         LOCKSTART;
1308         dip = VTOI(ap->a_dvp);
1309         if (dip->pmp->ronly) {
1310                 LOCKSTOP;
1311                 return (EROFS);
1312         }
1313
1314         ncp = ap->a_nch->ncp;
1315         name = ncp->nc_name;
1316         name_len = ncp->nc_nlen;
1317
1318         hammer2_pfs_memory_wait(dip->pmp);
1319         hammer2_trans_init(dip->pmp, 0);
1320
1321         inum = hammer2_trans_newinum(dip->pmp);
1322
1323         /*
1324          * Create the actual inode as a hidden file in the iroot, then
1325          * create the directory entry.  The creation of the actual inode
1326          * sets its nlinks to 1 which is the value we desire.
1327          */
1328         nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1329                                    NULL, 0, inum,
1330                                    inum, 0, 0,
1331                                    0, &error);
1332         if (error == 0) {
1333                 error = hammer2_dirent_create(dip, name, name_len,
1334                                               nip->meta.inum, nip->meta.type);
1335         }
1336
1337         if (error) {
1338                 KKASSERT(nip == NULL);
1339                 *ap->a_vpp = NULL;
1340         } else {
1341                 *ap->a_vpp = hammer2_igetv(nip, &error);
1342                 hammer2_inode_unlock(nip);
1343         }
1344
1345         /*
1346          * Update dip's mtime
1347          */
1348         if (error == 0) {
1349                 uint64_t mtime;
1350
1351                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1352                 hammer2_update_time(&mtime);
1353                 hammer2_inode_modify(dip);
1354                 dip->meta.mtime = mtime;
1355                 hammer2_inode_unlock(dip);
1356         }
1357
1358         hammer2_trans_done(dip->pmp);
1359
1360         if (error == 0) {
1361                 cache_setunresolved(ap->a_nch);
1362                 cache_setvp(ap->a_nch, *ap->a_vpp);
1363                 hammer2_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK);
1364         }
1365         LOCKSTOP;
1366         return error;
1367 }
1368
1369 static
1370 int
1371 hammer2_vop_open(struct vop_open_args *ap)
1372 {
1373         return vop_stdopen(ap);
1374 }
1375
1376 /*
1377  * hammer2_vop_advlock { vp, id, op, fl, flags }
1378  */
1379 static
1380 int
1381 hammer2_vop_advlock(struct vop_advlock_args *ap)
1382 {
1383         hammer2_inode_t *ip = VTOI(ap->a_vp);
1384         hammer2_off_t size;
1385
1386         size = ip->meta.size;
1387         return (lf_advlock(ap, &ip->advlock, size));
1388 }
1389
1390 static
1391 int
1392 hammer2_vop_close(struct vop_close_args *ap)
1393 {
1394         return vop_stdclose(ap);
1395 }
1396
1397 /*
1398  * hammer2_vop_nlink { nch, dvp, vp, cred }
1399  *
1400  * Create a hardlink from (vp) to {dvp, nch}.
1401  */
1402 static
1403 int
1404 hammer2_vop_nlink(struct vop_nlink_args *ap)
1405 {
1406         hammer2_inode_t *tdip;  /* target directory to create link in */
1407         hammer2_inode_t *ip;    /* inode we are hardlinking to */
1408         struct namecache *ncp;
1409         const uint8_t *name;
1410         size_t name_len;
1411         int error;
1412
1413         if (ap->a_dvp->v_mount != ap->a_vp->v_mount)
1414                 return(EXDEV);
1415
1416         LOCKSTART;
1417         tdip = VTOI(ap->a_dvp);
1418         if (tdip->pmp->ronly) {
1419                 LOCKSTOP;
1420                 return (EROFS);
1421         }
1422
1423         ncp = ap->a_nch->ncp;
1424         name = ncp->nc_name;
1425         name_len = ncp->nc_nlen;
1426
1427         /*
1428          * ip represents the file being hardlinked.  The file could be a
1429          * normal file or a hardlink target if it has already been hardlinked.
1430          * (with the new semantics, it will almost always be a hardlink
1431          * target).
1432          *
1433          * Bump nlinks and potentially also create or move the hardlink
1434          * target in the parent directory common to (ip) and (tdip).  The
1435          * consolidation code can modify ip->cluster.  The returned cluster
1436          * is locked.
1437          */
1438         ip = VTOI(ap->a_vp);
1439         KASSERT(ip->pmp, ("ip->pmp is NULL %p %p", ip, ip->pmp));
1440         hammer2_pfs_memory_wait(ip->pmp);
1441         hammer2_trans_init(ip->pmp, 0);
1442
1443         /*
1444          * Target should be an indexed inode or there's no way we will ever
1445          * be able to find it!
1446          */
1447         KKASSERT((ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0);
1448
1449         error = 0;
1450
1451         /*
1452          * Can return NULL and error == EXDEV if the common parent
1453          * crosses a directory with the xlink flag set.
1454          */
1455         hammer2_inode_lock(tdip, 0);
1456         hammer2_inode_lock(ip, 0);
1457
1458         /*
1459          * Create the directory entry and bump nlinks.
1460          */
1461         if (error == 0) {
1462                 error = hammer2_dirent_create(tdip, name, name_len,
1463                                               ip->meta.inum, ip->meta.type);
1464                 hammer2_inode_modify(ip);
1465                 ++ip->meta.nlinks;
1466         }
1467         if (error == 0) {
1468                 /*
1469                  * Update dip's mtime
1470                  */
1471                 uint64_t mtime;
1472
1473                 hammer2_update_time(&mtime);
1474                 hammer2_inode_modify(tdip);
1475                 tdip->meta.mtime = mtime;
1476
1477                 cache_setunresolved(ap->a_nch);
1478                 cache_setvp(ap->a_nch, ap->a_vp);
1479         }
1480         hammer2_inode_unlock(ip);
1481         hammer2_inode_unlock(tdip);
1482
1483         hammer2_trans_done(ip->pmp);
1484         hammer2_knote(ap->a_vp, NOTE_LINK);
1485         hammer2_knote(ap->a_dvp, NOTE_WRITE);
1486
1487         LOCKSTOP;
1488         return error;
1489 }
1490
1491 /*
1492  * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
1493  *
1494  * The operating system has already ensured that the directory entry
1495  * does not exist and done all appropriate namespace locking.
1496  */
1497 static
1498 int
1499 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1500 {
1501         hammer2_inode_t *dip;
1502         hammer2_inode_t *nip;
1503         struct namecache *ncp;
1504         const uint8_t *name;
1505         size_t name_len;
1506         hammer2_tid_t inum;
1507         int error;
1508
1509         LOCKSTART;
1510         dip = VTOI(ap->a_dvp);
1511         if (dip->pmp->ronly) {
1512                 LOCKSTOP;
1513                 return (EROFS);
1514         }
1515
1516         ncp = ap->a_nch->ncp;
1517         name = ncp->nc_name;
1518         name_len = ncp->nc_nlen;
1519         hammer2_pfs_memory_wait(dip->pmp);
1520         hammer2_trans_init(dip->pmp, 0);
1521
1522         inum = hammer2_trans_newinum(dip->pmp);
1523
1524         /*
1525          * Create the actual inode as a hidden file in the iroot, then
1526          * create the directory entry.  The creation of the actual inode
1527          * sets its nlinks to 1 which is the value we desire.
1528          */
1529         nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1530                                    NULL, 0, inum,
1531                                    inum, 0, 0,
1532                                    0, &error);
1533
1534         if (error == 0) {
1535                 error = hammer2_dirent_create(dip, name, name_len,
1536                                               nip->meta.inum, nip->meta.type);
1537         }
1538         if (error) {
1539                 KKASSERT(nip == NULL);
1540                 *ap->a_vpp = NULL;
1541         } else {
1542                 *ap->a_vpp = hammer2_igetv(nip, &error);
1543                 hammer2_inode_unlock(nip);
1544         }
1545
1546         /*
1547          * Update dip's mtime
1548          */
1549         if (error == 0) {
1550                 uint64_t mtime;
1551
1552                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1553                 hammer2_update_time(&mtime);
1554                 hammer2_inode_modify(dip);
1555                 dip->meta.mtime = mtime;
1556                 hammer2_inode_unlock(dip);
1557         }
1558
1559         hammer2_trans_done(dip->pmp);
1560
1561         if (error == 0) {
1562                 cache_setunresolved(ap->a_nch);
1563                 cache_setvp(ap->a_nch, *ap->a_vpp);
1564                 hammer2_knote(ap->a_dvp, NOTE_WRITE);
1565         }
1566         LOCKSTOP;
1567         return error;
1568 }
1569
1570 /*
1571  * Make a device node (typically a fifo)
1572  */
1573 static
1574 int
1575 hammer2_vop_nmknod(struct vop_nmknod_args *ap)
1576 {
1577         hammer2_inode_t *dip;
1578         hammer2_inode_t *nip;
1579         struct namecache *ncp;
1580         const uint8_t *name;
1581         size_t name_len;
1582         hammer2_tid_t inum;
1583         int error;
1584
1585         LOCKSTART;
1586         dip = VTOI(ap->a_dvp);
1587         if (dip->pmp->ronly) {
1588                 LOCKSTOP;
1589                 return (EROFS);
1590         }
1591
1592         ncp = ap->a_nch->ncp;
1593         name = ncp->nc_name;
1594         name_len = ncp->nc_nlen;
1595         hammer2_pfs_memory_wait(dip->pmp);
1596         hammer2_trans_init(dip->pmp, 0);
1597
1598         /*
1599          * Create the device inode and then create the directory entry.
1600          */
1601         inum = hammer2_trans_newinum(dip->pmp);
1602         nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1603                                    NULL, 0, inum,
1604                                    inum, 0, 0,
1605                                    0, &error);
1606         if (error == 0) {
1607                 error = hammer2_dirent_create(dip, name, name_len,
1608                                               nip->meta.inum, nip->meta.type);
1609         }
1610
1611
1612         if (error) {
1613                 KKASSERT(nip == NULL);
1614                 *ap->a_vpp = NULL;
1615         } else {
1616                 *ap->a_vpp = hammer2_igetv(nip, &error);
1617                 hammer2_inode_unlock(nip);
1618         }
1619
1620         /*
1621          * Update dip's mtime
1622          */
1623         if (error == 0) {
1624                 uint64_t mtime;
1625
1626                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1627                 hammer2_update_time(&mtime);
1628                 hammer2_inode_modify(dip);
1629                 dip->meta.mtime = mtime;
1630                 hammer2_inode_unlock(dip);
1631         }
1632
1633         hammer2_trans_done(dip->pmp);
1634
1635         if (error == 0) {
1636                 cache_setunresolved(ap->a_nch);
1637                 cache_setvp(ap->a_nch, *ap->a_vpp);
1638                 hammer2_knote(ap->a_dvp, NOTE_WRITE);
1639         }
1640         LOCKSTOP;
1641         return error;
1642 }
1643
1644 /*
1645  * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1646  */
1647 static
1648 int
1649 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1650 {
1651         hammer2_inode_t *dip;
1652         hammer2_inode_t *nip;
1653         struct namecache *ncp;
1654         const uint8_t *name;
1655         size_t name_len;
1656         hammer2_tid_t inum;
1657         int error;
1658         
1659         dip = VTOI(ap->a_dvp);
1660         if (dip->pmp->ronly)
1661                 return (EROFS);
1662
1663         ncp = ap->a_nch->ncp;
1664         name = ncp->nc_name;
1665         name_len = ncp->nc_nlen;
1666         hammer2_pfs_memory_wait(dip->pmp);
1667         hammer2_trans_init(dip->pmp, 0);
1668
1669         ap->a_vap->va_type = VLNK;      /* enforce type */
1670
1671         /*
1672          * Create the softlink as an inode and then create the directory
1673          * entry.
1674          */
1675         inum = hammer2_trans_newinum(dip->pmp);
1676
1677         nip = hammer2_inode_create(dip->pmp->iroot, dip, ap->a_vap, ap->a_cred,
1678                                    NULL, 0, inum,
1679                                    inum, 0, 0,
1680                                    0, &error);
1681         if (error == 0) {
1682                 error = hammer2_dirent_create(dip, name, name_len,
1683                                               nip->meta.inum, nip->meta.type);
1684         }
1685
1686
1687         if (error) {
1688                 KKASSERT(nip == NULL);
1689                 *ap->a_vpp = NULL;
1690                 hammer2_trans_done(dip->pmp);
1691                 return error;
1692         }
1693         *ap->a_vpp = hammer2_igetv(nip, &error);
1694
1695         /*
1696          * Build the softlink (~like file data) and finalize the namecache.
1697          */
1698         if (error == 0) {
1699                 size_t bytes;
1700                 struct uio auio;
1701                 struct iovec aiov;
1702
1703                 bytes = strlen(ap->a_target);
1704
1705                 hammer2_inode_unlock(nip);
1706                 bzero(&auio, sizeof(auio));
1707                 bzero(&aiov, sizeof(aiov));
1708                 auio.uio_iov = &aiov;
1709                 auio.uio_segflg = UIO_SYSSPACE;
1710                 auio.uio_rw = UIO_WRITE;
1711                 auio.uio_resid = bytes;
1712                 auio.uio_iovcnt = 1;
1713                 auio.uio_td = curthread;
1714                 aiov.iov_base = ap->a_target;
1715                 aiov.iov_len = bytes;
1716                 error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
1717                 /* XXX handle error */
1718                 error = 0;
1719         } else {
1720                 hammer2_inode_unlock(nip);
1721         }
1722
1723         /*
1724          * Update dip's mtime
1725          */
1726         if (error == 0) {
1727                 uint64_t mtime;
1728
1729                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1730                 hammer2_update_time(&mtime);
1731                 hammer2_inode_modify(dip);
1732                 dip->meta.mtime = mtime;
1733                 hammer2_inode_unlock(dip);
1734         }
1735
1736         hammer2_trans_done(dip->pmp);
1737
1738         /*
1739          * Finalize namecache
1740          */
1741         if (error == 0) {
1742                 cache_setunresolved(ap->a_nch);
1743                 cache_setvp(ap->a_nch, *ap->a_vpp);
1744                 hammer2_knote(ap->a_dvp, NOTE_WRITE);
1745         }
1746         return error;
1747 }
1748
1749 /*
1750  * hammer2_vop_nremove { nch, dvp, cred }
1751  */
1752 static
1753 int
1754 hammer2_vop_nremove(struct vop_nremove_args *ap)
1755 {
1756         hammer2_xop_unlink_t *xop;
1757         hammer2_inode_t *dip;
1758         hammer2_inode_t *ip;
1759         struct namecache *ncp;
1760         int error;
1761         int isopen;
1762
1763         LOCKSTART;
1764         dip = VTOI(ap->a_dvp);
1765         if (dip->pmp->ronly) {
1766                 LOCKSTOP;
1767                 return(EROFS);
1768         }
1769
1770         ncp = ap->a_nch->ncp;
1771
1772         hammer2_pfs_memory_wait(dip->pmp);
1773         hammer2_trans_init(dip->pmp, 0);
1774         hammer2_inode_lock(dip, 0);
1775
1776         /*
1777          * The unlink XOP unlinks the path from the directory and
1778          * locates and returns the cluster associated with the real inode.
1779          * We have to handle nlinks here on the frontend.
1780          */
1781         xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING);
1782         hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1783
1784         /*
1785          * The namecache entry is locked so nobody can use this namespace.
1786          * Calculate isopen to determine if this namespace has an open vp
1787          * associated with it and resolve the vp only if it does.
1788          *
1789          * We try to avoid resolving the vnode if nobody has it open, but
1790          * note that the test is via this namespace only.
1791          */
1792         isopen = cache_isopen(ap->a_nch);
1793         xop->isdir = 0;
1794         xop->dopermanent = 0;
1795         hammer2_xop_start(&xop->head, hammer2_xop_unlink);
1796
1797         /*
1798          * Collect the real inode and adjust nlinks, destroy the real
1799          * inode if nlinks transitions to 0 and it was the real inode
1800          * (else it has already been removed).
1801          */
1802         error = hammer2_xop_collect(&xop->head, 0);
1803         hammer2_inode_unlock(dip);
1804
1805         if (error == 0) {
1806                 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1807                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1808                 if (ip) {
1809                         hammer2_inode_unlink_finisher(ip, isopen);
1810                         hammer2_inode_unlock(ip);
1811                 }
1812         } else {
1813                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1814         }
1815
1816         /*
1817          * Update dip's mtime
1818          */
1819         if (error == 0) {
1820                 uint64_t mtime;
1821
1822                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1823                 hammer2_update_time(&mtime);
1824                 hammer2_inode_modify(dip);
1825                 dip->meta.mtime = mtime;
1826                 hammer2_inode_unlock(dip);
1827         }
1828
1829         hammer2_inode_run_sideq(dip->pmp);
1830         hammer2_trans_done(dip->pmp);
1831         if (error == 0) {
1832                 cache_unlink(ap->a_nch);
1833                 hammer2_knote(ap->a_dvp, NOTE_WRITE);
1834         }
1835         LOCKSTOP;
1836         return (error);
1837 }
1838
1839 /*
1840  * hammer2_vop_nrmdir { nch, dvp, cred }
1841  */
1842 static
1843 int
1844 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1845 {
1846         hammer2_xop_unlink_t *xop;
1847         hammer2_inode_t *dip;
1848         hammer2_inode_t *ip;
1849         struct namecache *ncp;
1850         int isopen;
1851         int error;
1852
1853         LOCKSTART;
1854         dip = VTOI(ap->a_dvp);
1855         if (dip->pmp->ronly) {
1856                 LOCKSTOP;
1857                 return(EROFS);
1858         }
1859
1860         hammer2_pfs_memory_wait(dip->pmp);
1861         hammer2_trans_init(dip->pmp, 0);
1862         hammer2_inode_lock(dip, 0);
1863
1864         xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING);
1865
1866         ncp = ap->a_nch->ncp;
1867         hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1868         isopen = cache_isopen(ap->a_nch);
1869         xop->isdir = 1;
1870         xop->dopermanent = 0;
1871         hammer2_xop_start(&xop->head, hammer2_xop_unlink);
1872
1873         /*
1874          * Collect the real inode and adjust nlinks, destroy the real
1875          * inode if nlinks transitions to 0 and it was the real inode
1876          * (else it has already been removed).
1877          */
1878         error = hammer2_xop_collect(&xop->head, 0);
1879         hammer2_inode_unlock(dip);
1880
1881         if (error == 0) {
1882                 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1);
1883                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1884                 if (ip) {
1885                         hammer2_inode_unlink_finisher(ip, isopen);
1886                         hammer2_inode_unlock(ip);
1887                 }
1888         } else {
1889                 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1890         }
1891
1892         /*
1893          * Update dip's mtime
1894          */
1895         if (error == 0) {
1896                 uint64_t mtime;
1897
1898                 hammer2_inode_lock(dip, HAMMER2_RESOLVE_SHARED);
1899                 hammer2_update_time(&mtime);
1900                 hammer2_inode_modify(dip);
1901                 dip->meta.mtime = mtime;
1902                 hammer2_inode_unlock(dip);
1903         }
1904
1905         hammer2_inode_run_sideq(dip->pmp);
1906         hammer2_trans_done(dip->pmp);
1907         if (error == 0) {
1908                 cache_unlink(ap->a_nch);
1909                 hammer2_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK);
1910         }
1911         LOCKSTOP;
1912         return (error);
1913 }
1914
1915 /*
1916  * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1917  */
1918 static
1919 int
1920 hammer2_vop_nrename(struct vop_nrename_args *ap)
1921 {
1922         struct namecache *fncp;
1923         struct namecache *tncp;
1924         hammer2_inode_t *fdip;  /* source directory */
1925         hammer2_inode_t *tdip;  /* target directory */
1926         hammer2_inode_t *ip;    /* file being renamed */
1927         hammer2_inode_t *tip;   /* replaced target during rename or NULL */
1928         const uint8_t *fname;
1929         size_t fname_len;
1930         const uint8_t *tname;
1931         size_t tname_len;
1932         int error;
1933         int update_tdip;
1934         int update_fdip;
1935         hammer2_key_t tlhc;
1936
1937         if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1938                 return(EXDEV);
1939         if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1940                 return(EXDEV);
1941
1942         fdip = VTOI(ap->a_fdvp);        /* source directory */
1943         tdip = VTOI(ap->a_tdvp);        /* target directory */
1944
1945         if (fdip->pmp->ronly)
1946                 return(EROFS);
1947
1948         LOCKSTART;
1949         fncp = ap->a_fnch->ncp;         /* entry name in source */
1950         fname = fncp->nc_name;
1951         fname_len = fncp->nc_nlen;
1952
1953         tncp = ap->a_tnch->ncp;         /* entry name in target */
1954         tname = tncp->nc_name;
1955         tname_len = tncp->nc_nlen;
1956
1957         hammer2_pfs_memory_wait(tdip->pmp);
1958         hammer2_trans_init(tdip->pmp, 0);
1959
1960         update_tdip = 0;
1961         update_fdip = 0;
1962
1963         ip = VTOI(fncp->nc_vp);
1964         hammer2_inode_ref(ip);          /* extra ref */
1965
1966         /*
1967          * Lookup the target name to determine if a directory entry
1968          * is being overwritten.  We only hold related inode locks
1969          * temporarily, the operating system is expected to protect
1970          * against rename races.
1971          */
1972         tip = tncp->nc_vp ? VTOI(tncp->nc_vp) : NULL;
1973         if (tip)
1974                 hammer2_inode_ref(tip); /* extra ref */
1975
1976         /*
1977          * Can return NULL and error == EXDEV if the common parent
1978          * crosses a directory with the xlink flag set.
1979          *
1980          * For now try to avoid deadlocks with a simple pointer address
1981          * test.  (tip) can be NULL.
1982          */
1983         error = 0;
1984         if (fdip <= tdip) {
1985                 hammer2_inode_lock(fdip, 0);
1986                 hammer2_inode_lock(tdip, 0);
1987         } else {
1988                 hammer2_inode_lock(tdip, 0);
1989                 hammer2_inode_lock(fdip, 0);
1990         }
1991         if (tip) {
1992                 if (ip <= tip) {
1993                         hammer2_inode_lock(ip, 0);
1994                         hammer2_inode_lock(tip, 0);
1995                 } else {
1996                         hammer2_inode_lock(tip, 0);
1997                         hammer2_inode_lock(ip, 0);
1998                 }
1999         } else {
2000                 hammer2_inode_lock(ip, 0);
2001         }
2002
2003 #if 0
2004         /*
2005          * Delete the target namespace.
2006          *
2007          * REMOVED - NOW FOLDED INTO XOP_NRENAME OPERATION
2008          */
2009         {
2010                 hammer2_xop_unlink_t *xop2;
2011                 hammer2_inode_t *tip;
2012                 int isopen;
2013
2014                 /*
2015                  * The unlink XOP unlinks the path from the directory and
2016                  * locates and returns the cluster associated with the real
2017                  * inode.  We have to handle nlinks here on the frontend.
2018                  */
2019                 xop2 = hammer2_xop_alloc(tdip, HAMMER2_XOP_MODIFYING);
2020                 hammer2_xop_setname(&xop2->head, tname, tname_len);
2021                 isopen = cache_isopen(ap->a_tnch);
2022                 xop2->isdir = -1;
2023                 xop2->dopermanent = 0;
2024                 hammer2_xop_start(&xop2->head, hammer2_xop_unlink);
2025
2026                 /*
2027                  * Collect the real inode and adjust nlinks, destroy the real
2028                  * inode if nlinks transitions to 0 and it was the real inode
2029                  * (else it has already been removed).
2030                  */
2031                 tnch_error = hammer2_xop_collect(&xop2->head, 0);
2032                 /* hammer2_inode_unlock(tdip); */
2033
2034                 if (tnch_error == 0) {
2035                         tip = hammer2_inode_get(tdip->pmp, NULL,
2036                                                 &xop2->head.cluster, -1);
2037                         hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
2038                         if (tip) {
2039                                 hammer2_inode_unlink_finisher(tip, isopen);
2040                                 hammer2_inode_unlock(tip);
2041                         }
2042                 } else {
2043                         hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
2044                 }
2045                 /* hammer2_inode_lock(tdip, 0); */
2046
2047                 if (tnch_error && tnch_error != ENOENT) {
2048                         error = tnch_error;
2049                         goto done2;
2050                 }
2051                 update_tdip = 1;
2052         }
2053 #endif
2054
2055         /*
2056          * Resolve the collision space for (tdip, tname, tname_len)
2057          *
2058          * tdip must be held exclusively locked to prevent races since
2059          * multiple filenames can end up in the same collision space.
2060          */
2061         {
2062                 hammer2_xop_scanlhc_t *sxop;
2063                 hammer2_tid_t lhcbase;
2064
2065                 tlhc = hammer2_dirhash(tname, tname_len);
2066                 lhcbase = tlhc;
2067                 sxop = hammer2_xop_alloc(tdip, HAMMER2_XOP_MODIFYING);
2068                 sxop->lhc = tlhc;
2069                 hammer2_xop_start(&sxop->head, hammer2_xop_scanlhc);
2070                 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) {
2071                         if (tlhc != sxop->head.cluster.focus->bref.key)
2072                                 break;
2073                         ++tlhc;
2074                 }
2075                 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP);
2076
2077                 if (error) {
2078                         if (error != ENOENT)
2079                                 goto done2;
2080                         ++tlhc;
2081                         error = 0;
2082                 }
2083                 if ((lhcbase ^ tlhc) & ~HAMMER2_DIRHASH_LOMASK) {
2084                         error = ENOSPC;
2085                         goto done2;
2086                 }
2087         }
2088
2089         /*
2090          * Ready to go, issue the rename to the backend.  Note that meta-data
2091          * updates to the related inodes occur separately from the rename
2092          * operation.
2093          *
2094          * NOTE: While it is not necessary to update ip->meta.name*, doing
2095          *       so aids catastrophic recovery and debugging.
2096          */
2097         if (error == 0) {
2098                 hammer2_xop_nrename_t *xop4;
2099
2100                 xop4 = hammer2_xop_alloc(fdip, HAMMER2_XOP_MODIFYING);
2101                 xop4->lhc = tlhc;
2102                 xop4->ip_key = ip->meta.name_key;
2103                 hammer2_xop_setip2(&xop4->head, ip);
2104                 hammer2_xop_setip3(&xop4->head, tdip);
2105                 hammer2_xop_setname(&xop4->head, fname, fname_len);
2106                 hammer2_xop_setname2(&xop4->head, tname, tname_len);
2107                 hammer2_xop_start(&xop4->head, hammer2_xop_nrename);
2108
2109                 error = hammer2_xop_collect(&xop4->head, 0);
2110                 hammer2_xop_retire(&xop4->head, HAMMER2_XOPMASK_VOP);
2111
2112                 if (error == ENOENT)
2113                         error = 0;
2114
2115                 /*
2116                  * Update inode meta-data.
2117                  *
2118                  * WARNING!  The in-memory inode (ip) structure does not
2119                  *           maintain a copy of the inode's filename buffer.
2120                  */
2121                 if (error == 0 &&
2122                     (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
2123                         hammer2_inode_modify(ip);
2124                         ip->meta.name_len = tname_len;
2125                         ip->meta.name_key = tlhc;
2126                 }
2127                 if (error == 0) {
2128                         hammer2_inode_modify(ip);
2129                         ip->meta.iparent = tdip->meta.inum;
2130                 }
2131                 update_fdip = 1;
2132                 update_tdip = 1;
2133         }
2134
2135 done2:
2136         /*
2137          * If no error, the backend has replaced the target directory entry.
2138          * We must adjust nlinks on the original replace target if it exists.
2139          */
2140         if (error == 0 && tip) {
2141                 int isopen;
2142
2143                 isopen = cache_isopen(ap->a_tnch);
2144                 hammer2_inode_unlink_finisher(tip, isopen);
2145         }
2146
2147         /*
2148          * Update directory mtimes to represent the something changed.
2149          */
2150         if (update_fdip || update_tdip) {
2151                 uint64_t mtime;
2152
2153                 hammer2_update_time(&mtime);
2154                 if (update_fdip) {
2155                         hammer2_inode_modify(fdip);
2156                         fdip->meta.mtime = mtime;
2157                 }
2158                 if (update_tdip) {
2159                         hammer2_inode_modify(tdip);
2160                         tdip->meta.mtime = mtime;
2161                 }
2162         }
2163         if (tip) {
2164                 hammer2_inode_unlock(tip);
2165                 hammer2_inode_drop(tip);
2166         }
2167         hammer2_inode_unlock(ip);
2168         hammer2_inode_unlock(tdip);
2169         hammer2_inode_unlock(fdip);
2170         hammer2_inode_drop(ip);
2171         hammer2_inode_run_sideq(fdip->pmp);
2172
2173         hammer2_trans_done(tdip->pmp);
2174
2175         /*
2176          * Issue the namecache update after unlocking all the internal
2177          * hammer structures, otherwise we might deadlock.
2178          */
2179         if (error == 0 && tip) {
2180                 cache_unlink(ap->a_tnch);
2181                 cache_setunresolved(ap->a_tnch);
2182         }
2183         if (error == 0) {
2184                 cache_rename(ap->a_fnch, ap->a_tnch);
2185                 hammer2_knote(ap->a_fdvp, NOTE_WRITE);
2186                 hammer2_knote(ap->a_tdvp, NOTE_WRITE);
2187                 hammer2_knote(fncp->nc_vp, NOTE_RENAME);
2188         }
2189
2190         LOCKSTOP;
2191         return (error);
2192 }
2193
2194 /*
2195  * hammer2_vop_ioctl { vp, command, data, fflag, cred }
2196  */
2197 static
2198 int
2199 hammer2_vop_ioctl(struct vop_ioctl_args *ap)
2200 {
2201         hammer2_inode_t *ip;
2202         int error;
2203
2204         LOCKSTART;
2205         ip = VTOI(ap->a_vp);
2206
2207         error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
2208                               ap->a_fflag, ap->a_cred);
2209         LOCKSTOP;
2210         return (error);
2211 }
2212
2213 static
2214 int 
2215 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
2216 {
2217         struct mount *mp;
2218         hammer2_pfs_t *pmp;
2219         int rc;
2220
2221         LOCKSTART;
2222         switch (ap->a_op) {
2223         case (MOUNTCTL_SET_EXPORT):
2224                 mp = ap->a_head.a_ops->head.vv_mount;
2225                 pmp = MPTOPMP(mp);
2226
2227                 if (ap->a_ctllen != sizeof(struct export_args))
2228                         rc = (EINVAL);
2229                 else
2230                         rc = vfs_export(mp, &pmp->export,
2231                                         (const struct export_args *)ap->a_ctl);
2232                 break;
2233         default:
2234                 rc = vop_stdmountctl(ap);
2235                 break;
2236         }
2237         LOCKSTOP;
2238         return (rc);
2239 }
2240
2241 /*
2242  * KQFILTER
2243  */
2244 static void filt_hammer2detach(struct knote *kn);
2245 static int filt_hammer2read(struct knote *kn, long hint);
2246 static int filt_hammer2write(struct knote *kn, long hint);
2247 static int filt_hammer2vnode(struct knote *kn, long hint);
2248
2249 static struct filterops hammer2read_filtops =
2250         { FILTEROP_ISFD | FILTEROP_MPSAFE,
2251           NULL, filt_hammer2detach, filt_hammer2read };
2252 static struct filterops hammer2write_filtops =
2253         { FILTEROP_ISFD | FILTEROP_MPSAFE,
2254           NULL, filt_hammer2detach, filt_hammer2write };
2255 static struct filterops hammer2vnode_filtops =
2256         { FILTEROP_ISFD | FILTEROP_MPSAFE,
2257           NULL, filt_hammer2detach, filt_hammer2vnode };
2258
2259 static
2260 int
2261 hammer2_vop_kqfilter(struct vop_kqfilter_args *ap)
2262 {
2263         struct vnode *vp = ap->a_vp;
2264         struct knote *kn = ap->a_kn;
2265
2266         switch (kn->kn_filter) {
2267         case EVFILT_READ:
2268                 kn->kn_fop = &hammer2read_filtops;
2269                 break;
2270         case EVFILT_WRITE:
2271                 kn->kn_fop = &hammer2write_filtops;
2272                 break;
2273         case EVFILT_VNODE:
2274                 kn->kn_fop = &hammer2vnode_filtops;
2275                 break;
2276         default:
2277                 return (EOPNOTSUPP);
2278         }
2279
2280         kn->kn_hook = (caddr_t)vp;
2281
2282         knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2283
2284         return(0);
2285 }
2286
2287 static void
2288 filt_hammer2detach(struct knote *kn)
2289 {
2290         struct vnode *vp = (void *)kn->kn_hook;
2291
2292         knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2293 }
2294
2295 static int
2296 filt_hammer2read(struct knote *kn, long hint)
2297 {
2298         struct vnode *vp = (void *)kn->kn_hook;
2299         hammer2_inode_t *ip = VTOI(vp);
2300         off_t off;
2301
2302         if (hint == NOTE_REVOKE) {
2303                 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2304                 return(1);
2305         }
2306         off = ip->meta.size - kn->kn_fp->f_offset;
2307         kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
2308         if (kn->kn_sfflags & NOTE_OLDAPI)
2309                 return(1);
2310         return (kn->kn_data != 0);
2311 }
2312
2313
2314 static int
2315 filt_hammer2write(struct knote *kn, long hint)
2316 {
2317         if (hint == NOTE_REVOKE)
2318                 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2319         kn->kn_data = 0;
2320         return (1);
2321 }
2322
2323 static int
2324 filt_hammer2vnode(struct knote *kn, long hint)
2325 {
2326         if (kn->kn_sfflags & hint)
2327                 kn->kn_fflags |= hint;
2328         if (hint == NOTE_REVOKE) {
2329                 kn->kn_flags |= (EV_EOF | EV_NODATA);
2330                 return (1);
2331         }
2332         return (kn->kn_fflags != 0);
2333 }
2334
2335 /*
2336  * FIFO VOPS
2337  */
2338 static
2339 int
2340 hammer2_vop_markatime(struct vop_markatime_args *ap)
2341 {
2342         hammer2_inode_t *ip;
2343         struct vnode *vp;
2344
2345         vp = ap->a_vp;
2346         ip = VTOI(vp);
2347
2348         if (ip->pmp->ronly)
2349                 return(EROFS);
2350         return(0);
2351 }
2352
2353 static
2354 int
2355 hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap)
2356 {
2357         int error;
2358
2359         error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2360         if (error)
2361                 error = hammer2_vop_kqfilter(ap);
2362         return(error);
2363 }
2364
2365 /*
2366  * VOPS vector
2367  */
2368 struct vop_ops hammer2_vnode_vops = {
2369         .vop_default    = vop_defaultop,
2370         .vop_fsync      = hammer2_vop_fsync,
2371         .vop_getpages   = vop_stdgetpages,
2372         .vop_putpages   = vop_stdputpages,
2373         .vop_access     = hammer2_vop_access,
2374         .vop_advlock    = hammer2_vop_advlock,
2375         .vop_close      = hammer2_vop_close,
2376         .vop_nlink      = hammer2_vop_nlink,
2377         .vop_ncreate    = hammer2_vop_ncreate,
2378         .vop_nsymlink   = hammer2_vop_nsymlink,
2379         .vop_nremove    = hammer2_vop_nremove,
2380         .vop_nrmdir     = hammer2_vop_nrmdir,
2381         .vop_nrename    = hammer2_vop_nrename,
2382         .vop_getattr    = hammer2_vop_getattr,
2383         .vop_setattr    = hammer2_vop_setattr,
2384         .vop_readdir    = hammer2_vop_readdir,
2385         .vop_readlink   = hammer2_vop_readlink,
2386         .vop_getpages   = vop_stdgetpages,
2387         .vop_putpages   = vop_stdputpages,
2388         .vop_read       = hammer2_vop_read,
2389         .vop_write      = hammer2_vop_write,
2390         .vop_open       = hammer2_vop_open,
2391         .vop_inactive   = hammer2_vop_inactive,
2392         .vop_reclaim    = hammer2_vop_reclaim,
2393         .vop_nresolve   = hammer2_vop_nresolve,
2394         .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2395         .vop_nmkdir     = hammer2_vop_nmkdir,
2396         .vop_nmknod     = hammer2_vop_nmknod,
2397         .vop_ioctl      = hammer2_vop_ioctl,
2398         .vop_mountctl   = hammer2_vop_mountctl,
2399         .vop_bmap       = hammer2_vop_bmap,
2400         .vop_strategy   = hammer2_vop_strategy,
2401         .vop_kqfilter   = hammer2_vop_kqfilter
2402 };
2403
2404 struct vop_ops hammer2_spec_vops = {
2405         .vop_default =          vop_defaultop,
2406         .vop_fsync =            hammer2_vop_fsync,
2407         .vop_read =             vop_stdnoread,
2408         .vop_write =            vop_stdnowrite,
2409         .vop_access =           hammer2_vop_access,
2410         .vop_close =            hammer2_vop_close,
2411         .vop_markatime =        hammer2_vop_markatime,
2412         .vop_getattr =          hammer2_vop_getattr,
2413         .vop_inactive =         hammer2_vop_inactive,
2414         .vop_reclaim =          hammer2_vop_reclaim,
2415         .vop_setattr =          hammer2_vop_setattr
2416 };
2417
2418 struct vop_ops hammer2_fifo_vops = {
2419         .vop_default =          fifo_vnoperate,
2420         .vop_fsync =            hammer2_vop_fsync,
2421 #if 0
2422         .vop_read =             hammer2_vop_fiforead,
2423         .vop_write =            hammer2_vop_fifowrite,
2424 #endif
2425         .vop_access =           hammer2_vop_access,
2426 #if 0
2427         .vop_close =            hammer2_vop_fifoclose,
2428 #endif
2429         .vop_markatime =        hammer2_vop_markatime,
2430         .vop_getattr =          hammer2_vop_getattr,
2431         .vop_inactive =         hammer2_vop_inactive,
2432         .vop_reclaim =          hammer2_vop_reclaim,
2433         .vop_setattr =          hammer2_vop_setattr,
2434         .vop_kqfilter =         hammer2_vop_fifokqfilter
2435 };
2436