hammer2 - Implement depth limit for stack recursion, embedded data fixes
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/fcntl.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/namei.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 #include <sys/mountctl.h>
45 #include <sys/dirent.h>
46 #include <sys/uio.h>
47
48 #include "hammer2.h"
49
50 #define ZFOFFSET        (-2LL)
51
52 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
53                                 int seqcount);
54 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, int ioflag,
55                               int seqcount);
56 static hammer2_off_t hammer2_assign_physical(hammer2_inode_t *ip,
57                                 hammer2_key_t lbase, int lblksize, int *errorp);
58 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
59 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
60 static int hammer2_unlink_file(hammer2_inode_t *dip,
61                                 const uint8_t *name, size_t name_len,
62                                 int isdir, int adjlinks);
63
64 /*
65  * Last reference to a vnode is going away but it is still cached.
66  */
67 static
68 int
69 hammer2_vop_inactive(struct vop_inactive_args *ap)
70 {
71         struct vnode *vp;
72         struct hammer2_inode *ip;
73 #if 0
74         struct hammer2_mount *hmp;
75 #endif
76
77         vp = ap->a_vp;
78         ip = VTOI(vp);
79
80         /*
81          * Degenerate case
82          */
83         if (ip == NULL) {
84                 vrecycle(vp);
85                 return (0);
86         }
87
88         /*
89          * Detect updates to the embedded data which may be synchronized by
90          * the strategy code.  Simply mark the inode modified so it gets
91          * picked up by our normal flush.
92          */
93         if (ip->chain.flags & HAMMER2_CHAIN_DIRTYEMBED) {
94                 hammer2_inode_lock_ex(ip);
95                 atomic_clear_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
96                 hammer2_chain_modify(ip->hmp, &ip->chain, 0);
97                 hammer2_inode_unlock_ex(ip);
98         }
99
100         /*
101          * Check for deleted inodes and recycle immediately.
102          */
103         if (ip->chain.flags & HAMMER2_CHAIN_DELETED) {
104                 vrecycle(vp);
105         }
106         return (0);
107 }
108
109 /*
110  * Reclaim a vnode so that it can be reused; after the inode is
111  * disassociated, the filesystem must manage it alone.
112  */
113 static
114 int
115 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
116 {
117         struct hammer2_inode *ip;
118         struct hammer2_mount *hmp;
119         struct vnode *vp;
120
121         vp = ap->a_vp;
122         ip = VTOI(vp);
123         if (ip == NULL)
124                 return(0);
125         hmp = ip->hmp;
126
127         /*
128          * Set SUBMODIFIED so we can detect and propagate the DESTROYED
129          * bit in the flush code.
130          */
131         hammer2_inode_lock_ex(ip);
132         vp->v_data = NULL;
133         ip->vp = NULL;
134         if (ip->chain.flags & HAMMER2_CHAIN_DELETED) {
135                 atomic_set_int(&ip->chain.flags, HAMMER2_CHAIN_DESTROYED |
136                                                  HAMMER2_CHAIN_SUBMODIFIED);
137         }
138         hammer2_chain_flush(hmp, &ip->chain);
139         hammer2_inode_unlock_ex(ip);
140         hammer2_chain_drop(hmp, &ip->chain);    /* vp ref */
141
142         /*
143          * XXX handle background sync when ip dirty, kernel will no longer
144          * notify us regarding this inode because there is no longer a
145          * vnode attached to it.
146          */
147
148         return (0);
149 }
150
151 static
152 int
153 hammer2_vop_fsync(struct vop_fsync_args *ap)
154 {
155         struct hammer2_inode *ip;
156         struct hammer2_mount *hmp;
157         struct vnode *vp;
158
159         vp = ap->a_vp;
160         ip = VTOI(vp);
161         hmp = ip->hmp;
162
163         hammer2_inode_lock_ex(ip);
164         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
165
166         /*
167          * Detect updates to the embedded data which may be synchronized by
168          * the strategy code.  Simply mark the inode modified so it gets
169          * picked up by our normal flush.
170          */
171         if (ip->chain.flags & HAMMER2_CHAIN_DIRTYEMBED) {
172                 atomic_clear_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
173                 hammer2_chain_modify(hmp, &ip->chain, 0);
174         }
175
176         /*
177          * Calling chain_flush here creates a lot of duplicative
178          * COW operations due to non-optimal vnode ordering.
179          *
180          * Only do it for an actual fsync() syscall.  The other forms
181          * which call this function will eventually call chain_flush
182          * on the volume root as a catch-all, which is far more optimal.
183          */
184         if (ap->a_flags & VOP_FSYNC_SYSCALL)
185                 hammer2_chain_flush(hmp, &ip->chain);
186         hammer2_inode_unlock_ex(ip);
187         return (0);
188 }
189
190 static
191 int
192 hammer2_vop_access(struct vop_access_args *ap)
193 {
194         hammer2_inode_t *ip = VTOI(ap->a_vp);
195         uid_t uid;
196         gid_t gid;
197         int error;
198
199         uid = hammer2_to_unix_xid(&ip->ip_data.uid);
200         gid = hammer2_to_unix_xid(&ip->ip_data.gid);
201
202         error = vop_helper_access(ap, uid, gid, ip->ip_data.mode,
203                                   ip->ip_data.uflags);
204         return (error);
205 }
206
207 static
208 int
209 hammer2_vop_getattr(struct vop_getattr_args *ap)
210 {
211         hammer2_mount_t *hmp;
212         hammer2_inode_t *ip;
213         struct vnode *vp;
214         struct vattr *vap;
215
216         vp = ap->a_vp;
217         vap = ap->a_vap;
218
219         ip = VTOI(vp);
220         hmp = ip->hmp;
221
222         hammer2_inode_lock_sh(ip);
223
224         vap->va_fsid = hmp->mp->mnt_stat.f_fsid.val[0];
225         vap->va_fileid = ip->ip_data.inum;
226         vap->va_mode = ip->ip_data.mode;
227         vap->va_nlink = ip->ip_data.nlinks;
228         vap->va_uid = 0;
229         vap->va_gid = 0;
230         vap->va_rmajor = 0;
231         vap->va_rminor = 0;
232         vap->va_size = ip->ip_data.size;
233         vap->va_blocksize = HAMMER2_PBUFSIZE;
234         vap->va_flags = ip->ip_data.uflags;
235         hammer2_time_to_timespec(ip->ip_data.ctime, &vap->va_ctime);
236         hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_mtime);
237         hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_atime);
238         vap->va_gen = 1;
239         vap->va_bytes = vap->va_size;   /* XXX */
240         vap->va_type = hammer2_get_vtype(ip);
241         vap->va_filerev = 0;
242         vap->va_uid_uuid = ip->ip_data.uid;
243         vap->va_gid_uuid = ip->ip_data.gid;
244         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
245                           VA_FSID_UUID_VALID;
246
247         hammer2_inode_unlock_sh(ip);
248
249         return (0);
250 }
251
252 static
253 int
254 hammer2_vop_setattr(struct vop_setattr_args *ap)
255 {
256         hammer2_mount_t *hmp;
257         hammer2_inode_t *ip;
258         struct vnode *vp;
259         struct vattr *vap;
260         int error;
261         int kflags = 0;
262         int doctime = 0;
263         int domtime = 0;
264
265         vp = ap->a_vp;
266         vap = ap->a_vap;
267
268         ip = VTOI(vp);
269         hmp = ip->hmp;
270
271         if (hmp->ronly)
272                 return(EROFS);
273
274         hammer2_inode_lock_ex(ip);
275         error = 0;
276
277         if (vap->va_flags != VNOVAL) {
278                 u_int32_t flags;
279
280                 flags = ip->ip_data.uflags;
281                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
282                                          hammer2_to_unix_xid(&ip->ip_data.uid),
283                                          ap->a_cred);
284                 if (error == 0) {
285                         if (ip->ip_data.uflags != flags) {
286                                 hammer2_chain_modify(hmp, &ip->chain, 0);
287                                 ip->ip_data.uflags = flags;
288                                 doctime = 1;
289                                 kflags |= NOTE_ATTRIB;
290                         }
291                         if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
292                                 error = 0;
293                                 goto done;
294                         }
295                 }
296         }
297
298         if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
299                 error = EPERM;
300                 goto done;
301         }
302         /* uid, gid */
303
304         /*
305          * Resize the file
306          */
307         if (vap->va_size != VNOVAL && ip->ip_data.size != vap->va_size) {
308                 switch(vp->v_type) {
309                 case VREG:
310                         if (vap->va_size == ip->ip_data.size)
311                                 break;
312                         if (vap->va_size < ip->ip_data.size) {
313                                 hammer2_truncate_file(ip, vap->va_size);
314                         } else {
315                                 hammer2_extend_file(ip, vap->va_size);
316                         }
317                         domtime = 1;
318                         break;
319                 default:
320                         error = EINVAL;
321                         goto done;
322                 }
323         }
324 done:
325         hammer2_inode_unlock_ex(ip);
326         return (error);
327 }
328
329 static
330 int
331 hammer2_vop_readdir(struct vop_readdir_args *ap)
332 {
333         hammer2_mount_t *hmp;
334         hammer2_inode_t *ip;
335         hammer2_inode_t *xip;
336         hammer2_chain_t *parent;
337         hammer2_chain_t *chain;
338         hammer2_key_t lkey;
339         struct uio *uio;
340         off_t *cookies;
341         off_t saveoff;
342         int cookie_index;
343         int ncookies;
344         int error;
345         int dtype;
346         int r;
347
348         ip = VTOI(ap->a_vp);
349         hmp = ip->hmp;
350         uio = ap->a_uio;
351         saveoff = uio->uio_offset;
352
353         /*
354          * Setup cookies directory entry cookies if requested
355          */
356         if (ap->a_ncookies) {
357                 ncookies = uio->uio_resid / 16 + 1;
358                 if (ncookies > 1024)
359                         ncookies = 1024;
360                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
361         } else {
362                 ncookies = -1;
363                 cookies = NULL;
364         }
365         cookie_index = 0;
366
367         /*
368          * Handle artificial entries.  To ensure that only positive 64 bit
369          * quantities are returned to userland we always strip off bit 63.
370          * The hash code is designed such that codes 0x0000-0x7FFF are not
371          * used, allowing us to use these codes for articial entries.
372          *
373          * Entry 0 is used for '.' and entry 1 is used for '..'.  Do not
374          * allow '..' to cross the mount point into (e.g.) the super-root.
375          */
376         error = 0;
377         chain = (void *)(intptr_t)-1;   /* non-NULL for early goto done case */
378
379         if (saveoff == 0) {
380                 r = vop_write_dirent(&error, uio,
381                                      ip->ip_data.inum &
382                                         HAMMER2_DIRHASH_USERMSK,
383                                      DT_DIR, 1, ".");
384                 if (r)
385                         goto done;
386                 if (cookies)
387                         cookies[cookie_index] = saveoff;
388                 ++saveoff;
389                 ++cookie_index;
390                 if (cookie_index == ncookies)
391                         goto done;
392         }
393         if (saveoff == 1) {
394                 if (ip->pip == NULL || ip == hmp->iroot)
395                         xip = ip;
396                 else
397                         xip = ip->pip;
398
399                 r = vop_write_dirent(&error, uio,
400                                      xip->ip_data.inum &
401                                       HAMMER2_DIRHASH_USERMSK,
402                                      DT_DIR, 2, "..");
403                 if (r)
404                         goto done;
405                 if (cookies)
406                         cookies[cookie_index] = saveoff;
407                 ++saveoff;
408                 ++cookie_index;
409                 if (cookie_index == ncookies)
410                         goto done;
411         }
412
413         lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
414
415         parent = &ip->chain;
416         error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
417         if (error) {
418                 hammer2_chain_unlock(hmp, parent);
419                 goto done;
420         }
421         chain = hammer2_chain_lookup(hmp, &parent, lkey, lkey, 0);
422         if (chain == NULL) {
423                 chain = hammer2_chain_lookup(hmp, &parent,
424                                              lkey, (hammer2_key_t)-1, 0);
425         }
426         while (chain) {
427                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
428                         dtype = hammer2_get_dtype(chain->u.ip);
429                         saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK;
430                         r = vop_write_dirent(&error, uio,
431                                              chain->u.ip->ip_data.inum &
432                                               HAMMER2_DIRHASH_USERMSK,
433                                              dtype, chain->u.ip->ip_data.name_len,
434                                              chain->u.ip->ip_data.filename);
435                         if (r)
436                                 break;
437                         if (cookies)
438                                 cookies[cookie_index] = saveoff;
439                         ++cookie_index;
440                 } else {
441                         /* XXX chain error */
442                         kprintf("bad chain type readdir %d\n",
443                                 chain->bref.type);
444                 }
445
446                 /*
447                  * Keys may not be returned in order so once we have a
448                  * placemarker (chain) the scan must allow the full range
449                  * or some entries will be missed.
450                  */
451                 chain = hammer2_chain_next(hmp, &parent, chain,
452                                            0, (hammer2_key_t)-1, 0);
453                 if (chain) {
454                         saveoff = (chain->bref.key &
455                                    HAMMER2_DIRHASH_USERMSK) + 1;
456                 } else {
457                         saveoff = (hammer2_key_t)-1;
458                 }
459                 if (cookie_index == ncookies)
460                         break;
461         }
462         if (chain)
463                 hammer2_chain_unlock(hmp, chain);
464         hammer2_chain_unlock(hmp, parent);
465 done:
466         if (ap->a_eofflag)
467                 *ap->a_eofflag = (chain == NULL);
468         uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
469         if (error && cookie_index == 0) {
470                 if (cookies) {
471                         kfree(cookies, M_TEMP);
472                         *ap->a_ncookies = 0;
473                         *ap->a_cookies = NULL;
474                 }
475         } else {
476                 if (cookies) {
477                         *ap->a_ncookies = cookie_index;
478                         *ap->a_cookies = cookies;
479                 }
480         }
481         return (error);
482 }
483
484 /*
485  * hammer2_vop_readlink { vp, uio, cred }
486  */
487 static
488 int
489 hammer2_vop_readlink(struct vop_readlink_args *ap)
490 {
491         struct vnode *vp;
492         hammer2_mount_t *hmp;
493         hammer2_inode_t *ip;
494         int error;
495
496         vp = ap->a_vp;
497         if (vp->v_type != VLNK)
498                 return (EINVAL);
499         ip = VTOI(vp);
500         hmp = ip->hmp;
501
502         error = hammer2_read_file(ip, ap->a_uio, 0);
503         return (error);
504 }
505
506 static
507 int
508 hammer2_vop_read(struct vop_read_args *ap)
509 {
510         struct vnode *vp;
511         hammer2_mount_t *hmp;
512         hammer2_inode_t *ip;
513         struct uio *uio;
514         int error;
515         int seqcount;
516         int bigread;
517
518         /*
519          * Read operations supported on this vnode?
520          */
521         vp = ap->a_vp;
522         if (vp->v_type != VREG)
523                 return (EINVAL);
524
525         /*
526          * Misc
527          */
528         ip = VTOI(vp);
529         hmp = ip->hmp;
530         uio = ap->a_uio;
531         error = 0;
532
533         seqcount = ap->a_ioflag >> 16;
534         bigread = (uio->uio_resid > 100 * 1024 * 1024);
535
536         error = hammer2_read_file(ip, uio, seqcount);
537         return (error);
538 }
539
540 static
541 int
542 hammer2_vop_write(struct vop_write_args *ap)
543 {
544         thread_t td;
545         struct vnode *vp;
546         hammer2_mount_t *hmp;
547         hammer2_inode_t *ip;
548         struct uio *uio;
549         int error;
550         int seqcount;
551         int bigwrite;
552
553         /*
554          * Read operations supported on this vnode?
555          */
556         vp = ap->a_vp;
557         if (vp->v_type != VREG)
558                 return (EINVAL);
559
560         /*
561          * Misc
562          */
563         ip = VTOI(vp);
564         hmp = ip->hmp;
565         uio = ap->a_uio;
566         error = 0;
567         if (hmp->ronly)
568                 return (EROFS);
569
570         seqcount = ap->a_ioflag >> 16;
571         bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
572
573         /*
574          * Check resource limit
575          */
576         if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
577             uio->uio_offset + uio->uio_resid >
578              td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
579                 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
580                 return (EFBIG);
581         }
582
583         bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
584
585         /*
586          * ip must be locked if extending the file.
587          * ip must be locked to avoid racing a truncation.
588          *
589          * ip must be marked modified, particularly because the write
590          * might wind up being copied into the embedded data area.
591          */
592         hammer2_inode_lock_ex(ip);
593         hammer2_chain_modify(hmp, &ip->chain, 0);
594         error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
595
596         hammer2_inode_unlock_ex(ip);
597         return (error);
598 }
599
600 /*
601  * Perform read operations on a file or symlink given an UNLOCKED
602  * inode and uio.
603  */
604 static
605 int
606 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
607 {
608         struct buf *bp;
609         int error;
610
611         error = 0;
612
613         /*
614          * UIO read loop
615          */
616         while (uio->uio_resid > 0 && uio->uio_offset < ip->ip_data.size) {
617                 hammer2_key_t lbase;
618                 hammer2_key_t leof;
619                 int lblksize;
620                 int loff;
621                 int n;
622
623                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
624                                                 &lbase, &leof);
625
626                 error = cluster_read(ip->vp, leof, lbase, lblksize,
627                                      uio->uio_resid, seqcount * BKVASIZE,
628                                      &bp);
629
630                 if (error)
631                         break;
632                 loff = (int)(uio->uio_offset - lbase);
633                 n = lblksize - loff;
634                 if (n > uio->uio_resid)
635                         n = uio->uio_resid;
636                 if (n > ip->ip_data.size - uio->uio_offset)
637                         n = (int)(ip->ip_data.size - uio->uio_offset);
638                 bp->b_flags |= B_AGE;
639                 uiomove((char *)bp->b_data + loff, n, uio);
640                 bqrelse(bp);
641         }
642         return (error);
643 }
644
645 /*
646  * Called with a locked (ip) to do the underlying write to a file or
647  * to build the symlink target.
648  */
649 static
650 int
651 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
652                    int ioflag, int seqcount)
653 {
654         hammer2_key_t old_eof;
655         struct buf *bp;
656         int kflags;
657         int error;
658
659         /*
660          * Setup if append
661          */
662         if (ioflag & IO_APPEND)
663                 uio->uio_offset = ip->ip_data.size;
664         kflags = 0;
665         error = 0;
666
667         /*
668          * Extend the file if necessary.  If the write fails at some point
669          * we will truncate it back down to cover as much as we were able
670          * to write.
671          *
672          * Doing this now makes it easier to calculate buffer sizes in
673          * the loop.
674          */
675         old_eof = ip->ip_data.size;
676         if (uio->uio_offset + uio->uio_resid > ip->ip_data.size) {
677                 hammer2_extend_file(ip, uio->uio_offset + uio->uio_resid);
678                 kflags |= NOTE_EXTEND;
679         }
680
681         /*
682          * UIO write loop
683          */
684         while (uio->uio_resid > 0) {
685                 hammer2_key_t lbase;
686                 hammer2_key_t leof;
687                 int trivial;
688                 int lblksize;
689                 int loff;
690                 int n;
691
692                 /*
693                  * Don't allow the buffer build to blow out the buffer
694                  * cache.
695                  */
696                 if ((ioflag & IO_RECURSE) == 0) {
697                         /*
698                          * XXX should try to leave this unlocked through
699                          *      the whole loop
700                          */
701                         hammer2_chain_unlock(ip->hmp, &ip->chain);
702                         bwillwrite(HAMMER2_PBUFSIZE);
703                         hammer2_chain_lock(ip->hmp, &ip->chain,
704                                            HAMMER2_RESOLVE_ALWAYS);
705                 }
706
707                 /* XXX bigwrite & signal check test */
708
709                 /*
710                  * This nominally tells us how much we can cluster and
711                  * what the logical buffer size needs to be.  Currently
712                  * we don't try to cluster the write and just handle one
713                  * block at a time.
714                  */
715                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
716                                                 &lbase, &leof);
717                 loff = (int)(uio->uio_offset - lbase);
718
719                 /*
720                  * Calculate bytes to copy this transfer and whether the
721                  * copy completely covers the buffer or not.
722                  */
723                 trivial = 0;
724                 n = lblksize - loff;
725                 if (n > uio->uio_resid) {
726                         n = uio->uio_resid;
727                         if (uio->uio_offset + n == ip->ip_data.size)
728                                 trivial = 1;
729                 } else if (loff == 0) {
730                         trivial = 1;
731                 }
732
733                 /*
734                  * Get the buffer
735                  */
736                 if (uio->uio_segflg == UIO_NOCOPY) {
737                         /*
738                          * Issuing a write with the same data backing the
739                          * buffer.  Instantiate the buffer to collect the
740                          * backing vm pages, then read-in any missing bits.
741                          *
742                          * This case is used by vop_stdputpages().
743                          */
744                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
745                         if ((bp->b_flags & B_CACHE) == 0) {
746                                 bqrelse(bp);
747                                 error = bread(ip->vp, lbase, lblksize, &bp);
748                         }
749                 } else if (trivial) {
750                         /*
751                          * Even though we are entirely overwriting the buffer
752                          * we may still have to zero it out to avoid a
753                          * mmap/write visibility issue.
754                          */
755                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
756                         if ((bp->b_flags & B_CACHE) == 0)
757                                 vfs_bio_clrbuf(bp);
758                 } else {
759                         /*
760                          * Partial overwrite, read in any missing bits then
761                          * replace the portion being written.
762                          *
763                          * (The strategy code will detect zero-fill physical
764                          * blocks for this case).
765                          */
766                         error = bread(ip->vp, lbase, lblksize, &bp);
767                         if (error == 0)
768                                 bheavy(bp);
769                 }
770
771                 if (error) {
772                         brelse(bp);
773                         break;
774                 }
775
776                 /*
777                  * We have to assign physical storage to the buffer we intend
778                  * to dirty or write now to avoid deadlocks in the strategy
779                  * code later.
780                  *
781                  * This can return NOOFFSET for inode-embedded data.  The
782                  * strategy code will take care of it in that case.
783                  */
784                 bp->b_bio2.bio_offset =
785                         hammer2_assign_physical(ip, lbase, lblksize, &error);
786                 if (error) {
787                         brelse(bp);
788                         break;
789                 }
790
791                 /*
792                  * Ok, copy the data in
793                  */
794                 hammer2_chain_unlock(ip->hmp, &ip->chain);
795                 error = uiomove(bp->b_data + loff, n, uio);
796                 hammer2_chain_lock(ip->hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
797                 kflags |= NOTE_WRITE;
798
799                 if (error) {
800                         brelse(bp);
801                         break;
802                 }
803
804                 /* XXX update ino_data.mtime */
805
806                 /*
807                  * Once we dirty a buffer any cached offset becomes invalid.
808                  *
809                  * NOTE: For cluster_write() always use the trailing block
810                  *       size, which is HAMMER2_PBUFSIZE.  lblksize is the
811                  *       eof-straddling blocksize and is incorrect.
812                  */
813                 bp->b_flags |= B_AGE;
814                 if (ioflag & IO_SYNC) {
815                         bwrite(bp);
816                 } else if ((ioflag & IO_DIRECT) && loff + n == lblksize) {
817                         bp->b_flags |= B_CLUSTEROK;
818                         bdwrite(bp);
819                 } else if (ioflag & IO_ASYNC) {
820                         bawrite(bp);
821                 } else if (hammer2_cluster_enable) {
822                         bp->b_flags |= B_CLUSTEROK;
823                         cluster_write(bp, leof, HAMMER2_PBUFSIZE, seqcount);
824                 } else {
825                         bp->b_flags |= B_CLUSTEROK;
826                         bdwrite(bp);
827                 }
828         }
829
830         /*
831          * Cleanup.  If we extended the file EOF but failed to write through
832          * the entire write is a failure and we have to back-up.
833          */
834         if (error && ip->ip_data.size != old_eof)
835                 hammer2_truncate_file(ip, old_eof);
836         /* hammer2_knote(ip->vp, kflags); */
837         return error;
838 }
839
840 /*
841  * Assign physical storage to a logical block.
842  *
843  * NOOFFSET is returned if the data is inode-embedded.  In this case the
844  * strategy code will simply bcopy() the data into the inode.
845  */
846 static
847 hammer2_off_t
848 hammer2_assign_physical(hammer2_inode_t *ip, hammer2_key_t lbase,
849                         int lblksize, int *errorp)
850 {
851         hammer2_mount_t *hmp;
852         hammer2_chain_t *parent;
853         hammer2_chain_t *chain;
854         hammer2_off_t pbase;
855
856         *errorp = 0;
857         hmp = ip->hmp;
858
859         /*
860          * Locate the chain associated with lbase, return a locked chain.
861          * However, do not instantiate any data reference (which utilizes a
862          * device buffer) because we will be using direct IO via the
863          * logical buffer cache buffer.
864          */
865         parent = &ip->chain;
866         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
867
868         chain = hammer2_chain_lookup(hmp, &parent,
869                                      lbase, lbase,
870                                      HAMMER2_LOOKUP_NODATA);
871
872         if (chain == NULL) {
873                 /*
874                  * We found a hole, create a new chain entry.
875                  *
876                  * NOTE: DATA chains are created without device backing
877                  *       store (nor do we want any).
878                  */
879                 chain = hammer2_chain_create(hmp, parent, NULL,
880                                              lbase, HAMMER2_PBUFRADIX,
881                                              HAMMER2_BREF_TYPE_DATA,
882                                              lblksize);
883                 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
884         } else {
885                 switch (chain->bref.type) {
886                 case HAMMER2_BREF_TYPE_INODE:
887                         /*
888                          * The data is embedded in the inode.  The
889                          * caller is responsible for marking the inode
890                          * modified and copying the data to the embedded
891                          * area.
892                          */
893                         pbase = NOOFFSET;
894                         break;
895                 case HAMMER2_BREF_TYPE_DATA:
896                         if (chain->bytes != lblksize) {
897                                 panic("hammer2_assign_physical: "
898                                       "size mismatch %d/%d\n",
899                                       lblksize, chain->bytes);
900                         }
901                         hammer2_chain_modify(hmp, chain,
902                                              HAMMER2_MODIFY_OPTDATA);
903                         pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
904                         break;
905                 default:
906                         panic("hammer2_assign_physical: bad type");
907                         /* NOT REACHED */
908                         pbase = NOOFFSET;
909                         break;
910                 }
911         }
912
913         if (chain)
914                 hammer2_chain_unlock(hmp, chain);
915         hammer2_chain_unlock(hmp, parent);
916
917         return (pbase);
918 }
919
920 /*
921  * Truncate the size of a file.
922  *
923  * This routine adjusts ip->ip_data.size smaller, destroying any related
924  * data beyond the new EOF and potentially resizing the block straddling
925  * the EOF.
926  *
927  * The inode must be locked.
928  */
929 static
930 void
931 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
932 {
933         hammer2_chain_t *parent;
934         hammer2_chain_t *chain;
935         hammer2_mount_t *hmp = ip->hmp;
936         hammer2_key_t lbase;
937         hammer2_key_t leof;
938         struct buf *bp;
939         int loff;
940         int error;
941         int oblksize;
942         int nblksize;
943
944         hammer2_chain_modify(hmp, &ip->chain, 0);
945         bp = NULL;
946
947         /*
948          * Destroy any logical buffer cache buffers beyond the file EOF.
949          *
950          * We call nvtruncbuf() w/ trivial == 1 to prevent it from messing
951          * around with the buffer straddling EOF, because we need to assign
952          * a new physical offset to it.
953          */
954         if (ip->vp) {
955                 nvtruncbuf(ip->vp, nsize,
956                            HAMMER2_PBUFSIZE, (int)nsize & HAMMER2_PBUFMASK,
957                            1);
958         }
959
960         /*
961          * Setup for lookup/search
962          */
963         parent = &ip->chain;
964         error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
965         if (error) {
966                 hammer2_chain_unlock(hmp, parent);
967                 /* XXX error reporting */
968                 return;
969         }
970
971         /*
972          * Handle the case where a chain/logical-buffer straddles the new
973          * EOF.  We told nvtruncbuf() above not to mess with the logical
974          * buffer straddling the EOF because we need to reassign its storage
975          * and can't let the strategy code do it for us.
976          */
977         loff = (int)nsize & HAMMER2_PBUFMASK;
978         if (loff && ip->vp) {
979                 oblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof);
980                 error = bread(ip->vp, lbase, oblksize, &bp);
981                 KKASSERT(error == 0);
982         }
983         ip->ip_data.size = nsize;
984         nblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof);
985
986         /*
987          * Fixup the chain element.  If we have a logical buffer in-hand
988          * we don't want to create a conflicting device buffer.
989          */
990         if (loff && bp) {
991                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase,
992                                              HAMMER2_LOOKUP_NODATA);
993                 if (chain) {
994                         allocbuf(bp, nblksize);
995                         switch(chain->bref.type) {
996                         case HAMMER2_BREF_TYPE_DATA:
997                                 hammer2_chain_resize(hmp, chain,
998                                              hammer2_bytes_to_radix(nblksize),
999                                              HAMMER2_MODIFY_OPTDATA);
1000                                 bzero(bp->b_data + loff, nblksize - loff);
1001                                 bp->b_bio2.bio_offset = chain->bref.data_off &
1002                                                         HAMMER2_OFF_MASK;
1003                                 break;
1004                         case HAMMER2_BREF_TYPE_INODE:
1005                                 bzero(bp->b_data + loff, nblksize - loff);
1006                                 bp->b_bio2.bio_offset = NOOFFSET;
1007                                 break;
1008                         default:
1009                                 panic("hammer2_truncate_file: bad type");
1010                                 break;
1011                         }
1012                         hammer2_chain_unlock(hmp, chain);
1013                         bp->b_flags |= B_CLUSTEROK;
1014                         bdwrite(bp);
1015                 } else {
1016                         /*
1017                          * Destroy clean buffer w/ wrong buffer size.  Retain
1018                          * backing store.
1019                          */
1020                         bp->b_flags |= B_RELBUF;
1021                         KKASSERT(bp->b_bio2.bio_offset == NOOFFSET);
1022                         KKASSERT((bp->b_flags & B_DIRTY) == 0);
1023                         bqrelse(bp);
1024                 }
1025         } else if (loff) {
1026                 /*
1027                  * WARNING: This utilizes a device buffer for the data.
1028                  *
1029                  * XXX case should not occur
1030                  */
1031                 panic("hammer2_truncate_file: non-zero truncation, no-vnode");
1032                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase, 0);
1033                 if (chain) {
1034                         switch(chain->bref.type) {
1035                         case HAMMER2_BREF_TYPE_DATA:
1036                                 hammer2_chain_resize(hmp, chain,
1037                                              hammer2_bytes_to_radix(nblksize),
1038                                              0);
1039                                 hammer2_chain_modify(hmp, chain, 0);
1040                                 bzero(chain->data->buf + loff, nblksize - loff);
1041                                 break;
1042                         case HAMMER2_BREF_TYPE_INODE:
1043                                 if (loff < HAMMER2_EMBEDDED_BYTES) {
1044                                         hammer2_chain_modify(hmp, chain, 0);
1045                                         bzero(chain->data->ipdata.u.data + loff,
1046                                               HAMMER2_EMBEDDED_BYTES - loff);
1047                                 }
1048                                 break;
1049                         }
1050                         hammer2_chain_unlock(hmp, chain);
1051                 }
1052         }
1053
1054         /*
1055          * Clean up any fragmentory VM pages now that we have properly
1056          * resized the straddling buffer.  These pages are no longer
1057          * part of the buffer.
1058          */
1059         if (ip->vp) {
1060                 nvtruncbuf(ip->vp, nsize,
1061                            nblksize, (int)nsize & (nblksize - 1),
1062                            1);
1063         }
1064
1065         /*
1066          * Destroy any physical blocks after the new EOF point.
1067          */
1068         lbase = (nsize + HAMMER2_PBUFMASK64) & ~HAMMER2_PBUFMASK64;
1069         chain = hammer2_chain_lookup(hmp, &parent,
1070                                      lbase, (hammer2_key_t)-1,
1071                                      HAMMER2_LOOKUP_NODATA);
1072         while (chain) {
1073                 /*
1074                  * Degenerate embedded data case, nothing to loop on.
1075                  */
1076                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1077                         hammer2_chain_unlock(hmp, chain);
1078                         break;
1079                 }
1080
1081                 /*
1082                  * Delete physical data blocks past the file EOF.
1083                  */
1084                 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1085                         hammer2_chain_delete(hmp, parent, chain);
1086                 }
1087                 /* XXX check parent if empty indirect block & delete */
1088                 chain = hammer2_chain_next(hmp, &parent, chain,
1089                                            lbase, (hammer2_key_t)-1,
1090                                            HAMMER2_LOOKUP_NODATA);
1091         }
1092         hammer2_chain_unlock(hmp, parent);
1093 }
1094
1095 /*
1096  * Extend the size of a file.  The inode must be locked.
1097  *
1098  * We may have to resize the block straddling the old EOF.
1099  */
1100 static
1101 void
1102 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1103 {
1104         hammer2_mount_t *hmp;
1105         hammer2_chain_t *parent;
1106         hammer2_chain_t *chain;
1107         struct buf *bp;
1108         hammer2_key_t osize;
1109         hammer2_key_t obase;
1110         hammer2_key_t nbase;
1111         hammer2_key_t leof;
1112         int oblksize;
1113         int nblksize;
1114         int nradix;
1115         int error;
1116
1117         KKASSERT(ip->vp);
1118         hmp = ip->hmp;
1119
1120         hammer2_chain_modify(hmp, &ip->chain, 0);
1121
1122         /*
1123          * Nothing to do if the direct-data case is still intact
1124          */
1125         if ((ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1126             nsize <= HAMMER2_EMBEDDED_BYTES) {
1127                 ip->ip_data.size = nsize;
1128                 return;
1129         }
1130
1131         /*
1132          * Calculate the blocksize at the original EOF and resize the block
1133          * if necessary.  Adjust the file size in the inode.
1134          */
1135         osize = ip->ip_data.size;
1136         oblksize = hammer2_calc_logical(ip, osize, &obase, &leof);
1137         ip->ip_data.size = nsize;
1138         nblksize = hammer2_calc_logical(ip, osize, &nbase, &leof);
1139
1140         /*
1141          * Do all required vnode operations, but do not mess with the
1142          * buffer straddling the orignal EOF.
1143          */
1144         nvextendbuf(ip->vp,
1145                     ip->ip_data.size, nsize,
1146                     0, nblksize,
1147                     0, (int)nsize & HAMMER2_PBUFMASK,
1148                     1);
1149
1150         /*
1151          * Early return if we have no more work to do.
1152          */
1153         if (obase == nbase && oblksize == nblksize &&
1154             (ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1155                 return;
1156         }
1157
1158         /*
1159          * We have work to do, including possibly resizing the buffer
1160          * at the EOF point and turning off DIRECTDATA mode.
1161          */
1162         bp = NULL;
1163         if (((int)osize & HAMMER2_PBUFMASK)) {
1164                 error = bread(ip->vp, obase, oblksize, &bp);
1165                 KKASSERT(error == 0);
1166
1167                 if (obase != nbase) {
1168                         allocbuf(bp, HAMMER2_PBUFSIZE);
1169                 } else {
1170                         allocbuf(bp, nblksize);
1171                 }
1172                 vfs_bio_clrbuf(bp);
1173         }
1174
1175         /*
1176          * Disable direct-data mode by loading up a buffer cache buffer
1177          * with the data, then converting the inode data area into the
1178          * inode indirect block array area.
1179          */
1180         if (ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
1181                 ip->ip_data.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1182                 bzero(&ip->ip_data.u.blockset, sizeof(ip->ip_data.u.blockset));
1183         }
1184
1185         /*
1186          * Resize the chain element at the old EOF.
1187          */
1188         if (((int)osize & HAMMER2_PBUFMASK)) {
1189                 parent = &ip->chain;
1190                 error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1191                 KKASSERT(error == 0);
1192
1193                 nradix = hammer2_bytes_to_radix(nblksize);
1194
1195                 chain = hammer2_chain_lookup(hmp, &parent,
1196                                              obase, obase,
1197                                              HAMMER2_LOOKUP_NODATA);
1198                 if (chain == NULL) {
1199                         chain = hammer2_chain_create(hmp, parent, NULL,
1200                                                      obase, nblksize,
1201                                                      HAMMER2_BREF_TYPE_DATA,
1202                                                      nblksize);
1203                 } else {
1204                         KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA);
1205                         hammer2_chain_resize(hmp, chain, nradix,
1206                                              HAMMER2_MODIFY_OPTDATA);
1207                 }
1208                 bp->b_bio2.bio_offset = chain->bref.data_off &
1209                                         HAMMER2_OFF_MASK;
1210                 hammer2_chain_unlock(hmp, chain);
1211                 bp->b_flags |= B_CLUSTEROK;
1212                 bdwrite(bp);
1213                 hammer2_chain_unlock(hmp, parent);
1214         }
1215 }
1216
1217 static
1218 int
1219 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
1220 {
1221         hammer2_inode_t *dip;
1222         hammer2_mount_t *hmp;
1223         hammer2_chain_t *parent;
1224         hammer2_chain_t *chain;
1225         struct namecache *ncp;
1226         const uint8_t *name;
1227         size_t name_len;
1228         hammer2_key_t lhc;
1229         int error = 0;
1230         struct vnode *vp;
1231
1232         dip = VTOI(ap->a_dvp);
1233         hmp = dip->hmp;
1234         ncp = ap->a_nch->ncp;
1235         name = ncp->nc_name;
1236         name_len = ncp->nc_nlen;
1237         lhc = hammer2_dirhash(name, name_len);
1238
1239         /*
1240          * Note: In DragonFly the kernel handles '.' and '..'.
1241          */
1242         parent = &dip->chain;
1243         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1244         chain = hammer2_chain_lookup(hmp, &parent,
1245                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1246                                      0);
1247         while (chain) {
1248                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
1249                     chain->u.ip &&
1250                     name_len == chain->data->ipdata.name_len &&
1251                     bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
1252                         break;
1253                 }
1254                 chain = hammer2_chain_next(hmp, &parent, chain,
1255                                            lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1256                                            0);
1257         }
1258         hammer2_chain_unlock(hmp, parent);
1259
1260         if (chain) {
1261                 vp = hammer2_igetv(chain->u.ip, &error);
1262                 if (error == 0) {
1263                         vn_unlock(vp);
1264                         cache_setvp(ap->a_nch, vp);
1265                         vrele(vp);
1266                 }
1267                 hammer2_chain_unlock(hmp, chain);
1268         } else {
1269                 error = ENOENT;
1270                 cache_setvp(ap->a_nch, NULL);
1271         }
1272         return error;
1273 }
1274
1275 static
1276 int
1277 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1278 {
1279         hammer2_inode_t *dip;
1280         hammer2_inode_t *ip;
1281         hammer2_mount_t *hmp;
1282         int error;
1283
1284         dip = VTOI(ap->a_dvp);
1285         hmp = dip->hmp;
1286
1287         if ((ip = dip->pip) == NULL) {
1288                 *ap->a_vpp = NULL;
1289                 return ENOENT;
1290         }
1291         hammer2_chain_lock(hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
1292         *ap->a_vpp = hammer2_igetv(ip, &error);
1293         hammer2_chain_unlock(hmp, &ip->chain);
1294
1295         return error;
1296 }
1297
1298 static
1299 int
1300 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1301 {
1302         hammer2_mount_t *hmp;
1303         hammer2_inode_t *dip;
1304         hammer2_inode_t *nip;
1305         struct namecache *ncp;
1306         const uint8_t *name;
1307         size_t name_len;
1308         int error;
1309
1310         dip = VTOI(ap->a_dvp);
1311         hmp = dip->hmp;
1312         if (hmp->ronly)
1313                 return (EROFS);
1314
1315         ncp = ap->a_nch->ncp;
1316         name = ncp->nc_name;
1317         name_len = ncp->nc_nlen;
1318
1319         error = hammer2_inode_create(hmp, ap->a_vap, ap->a_cred,
1320                                      dip, name, name_len, &nip);
1321         if (error) {
1322                 KKASSERT(nip == NULL);
1323                 *ap->a_vpp = NULL;
1324                 return error;
1325         }
1326         *ap->a_vpp = hammer2_igetv(nip, &error);
1327         hammer2_chain_unlock(hmp, &nip->chain);
1328
1329         if (error == 0) {
1330                 cache_setunresolved(ap->a_nch);
1331                 cache_setvp(ap->a_nch, *ap->a_vpp);
1332         }
1333         return error;
1334 }
1335
1336 /*
1337  * Return the largest contiguous physical disk range for the logical
1338  * request.
1339  *
1340  * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
1341  */
1342 static
1343 int
1344 hammer2_vop_bmap(struct vop_bmap_args *ap)
1345 {
1346         struct vnode *vp;
1347         hammer2_mount_t *hmp;
1348         hammer2_inode_t *ip;
1349         hammer2_chain_t *parent;
1350         hammer2_chain_t *chain;
1351         hammer2_key_t lbeg;
1352         hammer2_key_t lend;
1353         hammer2_off_t pbeg;
1354         hammer2_off_t pbytes;
1355         hammer2_off_t array[HAMMER2_BMAP_COUNT][2];
1356         int loff;
1357         int ai;
1358
1359         /*
1360          * Only supported on regular files
1361          *
1362          * Only supported for read operations (required for cluster_read).
1363          * The block allocation is delayed for write operations.
1364          */
1365         vp = ap->a_vp;
1366         if (vp->v_type != VREG)
1367                 return (EOPNOTSUPP);
1368         if (ap->a_cmd != BUF_CMD_READ)
1369                 return (EOPNOTSUPP);
1370
1371         ip = VTOI(vp);
1372         hmp = ip->hmp;
1373         bzero(array, sizeof(array));
1374
1375         /*
1376          * Calculate logical range
1377          */
1378         KKASSERT((ap->a_loffset & HAMMER2_LBUFMASK64) == 0);
1379         lbeg = ap->a_loffset & HAMMER2_OFF_MASK_HI;
1380         lend = lbeg + HAMMER2_BMAP_COUNT * HAMMER2_PBUFSIZE - 1;
1381         if (lend < lbeg)
1382                 lend = lbeg;
1383         loff = ap->a_loffset & HAMMER2_OFF_MASK_LO;
1384
1385         parent = &ip->chain;
1386         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1387         chain = hammer2_chain_lookup(hmp, &parent,
1388                                      lbeg, lend,
1389                                      HAMMER2_LOOKUP_NODATA);
1390         if (chain == NULL) {
1391                 *ap->a_doffsetp = ZFOFFSET;
1392                 hammer2_chain_unlock(hmp, parent);
1393                 return (0);
1394         }
1395
1396         while (chain) {
1397                 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1398                         ai = (chain->bref.key - lbeg) / HAMMER2_PBUFSIZE;
1399                         KKASSERT(ai >= 0 && ai < HAMMER2_BMAP_COUNT);
1400                         array[ai][0] = chain->bref.data_off & HAMMER2_OFF_MASK;
1401                         array[ai][1] = chain->bytes;
1402                 }
1403                 chain = hammer2_chain_next(hmp, &parent, chain,
1404                                            lbeg, lend,
1405                                            HAMMER2_LOOKUP_NODATA);
1406         }
1407         hammer2_chain_unlock(hmp, parent);
1408
1409         /*
1410          * If the requested loffset is not mappable physically we can't
1411          * bmap.  The caller will have to access the file data via a
1412          * device buffer.
1413          */
1414         if (array[0][0] == 0 || array[0][1] < loff + HAMMER2_LBUFSIZE) {
1415                 *ap->a_doffsetp = NOOFFSET;
1416                 return (0);
1417         }
1418
1419         /*
1420          * Calculate the physical disk offset range for array[0]
1421          */
1422         pbeg = array[0][0] + loff;
1423         pbytes = array[0][1] - loff;
1424
1425         for (ai = 1; ai < HAMMER2_BMAP_COUNT; ++ai) {
1426                 if (array[ai][0] != pbeg + pbytes)
1427                         break;
1428                 pbytes += array[ai][1];
1429         }
1430
1431         *ap->a_doffsetp = pbeg;
1432         if (ap->a_runp)
1433                 *ap->a_runp = pbytes;
1434         return (0);
1435 }
1436
1437 static
1438 int
1439 hammer2_vop_open(struct vop_open_args *ap)
1440 {
1441         return vop_stdopen(ap);
1442 }
1443
1444 /*
1445  * hammer2_vop_advlock { vp, id, op, fl, flags }
1446  */
1447 static
1448 int
1449 hammer2_vop_advlock(struct vop_advlock_args *ap)
1450 {
1451         hammer2_inode_t *ip = VTOI(ap->a_vp);
1452
1453         return (lf_advlock(ap, &ip->advlock, ip->ip_data.size));
1454 }
1455
1456
1457 static
1458 int
1459 hammer2_vop_close(struct vop_close_args *ap)
1460 {
1461         return vop_stdclose(ap);
1462 }
1463
1464 /*
1465  * hammer2_vop_nlink { nch, dvp, vp, cred }
1466  *
1467  * Create a hardlink to vp.
1468  */
1469 static
1470 int
1471 hammer2_vop_nlink(struct vop_nlink_args *ap)
1472 {
1473         hammer2_inode_t *dip;
1474         hammer2_inode_t *ip;    /* inode we are hardlinking to */
1475         hammer2_mount_t *hmp;
1476         struct namecache *ncp;
1477         const uint8_t *name;
1478         size_t name_len;
1479         int error;
1480
1481         dip = VTOI(ap->a_dvp);
1482         hmp = dip->hmp;
1483         if (hmp->ronly)
1484                 return (EROFS);
1485
1486         ip = VTOI(ap->a_vp);
1487
1488         ncp = ap->a_nch->ncp;
1489         name = ncp->nc_name;
1490         name_len = ncp->nc_nlen;
1491
1492         error = hammer2_hardlink_create(ip, dip, name, name_len);
1493         if (error == 0) {
1494                 cache_setunresolved(ap->a_nch);
1495                 cache_setvp(ap->a_nch, ap->a_vp);
1496         }
1497         return error;
1498 }
1499
1500 /*
1501  * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
1502  *
1503  * The operating system has already ensured that the directory entry
1504  * does not exist and done all appropriate namespace locking.
1505  */
1506 static
1507 int
1508 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1509 {
1510         hammer2_mount_t *hmp;
1511         hammer2_inode_t *dip;
1512         hammer2_inode_t *nip;
1513         struct namecache *ncp;
1514         const uint8_t *name;
1515         size_t name_len;
1516         int error;
1517
1518         dip = VTOI(ap->a_dvp);
1519         hmp = dip->hmp;
1520         if (hmp->ronly)
1521                 return (EROFS);
1522
1523         ncp = ap->a_nch->ncp;
1524         name = ncp->nc_name;
1525         name_len = ncp->nc_nlen;
1526
1527         error = hammer2_inode_create(hmp, ap->a_vap, ap->a_cred,
1528                                      dip, name, name_len, &nip);
1529         if (error) {
1530                 KKASSERT(nip == NULL);
1531                 *ap->a_vpp = NULL;
1532                 return error;
1533         }
1534         *ap->a_vpp = hammer2_igetv(nip, &error);
1535         hammer2_chain_unlock(hmp, &nip->chain);
1536
1537         if (error == 0) {
1538                 cache_setunresolved(ap->a_nch);
1539                 cache_setvp(ap->a_nch, *ap->a_vpp);
1540         }
1541         return error;
1542 }
1543
1544 /*
1545  * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1546  */
1547 static
1548 int
1549 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1550 {
1551         hammer2_mount_t *hmp;
1552         hammer2_inode_t *dip;
1553         hammer2_inode_t *nip;
1554         struct namecache *ncp;
1555         const uint8_t *name;
1556         size_t name_len;
1557         int error;
1558
1559         dip = VTOI(ap->a_dvp);
1560         hmp = dip->hmp;
1561         if (hmp->ronly)
1562                 return (EROFS);
1563
1564         ncp = ap->a_nch->ncp;
1565         name = ncp->nc_name;
1566         name_len = ncp->nc_nlen;
1567
1568         ap->a_vap->va_type = VLNK;      /* enforce type */
1569
1570         error = hammer2_inode_create(hmp, ap->a_vap, ap->a_cred,
1571                                      dip, name, name_len, &nip);
1572         if (error) {
1573                 KKASSERT(nip == NULL);
1574                 *ap->a_vpp = NULL;
1575                 return error;
1576         }
1577         *ap->a_vpp = hammer2_igetv(nip, &error);
1578
1579         /*
1580          * Build the softlink (~like file data) and finalize the namecache.
1581          */
1582         if (error == 0) {
1583                 size_t bytes;
1584                 struct uio auio;
1585                 struct iovec aiov;
1586
1587                 bytes = strlen(ap->a_target);
1588
1589                 if (bytes <= HAMMER2_EMBEDDED_BYTES) {
1590                         KKASSERT(nip->ip_data.op_flags &
1591                                  HAMMER2_OPFLAG_DIRECTDATA);
1592                         bcopy(ap->a_target, nip->ip_data.u.data, bytes);
1593                         nip->ip_data.size = bytes;
1594                 } else {
1595                         bzero(&auio, sizeof(auio));
1596                         bzero(&aiov, sizeof(aiov));
1597                         auio.uio_iov = &aiov;
1598                         auio.uio_segflg = UIO_SYSSPACE;
1599                         auio.uio_rw = UIO_WRITE;
1600                         auio.uio_resid = bytes;
1601                         auio.uio_iovcnt = 1;
1602                         auio.uio_td = curthread;
1603                         aiov.iov_base = ap->a_target;
1604                         aiov.iov_len = bytes;
1605                         error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
1606                         /* XXX handle error */
1607                         error = 0;
1608                 }
1609         }
1610         hammer2_chain_unlock(hmp, &nip->chain);
1611
1612         /*
1613          * Finalize namecache
1614          */
1615         if (error == 0) {
1616                 cache_setunresolved(ap->a_nch);
1617                 cache_setvp(ap->a_nch, *ap->a_vpp);
1618                 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */
1619         }
1620         return error;
1621 }
1622
1623 /*
1624  * hammer2_vop_nremove { nch, dvp, cred }
1625  */
1626 static
1627 int
1628 hammer2_vop_nremove(struct vop_nremove_args *ap)
1629 {
1630         hammer2_inode_t *dip;
1631         hammer2_mount_t *hmp;
1632         struct namecache *ncp;
1633         const uint8_t *name;
1634         size_t name_len;
1635         int error;
1636
1637         dip = VTOI(ap->a_dvp);
1638         hmp = dip->hmp;
1639         if (hmp->ronly)
1640                 return(EROFS);
1641
1642         ncp = ap->a_nch->ncp;
1643         name = ncp->nc_name;
1644         name_len = ncp->nc_nlen;
1645
1646         error = hammer2_unlink_file(dip, name, name_len, 0, 1);
1647
1648         if (error == 0) {
1649                 cache_setunresolved(ap->a_nch);
1650                 cache_setvp(ap->a_nch, NULL);
1651         }
1652         return (error);
1653 }
1654
1655 /*
1656  * hammer2_vop_nrmdir { nch, dvp, cred }
1657  */
1658 static
1659 int
1660 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1661 {
1662         hammer2_inode_t *dip;
1663         hammer2_mount_t *hmp;
1664         struct namecache *ncp;
1665         const uint8_t *name;
1666         size_t name_len;
1667         int error;
1668
1669         dip = VTOI(ap->a_dvp);
1670         hmp = dip->hmp;
1671         if (hmp->ronly)
1672                 return(EROFS);
1673
1674         ncp = ap->a_nch->ncp;
1675         name = ncp->nc_name;
1676         name_len = ncp->nc_nlen;
1677
1678         error = hammer2_unlink_file(dip, name, name_len, 1, 1);
1679
1680         if (error == 0) {
1681                 cache_setunresolved(ap->a_nch);
1682                 cache_setvp(ap->a_nch, NULL);
1683         }
1684         return (error);
1685 }
1686
1687 /*
1688  * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1689  */
1690 static
1691 int
1692 hammer2_vop_nrename(struct vop_nrename_args *ap)
1693 {
1694         struct namecache *fncp;
1695         struct namecache *tncp;
1696         hammer2_inode_t *fdip;
1697         hammer2_inode_t *tdip;
1698         hammer2_inode_t *ip;
1699         hammer2_mount_t *hmp;
1700         const uint8_t *fname;
1701         size_t fname_len;
1702         const uint8_t *tname;
1703         size_t tname_len;
1704         int error;
1705
1706         if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1707                 return(EXDEV);
1708         if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1709                 return(EXDEV);
1710
1711         fdip = VTOI(ap->a_fdvp);        /* source directory */
1712         tdip = VTOI(ap->a_tdvp);        /* target directory */
1713
1714         hmp = fdip->hmp;                /* check read-only filesystem */
1715         if (hmp->ronly)
1716                 return(EROFS);
1717
1718         fncp = ap->a_fnch->ncp;         /* entry name in source */
1719         fname = fncp->nc_name;
1720         fname_len = fncp->nc_nlen;
1721
1722         tncp = ap->a_tnch->ncp;         /* entry name in target */
1723         tname = tncp->nc_name;
1724         tname_len = tncp->nc_nlen;
1725
1726         ip = VTOI(fncp->nc_vp);         /* inode being moved */
1727
1728         /*
1729          * Keep a tight grip on the inode as removing it should disconnect
1730          * it and we don't want to destroy it.
1731          *
1732          * NOTE: To avoid deadlocks we cannot lock (ip) while we are
1733          *       unlinking elements from their directories.
1734          */
1735         hammer2_chain_ref(hmp, &ip->chain);
1736
1737         /*
1738          * Remove target if it exists
1739          */
1740         error = hammer2_unlink_file(tdip, tname, tname_len, -1, 1);
1741         if (error && error != ENOENT)
1742                 goto done;
1743         cache_setunresolved(ap->a_tnch);
1744         cache_setvp(ap->a_tnch, NULL);
1745
1746         /*
1747          * Disconnect ip from the source directory, do not adjust
1748          * the link count.  Note that rename doesn't need to understand
1749          * whether this is a hardlink or not, we can just rename the
1750          * forwarding entry and don't even have to adjust the related
1751          * hardlink's link count.
1752          */
1753         error = hammer2_unlink_file(fdip, fname, fname_len, -1, 0);
1754         if (error)
1755                 goto done;
1756
1757         if (ip->chain.parent != NULL)
1758                 panic("hammer2_vop_nrename(): rename source != ip!");
1759
1760         /*
1761          * Reconnect ip to target directory.
1762          *
1763          * WARNING: chain locks can lock buffer cache buffers, to avoid
1764          *          deadlocks we want to unlock before issuing a cache_*()
1765          *          op (that might have to lock a vnode).
1766          */
1767         hammer2_chain_lock(hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
1768         error = hammer2_inode_connect(tdip, ip, tname, tname_len);
1769         hammer2_chain_unlock(hmp, &ip->chain);
1770
1771         if (error == 0) {
1772                 cache_rename(ap->a_fnch, ap->a_tnch);
1773         }
1774 done:
1775         hammer2_chain_drop(hmp, &ip->chain);    /* from ref up top */
1776
1777         return (error);
1778 }
1779
1780 /*
1781  * Unlink the file from the specified directory inode.  The directory inode
1782  * does not need to be locked.
1783  *
1784  * isdir determines whether a directory/non-directory check should be made.
1785  * No check is made if isdir is set to -1.
1786  *
1787  * adjlinks tells unlink that we want to adjust the nlinks count of the
1788  * inode.  When removing the last link for a NON forwarding entry we can
1789  * just ignore the link count... no point updating the inode that we are
1790  * about to dereference, it would just result in a lot of wasted I/O.
1791  *
1792  * However, if the entry is a forwarding entry (aka a hardlink), and adjlinks
1793  * is non-zero, we have to locate the hardlink and adjust its nlinks field.
1794  */
1795 static
1796 int
1797 hammer2_unlink_file(hammer2_inode_t *dip, const uint8_t *name, size_t name_len,
1798                     int isdir, int adjlinks)
1799 {
1800         hammer2_mount_t *hmp;
1801         hammer2_chain_t *parent;
1802         hammer2_chain_t *chain;
1803         hammer2_chain_t *dparent;
1804         hammer2_chain_t *dchain;
1805         hammer2_key_t lhc;
1806         int error;
1807
1808         error = 0;
1809
1810         hmp = dip->hmp;
1811         lhc = hammer2_dirhash(name, name_len);
1812
1813         /*
1814          * Search for the filename in the directory
1815          */
1816         parent = &dip->chain;
1817         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1818         chain = hammer2_chain_lookup(hmp, &parent,
1819                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1820                                      0);
1821         while (chain) {
1822                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
1823                     chain->u.ip &&
1824                     name_len == chain->data->ipdata.name_len &&
1825                     bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
1826                         break;
1827                 }
1828                 chain = hammer2_chain_next(hmp, &parent, chain,
1829                                            lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1830                                            0);
1831         }
1832
1833         /*
1834          * Not found or wrong type (isdir < 0 disables the type check).
1835          */
1836         if (chain == NULL) {
1837                 hammer2_chain_unlock(hmp, parent);
1838                 return ENOENT;
1839         }
1840         if (chain->data->ipdata.type == HAMMER2_OBJTYPE_DIRECTORY &&
1841             isdir == 0) {
1842                 error = ENOTDIR;
1843                 goto done;
1844         }
1845         if (chain->data->ipdata.type != HAMMER2_OBJTYPE_DIRECTORY &&
1846             isdir == 1) {
1847                 error = EISDIR;
1848                 goto done;
1849         }
1850
1851         /*
1852          * If this is a directory the directory must be empty.  However, if
1853          * isdir < 0 we are doing a rename and the directory does not have
1854          * to be empty.
1855          */
1856         if (chain->data->ipdata.type == HAMMER2_OBJTYPE_DIRECTORY &&
1857             isdir >= 0) {
1858                 dparent = chain;
1859                 hammer2_chain_lock(hmp, dparent, HAMMER2_RESOLVE_ALWAYS);
1860                 dchain = hammer2_chain_lookup(hmp, &dparent,
1861                                               0, (hammer2_key_t)-1,
1862                                               HAMMER2_LOOKUP_NODATA);
1863                 if (dchain) {
1864                         hammer2_chain_unlock(hmp, dchain);
1865                         hammer2_chain_unlock(hmp, dparent);
1866                         error = ENOTEMPTY;
1867                         goto done;
1868                 }
1869                 hammer2_chain_unlock(hmp, dparent);
1870                 dparent = NULL;
1871                 /* dchain NULL */
1872         }
1873
1874 #if 0
1875         /*
1876          * If adjlinks is non-zero this is a real deletion (otherwise it is
1877          * probably a rename).  XXX
1878          */
1879         if (adjlinks) {
1880                 if (chain->data->ipdata.type == HAMMER2_OBJTYPE_HARDLINK) {
1881                         /*hammer2_adjust_hardlink(chain->u.ip, -1);*/
1882                         /* error handling */
1883                 } else {
1884                         waslastlink = 1;
1885                 }
1886         } else {
1887                 waslastlink = 0;
1888         }
1889 #endif
1890
1891         /*
1892          * Found, the chain represents the inode.  Remove the parent reference
1893          * to the chain.  The chain itself is no longer referenced and will
1894          * be marked unmodified by hammer2_chain_delete(), avoiding unnecessary
1895          * I/O.
1896          */
1897         hammer2_chain_delete(hmp, parent, chain);
1898         /* XXX nlinks (hardlink special case) */
1899         /* XXX nlinks (parent directory) */
1900
1901 #if 0
1902         /*
1903          * Destroy any associated vnode, but only if this was the last
1904          * link.  XXX this might not be needed.
1905          */
1906         if (chain->u.ip->vp) {
1907                 struct vnode *vp;
1908                 vp = hammer2_igetv(chain->u.ip, &error);
1909                 if (error == 0) {
1910                         vn_unlock(vp);
1911                         /* hammer2_knote(vp, NOTE_DELETE); */
1912                         cache_inval_vp(vp, CINV_DESTROY);
1913                         vrele(vp);
1914                 }
1915         }
1916 #endif
1917         error = 0;
1918
1919 done:
1920         hammer2_chain_unlock(hmp, chain);
1921         hammer2_chain_unlock(hmp, parent);
1922
1923         return error;
1924 }
1925
1926
1927 static int hammer2_strategy_read(struct vop_strategy_args *ap);
1928 static int hammer2_strategy_write(struct vop_strategy_args *ap);
1929
1930 static
1931 int
1932 hammer2_vop_strategy(struct vop_strategy_args *ap)
1933 {
1934         struct bio *biop;
1935         struct buf *bp;
1936         int error;
1937
1938         biop = ap->a_bio;
1939         bp = biop->bio_buf;
1940
1941         switch(bp->b_cmd) {
1942         case BUF_CMD_READ:
1943                 error = hammer2_strategy_read(ap);
1944                 ++hammer2_iod_file_read;
1945                 break;
1946         case BUF_CMD_WRITE:
1947                 error = hammer2_strategy_write(ap);
1948                 ++hammer2_iod_file_write;
1949                 break;
1950         default:
1951                 bp->b_error = error = EINVAL;
1952                 bp->b_flags |= B_ERROR;
1953                 biodone(biop);
1954                 break;
1955         }
1956
1957         return (error);
1958 }
1959
1960 static
1961 int
1962 hammer2_strategy_read(struct vop_strategy_args *ap)
1963 {
1964         struct buf *bp;
1965         struct bio *bio;
1966         struct bio *nbio;
1967         hammer2_mount_t *hmp;
1968         hammer2_inode_t *ip;
1969         hammer2_chain_t *parent;
1970         hammer2_chain_t *chain;
1971         hammer2_key_t lbase;
1972
1973         bio = ap->a_bio;
1974         bp = bio->bio_buf;
1975         ip = VTOI(ap->a_vp);
1976         hmp = ip->hmp;
1977         nbio = push_bio(bio);
1978
1979         lbase = bio->bio_offset;
1980         chain = NULL;
1981         KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
1982
1983         /*
1984          * We must characterize the logical->physical translation if it
1985          * has not already been cached.
1986          *
1987          * Physical data references < LBUFSIZE are never cached.  This
1988          * includes both small-block allocations and inode-embedded data.
1989          */
1990         if (nbio->bio_offset == NOOFFSET) {
1991                 parent = &ip->chain;
1992                 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1993
1994                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase,
1995                                              HAMMER2_LOOKUP_NODATA);
1996                 if (chain == NULL) {
1997                         /*
1998                          * Data is zero-fill
1999                          */
2000                         nbio->bio_offset = ZFOFFSET;
2001                 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
2002                         /*
2003                          * Data is embedded in the inode (do nothing)
2004                          */
2005                         KKASSERT(chain == parent);
2006                         hammer2_chain_unlock(hmp, chain);
2007                 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
2008                         /*
2009                          * Data is on-media
2010                          */
2011                         KKASSERT(bp->b_bcount == chain->bytes);
2012                         nbio->bio_offset = chain->bref.data_off &
2013                                            HAMMER2_OFF_MASK;
2014                         hammer2_chain_unlock(hmp, chain);
2015                         KKASSERT(nbio->bio_offset != 0);
2016                 } else {
2017                         panic("hammer2_strategy_read: unknown bref type");
2018                 }
2019                 hammer2_chain_unlock(hmp, parent);
2020         }
2021
2022         if (hammer2_debug & 0x0020) {
2023                 kprintf("read %016jx %016jx\n",
2024                         bio->bio_offset, nbio->bio_offset);
2025         }
2026
2027         if (nbio->bio_offset == ZFOFFSET) {
2028                 /*
2029                  * Data is zero-fill
2030                  */
2031                 bp->b_resid = 0;
2032                 bp->b_error = 0;
2033                 bzero(bp->b_data, bp->b_bcount);
2034                 biodone(nbio);
2035         } else if (nbio->bio_offset != NOOFFSET) {
2036                 /*
2037                  * Forward direct IO to the device
2038                  */
2039                 vn_strategy(hmp->devvp, nbio);
2040         } else {
2041                 /*
2042                  * Data is embedded in inode.
2043                  */
2044                 bcopy(chain->data->ipdata.u.data, bp->b_data,
2045                       HAMMER2_EMBEDDED_BYTES);
2046                 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES,
2047                       bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
2048                 bp->b_resid = 0;
2049                 bp->b_error = 0;
2050                 biodone(nbio);
2051         }
2052         return (0);
2053 }
2054
2055 static
2056 int
2057 hammer2_strategy_write(struct vop_strategy_args *ap)
2058 {
2059         struct buf *bp;
2060         struct bio *bio;
2061         struct bio *nbio;
2062         hammer2_mount_t *hmp;
2063         hammer2_inode_t *ip;
2064
2065         bio = ap->a_bio;
2066         bp = bio->bio_buf;
2067         ip = VTOI(ap->a_vp);
2068         hmp = ip->hmp;
2069         nbio = push_bio(bio);
2070
2071         KKASSERT((bio->bio_offset & HAMMER2_PBUFMASK64) == 0);
2072         KKASSERT(nbio->bio_offset != 0 && nbio->bio_offset != ZFOFFSET);
2073
2074         if (nbio->bio_offset == NOOFFSET) {
2075                 /*
2076                  * Must be embedded in the inode.
2077                  */
2078                 KKASSERT(bio->bio_offset == 0);
2079                 bcopy(bp->b_data, ip->ip_data.u.data, HAMMER2_EMBEDDED_BYTES);
2080                 bp->b_resid = 0;
2081                 bp->b_error = 0;
2082                 biodone(nbio);
2083
2084                 /*
2085                  * This special flag does not follow the normal MODIFY1 rules
2086                  * because we might deadlock on ip.  Instead we depend on
2087                  * VOP_FSYNC() to detect the case.
2088                  */
2089                 atomic_set_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
2090         } else {
2091                 /*
2092                  * Forward direct IO to the device
2093                  */
2094                 vn_strategy(hmp->devvp, nbio);
2095         }
2096         return (0);
2097 }
2098
2099 static
2100 int 
2101 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
2102 {
2103         struct mount *mp;
2104         struct hammer2_mount *hmp;
2105         int rc;
2106
2107         switch (ap->a_op) {
2108         case (MOUNTCTL_SET_EXPORT):
2109                 mp = ap->a_head.a_ops->head.vv_mount;
2110                 hmp = MPTOH2(mp);
2111
2112                 if (ap->a_ctllen != sizeof(struct export_args))
2113                         rc = (EINVAL);
2114                 else
2115                         rc = vfs_export(mp, &hmp->export,
2116                                         (const struct export_args *)ap->a_ctl);
2117                 break;
2118         default:
2119                 rc = vop_stdmountctl(ap);
2120                 break;
2121         }
2122         return (rc);
2123 }
2124
2125 struct vop_ops hammer2_vnode_vops = {
2126         .vop_default    = vop_defaultop,
2127         .vop_fsync      = hammer2_vop_fsync,
2128         .vop_getpages   = vop_stdgetpages,
2129         .vop_putpages   = vop_stdputpages,
2130         .vop_access     = hammer2_vop_access,
2131         .vop_advlock    = hammer2_vop_advlock,
2132         .vop_close      = hammer2_vop_close,
2133         .vop_nlink      = hammer2_vop_nlink,
2134         .vop_ncreate    = hammer2_vop_ncreate,
2135         .vop_nsymlink   = hammer2_vop_nsymlink,
2136         .vop_nremove    = hammer2_vop_nremove,
2137         .vop_nrmdir     = hammer2_vop_nrmdir,
2138         .vop_nrename    = hammer2_vop_nrename,
2139         .vop_getattr    = hammer2_vop_getattr,
2140         .vop_setattr    = hammer2_vop_setattr,
2141         .vop_readdir    = hammer2_vop_readdir,
2142         .vop_readlink   = hammer2_vop_readlink,
2143         .vop_getpages   = vop_stdgetpages,
2144         .vop_putpages   = vop_stdputpages,
2145         .vop_read       = hammer2_vop_read,
2146         .vop_write      = hammer2_vop_write,
2147         .vop_open       = hammer2_vop_open,
2148         .vop_inactive   = hammer2_vop_inactive,
2149         .vop_reclaim    = hammer2_vop_reclaim,
2150         .vop_nresolve   = hammer2_vop_nresolve,
2151         .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2152         .vop_nmkdir     = hammer2_vop_nmkdir,
2153         .vop_mountctl   = hammer2_vop_mountctl,
2154         .vop_bmap       = hammer2_vop_bmap,
2155         .vop_strategy   = hammer2_vop_strategy,
2156 };
2157
2158 struct vop_ops hammer2_spec_vops = {
2159
2160 };
2161
2162 struct vop_ops hammer2_fifo_vops = {
2163
2164 };