Merge branches 'hammer2' and 'master' of ssh://crater.dragonflybsd.org/repository...
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/fcntl.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/namei.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 #include <sys/mountctl.h>
45 #include <sys/dirent.h>
46 #include <sys/uio.h>
47
48 #include "hammer2.h"
49
50 #define ZFOFFSET        (-2LL)
51
52 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
53                                 int seqcount);
54 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, int ioflag,
55                               int seqcount);
56 static hammer2_off_t hammer2_assign_physical(hammer2_inode_t *ip,
57                                 hammer2_key_t lbase, int lblksize, int *errorp);
58 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
59 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
60
61 static __inline
62 void
63 hammer2_knote(struct vnode *vp, int flags)
64 {
65         if (flags)
66                 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
67 }
68
69 /*
70  * Last reference to a vnode is going away but it is still cached.
71  */
72 static
73 int
74 hammer2_vop_inactive(struct vop_inactive_args *ap)
75 {
76         struct vnode *vp;
77         struct hammer2_inode *ip;
78 #if 0
79         struct hammer2_mount *hmp;
80 #endif
81
82         vp = ap->a_vp;
83         ip = VTOI(vp);
84
85         /*
86          * Degenerate case
87          */
88         if (ip == NULL) {
89                 vrecycle(vp);
90                 return (0);
91         }
92
93         /*
94          * Detect updates to the embedded data which may be synchronized by
95          * the strategy code.  Simply mark the inode modified so it gets
96          * picked up by our normal flush.
97          */
98         if (ip->chain.flags & HAMMER2_CHAIN_DIRTYEMBED) {
99                 hammer2_inode_lock_ex(ip);
100                 atomic_clear_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
101                 hammer2_chain_modify(ip->hmp, &ip->chain, 0);
102                 hammer2_inode_unlock_ex(ip);
103         }
104
105         /*
106          * Check for deleted inodes and recycle immediately.
107          */
108         if (ip->chain.flags & HAMMER2_CHAIN_DELETED) {
109                 vrecycle(vp);
110         }
111         return (0);
112 }
113
114 /*
115  * Reclaim a vnode so that it can be reused; after the inode is
116  * disassociated, the filesystem must manage it alone.
117  */
118 static
119 int
120 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
121 {
122         struct hammer2_inode *ip;
123         struct hammer2_mount *hmp;
124         struct vnode *vp;
125
126         vp = ap->a_vp;
127         ip = VTOI(vp);
128         if (ip == NULL)
129                 return(0);
130         hmp = ip->hmp;
131
132         /*
133          * Set SUBMODIFIED so we can detect and propagate the DESTROYED
134          * bit in the flush code.
135          */
136         hammer2_inode_lock_ex(ip);
137         vp->v_data = NULL;
138         ip->vp = NULL;
139         if (ip->chain.flags & HAMMER2_CHAIN_DELETED) {
140                 atomic_set_int(&ip->chain.flags, HAMMER2_CHAIN_DESTROYED |
141                                                  HAMMER2_CHAIN_SUBMODIFIED);
142         }
143         hammer2_chain_flush(hmp, &ip->chain, 0);
144         hammer2_inode_unlock_ex(ip);
145         hammer2_chain_drop(hmp, &ip->chain);    /* vp ref */
146
147         /*
148          * XXX handle background sync when ip dirty, kernel will no longer
149          * notify us regarding this inode because there is no longer a
150          * vnode attached to it.
151          */
152
153         return (0);
154 }
155
156 static
157 int
158 hammer2_vop_fsync(struct vop_fsync_args *ap)
159 {
160         struct hammer2_inode *ip;
161         struct hammer2_mount *hmp;
162         struct vnode *vp;
163
164         vp = ap->a_vp;
165         ip = VTOI(vp);
166         hmp = ip->hmp;
167
168         hammer2_inode_lock_ex(ip);
169         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
170
171         /*
172          * Detect updates to the embedded data which may be synchronized by
173          * the strategy code.  Simply mark the inode modified so it gets
174          * picked up by our normal flush.
175          */
176         if (ip->chain.flags & HAMMER2_CHAIN_DIRTYEMBED) {
177                 atomic_clear_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
178                 hammer2_chain_modify(hmp, &ip->chain, 0);
179         }
180
181         /*
182          * Calling chain_flush here creates a lot of duplicative
183          * COW operations due to non-optimal vnode ordering.
184          *
185          * Only do it for an actual fsync() syscall.  The other forms
186          * which call this function will eventually call chain_flush
187          * on the volume root as a catch-all, which is far more optimal.
188          */
189         if (ap->a_flags & VOP_FSYNC_SYSCALL)
190                 hammer2_chain_flush(hmp, &ip->chain, 0);
191         hammer2_inode_unlock_ex(ip);
192         return (0);
193 }
194
195 static
196 int
197 hammer2_vop_access(struct vop_access_args *ap)
198 {
199         hammer2_inode_t *ip = VTOI(ap->a_vp);
200         uid_t uid;
201         gid_t gid;
202         int error;
203
204         uid = hammer2_to_unix_xid(&ip->ip_data.uid);
205         gid = hammer2_to_unix_xid(&ip->ip_data.gid);
206
207         error = vop_helper_access(ap, uid, gid, ip->ip_data.mode,
208                                   ip->ip_data.uflags);
209         return (error);
210 }
211
212 static
213 int
214 hammer2_vop_getattr(struct vop_getattr_args *ap)
215 {
216         hammer2_pfsmount_t *pmp;
217         hammer2_inode_t *ip;
218         struct vnode *vp;
219         struct vattr *vap;
220
221         vp = ap->a_vp;
222         vap = ap->a_vap;
223
224         ip = VTOI(vp);
225         pmp = ip->pmp;
226
227         hammer2_inode_lock_sh(ip);
228
229         vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
230         vap->va_fileid = ip->ip_data.inum;
231         vap->va_mode = ip->ip_data.mode;
232         vap->va_nlink = ip->ip_data.nlinks;
233         vap->va_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
234         vap->va_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
235         vap->va_rmajor = 0;
236         vap->va_rminor = 0;
237         vap->va_size = ip->ip_data.size;
238         vap->va_blocksize = HAMMER2_PBUFSIZE;
239         vap->va_flags = ip->ip_data.uflags;
240         hammer2_time_to_timespec(ip->ip_data.ctime, &vap->va_ctime);
241         hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_mtime);
242         hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_atime);
243         vap->va_gen = 1;
244         vap->va_bytes = vap->va_size;   /* XXX */
245         vap->va_type = hammer2_get_vtype(ip);
246         vap->va_filerev = 0;
247         vap->va_uid_uuid = ip->ip_data.uid;
248         vap->va_gid_uuid = ip->ip_data.gid;
249         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
250                           VA_FSID_UUID_VALID;
251
252         hammer2_inode_unlock_sh(ip);
253
254         return (0);
255 }
256
257 static
258 int
259 hammer2_vop_setattr(struct vop_setattr_args *ap)
260 {
261         hammer2_mount_t *hmp;
262         hammer2_inode_t *ip;
263         struct vnode *vp;
264         struct vattr *vap;
265         int error;
266         int kflags = 0;
267         int domtime = 0;
268         uint64_t ctime;
269
270         vp = ap->a_vp;
271         vap = ap->a_vap;
272         hammer2_update_time(&ctime);
273
274         ip = VTOI(vp);
275         hmp = ip->hmp;
276
277         if (hmp->ronly)
278                 return(EROFS);
279
280         hammer2_inode_lock_ex(ip);
281         error = 0;
282
283         if (vap->va_flags != VNOVAL) {
284                 u_int32_t flags;
285
286                 flags = ip->ip_data.uflags;
287                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
288                                          hammer2_to_unix_xid(&ip->ip_data.uid),
289                                          ap->a_cred);
290                 if (error == 0) {
291                         if (ip->ip_data.uflags != flags) {
292                                 hammer2_chain_modify(hmp, &ip->chain, 0);
293                                 ip->ip_data.uflags = flags;
294                                 ip->ip_data.ctime = ctime;
295                                 kflags |= NOTE_ATTRIB;
296                         }
297                         if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
298                                 error = 0;
299                                 goto done;
300                         }
301                 }
302                 goto done;
303         }
304         if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
305                 error = EPERM;
306                 goto done;
307         }
308         if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
309                 mode_t cur_mode = ip->ip_data.mode;
310                 uid_t cur_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
311                 gid_t cur_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
312                 uuid_t uuid_uid;
313                 uuid_t uuid_gid;
314
315                 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
316                                          ap->a_cred,
317                                          &cur_uid, &cur_gid, &cur_mode);
318                 if (error == 0) {
319                         hammer2_guid_to_uuid(&uuid_uid, cur_uid);
320                         hammer2_guid_to_uuid(&uuid_gid, cur_gid);
321                         if (bcmp(&uuid_uid, &ip->ip_data.uid,
322                                  sizeof(uuid_uid)) ||
323                             bcmp(&uuid_gid, &ip->ip_data.gid,
324                                  sizeof(uuid_gid)) ||
325                             ip->ip_data.mode != cur_mode
326                         ) {
327                                 hammer2_chain_modify(hmp, &ip->chain, 0);
328                                 ip->ip_data.uid = uuid_uid;
329                                 ip->ip_data.gid = uuid_gid;
330                                 ip->ip_data.mode = cur_mode;
331                                 ip->ip_data.ctime = ctime;
332                         }
333                         kflags |= NOTE_ATTRIB;
334                 }
335         }
336
337         /*
338          * Resize the file
339          */
340         if (vap->va_size != VNOVAL && ip->ip_data.size != vap->va_size) {
341                 switch(vp->v_type) {
342                 case VREG:
343                         if (vap->va_size == ip->ip_data.size)
344                                 break;
345                         if (vap->va_size < ip->ip_data.size) {
346                                 hammer2_truncate_file(ip, vap->va_size);
347                         } else {
348                                 hammer2_extend_file(ip, vap->va_size);
349                         }
350                         domtime = 1;
351                         break;
352                 default:
353                         error = EINVAL;
354                         goto done;
355                 }
356         }
357 #if 0
358         /* atime not supported */
359         if (vap->va_atime.tv_sec != VNOVAL) {
360                 hammer2_chain_modify(hmp, &ip->chain, 0);
361                 ip->ip_data.atime = hammer2_timespec_to_time(&vap->va_atime);
362                 kflags |= NOTE_ATTRIB;
363         }
364 #endif
365         if (vap->va_mtime.tv_sec != VNOVAL) {
366                 hammer2_chain_modify(hmp, &ip->chain, 0);
367                 ip->ip_data.mtime = hammer2_timespec_to_time(&vap->va_mtime);
368                 kflags |= NOTE_ATTRIB;
369         }
370         if (vap->va_mode != (mode_t)VNOVAL) {
371                 mode_t cur_mode = ip->ip_data.mode;
372                 uid_t cur_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
373                 gid_t cur_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
374
375                 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
376                                          cur_uid, cur_gid, &cur_mode);
377                 if (error == 0 && ip->ip_data.mode != cur_mode) {
378                         ip->ip_data.mode = cur_mode;
379                         ip->ip_data.ctime = ctime;
380                         kflags |= NOTE_ATTRIB;
381                 }
382         }
383 done:
384         hammer2_inode_unlock_ex(ip);
385         return (error);
386 }
387
388 static
389 int
390 hammer2_vop_readdir(struct vop_readdir_args *ap)
391 {
392         hammer2_mount_t *hmp;
393         hammer2_inode_t *ip;
394         hammer2_inode_t *xip;
395         hammer2_chain_t *parent;
396         hammer2_chain_t *chain;
397         hammer2_key_t lkey;
398         struct uio *uio;
399         off_t *cookies;
400         off_t saveoff;
401         int cookie_index;
402         int ncookies;
403         int error;
404         int dtype;
405         int r;
406
407         ip = VTOI(ap->a_vp);
408         hmp = ip->hmp;
409         uio = ap->a_uio;
410         saveoff = uio->uio_offset;
411
412         /*
413          * Setup cookies directory entry cookies if requested
414          */
415         if (ap->a_ncookies) {
416                 ncookies = uio->uio_resid / 16 + 1;
417                 if (ncookies > 1024)
418                         ncookies = 1024;
419                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
420         } else {
421                 ncookies = -1;
422                 cookies = NULL;
423         }
424         cookie_index = 0;
425
426         /*
427          * Handle artificial entries.  To ensure that only positive 64 bit
428          * quantities are returned to userland we always strip off bit 63.
429          * The hash code is designed such that codes 0x0000-0x7FFF are not
430          * used, allowing us to use these codes for articial entries.
431          *
432          * Entry 0 is used for '.' and entry 1 is used for '..'.  Do not
433          * allow '..' to cross the mount point into (e.g.) the super-root.
434          */
435         error = 0;
436         chain = (void *)(intptr_t)-1;   /* non-NULL for early goto done case */
437
438         if (saveoff == 0) {
439                 r = vop_write_dirent(&error, uio,
440                                      ip->ip_data.inum &
441                                         HAMMER2_DIRHASH_USERMSK,
442                                      DT_DIR, 1, ".");
443                 if (r)
444                         goto done;
445                 if (cookies)
446                         cookies[cookie_index] = saveoff;
447                 ++saveoff;
448                 ++cookie_index;
449                 if (cookie_index == ncookies)
450                         goto done;
451         }
452         if (saveoff == 1) {
453                 if (ip->pip == NULL || ip == ip->pmp->iroot)
454                         xip = ip;
455                 else
456                         xip = ip->pip;
457
458                 r = vop_write_dirent(&error, uio,
459                                      xip->ip_data.inum &
460                                       HAMMER2_DIRHASH_USERMSK,
461                                      DT_DIR, 2, "..");
462                 if (r)
463                         goto done;
464                 if (cookies)
465                         cookies[cookie_index] = saveoff;
466                 ++saveoff;
467                 ++cookie_index;
468                 if (cookie_index == ncookies)
469                         goto done;
470         }
471
472         lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
473
474         parent = &ip->chain;
475         error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
476                                                 HAMMER2_RESOLVE_SHARED);
477         if (error) {
478                 hammer2_chain_unlock(hmp, parent);
479                 goto done;
480         }
481         chain = hammer2_chain_lookup(hmp, &parent, lkey, lkey,
482                                      HAMMER2_LOOKUP_SHARED);
483         if (chain == NULL) {
484                 chain = hammer2_chain_lookup(hmp, &parent,
485                                              lkey, (hammer2_key_t)-1,
486                                              HAMMER2_LOOKUP_SHARED);
487         }
488         while (chain) {
489                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
490                         dtype = hammer2_get_dtype(chain->u.ip);
491                         saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK;
492                         r = vop_write_dirent(&error, uio,
493                                              chain->u.ip->ip_data.inum &
494                                               HAMMER2_DIRHASH_USERMSK,
495                                              dtype, chain->u.ip->ip_data.name_len,
496                                              chain->u.ip->ip_data.filename);
497                         if (r)
498                                 break;
499                         if (cookies)
500                                 cookies[cookie_index] = saveoff;
501                         ++cookie_index;
502                 } else {
503                         /* XXX chain error */
504                         kprintf("bad chain type readdir %d\n",
505                                 chain->bref.type);
506                 }
507
508                 /*
509                  * Keys may not be returned in order so once we have a
510                  * placemarker (chain) the scan must allow the full range
511                  * or some entries will be missed.
512                  */
513                 chain = hammer2_chain_next(hmp, &parent, chain,
514                                            HAMMER2_DIRHASH_VISIBLE,
515                                            (hammer2_key_t)-1,
516                                            HAMMER2_LOOKUP_SHARED);
517                 if (chain) {
518                         saveoff = (chain->bref.key &
519                                    HAMMER2_DIRHASH_USERMSK) + 1;
520                 } else {
521                         saveoff = (hammer2_key_t)-1;
522                 }
523                 if (cookie_index == ncookies)
524                         break;
525         }
526         if (chain)
527                 hammer2_chain_unlock(hmp, chain);
528         hammer2_chain_unlock(hmp, parent);
529 done:
530         if (ap->a_eofflag)
531                 *ap->a_eofflag = (chain == NULL);
532         uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
533         if (error && cookie_index == 0) {
534                 if (cookies) {
535                         kfree(cookies, M_TEMP);
536                         *ap->a_ncookies = 0;
537                         *ap->a_cookies = NULL;
538                 }
539         } else {
540                 if (cookies) {
541                         *ap->a_ncookies = cookie_index;
542                         *ap->a_cookies = cookies;
543                 }
544         }
545         return (error);
546 }
547
548 /*
549  * hammer2_vop_readlink { vp, uio, cred }
550  */
551 static
552 int
553 hammer2_vop_readlink(struct vop_readlink_args *ap)
554 {
555         struct vnode *vp;
556         hammer2_mount_t *hmp;
557         hammer2_inode_t *ip;
558         int error;
559
560         vp = ap->a_vp;
561         if (vp->v_type != VLNK)
562                 return (EINVAL);
563         ip = VTOI(vp);
564         hmp = ip->hmp;
565
566         error = hammer2_read_file(ip, ap->a_uio, 0);
567         return (error);
568 }
569
570 static
571 int
572 hammer2_vop_read(struct vop_read_args *ap)
573 {
574         struct vnode *vp;
575         hammer2_mount_t *hmp;
576         hammer2_inode_t *ip;
577         struct uio *uio;
578         int error;
579         int seqcount;
580         int bigread;
581
582         /*
583          * Read operations supported on this vnode?
584          */
585         vp = ap->a_vp;
586         if (vp->v_type != VREG)
587                 return (EINVAL);
588
589         /*
590          * Misc
591          */
592         ip = VTOI(vp);
593         hmp = ip->hmp;
594         uio = ap->a_uio;
595         error = 0;
596
597         seqcount = ap->a_ioflag >> 16;
598         bigread = (uio->uio_resid > 100 * 1024 * 1024);
599
600         error = hammer2_read_file(ip, uio, seqcount);
601         return (error);
602 }
603
604 static
605 int
606 hammer2_vop_write(struct vop_write_args *ap)
607 {
608         thread_t td;
609         struct vnode *vp;
610         hammer2_mount_t *hmp;
611         hammer2_inode_t *ip;
612         struct uio *uio;
613         int error;
614         int seqcount;
615         int bigwrite;
616
617         /*
618          * Read operations supported on this vnode?
619          */
620         vp = ap->a_vp;
621         if (vp->v_type != VREG)
622                 return (EINVAL);
623
624         /*
625          * Misc
626          */
627         ip = VTOI(vp);
628         hmp = ip->hmp;
629         uio = ap->a_uio;
630         error = 0;
631         if (hmp->ronly)
632                 return (EROFS);
633
634         seqcount = ap->a_ioflag >> 16;
635         bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
636
637         /*
638          * Check resource limit
639          */
640         if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
641             uio->uio_offset + uio->uio_resid >
642              td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
643                 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
644                 return (EFBIG);
645         }
646
647         bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
648
649         /*
650          * ip must be locked if extending the file.
651          * ip must be locked to avoid racing a truncation.
652          *
653          * ip must be marked modified, particularly because the write
654          * might wind up being copied into the embedded data area.
655          */
656         hammer2_inode_lock_ex(ip);
657         error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
658         hammer2_inode_unlock_ex(ip);
659         return (error);
660 }
661
662 /*
663  * Perform read operations on a file or symlink given an UNLOCKED
664  * inode and uio.
665  */
666 static
667 int
668 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
669 {
670         struct buf *bp;
671         int error;
672
673         error = 0;
674
675         /*
676          * UIO read loop
677          */
678         while (uio->uio_resid > 0 && uio->uio_offset < ip->ip_data.size) {
679                 hammer2_key_t lbase;
680                 hammer2_key_t leof;
681                 int lblksize;
682                 int loff;
683                 int n;
684
685                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
686                                                 &lbase, &leof);
687
688                 error = cluster_read(ip->vp, leof, lbase, lblksize,
689                                      uio->uio_resid, seqcount * BKVASIZE,
690                                      &bp);
691
692                 if (error)
693                         break;
694                 loff = (int)(uio->uio_offset - lbase);
695                 n = lblksize - loff;
696                 if (n > uio->uio_resid)
697                         n = uio->uio_resid;
698                 if (n > ip->ip_data.size - uio->uio_offset)
699                         n = (int)(ip->ip_data.size - uio->uio_offset);
700                 bp->b_flags |= B_AGE;
701                 uiomove((char *)bp->b_data + loff, n, uio);
702                 bqrelse(bp);
703         }
704         return (error);
705 }
706
707 /*
708  * Called with a locked (ip) to do the underlying write to a file or
709  * to build the symlink target.
710  */
711 static
712 int
713 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
714                    int ioflag, int seqcount)
715 {
716         hammer2_key_t old_eof;
717         struct buf *bp;
718         int kflags;
719         int error;
720         int modified = 0;
721
722         /*
723          * Setup if append
724          */
725         if (ioflag & IO_APPEND)
726                 uio->uio_offset = ip->ip_data.size;
727         kflags = 0;
728         error = 0;
729
730         /*
731          * Extend the file if necessary.  If the write fails at some point
732          * we will truncate it back down to cover as much as we were able
733          * to write.
734          *
735          * Doing this now makes it easier to calculate buffer sizes in
736          * the loop.
737          */
738         old_eof = ip->ip_data.size;
739         if (uio->uio_offset + uio->uio_resid > ip->ip_data.size) {
740                 modified = 1;
741                 hammer2_extend_file(ip, uio->uio_offset + uio->uio_resid);
742                 kflags |= NOTE_EXTEND;
743         }
744
745         /*
746          * UIO write loop
747          */
748         while (uio->uio_resid > 0) {
749                 hammer2_key_t lbase;
750                 hammer2_key_t leof;
751                 int trivial;
752                 int lblksize;
753                 int loff;
754                 int n;
755
756                 /*
757                  * Don't allow the buffer build to blow out the buffer
758                  * cache.
759                  */
760                 if ((ioflag & IO_RECURSE) == 0) {
761                         /*
762                          * XXX should try to leave this unlocked through
763                          *      the whole loop
764                          */
765                         hammer2_chain_unlock(ip->hmp, &ip->chain);
766                         bwillwrite(HAMMER2_PBUFSIZE);
767                         hammer2_chain_lock(ip->hmp, &ip->chain,
768                                            HAMMER2_RESOLVE_ALWAYS);
769                 }
770
771                 /* XXX bigwrite & signal check test */
772
773                 /*
774                  * This nominally tells us how much we can cluster and
775                  * what the logical buffer size needs to be.  Currently
776                  * we don't try to cluster the write and just handle one
777                  * block at a time.
778                  */
779                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
780                                                 &lbase, &leof);
781                 loff = (int)(uio->uio_offset - lbase);
782
783                 /*
784                  * Calculate bytes to copy this transfer and whether the
785                  * copy completely covers the buffer or not.
786                  */
787                 trivial = 0;
788                 n = lblksize - loff;
789                 if (n > uio->uio_resid) {
790                         n = uio->uio_resid;
791                         if (uio->uio_offset + n == ip->ip_data.size)
792                                 trivial = 1;
793                 } else if (loff == 0) {
794                         trivial = 1;
795                 }
796
797                 /*
798                  * Get the buffer
799                  */
800                 if (uio->uio_segflg == UIO_NOCOPY) {
801                         /*
802                          * Issuing a write with the same data backing the
803                          * buffer.  Instantiate the buffer to collect the
804                          * backing vm pages, then read-in any missing bits.
805                          *
806                          * This case is used by vop_stdputpages().
807                          */
808                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
809                         if ((bp->b_flags & B_CACHE) == 0) {
810                                 bqrelse(bp);
811                                 error = bread(ip->vp, lbase, lblksize, &bp);
812                         }
813                 } else if (trivial) {
814                         /*
815                          * Even though we are entirely overwriting the buffer
816                          * we may still have to zero it out to avoid a
817                          * mmap/write visibility issue.
818                          */
819                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
820                         if ((bp->b_flags & B_CACHE) == 0)
821                                 vfs_bio_clrbuf(bp);
822                 } else {
823                         /*
824                          * Partial overwrite, read in any missing bits then
825                          * replace the portion being written.
826                          *
827                          * (The strategy code will detect zero-fill physical
828                          * blocks for this case).
829                          */
830                         error = bread(ip->vp, lbase, lblksize, &bp);
831                         if (error == 0)
832                                 bheavy(bp);
833                 }
834
835                 if (error) {
836                         brelse(bp);
837                         break;
838                 }
839
840                 /*
841                  * We have to assign physical storage to the buffer we intend
842                  * to dirty or write now to avoid deadlocks in the strategy
843                  * code later.
844                  *
845                  * This can return NOOFFSET for inode-embedded data.  The
846                  * strategy code will take care of it in that case.
847                  */
848                 bp->b_bio2.bio_offset =
849                         hammer2_assign_physical(ip, lbase, lblksize, &error);
850                 if (error) {
851                         brelse(bp);
852                         break;
853                 }
854
855                 /*
856                  * Ok, copy the data in
857                  */
858                 hammer2_chain_unlock(ip->hmp, &ip->chain);
859                 error = uiomove(bp->b_data + loff, n, uio);
860                 hammer2_chain_lock(ip->hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
861                 kflags |= NOTE_WRITE;
862                 modified = 1;
863
864                 if (error) {
865                         brelse(bp);
866                         break;
867                 }
868
869                 /* XXX update ip_data.mtime */
870
871                 /*
872                  * Once we dirty a buffer any cached offset becomes invalid.
873                  *
874                  * NOTE: For cluster_write() always use the trailing block
875                  *       size, which is HAMMER2_PBUFSIZE.  lblksize is the
876                  *       eof-straddling blocksize and is incorrect.
877                  */
878                 bp->b_flags |= B_AGE;
879                 if (ioflag & IO_SYNC) {
880                         bwrite(bp);
881                 } else if ((ioflag & IO_DIRECT) && loff + n == lblksize) {
882                         if (bp->b_bcount == HAMMER2_PBUFSIZE)
883                                 bp->b_flags |= B_CLUSTEROK;
884                         bdwrite(bp);
885                 } else if (ioflag & IO_ASYNC) {
886                         bawrite(bp);
887                 } else if (hammer2_cluster_enable) {
888                         if (bp->b_bcount == HAMMER2_PBUFSIZE)
889                                 bp->b_flags |= B_CLUSTEROK;
890                         cluster_write(bp, leof, HAMMER2_PBUFSIZE, seqcount);
891                 } else {
892                         if (bp->b_bcount == HAMMER2_PBUFSIZE)
893                                 bp->b_flags |= B_CLUSTEROK;
894                         bdwrite(bp);
895                 }
896         }
897
898         /*
899          * Cleanup.  If we extended the file EOF but failed to write through
900          * the entire write is a failure and we have to back-up.
901          */
902         if (error && ip->ip_data.size != old_eof) {
903                 hammer2_truncate_file(ip, old_eof);
904         } else if (modified) {
905                 hammer2_chain_modify(ip->hmp, &ip->chain, 0);
906                 hammer2_update_time(&ip->ip_data.mtime);
907         }
908         hammer2_knote(ip->vp, kflags);
909         return error;
910 }
911
912 /*
913  * Assign physical storage to a logical block.
914  *
915  * NOOFFSET is returned if the data is inode-embedded.  In this case the
916  * strategy code will simply bcopy() the data into the inode.
917  *
918  * The inode's delta_dcount is adjusted.
919  */
920 static
921 hammer2_off_t
922 hammer2_assign_physical(hammer2_inode_t *ip, hammer2_key_t lbase,
923                         int lblksize, int *errorp)
924 {
925         hammer2_mount_t *hmp;
926         hammer2_chain_t *parent;
927         hammer2_chain_t *chain;
928         hammer2_off_t pbase;
929
930         *errorp = 0;
931         hmp = ip->hmp;
932
933         /*
934          * Locate the chain associated with lbase, return a locked chain.
935          * However, do not instantiate any data reference (which utilizes a
936          * device buffer) because we will be using direct IO via the
937          * logical buffer cache buffer.
938          */
939         parent = &ip->chain;
940         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
941
942         chain = hammer2_chain_lookup(hmp, &parent,
943                                      lbase, lbase,
944                                      HAMMER2_LOOKUP_NODATA);
945
946         if (chain == NULL) {
947                 /*
948                  * We found a hole, create a new chain entry.
949                  *
950                  * NOTE: DATA chains are created without device backing
951                  *       store (nor do we want any).
952                  */
953                 chain = hammer2_chain_create(hmp, parent, NULL,
954                                              lbase, HAMMER2_PBUFRADIX,
955                                              HAMMER2_BREF_TYPE_DATA,
956                                              lblksize);
957                 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
958                 ip->delta_dcount += lblksize;
959         } else {
960                 switch (chain->bref.type) {
961                 case HAMMER2_BREF_TYPE_INODE:
962                         /*
963                          * The data is embedded in the inode.  The
964                          * caller is responsible for marking the inode
965                          * modified and copying the data to the embedded
966                          * area.
967                          */
968                         pbase = NOOFFSET;
969                         break;
970                 case HAMMER2_BREF_TYPE_DATA:
971                         if (chain->bytes != lblksize) {
972                                 panic("hammer2_assign_physical: "
973                                       "size mismatch %d/%d\n",
974                                       lblksize, chain->bytes);
975                         }
976                         hammer2_chain_modify(hmp, chain,
977                                              HAMMER2_MODIFY_OPTDATA);
978                         pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
979                         break;
980                 default:
981                         panic("hammer2_assign_physical: bad type");
982                         /* NOT REACHED */
983                         pbase = NOOFFSET;
984                         break;
985                 }
986         }
987
988         if (chain)
989                 hammer2_chain_unlock(hmp, chain);
990         hammer2_chain_unlock(hmp, parent);
991
992         return (pbase);
993 }
994
995 /*
996  * Truncate the size of a file.
997  *
998  * This routine adjusts ip->ip_data.size smaller, destroying any related
999  * data beyond the new EOF and potentially resizing the block straddling
1000  * the EOF.
1001  *
1002  * The inode must be locked.
1003  */
1004 static
1005 void
1006 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1007 {
1008         hammer2_chain_t *parent;
1009         hammer2_chain_t *chain;
1010         hammer2_mount_t *hmp = ip->hmp;
1011         hammer2_key_t lbase;
1012         hammer2_key_t leof;
1013         struct buf *bp;
1014         int loff;
1015         int error;
1016         int oblksize;
1017         int nblksize;
1018
1019         hammer2_chain_modify(hmp, &ip->chain, 0);
1020         bp = NULL;
1021
1022         /*
1023          * Destroy any logical buffer cache buffers beyond the file EOF.
1024          *
1025          * We call nvtruncbuf() w/ trivial == 1 to prevent it from messing
1026          * around with the buffer straddling EOF, because we need to assign
1027          * a new physical offset to it.
1028          */
1029         if (ip->vp) {
1030                 nvtruncbuf(ip->vp, nsize,
1031                            HAMMER2_PBUFSIZE, (int)nsize & HAMMER2_PBUFMASK,
1032                            1);
1033         }
1034
1035         /*
1036          * Setup for lookup/search
1037          */
1038         parent = &ip->chain;
1039         error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1040         if (error) {
1041                 hammer2_chain_unlock(hmp, parent);
1042                 /* XXX error reporting */
1043                 return;
1044         }
1045
1046         /*
1047          * Handle the case where a chain/logical-buffer straddles the new
1048          * EOF.  We told nvtruncbuf() above not to mess with the logical
1049          * buffer straddling the EOF because we need to reassign its storage
1050          * and can't let the strategy code do it for us.
1051          */
1052         loff = (int)nsize & HAMMER2_PBUFMASK;
1053         if (loff && ip->vp) {
1054                 oblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof);
1055                 error = bread(ip->vp, lbase, oblksize, &bp);
1056                 KKASSERT(error == 0);
1057         }
1058         ip->ip_data.size = nsize;
1059         nblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof);
1060
1061         /*
1062          * Fixup the chain element.  If we have a logical buffer in-hand
1063          * we don't want to create a conflicting device buffer.
1064          */
1065         if (loff && bp) {
1066                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase,
1067                                              HAMMER2_LOOKUP_NODATA);
1068                 if (chain) {
1069                         allocbuf(bp, nblksize);
1070                         switch(chain->bref.type) {
1071                         case HAMMER2_BREF_TYPE_DATA:
1072                                 hammer2_chain_resize(ip, chain,
1073                                              hammer2_bytes_to_radix(nblksize),
1074                                              HAMMER2_MODIFY_OPTDATA);
1075                                 bzero(bp->b_data + loff, nblksize - loff);
1076                                 bp->b_bio2.bio_offset = chain->bref.data_off &
1077                                                         HAMMER2_OFF_MASK;
1078                                 break;
1079                         case HAMMER2_BREF_TYPE_INODE:
1080                                 bzero(bp->b_data + loff, nblksize - loff);
1081                                 bp->b_bio2.bio_offset = NOOFFSET;
1082                                 break;
1083                         default:
1084                                 panic("hammer2_truncate_file: bad type");
1085                                 break;
1086                         }
1087                         hammer2_chain_unlock(hmp, chain);
1088                         if (bp->b_bcount == HAMMER2_PBUFSIZE)
1089                                 bp->b_flags |= B_CLUSTEROK;
1090                         bdwrite(bp);
1091                 } else {
1092                         /*
1093                          * Destroy clean buffer w/ wrong buffer size.  Retain
1094                          * backing store.
1095                          */
1096                         bp->b_flags |= B_RELBUF;
1097                         KKASSERT(bp->b_bio2.bio_offset == NOOFFSET);
1098                         KKASSERT((bp->b_flags & B_DIRTY) == 0);
1099                         bqrelse(bp);
1100                 }
1101         } else if (loff) {
1102                 /*
1103                  * WARNING: This utilizes a device buffer for the data.
1104                  *
1105                  * This case should not occur because file truncations without
1106                  * a vnode (and hence no logical buffer cache) should only
1107                  * always truncate to 0-length.
1108                  */
1109                 panic("hammer2_truncate_file: non-zero truncation, no-vnode");
1110 #if 0
1111                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase, 0);
1112                 if (chain) {
1113                         switch(chain->bref.type) {
1114                         case HAMMER2_BREF_TYPE_DATA:
1115                                 hammer2_chain_resize(ip, chain,
1116                                              hammer2_bytes_to_radix(nblksize),
1117                                              0);
1118                                 hammer2_chain_modify(hmp, chain, 0);
1119                                 bzero(chain->data->buf + loff, nblksize - loff);
1120                                 break;
1121                         case HAMMER2_BREF_TYPE_INODE:
1122                                 if (loff < HAMMER2_EMBEDDED_BYTES) {
1123                                         hammer2_chain_modify(hmp, chain, 0);
1124                                         bzero(chain->data->ipdata.u.data + loff,
1125                                               HAMMER2_EMBEDDED_BYTES - loff);
1126                                 }
1127                                 break;
1128                         }
1129                         hammer2_chain_unlock(hmp, chain);
1130                 }
1131 #endif
1132         }
1133
1134         /*
1135          * Clean up any fragmentory VM pages now that we have properly
1136          * resized the straddling buffer.  These pages are no longer
1137          * part of the buffer.
1138          */
1139         if (ip->vp) {
1140                 nvtruncbuf(ip->vp, nsize,
1141                            nblksize, (int)nsize & (nblksize - 1),
1142                            1);
1143         }
1144
1145         /*
1146          * Destroy any physical blocks after the new EOF point.
1147          */
1148         lbase = (nsize + HAMMER2_PBUFMASK64) & ~HAMMER2_PBUFMASK64;
1149         chain = hammer2_chain_lookup(hmp, &parent,
1150                                      lbase, (hammer2_key_t)-1,
1151                                      HAMMER2_LOOKUP_NODATA);
1152         while (chain) {
1153                 /*
1154                  * Degenerate embedded data case, nothing to loop on.
1155                  */
1156                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1157                         hammer2_chain_unlock(hmp, chain);
1158                         break;
1159                 }
1160
1161                 /*
1162                  * Delete physical data blocks past the file EOF.
1163                  */
1164                 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1165                         ip->delta_dcount -= chain->bytes;
1166                         hammer2_chain_delete(hmp, parent, chain, 0);
1167                 }
1168                 /* XXX check parent if empty indirect block & delete */
1169                 chain = hammer2_chain_next(hmp, &parent, chain,
1170                                            lbase, (hammer2_key_t)-1,
1171                                            HAMMER2_LOOKUP_NODATA);
1172         }
1173         hammer2_chain_unlock(hmp, parent);
1174 }
1175
1176 /*
1177  * Extend the size of a file.  The inode must be locked.
1178  *
1179  * We may have to resize the block straddling the old EOF.
1180  */
1181 static
1182 void
1183 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1184 {
1185         hammer2_mount_t *hmp;
1186         hammer2_chain_t *parent;
1187         hammer2_chain_t *chain;
1188         struct buf *bp;
1189         hammer2_key_t osize;
1190         hammer2_key_t obase;
1191         hammer2_key_t nbase;
1192         hammer2_key_t leof;
1193         int oblksize;
1194         int nblksize;
1195         int nradix;
1196         int error;
1197
1198         KKASSERT(ip->vp);
1199         hmp = ip->hmp;
1200
1201         hammer2_chain_modify(hmp, &ip->chain, 0);
1202
1203         /*
1204          * Nothing to do if the direct-data case is still intact
1205          */
1206         if ((ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1207             nsize <= HAMMER2_EMBEDDED_BYTES) {
1208                 ip->ip_data.size = nsize;
1209                 nvextendbuf(ip->vp,
1210                             ip->ip_data.size, nsize,
1211                             0, HAMMER2_EMBEDDED_BYTES,
1212                             0, (int)nsize,
1213                             1);
1214                 return;
1215         }
1216
1217         /*
1218          * Calculate the blocksize at the original EOF and resize the block
1219          * if necessary.  Adjust the file size in the inode.
1220          */
1221         osize = ip->ip_data.size;
1222         oblksize = hammer2_calc_logical(ip, osize, &obase, &leof);
1223         ip->ip_data.size = nsize;
1224         nblksize = hammer2_calc_logical(ip, osize, &nbase, &leof);
1225
1226         /*
1227          * Do all required vnode operations, but do not mess with the
1228          * buffer straddling the orignal EOF.
1229          */
1230         nvextendbuf(ip->vp,
1231                     ip->ip_data.size, nsize,
1232                     0, nblksize,
1233                     0, (int)nsize & HAMMER2_PBUFMASK,
1234                     1);
1235
1236         /*
1237          * Early return if we have no more work to do.
1238          */
1239         if (obase == nbase && oblksize == nblksize &&
1240             (ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1241                 return;
1242         }
1243
1244         /*
1245          * We have work to do, including possibly resizing the buffer
1246          * at the previous EOF point and turning off DIRECTDATA mode.
1247          */
1248         bp = NULL;
1249         if (((int)osize & HAMMER2_PBUFMASK)) {
1250                 error = bread(ip->vp, obase, oblksize, &bp);
1251                 KKASSERT(error == 0);
1252
1253                 if (obase != nbase) {
1254                         if (oblksize != HAMMER2_PBUFSIZE)
1255                                 allocbuf(bp, HAMMER2_PBUFSIZE);
1256                 } else {
1257                         if (oblksize != nblksize)
1258                                 allocbuf(bp, nblksize);
1259                 }
1260         }
1261
1262         /*
1263          * Disable direct-data mode by loading up a buffer cache buffer
1264          * with the data, then converting the inode data area into the
1265          * inode indirect block array area.
1266          */
1267         if (ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
1268                 ip->ip_data.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1269                 bzero(&ip->ip_data.u.blockset, sizeof(ip->ip_data.u.blockset));
1270         }
1271
1272         /*
1273          * Resize the chain element at the old EOF.
1274          */
1275         if (((int)osize & HAMMER2_PBUFMASK)) {
1276                 parent = &ip->chain;
1277                 error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1278                 KKASSERT(error == 0);
1279
1280                 nradix = hammer2_bytes_to_radix(nblksize);
1281
1282                 chain = hammer2_chain_lookup(hmp, &parent,
1283                                              obase, obase,
1284                                              HAMMER2_LOOKUP_NODATA);
1285                 if (chain == NULL) {
1286                         chain = hammer2_chain_create(hmp, parent, NULL,
1287                                                      obase, nblksize,
1288                                                      HAMMER2_BREF_TYPE_DATA,
1289                                                      nblksize);
1290                         ip->delta_dcount += nblksize;
1291                 } else {
1292                         KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA);
1293                         hammer2_chain_resize(ip, chain, nradix,
1294                                              HAMMER2_MODIFY_OPTDATA);
1295                 }
1296                 bp->b_bio2.bio_offset = chain->bref.data_off &
1297                                         HAMMER2_OFF_MASK;
1298                 hammer2_chain_unlock(hmp, chain);
1299                 if (bp->b_bcount == HAMMER2_PBUFSIZE)
1300                         bp->b_flags |= B_CLUSTEROK;
1301                 bdwrite(bp);
1302                 hammer2_chain_unlock(hmp, parent);
1303         }
1304 }
1305
1306 static
1307 int
1308 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
1309 {
1310         hammer2_inode_t *dip;
1311         hammer2_inode_t *ip;
1312         hammer2_mount_t *hmp;
1313         hammer2_chain_t *parent;
1314         hammer2_chain_t *chain;
1315         struct namecache *ncp;
1316         const uint8_t *name;
1317         size_t name_len;
1318         hammer2_key_t lhc;
1319         int error = 0;
1320         struct vnode *vp;
1321
1322         dip = VTOI(ap->a_dvp);
1323         hmp = dip->hmp;
1324         ncp = ap->a_nch->ncp;
1325         name = ncp->nc_name;
1326         name_len = ncp->nc_nlen;
1327         lhc = hammer2_dirhash(name, name_len);
1328
1329         /*
1330          * Note: In DragonFly the kernel handles '.' and '..'.
1331          */
1332         parent = &dip->chain;
1333         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
1334                                         HAMMER2_RESOLVE_SHARED);
1335         chain = hammer2_chain_lookup(hmp, &parent,
1336                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1337                                      HAMMER2_LOOKUP_SHARED);
1338         while (chain) {
1339                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
1340                     chain->u.ip &&
1341                     name_len == chain->data->ipdata.name_len &&
1342                     bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
1343                         break;
1344                 }
1345                 chain = hammer2_chain_next(hmp, &parent, chain,
1346                                            lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1347                                            HAMMER2_LOOKUP_SHARED);
1348         }
1349         hammer2_chain_unlock(hmp, parent);
1350
1351         /*
1352          * If the inode represents a forwarding entry for a hardlink we have
1353          * to locate the actual inode.  The original ip is saved for possible
1354          * deconsolidation.  (ip) will only be set to non-NULL when we have
1355          * to locate the real file via a hardlink.  ip will be referenced but
1356          * not locked in that situation.  chain is passed in locked and
1357          * returned locked.
1358          *
1359          * XXX what kind of chain lock?
1360          */
1361         ip = NULL;
1362         if (chain && chain->u.ip->ip_data.type == HAMMER2_OBJTYPE_HARDLINK) {
1363                 error = hammer2_hardlink_find(dip, &chain, &ip);
1364                 if (error) {
1365                         kprintf("hammer2: unable to find hardlink\n");
1366                         if (chain) {
1367                                 hammer2_chain_unlock(hmp, chain);
1368                                 chain = NULL;
1369                         }
1370                         goto failed;
1371                 }
1372         }
1373
1374         /*
1375          * Deconsolidate any hardlink whos nlinks == 1.  Ignore errors.
1376          * If an error occurs chain and ip are left alone.
1377          *
1378          * XXX upgrade shared lock?
1379          */
1380         if (ip && chain && chain->u.ip->ip_data.nlinks == 1 && !hmp->ronly) {
1381                 kprintf("hammer2: need to unconsolidate hardlink for %s\n",
1382                         chain->u.ip->ip_data.filename);
1383                 hammer2_hardlink_deconsolidate(dip, &chain, &ip);
1384         }
1385
1386         /*
1387          * Acquire the related vnode
1388          */
1389         if (chain) {
1390                 vp = hammer2_igetv(chain->u.ip, &error);
1391                 if (error == 0) {
1392                         vn_unlock(vp);
1393                         cache_setvp(ap->a_nch, vp);
1394                         vrele(vp);
1395                 }
1396                 hammer2_chain_unlock(hmp, chain);
1397         } else {
1398                 error = ENOENT;
1399 failed:
1400                 cache_setvp(ap->a_nch, NULL);
1401         }
1402         if (ip)
1403                 hammer2_inode_drop(ip);
1404         return error;
1405 }
1406
1407 static
1408 int
1409 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1410 {
1411         hammer2_inode_t *dip;
1412         hammer2_inode_t *ip;
1413         hammer2_mount_t *hmp;
1414         int error;
1415
1416         dip = VTOI(ap->a_dvp);
1417         hmp = dip->hmp;
1418
1419         if ((ip = dip->pip) == NULL) {
1420                 *ap->a_vpp = NULL;
1421                 return ENOENT;
1422         }
1423         hammer2_chain_lock(hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
1424         *ap->a_vpp = hammer2_igetv(ip, &error);
1425         hammer2_chain_unlock(hmp, &ip->chain);
1426
1427         return error;
1428 }
1429
1430 static
1431 int
1432 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1433 {
1434         hammer2_mount_t *hmp;
1435         hammer2_inode_t *dip;
1436         hammer2_inode_t *nip;
1437         struct namecache *ncp;
1438         const uint8_t *name;
1439         size_t name_len;
1440         int error;
1441
1442         dip = VTOI(ap->a_dvp);
1443         hmp = dip->hmp;
1444         if (hmp->ronly)
1445                 return (EROFS);
1446
1447         ncp = ap->a_nch->ncp;
1448         name = ncp->nc_name;
1449         name_len = ncp->nc_nlen;
1450
1451         error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1452                                      name, name_len, &nip);
1453         if (error) {
1454                 KKASSERT(nip == NULL);
1455                 *ap->a_vpp = NULL;
1456                 return error;
1457         }
1458         *ap->a_vpp = hammer2_igetv(nip, &error);
1459         hammer2_chain_unlock(hmp, &nip->chain);
1460
1461         if (error == 0) {
1462                 cache_setunresolved(ap->a_nch);
1463                 cache_setvp(ap->a_nch, *ap->a_vpp);
1464         }
1465         return error;
1466 }
1467
1468 /*
1469  * Return the largest contiguous physical disk range for the logical
1470  * request.
1471  *
1472  * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
1473  */
1474 static
1475 int
1476 hammer2_vop_bmap(struct vop_bmap_args *ap)
1477 {
1478         struct vnode *vp;
1479         hammer2_mount_t *hmp;
1480         hammer2_inode_t *ip;
1481         hammer2_chain_t *parent;
1482         hammer2_chain_t *chain;
1483         hammer2_key_t lbeg;
1484         hammer2_key_t lend;
1485         hammer2_off_t pbeg;
1486         hammer2_off_t pbytes;
1487         hammer2_off_t array[HAMMER2_BMAP_COUNT][2];
1488         int loff;
1489         int ai;
1490
1491         /*
1492          * Only supported on regular files
1493          *
1494          * Only supported for read operations (required for cluster_read).
1495          * The block allocation is delayed for write operations.
1496          */
1497         vp = ap->a_vp;
1498         if (vp->v_type != VREG)
1499                 return (EOPNOTSUPP);
1500         if (ap->a_cmd != BUF_CMD_READ)
1501                 return (EOPNOTSUPP);
1502
1503         ip = VTOI(vp);
1504         hmp = ip->hmp;
1505         bzero(array, sizeof(array));
1506
1507         /*
1508          * Calculate logical range
1509          */
1510         KKASSERT((ap->a_loffset & HAMMER2_LBUFMASK64) == 0);
1511         lbeg = ap->a_loffset & HAMMER2_OFF_MASK_HI;
1512         lend = lbeg + HAMMER2_BMAP_COUNT * HAMMER2_PBUFSIZE - 1;
1513         if (lend < lbeg)
1514                 lend = lbeg;
1515         loff = ap->a_loffset & HAMMER2_OFF_MASK_LO;
1516
1517         parent = &ip->chain;
1518         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
1519                                         HAMMER2_RESOLVE_SHARED);
1520         chain = hammer2_chain_lookup(hmp, &parent,
1521                                      lbeg, lend,
1522                                      HAMMER2_LOOKUP_NODATA |
1523                                      HAMMER2_LOOKUP_SHARED);
1524         if (chain == NULL) {
1525                 *ap->a_doffsetp = ZFOFFSET;
1526                 hammer2_chain_unlock(hmp, parent);
1527                 return (0);
1528         }
1529
1530         while (chain) {
1531                 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1532                         ai = (chain->bref.key - lbeg) / HAMMER2_PBUFSIZE;
1533                         KKASSERT(ai >= 0 && ai < HAMMER2_BMAP_COUNT);
1534                         array[ai][0] = chain->bref.data_off & HAMMER2_OFF_MASK;
1535                         array[ai][1] = chain->bytes;
1536                 }
1537                 chain = hammer2_chain_next(hmp, &parent, chain,
1538                                            lbeg, lend,
1539                                            HAMMER2_LOOKUP_NODATA |
1540                                            HAMMER2_LOOKUP_SHARED);
1541         }
1542         hammer2_chain_unlock(hmp, parent);
1543
1544         /*
1545          * If the requested loffset is not mappable physically we can't
1546          * bmap.  The caller will have to access the file data via a
1547          * device buffer.
1548          */
1549         if (array[0][0] == 0 || array[0][1] < loff + HAMMER2_LBUFSIZE) {
1550                 *ap->a_doffsetp = NOOFFSET;
1551                 return (0);
1552         }
1553
1554         /*
1555          * Calculate the physical disk offset range for array[0]
1556          */
1557         pbeg = array[0][0] + loff;
1558         pbytes = array[0][1] - loff;
1559
1560         for (ai = 1; ai < HAMMER2_BMAP_COUNT; ++ai) {
1561                 if (array[ai][0] != pbeg + pbytes)
1562                         break;
1563                 pbytes += array[ai][1];
1564         }
1565
1566         *ap->a_doffsetp = pbeg;
1567         if (ap->a_runp)
1568                 *ap->a_runp = pbytes;
1569         return (0);
1570 }
1571
1572 static
1573 int
1574 hammer2_vop_open(struct vop_open_args *ap)
1575 {
1576         return vop_stdopen(ap);
1577 }
1578
1579 /*
1580  * hammer2_vop_advlock { vp, id, op, fl, flags }
1581  */
1582 static
1583 int
1584 hammer2_vop_advlock(struct vop_advlock_args *ap)
1585 {
1586         hammer2_inode_t *ip = VTOI(ap->a_vp);
1587
1588         return (lf_advlock(ap, &ip->advlock, ip->ip_data.size));
1589 }
1590
1591
1592 static
1593 int
1594 hammer2_vop_close(struct vop_close_args *ap)
1595 {
1596         return vop_stdclose(ap);
1597 }
1598
1599 /*
1600  * hammer2_vop_nlink { nch, dvp, vp, cred }
1601  *
1602  * Create a hardlink from (vp) to {dvp, nch}.
1603  */
1604 static
1605 int
1606 hammer2_vop_nlink(struct vop_nlink_args *ap)
1607 {
1608         hammer2_inode_t *dip;   /* target directory to create link in */
1609         hammer2_inode_t *ip;    /* inode we are hardlinking to */
1610         hammer2_inode_t *oip;
1611         hammer2_mount_t *hmp;
1612         struct namecache *ncp;
1613         const uint8_t *name;
1614         size_t name_len;
1615         int error;
1616
1617         dip = VTOI(ap->a_dvp);
1618         hmp = dip->hmp;
1619         if (hmp->ronly)
1620                 return (EROFS);
1621
1622         /*
1623          * (ip) is the inode we are linking to.
1624          */
1625         ip = oip = VTOI(ap->a_vp);
1626         hammer2_inode_lock_nlinks(ip);
1627
1628         ncp = ap->a_nch->ncp;
1629         name = ncp->nc_name;
1630         name_len = ncp->nc_nlen;
1631
1632         /*
1633          * Create a consolidated real file for the hardlink, adjust (ip),
1634          * and move the nlinks lock if necessary.  Tell the function to
1635          * bump the hardlink count on the consolidated file.
1636          */
1637         error = hammer2_hardlink_consolidate(&ip, dip);
1638         if (error)
1639                 goto done;
1640
1641         /*
1642          * If the consolidation changed ip to a HARDLINK pointer we have
1643          * to adjust the vnode to point to the actual ip.
1644          *
1645          * XXX this can race against concurrent vnode ops.
1646          */
1647         if (oip != ip) {
1648                 hammer2_chain_ref(hmp, &ip->chain);
1649                 hammer2_inode_lock_ex(ip);
1650                 hammer2_inode_lock_ex(oip);
1651                 ip->vp = ap->a_vp;
1652                 ap->a_vp->v_data = ip;
1653                 oip->vp = NULL;
1654                 hammer2_inode_unlock_ex(oip);
1655                 hammer2_inode_unlock_ex(ip);
1656                 hammer2_chain_drop(hmp, &oip->chain);
1657         }
1658
1659         /*
1660          * The act of connecting the existing (ip) will properly bump the
1661          * nlinks count.  However, vp will incorrectly point at the old
1662          * inode which has now been turned into a OBJTYPE_HARDLINK pointer.
1663          *
1664          * We must reconnect the vp.
1665          */
1666         hammer2_chain_lock(hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
1667         error = hammer2_inode_connect(dip, ip, name, name_len);
1668         hammer2_chain_unlock(hmp, &ip->chain);
1669         if (error == 0) {
1670                 cache_setunresolved(ap->a_nch);
1671                 cache_setvp(ap->a_nch, ap->a_vp);
1672         }
1673 done:
1674         hammer2_inode_unlock_nlinks(ip);
1675         return error;
1676 }
1677
1678 /*
1679  * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
1680  *
1681  * The operating system has already ensured that the directory entry
1682  * does not exist and done all appropriate namespace locking.
1683  */
1684 static
1685 int
1686 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1687 {
1688         hammer2_mount_t *hmp;
1689         hammer2_inode_t *dip;
1690         hammer2_inode_t *nip;
1691         struct namecache *ncp;
1692         const uint8_t *name;
1693         size_t name_len;
1694         int error;
1695
1696         dip = VTOI(ap->a_dvp);
1697         hmp = dip->hmp;
1698         if (hmp->ronly)
1699                 return (EROFS);
1700
1701         ncp = ap->a_nch->ncp;
1702         name = ncp->nc_name;
1703         name_len = ncp->nc_nlen;
1704
1705         error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1706                                      name, name_len, &nip);
1707         if (error) {
1708                 KKASSERT(nip == NULL);
1709                 *ap->a_vpp = NULL;
1710                 return error;
1711         }
1712         *ap->a_vpp = hammer2_igetv(nip, &error);
1713         hammer2_chain_unlock(hmp, &nip->chain);
1714
1715         if (error == 0) {
1716                 cache_setunresolved(ap->a_nch);
1717                 cache_setvp(ap->a_nch, *ap->a_vpp);
1718         }
1719         return error;
1720 }
1721
1722 /*
1723  * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1724  */
1725 static
1726 int
1727 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1728 {
1729         hammer2_mount_t *hmp;
1730         hammer2_inode_t *dip;
1731         hammer2_inode_t *nip;
1732         struct namecache *ncp;
1733         const uint8_t *name;
1734         size_t name_len;
1735         int error;
1736
1737         dip = VTOI(ap->a_dvp);
1738         hmp = dip->hmp;
1739         if (hmp->ronly)
1740                 return (EROFS);
1741
1742         ncp = ap->a_nch->ncp;
1743         name = ncp->nc_name;
1744         name_len = ncp->nc_nlen;
1745
1746         ap->a_vap->va_type = VLNK;      /* enforce type */
1747
1748         error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1749                                      name, name_len, &nip);
1750         if (error) {
1751                 KKASSERT(nip == NULL);
1752                 *ap->a_vpp = NULL;
1753                 return error;
1754         }
1755         *ap->a_vpp = hammer2_igetv(nip, &error);
1756
1757         /*
1758          * Build the softlink (~like file data) and finalize the namecache.
1759          */
1760         if (error == 0) {
1761                 size_t bytes;
1762                 struct uio auio;
1763                 struct iovec aiov;
1764
1765                 bytes = strlen(ap->a_target);
1766
1767                 if (bytes <= HAMMER2_EMBEDDED_BYTES) {
1768                         KKASSERT(nip->ip_data.op_flags &
1769                                  HAMMER2_OPFLAG_DIRECTDATA);
1770                         bcopy(ap->a_target, nip->ip_data.u.data, bytes);
1771                         nip->ip_data.size = bytes;
1772                 } else {
1773                         bzero(&auio, sizeof(auio));
1774                         bzero(&aiov, sizeof(aiov));
1775                         auio.uio_iov = &aiov;
1776                         auio.uio_segflg = UIO_SYSSPACE;
1777                         auio.uio_rw = UIO_WRITE;
1778                         auio.uio_resid = bytes;
1779                         auio.uio_iovcnt = 1;
1780                         auio.uio_td = curthread;
1781                         aiov.iov_base = ap->a_target;
1782                         aiov.iov_len = bytes;
1783                         error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
1784                         /* XXX handle error */
1785                         error = 0;
1786                 }
1787         }
1788         hammer2_chain_unlock(hmp, &nip->chain);
1789
1790         /*
1791          * Finalize namecache
1792          */
1793         if (error == 0) {
1794                 cache_setunresolved(ap->a_nch);
1795                 cache_setvp(ap->a_nch, *ap->a_vpp);
1796                 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */
1797         }
1798         return error;
1799 }
1800
1801 /*
1802  * hammer2_vop_nremove { nch, dvp, cred }
1803  */
1804 static
1805 int
1806 hammer2_vop_nremove(struct vop_nremove_args *ap)
1807 {
1808         hammer2_inode_t *dip;
1809         hammer2_mount_t *hmp;
1810         struct namecache *ncp;
1811         const uint8_t *name;
1812         size_t name_len;
1813         int error;
1814
1815         dip = VTOI(ap->a_dvp);
1816         hmp = dip->hmp;
1817         if (hmp->ronly)
1818                 return(EROFS);
1819
1820         ncp = ap->a_nch->ncp;
1821         name = ncp->nc_name;
1822         name_len = ncp->nc_nlen;
1823
1824         error = hammer2_unlink_file(dip, name, name_len, 0, NULL);
1825
1826         if (error == 0) {
1827                 cache_setunresolved(ap->a_nch);
1828                 cache_setvp(ap->a_nch, NULL);
1829         }
1830         return (error);
1831 }
1832
1833 /*
1834  * hammer2_vop_nrmdir { nch, dvp, cred }
1835  */
1836 static
1837 int
1838 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1839 {
1840         hammer2_inode_t *dip;
1841         hammer2_mount_t *hmp;
1842         struct namecache *ncp;
1843         const uint8_t *name;
1844         size_t name_len;
1845         int error;
1846
1847         dip = VTOI(ap->a_dvp);
1848         hmp = dip->hmp;
1849         if (hmp->ronly)
1850                 return(EROFS);
1851
1852         ncp = ap->a_nch->ncp;
1853         name = ncp->nc_name;
1854         name_len = ncp->nc_nlen;
1855
1856         error = hammer2_unlink_file(dip, name, name_len, 1, NULL);
1857
1858         if (error == 0) {
1859                 cache_setunresolved(ap->a_nch);
1860                 cache_setvp(ap->a_nch, NULL);
1861         }
1862         return (error);
1863 }
1864
1865 /*
1866  * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1867  */
1868 static
1869 int
1870 hammer2_vop_nrename(struct vop_nrename_args *ap)
1871 {
1872         struct namecache *fncp;
1873         struct namecache *tncp;
1874         hammer2_inode_t *fdip;
1875         hammer2_inode_t *tdip;
1876         hammer2_inode_t *ip;
1877         hammer2_mount_t *hmp;
1878         const uint8_t *fname;
1879         size_t fname_len;
1880         const uint8_t *tname;
1881         size_t tname_len;
1882         int error;
1883
1884         if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1885                 return(EXDEV);
1886         if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1887                 return(EXDEV);
1888
1889         fdip = VTOI(ap->a_fdvp);        /* source directory */
1890         tdip = VTOI(ap->a_tdvp);        /* target directory */
1891
1892         hmp = fdip->hmp;                /* check read-only filesystem */
1893         if (hmp->ronly)
1894                 return(EROFS);
1895
1896         fncp = ap->a_fnch->ncp;         /* entry name in source */
1897         fname = fncp->nc_name;
1898         fname_len = fncp->nc_nlen;
1899
1900         tncp = ap->a_tnch->ncp;         /* entry name in target */
1901         tname = tncp->nc_name;
1902         tname_len = tncp->nc_nlen;
1903
1904         /*
1905          * ip is the inode being removed.  If this is a hardlink then
1906          * ip represents the actual file and not the hardlink marker.
1907          */
1908         ip = VTOI(fncp->nc_vp);
1909
1910         /*
1911          * Keep a tight grip on the inode as removing it should disconnect
1912          * it and we don't want to destroy it.
1913          *
1914          * NOTE: To avoid deadlocks we cannot lock (ip) while we are
1915          *       unlinking elements from their directories.  Locking
1916          *       the nlinks field does not lock the whole inode.
1917          */
1918         hammer2_inode_lock_nlinks(ip);
1919
1920         /*
1921          * Remove target if it exists
1922          */
1923         error = hammer2_unlink_file(tdip, tname, tname_len, -1, NULL);
1924         if (error && error != ENOENT)
1925                 goto done;
1926         cache_setunresolved(ap->a_tnch);
1927         cache_setvp(ap->a_tnch, NULL);
1928
1929         /*
1930          * Disconnect (fdip, fname) from the source directory.  This will
1931          * disconnect (ip) if it represents a direct file.  If (ip) represents
1932          * a hardlink the HARDLINK pointer object will be removed but the
1933          * hardlink will stay intact.
1934          *
1935          * If (ip) is already hardlinked we have to resolve to a consolidated
1936          * file but we do not bump the nlinks count.  (ip) must hold the nlinks
1937          * lock & ref for the operation.  If the consolidated file has been
1938          * relocated (ip) will be adjusted and the related nlinks lock moved
1939          * along with it.
1940          *
1941          * If (ip) does not have multiple links we can just copy the physical
1942          * contents of the inode.
1943          */
1944         if (ip->ip_data.nlinks > 1) {
1945                 error = hammer2_hardlink_consolidate(&ip, tdip);
1946                 if (error)
1947                         goto done;
1948         }
1949         error = hammer2_unlink_file(fdip, fname, fname_len, -1, ip);
1950         if (error)
1951                 goto done;
1952
1953         /*
1954          * Reconnect ip to target directory.
1955          *
1956          * WARNING: chain locks can lock buffer cache buffers, to avoid
1957          *          deadlocks we want to unlock before issuing a cache_*()
1958          *          op (that might have to lock a vnode).
1959          */
1960         hammer2_chain_lock(hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
1961         error = hammer2_inode_connect(tdip, ip, tname, tname_len);
1962         hammer2_chain_unlock(hmp, &ip->chain);
1963
1964         if (error == 0) {
1965                 cache_rename(ap->a_fnch, ap->a_tnch);
1966         }
1967 done:
1968         hammer2_inode_unlock_nlinks(ip);
1969
1970         return (error);
1971 }
1972
1973 static int hammer2_strategy_read(struct vop_strategy_args *ap);
1974 static int hammer2_strategy_write(struct vop_strategy_args *ap);
1975
1976 static
1977 int
1978 hammer2_vop_strategy(struct vop_strategy_args *ap)
1979 {
1980         struct bio *biop;
1981         struct buf *bp;
1982         int error;
1983
1984         biop = ap->a_bio;
1985         bp = biop->bio_buf;
1986
1987         switch(bp->b_cmd) {
1988         case BUF_CMD_READ:
1989                 error = hammer2_strategy_read(ap);
1990                 ++hammer2_iod_file_read;
1991                 break;
1992         case BUF_CMD_WRITE:
1993                 error = hammer2_strategy_write(ap);
1994                 ++hammer2_iod_file_write;
1995                 break;
1996         default:
1997                 bp->b_error = error = EINVAL;
1998                 bp->b_flags |= B_ERROR;
1999                 biodone(biop);
2000                 break;
2001         }
2002
2003         return (error);
2004 }
2005
2006 static
2007 int
2008 hammer2_strategy_read(struct vop_strategy_args *ap)
2009 {
2010         struct buf *bp;
2011         struct bio *bio;
2012         struct bio *nbio;
2013         hammer2_mount_t *hmp;
2014         hammer2_inode_t *ip;
2015         hammer2_chain_t *parent;
2016         hammer2_chain_t *chain;
2017         hammer2_key_t lbase;
2018
2019         bio = ap->a_bio;
2020         bp = bio->bio_buf;
2021         ip = VTOI(ap->a_vp);
2022         hmp = ip->hmp;
2023         nbio = push_bio(bio);
2024
2025         lbase = bio->bio_offset;
2026         chain = NULL;
2027         KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
2028
2029         /*
2030          * We must characterize the logical->physical translation if it
2031          * has not already been cached.
2032          *
2033          * Physical data references < LBUFSIZE are never cached.  This
2034          * includes both small-block allocations and inode-embedded data.
2035          */
2036         if (nbio->bio_offset == NOOFFSET) {
2037                 parent = &ip->chain;
2038                 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
2039                                                 HAMMER2_RESOLVE_SHARED);
2040
2041                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase,
2042                                              HAMMER2_LOOKUP_NODATA |
2043                                              HAMMER2_LOOKUP_SHARED);
2044                 if (chain == NULL) {
2045                         /*
2046                          * Data is zero-fill
2047                          */
2048                         nbio->bio_offset = ZFOFFSET;
2049                 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
2050                         /*
2051                          * Data is embedded in the inode (do nothing)
2052                          */
2053                         KKASSERT(chain == parent);
2054                         hammer2_chain_unlock(hmp, chain);
2055                 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
2056                         /*
2057                          * Data is on-media
2058                          */
2059                         KKASSERT(bp->b_bcount == chain->bytes);
2060                         nbio->bio_offset = chain->bref.data_off &
2061                                            HAMMER2_OFF_MASK;
2062                         hammer2_chain_unlock(hmp, chain);
2063                         KKASSERT(nbio->bio_offset != 0);
2064                 } else {
2065                         panic("hammer2_strategy_read: unknown bref type");
2066                 }
2067                 hammer2_chain_unlock(hmp, parent);
2068         }
2069
2070         if (hammer2_debug & 0x0020) {
2071                 kprintf("read %016jx %016jx\n",
2072                         bio->bio_offset, nbio->bio_offset);
2073         }
2074
2075         if (nbio->bio_offset == ZFOFFSET) {
2076                 /*
2077                  * Data is zero-fill
2078                  */
2079                 bp->b_resid = 0;
2080                 bp->b_error = 0;
2081                 bzero(bp->b_data, bp->b_bcount);
2082                 biodone(nbio);
2083         } else if (nbio->bio_offset != NOOFFSET) {
2084                 /*
2085                  * Forward direct IO to the device
2086                  */
2087                 vn_strategy(hmp->devvp, nbio);
2088         } else {
2089                 /*
2090                  * Data is embedded in inode.
2091                  */
2092                 bcopy(chain->data->ipdata.u.data, bp->b_data,
2093                       HAMMER2_EMBEDDED_BYTES);
2094                 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES,
2095                       bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
2096                 bp->b_resid = 0;
2097                 bp->b_error = 0;
2098                 biodone(nbio);
2099         }
2100         return (0);
2101 }
2102
2103 static
2104 int
2105 hammer2_strategy_write(struct vop_strategy_args *ap)
2106 {
2107         struct buf *bp;
2108         struct bio *bio;
2109         struct bio *nbio;
2110         hammer2_mount_t *hmp;
2111         hammer2_inode_t *ip;
2112
2113         bio = ap->a_bio;
2114         bp = bio->bio_buf;
2115         ip = VTOI(ap->a_vp);
2116         hmp = ip->hmp;
2117         nbio = push_bio(bio);
2118
2119         KKASSERT((bio->bio_offset & HAMMER2_PBUFMASK64) == 0);
2120         KKASSERT(nbio->bio_offset != 0 && nbio->bio_offset != ZFOFFSET);
2121
2122         if (nbio->bio_offset == NOOFFSET) {
2123                 /*
2124                  * Must be embedded in the inode.
2125                  */
2126                 KKASSERT(bio->bio_offset == 0);
2127                 bcopy(bp->b_data, ip->ip_data.u.data, HAMMER2_EMBEDDED_BYTES);
2128                 bp->b_resid = 0;
2129                 bp->b_error = 0;
2130                 biodone(nbio);
2131
2132                 /*
2133                  * This special flag does not follow the normal MODIFY rules
2134                  * because we might deadlock on ip.  Instead we depend on
2135                  * VOP_FSYNC() to detect the case.
2136                  */
2137                 atomic_set_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
2138         } else {
2139                 /*
2140                  * Forward direct IO to the device
2141                  */
2142                 vn_strategy(hmp->devvp, nbio);
2143         }
2144         return (0);
2145 }
2146
2147 /*
2148  * hammer2_vop_ioctl { vp, command, data, fflag, cred }
2149  */
2150 static
2151 int
2152 hammer2_vop_ioctl(struct vop_ioctl_args *ap)
2153 {
2154         hammer2_mount_t *hmp;
2155         hammer2_inode_t *ip;
2156         int error;
2157
2158         ip = VTOI(ap->a_vp);
2159         hmp = ip->hmp;
2160
2161         error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
2162                               ap->a_fflag, ap->a_cred);
2163         return (error);
2164 }
2165
2166 static
2167 int 
2168 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
2169 {
2170         struct mount *mp;
2171         hammer2_pfsmount_t *pmp;
2172         int rc;
2173
2174         switch (ap->a_op) {
2175         case (MOUNTCTL_SET_EXPORT):
2176                 mp = ap->a_head.a_ops->head.vv_mount;
2177                 pmp = MPTOPMP(mp);
2178
2179                 if (ap->a_ctllen != sizeof(struct export_args))
2180                         rc = (EINVAL);
2181                 else
2182                         rc = vfs_export(mp, &pmp->export,
2183                                         (const struct export_args *)ap->a_ctl);
2184                 break;
2185         default:
2186                 rc = vop_stdmountctl(ap);
2187                 break;
2188         }
2189         return (rc);
2190 }
2191
2192 struct vop_ops hammer2_vnode_vops = {
2193         .vop_default    = vop_defaultop,
2194         .vop_fsync      = hammer2_vop_fsync,
2195         .vop_getpages   = vop_stdgetpages,
2196         .vop_putpages   = vop_stdputpages,
2197         .vop_access     = hammer2_vop_access,
2198         .vop_advlock    = hammer2_vop_advlock,
2199         .vop_close      = hammer2_vop_close,
2200         .vop_nlink      = hammer2_vop_nlink,
2201         .vop_ncreate    = hammer2_vop_ncreate,
2202         .vop_nsymlink   = hammer2_vop_nsymlink,
2203         .vop_nremove    = hammer2_vop_nremove,
2204         .vop_nrmdir     = hammer2_vop_nrmdir,
2205         .vop_nrename    = hammer2_vop_nrename,
2206         .vop_getattr    = hammer2_vop_getattr,
2207         .vop_setattr    = hammer2_vop_setattr,
2208         .vop_readdir    = hammer2_vop_readdir,
2209         .vop_readlink   = hammer2_vop_readlink,
2210         .vop_getpages   = vop_stdgetpages,
2211         .vop_putpages   = vop_stdputpages,
2212         .vop_read       = hammer2_vop_read,
2213         .vop_write      = hammer2_vop_write,
2214         .vop_open       = hammer2_vop_open,
2215         .vop_inactive   = hammer2_vop_inactive,
2216         .vop_reclaim    = hammer2_vop_reclaim,
2217         .vop_nresolve   = hammer2_vop_nresolve,
2218         .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2219         .vop_nmkdir     = hammer2_vop_nmkdir,
2220         .vop_ioctl      = hammer2_vop_ioctl,
2221         .vop_mountctl   = hammer2_vop_mountctl,
2222         .vop_bmap       = hammer2_vop_bmap,
2223         .vop_strategy   = hammer2_vop_strategy,
2224 };
2225
2226 struct vop_ops hammer2_spec_vops = {
2227
2228 };
2229
2230 struct vop_ops hammer2_fifo_vops = {
2231
2232 };