307b1305aec6da993a7b7d09355e5baaddfe6ad4
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/fcntl.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/namei.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 #include <sys/mountctl.h>
45 #include <sys/dirent.h>
46 #include <sys/uio.h>
47
48 #include "hammer2.h"
49
50 #define ZFOFFSET        (-2LL)
51
52 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
53                                 int seqcount);
54 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, int ioflag,
55                               int seqcount);
56 static hammer2_off_t hammer2_assign_physical(hammer2_inode_t *ip,
57                                 hammer2_key_t lbase, int lblksize, int *errorp);
58 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
59 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
60
61 static __inline
62 void
63 hammer2_knote(struct vnode *vp, int flags)
64 {
65         if (flags)
66                 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
67 }
68
69 /*
70  * Last reference to a vnode is going away but it is still cached.
71  */
72 static
73 int
74 hammer2_vop_inactive(struct vop_inactive_args *ap)
75 {
76         struct vnode *vp;
77         struct hammer2_inode *ip;
78 #if 0
79         struct hammer2_mount *hmp;
80 #endif
81
82         vp = ap->a_vp;
83         ip = VTOI(vp);
84
85         /*
86          * Degenerate case
87          */
88         if (ip == NULL) {
89                 vrecycle(vp);
90                 return (0);
91         }
92
93         /*
94          * Detect updates to the embedded data which may be synchronized by
95          * the strategy code.  Simply mark the inode modified so it gets
96          * picked up by our normal flush.
97          */
98         if (ip->chain.flags & HAMMER2_CHAIN_DIRTYEMBED) {
99                 hammer2_inode_lock_ex(ip);
100                 atomic_clear_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
101                 hammer2_chain_modify(ip->hmp, &ip->chain, 0);
102                 hammer2_inode_unlock_ex(ip);
103         }
104
105         /*
106          * Check for deleted inodes and recycle immediately.
107          */
108         if (ip->chain.flags & HAMMER2_CHAIN_DELETED) {
109                 vrecycle(vp);
110         }
111         return (0);
112 }
113
114 /*
115  * Reclaim a vnode so that it can be reused; after the inode is
116  * disassociated, the filesystem must manage it alone.
117  */
118 static
119 int
120 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
121 {
122         struct hammer2_inode *ip;
123         struct hammer2_mount *hmp;
124         struct vnode *vp;
125
126         vp = ap->a_vp;
127         ip = VTOI(vp);
128         if (ip == NULL)
129                 return(0);
130         hmp = ip->hmp;
131
132         /*
133          * Set SUBMODIFIED so we can detect and propagate the DESTROYED
134          * bit in the flush code.
135          */
136         hammer2_inode_lock_ex(ip);
137         vp->v_data = NULL;
138         ip->vp = NULL;
139         if (ip->chain.flags & HAMMER2_CHAIN_DELETED) {
140                 atomic_set_int(&ip->chain.flags, HAMMER2_CHAIN_DESTROYED |
141                                                  HAMMER2_CHAIN_SUBMODIFIED);
142         }
143         hammer2_chain_flush(hmp, &ip->chain, 0);
144         hammer2_inode_unlock_ex(ip);
145         hammer2_chain_drop(hmp, &ip->chain);    /* vp ref */
146
147         /*
148          * XXX handle background sync when ip dirty, kernel will no longer
149          * notify us regarding this inode because there is no longer a
150          * vnode attached to it.
151          */
152
153         return (0);
154 }
155
156 static
157 int
158 hammer2_vop_fsync(struct vop_fsync_args *ap)
159 {
160         struct hammer2_inode *ip;
161         struct hammer2_mount *hmp;
162         struct vnode *vp;
163
164         vp = ap->a_vp;
165         ip = VTOI(vp);
166         hmp = ip->hmp;
167
168         hammer2_inode_lock_ex(ip);
169         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
170
171         /*
172          * Detect updates to the embedded data which may be synchronized by
173          * the strategy code.  Simply mark the inode modified so it gets
174          * picked up by our normal flush.
175          */
176         if (ip->chain.flags & HAMMER2_CHAIN_DIRTYEMBED) {
177                 atomic_clear_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
178                 hammer2_chain_modify(hmp, &ip->chain, 0);
179         }
180
181         /*
182          * Calling chain_flush here creates a lot of duplicative
183          * COW operations due to non-optimal vnode ordering.
184          *
185          * Only do it for an actual fsync() syscall.  The other forms
186          * which call this function will eventually call chain_flush
187          * on the volume root as a catch-all, which is far more optimal.
188          */
189         if (ap->a_flags & VOP_FSYNC_SYSCALL)
190                 hammer2_chain_flush(hmp, &ip->chain, 0);
191         hammer2_inode_unlock_ex(ip);
192         return (0);
193 }
194
195 static
196 int
197 hammer2_vop_access(struct vop_access_args *ap)
198 {
199         hammer2_inode_t *ip = VTOI(ap->a_vp);
200         uid_t uid;
201         gid_t gid;
202         int error;
203
204         uid = hammer2_to_unix_xid(&ip->ip_data.uid);
205         gid = hammer2_to_unix_xid(&ip->ip_data.gid);
206
207         error = vop_helper_access(ap, uid, gid, ip->ip_data.mode,
208                                   ip->ip_data.uflags);
209         return (error);
210 }
211
212 static
213 int
214 hammer2_vop_getattr(struct vop_getattr_args *ap)
215 {
216         hammer2_pfsmount_t *pmp;
217         hammer2_inode_t *ip;
218         struct vnode *vp;
219         struct vattr *vap;
220
221         vp = ap->a_vp;
222         vap = ap->a_vap;
223
224         ip = VTOI(vp);
225         pmp = ip->pmp;
226
227         hammer2_inode_lock_sh(ip);
228
229         vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
230         vap->va_fileid = ip->ip_data.inum;
231         vap->va_mode = ip->ip_data.mode;
232         vap->va_nlink = ip->ip_data.nlinks;
233         vap->va_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
234         vap->va_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
235         vap->va_rmajor = 0;
236         vap->va_rminor = 0;
237         vap->va_size = ip->ip_data.size;
238         vap->va_blocksize = HAMMER2_PBUFSIZE;
239         vap->va_flags = ip->ip_data.uflags;
240         hammer2_time_to_timespec(ip->ip_data.ctime, &vap->va_ctime);
241         hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_mtime);
242         hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_atime);
243         vap->va_gen = 1;
244         vap->va_bytes = vap->va_size;   /* XXX */
245         vap->va_type = hammer2_get_vtype(ip);
246         vap->va_filerev = 0;
247         vap->va_uid_uuid = ip->ip_data.uid;
248         vap->va_gid_uuid = ip->ip_data.gid;
249         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
250                           VA_FSID_UUID_VALID;
251
252         hammer2_inode_unlock_sh(ip);
253
254         return (0);
255 }
256
257 static
258 int
259 hammer2_vop_setattr(struct vop_setattr_args *ap)
260 {
261         hammer2_mount_t *hmp;
262         hammer2_inode_t *ip;
263         struct vnode *vp;
264         struct vattr *vap;
265         int error;
266         int kflags = 0;
267         int domtime = 0;
268         uint64_t ctime;
269
270         vp = ap->a_vp;
271         vap = ap->a_vap;
272         hammer2_update_time(&ctime);
273
274         ip = VTOI(vp);
275         hmp = ip->hmp;
276
277         if (hmp->ronly)
278                 return(EROFS);
279
280         hammer2_inode_lock_ex(ip);
281         error = 0;
282
283         if (vap->va_flags != VNOVAL) {
284                 u_int32_t flags;
285
286                 flags = ip->ip_data.uflags;
287                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
288                                          hammer2_to_unix_xid(&ip->ip_data.uid),
289                                          ap->a_cred);
290                 if (error == 0) {
291                         if (ip->ip_data.uflags != flags) {
292                                 hammer2_chain_modify(hmp, &ip->chain, 0);
293                                 ip->ip_data.uflags = flags;
294                                 ip->ip_data.ctime = ctime;
295                                 kflags |= NOTE_ATTRIB;
296                         }
297                         if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
298                                 error = 0;
299                                 goto done;
300                         }
301                 }
302                 goto done;
303         }
304         if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
305                 error = EPERM;
306                 goto done;
307         }
308         if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
309                 mode_t cur_mode = ip->ip_data.mode;
310                 uid_t cur_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
311                 gid_t cur_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
312                 uuid_t uuid_uid;
313                 uuid_t uuid_gid;
314
315                 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
316                                          ap->a_cred,
317                                          &cur_uid, &cur_gid, &cur_mode);
318                 if (error == 0) {
319                         hammer2_guid_to_uuid(&uuid_uid, cur_uid);
320                         hammer2_guid_to_uuid(&uuid_gid, cur_gid);
321                         if (bcmp(&uuid_uid, &ip->ip_data.uid,
322                                  sizeof(uuid_uid)) ||
323                             bcmp(&uuid_gid, &ip->ip_data.gid,
324                                  sizeof(uuid_gid)) ||
325                             ip->ip_data.mode != cur_mode
326                         ) {
327                                 hammer2_chain_modify(hmp, &ip->chain, 0);
328                                 ip->ip_data.uid = uuid_uid;
329                                 ip->ip_data.gid = uuid_gid;
330                                 ip->ip_data.mode = cur_mode;
331                                 ip->ip_data.ctime = ctime;
332                         }
333                         kflags |= NOTE_ATTRIB;
334                 }
335         }
336
337         /*
338          * Resize the file
339          */
340         if (vap->va_size != VNOVAL && ip->ip_data.size != vap->va_size) {
341                 switch(vp->v_type) {
342                 case VREG:
343                         if (vap->va_size == ip->ip_data.size)
344                                 break;
345                         if (vap->va_size < ip->ip_data.size) {
346                                 hammer2_truncate_file(ip, vap->va_size);
347                         } else {
348                                 hammer2_extend_file(ip, vap->va_size);
349                         }
350                         domtime = 1;
351                         break;
352                 default:
353                         error = EINVAL;
354                         goto done;
355                 }
356         }
357 #if 0
358         /* atime not supported */
359         if (vap->va_atime.tv_sec != VNOVAL) {
360                 hammer2_chain_modify(hmp, &ip->chain, 0);
361                 ip->ip_data.atime = hammer2_timespec_to_time(&vap->va_atime);
362                 kflags |= NOTE_ATTRIB;
363         }
364 #endif
365         if (vap->va_mtime.tv_sec != VNOVAL) {
366                 hammer2_chain_modify(hmp, &ip->chain, 0);
367                 ip->ip_data.mtime = hammer2_timespec_to_time(&vap->va_mtime);
368                 kflags |= NOTE_ATTRIB;
369         }
370         if (vap->va_mode != (mode_t)VNOVAL) {
371                 mode_t cur_mode = ip->ip_data.mode;
372                 uid_t cur_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
373                 gid_t cur_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
374
375                 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
376                                          cur_uid, cur_gid, &cur_mode);
377                 if (error == 0 && ip->ip_data.mode != cur_mode) {
378                         ip->ip_data.mode = cur_mode;
379                         ip->ip_data.ctime = ctime;
380                         kflags |= NOTE_ATTRIB;
381                 }
382         }
383 done:
384         hammer2_inode_unlock_ex(ip);
385         return (error);
386 }
387
388 static
389 int
390 hammer2_vop_readdir(struct vop_readdir_args *ap)
391 {
392         hammer2_mount_t *hmp;
393         hammer2_inode_t *ip;
394         hammer2_inode_t *xip;
395         hammer2_chain_t *parent;
396         hammer2_chain_t *chain;
397         hammer2_key_t lkey;
398         struct uio *uio;
399         off_t *cookies;
400         off_t saveoff;
401         int cookie_index;
402         int ncookies;
403         int error;
404         int dtype;
405         int r;
406
407         ip = VTOI(ap->a_vp);
408         hmp = ip->hmp;
409         uio = ap->a_uio;
410         saveoff = uio->uio_offset;
411
412         /*
413          * Setup cookies directory entry cookies if requested
414          */
415         if (ap->a_ncookies) {
416                 ncookies = uio->uio_resid / 16 + 1;
417                 if (ncookies > 1024)
418                         ncookies = 1024;
419                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
420         } else {
421                 ncookies = -1;
422                 cookies = NULL;
423         }
424         cookie_index = 0;
425
426         /*
427          * Handle artificial entries.  To ensure that only positive 64 bit
428          * quantities are returned to userland we always strip off bit 63.
429          * The hash code is designed such that codes 0x0000-0x7FFF are not
430          * used, allowing us to use these codes for articial entries.
431          *
432          * Entry 0 is used for '.' and entry 1 is used for '..'.  Do not
433          * allow '..' to cross the mount point into (e.g.) the super-root.
434          */
435         error = 0;
436         chain = (void *)(intptr_t)-1;   /* non-NULL for early goto done case */
437
438         if (saveoff == 0) {
439                 r = vop_write_dirent(&error, uio,
440                                      ip->ip_data.inum &
441                                         HAMMER2_DIRHASH_USERMSK,
442                                      DT_DIR, 1, ".");
443                 if (r)
444                         goto done;
445                 if (cookies)
446                         cookies[cookie_index] = saveoff;
447                 ++saveoff;
448                 ++cookie_index;
449                 if (cookie_index == ncookies)
450                         goto done;
451         }
452         if (saveoff == 1) {
453                 if (ip->pip == NULL || ip == ip->pmp->iroot)
454                         xip = ip;
455                 else
456                         xip = ip->pip;
457
458                 r = vop_write_dirent(&error, uio,
459                                      xip->ip_data.inum &
460                                       HAMMER2_DIRHASH_USERMSK,
461                                      DT_DIR, 2, "..");
462                 if (r)
463                         goto done;
464                 if (cookies)
465                         cookies[cookie_index] = saveoff;
466                 ++saveoff;
467                 ++cookie_index;
468                 if (cookie_index == ncookies)
469                         goto done;
470         }
471
472         lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
473
474         parent = &ip->chain;
475         error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
476         if (error) {
477                 hammer2_chain_unlock(hmp, parent);
478                 goto done;
479         }
480         chain = hammer2_chain_lookup(hmp, &parent, lkey, lkey, 0);
481         if (chain == NULL) {
482                 chain = hammer2_chain_lookup(hmp, &parent,
483                                              lkey, (hammer2_key_t)-1, 0);
484         }
485         while (chain) {
486                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
487                         dtype = hammer2_get_dtype(chain->u.ip);
488                         saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK;
489                         r = vop_write_dirent(&error, uio,
490                                              chain->u.ip->ip_data.inum &
491                                               HAMMER2_DIRHASH_USERMSK,
492                                              dtype, chain->u.ip->ip_data.name_len,
493                                              chain->u.ip->ip_data.filename);
494                         if (r)
495                                 break;
496                         if (cookies)
497                                 cookies[cookie_index] = saveoff;
498                         ++cookie_index;
499                 } else {
500                         /* XXX chain error */
501                         kprintf("bad chain type readdir %d\n",
502                                 chain->bref.type);
503                 }
504
505                 /*
506                  * Keys may not be returned in order so once we have a
507                  * placemarker (chain) the scan must allow the full range
508                  * or some entries will be missed.
509                  */
510                 chain = hammer2_chain_next(hmp, &parent, chain,
511                                            HAMMER2_DIRHASH_VISIBLE,
512                                            (hammer2_key_t)-1, 0);
513                 if (chain) {
514                         saveoff = (chain->bref.key &
515                                    HAMMER2_DIRHASH_USERMSK) + 1;
516                 } else {
517                         saveoff = (hammer2_key_t)-1;
518                 }
519                 if (cookie_index == ncookies)
520                         break;
521         }
522         if (chain)
523                 hammer2_chain_unlock(hmp, chain);
524         hammer2_chain_unlock(hmp, parent);
525 done:
526         if (ap->a_eofflag)
527                 *ap->a_eofflag = (chain == NULL);
528         uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
529         if (error && cookie_index == 0) {
530                 if (cookies) {
531                         kfree(cookies, M_TEMP);
532                         *ap->a_ncookies = 0;
533                         *ap->a_cookies = NULL;
534                 }
535         } else {
536                 if (cookies) {
537                         *ap->a_ncookies = cookie_index;
538                         *ap->a_cookies = cookies;
539                 }
540         }
541         return (error);
542 }
543
544 /*
545  * hammer2_vop_readlink { vp, uio, cred }
546  */
547 static
548 int
549 hammer2_vop_readlink(struct vop_readlink_args *ap)
550 {
551         struct vnode *vp;
552         hammer2_mount_t *hmp;
553         hammer2_inode_t *ip;
554         int error;
555
556         vp = ap->a_vp;
557         if (vp->v_type != VLNK)
558                 return (EINVAL);
559         ip = VTOI(vp);
560         hmp = ip->hmp;
561
562         error = hammer2_read_file(ip, ap->a_uio, 0);
563         return (error);
564 }
565
566 static
567 int
568 hammer2_vop_read(struct vop_read_args *ap)
569 {
570         struct vnode *vp;
571         hammer2_mount_t *hmp;
572         hammer2_inode_t *ip;
573         struct uio *uio;
574         int error;
575         int seqcount;
576         int bigread;
577
578         /*
579          * Read operations supported on this vnode?
580          */
581         vp = ap->a_vp;
582         if (vp->v_type != VREG)
583                 return (EINVAL);
584
585         /*
586          * Misc
587          */
588         ip = VTOI(vp);
589         hmp = ip->hmp;
590         uio = ap->a_uio;
591         error = 0;
592
593         seqcount = ap->a_ioflag >> 16;
594         bigread = (uio->uio_resid > 100 * 1024 * 1024);
595
596         error = hammer2_read_file(ip, uio, seqcount);
597         return (error);
598 }
599
600 static
601 int
602 hammer2_vop_write(struct vop_write_args *ap)
603 {
604         thread_t td;
605         struct vnode *vp;
606         hammer2_mount_t *hmp;
607         hammer2_inode_t *ip;
608         struct uio *uio;
609         int error;
610         int seqcount;
611         int bigwrite;
612
613         /*
614          * Read operations supported on this vnode?
615          */
616         vp = ap->a_vp;
617         if (vp->v_type != VREG)
618                 return (EINVAL);
619
620         /*
621          * Misc
622          */
623         ip = VTOI(vp);
624         hmp = ip->hmp;
625         uio = ap->a_uio;
626         error = 0;
627         if (hmp->ronly)
628                 return (EROFS);
629
630         seqcount = ap->a_ioflag >> 16;
631         bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
632
633         /*
634          * Check resource limit
635          */
636         if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
637             uio->uio_offset + uio->uio_resid >
638              td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
639                 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
640                 return (EFBIG);
641         }
642
643         bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
644
645         /*
646          * ip must be locked if extending the file.
647          * ip must be locked to avoid racing a truncation.
648          *
649          * ip must be marked modified, particularly because the write
650          * might wind up being copied into the embedded data area.
651          */
652         hammer2_inode_lock_ex(ip);
653         error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
654
655         hammer2_inode_unlock_ex(ip);
656         return (error);
657 }
658
659 /*
660  * Perform read operations on a file or symlink given an UNLOCKED
661  * inode and uio.
662  */
663 static
664 int
665 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
666 {
667         struct buf *bp;
668         int error;
669
670         error = 0;
671
672         /*
673          * UIO read loop
674          */
675         while (uio->uio_resid > 0 && uio->uio_offset < ip->ip_data.size) {
676                 hammer2_key_t lbase;
677                 hammer2_key_t leof;
678                 int lblksize;
679                 int loff;
680                 int n;
681
682                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
683                                                 &lbase, &leof);
684
685                 error = cluster_read(ip->vp, leof, lbase, lblksize,
686                                      uio->uio_resid, seqcount * BKVASIZE,
687                                      &bp);
688
689                 if (error)
690                         break;
691                 loff = (int)(uio->uio_offset - lbase);
692                 n = lblksize - loff;
693                 if (n > uio->uio_resid)
694                         n = uio->uio_resid;
695                 if (n > ip->ip_data.size - uio->uio_offset)
696                         n = (int)(ip->ip_data.size - uio->uio_offset);
697                 bp->b_flags |= B_AGE;
698                 uiomove((char *)bp->b_data + loff, n, uio);
699                 bqrelse(bp);
700         }
701         return (error);
702 }
703
704 /*
705  * Called with a locked (ip) to do the underlying write to a file or
706  * to build the symlink target.
707  */
708 static
709 int
710 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
711                    int ioflag, int seqcount)
712 {
713         hammer2_key_t old_eof;
714         struct buf *bp;
715         int kflags;
716         int error;
717         int modified = 0;
718
719         /*
720          * Setup if append
721          */
722         if (ioflag & IO_APPEND)
723                 uio->uio_offset = ip->ip_data.size;
724         kflags = 0;
725         error = 0;
726
727         /*
728          * Extend the file if necessary.  If the write fails at some point
729          * we will truncate it back down to cover as much as we were able
730          * to write.
731          *
732          * Doing this now makes it easier to calculate buffer sizes in
733          * the loop.
734          */
735         old_eof = ip->ip_data.size;
736         if (uio->uio_offset + uio->uio_resid > ip->ip_data.size) {
737                 modified = 1;
738                 hammer2_extend_file(ip, uio->uio_offset + uio->uio_resid);
739                 kflags |= NOTE_EXTEND;
740         }
741
742         /*
743          * UIO write loop
744          */
745         while (uio->uio_resid > 0) {
746                 hammer2_key_t lbase;
747                 hammer2_key_t leof;
748                 int trivial;
749                 int lblksize;
750                 int loff;
751                 int n;
752
753                 /*
754                  * Don't allow the buffer build to blow out the buffer
755                  * cache.
756                  */
757                 if ((ioflag & IO_RECURSE) == 0) {
758                         /*
759                          * XXX should try to leave this unlocked through
760                          *      the whole loop
761                          */
762                         hammer2_chain_unlock(ip->hmp, &ip->chain);
763                         bwillwrite(HAMMER2_PBUFSIZE);
764                         hammer2_chain_lock(ip->hmp, &ip->chain,
765                                            HAMMER2_RESOLVE_ALWAYS);
766                 }
767
768                 /* XXX bigwrite & signal check test */
769
770                 /*
771                  * This nominally tells us how much we can cluster and
772                  * what the logical buffer size needs to be.  Currently
773                  * we don't try to cluster the write and just handle one
774                  * block at a time.
775                  */
776                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
777                                                 &lbase, &leof);
778                 loff = (int)(uio->uio_offset - lbase);
779
780                 /*
781                  * Calculate bytes to copy this transfer and whether the
782                  * copy completely covers the buffer or not.
783                  */
784                 trivial = 0;
785                 n = lblksize - loff;
786                 if (n > uio->uio_resid) {
787                         n = uio->uio_resid;
788                         if (uio->uio_offset + n == ip->ip_data.size)
789                                 trivial = 1;
790                 } else if (loff == 0) {
791                         trivial = 1;
792                 }
793
794                 /*
795                  * Get the buffer
796                  */
797                 if (uio->uio_segflg == UIO_NOCOPY) {
798                         /*
799                          * Issuing a write with the same data backing the
800                          * buffer.  Instantiate the buffer to collect the
801                          * backing vm pages, then read-in any missing bits.
802                          *
803                          * This case is used by vop_stdputpages().
804                          */
805                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
806                         if ((bp->b_flags & B_CACHE) == 0) {
807                                 bqrelse(bp);
808                                 error = bread(ip->vp, lbase, lblksize, &bp);
809                         }
810                 } else if (trivial) {
811                         /*
812                          * Even though we are entirely overwriting the buffer
813                          * we may still have to zero it out to avoid a
814                          * mmap/write visibility issue.
815                          */
816                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
817                         if ((bp->b_flags & B_CACHE) == 0)
818                                 vfs_bio_clrbuf(bp);
819                 } else {
820                         /*
821                          * Partial overwrite, read in any missing bits then
822                          * replace the portion being written.
823                          *
824                          * (The strategy code will detect zero-fill physical
825                          * blocks for this case).
826                          */
827                         error = bread(ip->vp, lbase, lblksize, &bp);
828                         if (error == 0)
829                                 bheavy(bp);
830                 }
831
832                 if (error) {
833                         brelse(bp);
834                         break;
835                 }
836
837                 /*
838                  * We have to assign physical storage to the buffer we intend
839                  * to dirty or write now to avoid deadlocks in the strategy
840                  * code later.
841                  *
842                  * This can return NOOFFSET for inode-embedded data.  The
843                  * strategy code will take care of it in that case.
844                  */
845                 bp->b_bio2.bio_offset =
846                         hammer2_assign_physical(ip, lbase, lblksize, &error);
847                 if (error) {
848                         brelse(bp);
849                         break;
850                 }
851
852                 /*
853                  * Ok, copy the data in
854                  */
855                 hammer2_chain_unlock(ip->hmp, &ip->chain);
856                 error = uiomove(bp->b_data + loff, n, uio);
857                 hammer2_chain_lock(ip->hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
858                 kflags |= NOTE_WRITE;
859                 modified = 1;
860
861                 if (error) {
862                         brelse(bp);
863                         break;
864                 }
865
866                 /* XXX update ip_data.mtime */
867
868                 /*
869                  * Once we dirty a buffer any cached offset becomes invalid.
870                  *
871                  * NOTE: For cluster_write() always use the trailing block
872                  *       size, which is HAMMER2_PBUFSIZE.  lblksize is the
873                  *       eof-straddling blocksize and is incorrect.
874                  */
875                 bp->b_flags |= B_AGE;
876                 if (ioflag & IO_SYNC) {
877                         bwrite(bp);
878                 } else if ((ioflag & IO_DIRECT) && loff + n == lblksize) {
879                         bp->b_flags |= B_CLUSTEROK;
880                         bdwrite(bp);
881                 } else if (ioflag & IO_ASYNC) {
882                         bawrite(bp);
883                 } else if (hammer2_cluster_enable) {
884                         bp->b_flags |= B_CLUSTEROK;
885                         cluster_write(bp, leof, HAMMER2_PBUFSIZE, seqcount);
886                 } else {
887                         bp->b_flags |= B_CLUSTEROK;
888                         bdwrite(bp);
889                 }
890         }
891
892         /*
893          * Cleanup.  If we extended the file EOF but failed to write through
894          * the entire write is a failure and we have to back-up.
895          */
896         if (error && ip->ip_data.size != old_eof) {
897                 hammer2_truncate_file(ip, old_eof);
898         } else if (modified) {
899                 hammer2_chain_modify(ip->hmp, &ip->chain, 0);
900                 hammer2_update_time(&ip->ip_data.mtime);
901         }
902         hammer2_knote(ip->vp, kflags);
903         return error;
904 }
905
906 /*
907  * Assign physical storage to a logical block.
908  *
909  * NOOFFSET is returned if the data is inode-embedded.  In this case the
910  * strategy code will simply bcopy() the data into the inode.
911  *
912  * The inode's delta_dcount is adjusted.
913  */
914 static
915 hammer2_off_t
916 hammer2_assign_physical(hammer2_inode_t *ip, hammer2_key_t lbase,
917                         int lblksize, int *errorp)
918 {
919         hammer2_mount_t *hmp;
920         hammer2_chain_t *parent;
921         hammer2_chain_t *chain;
922         hammer2_off_t pbase;
923
924         *errorp = 0;
925         hmp = ip->hmp;
926
927         /*
928          * Locate the chain associated with lbase, return a locked chain.
929          * However, do not instantiate any data reference (which utilizes a
930          * device buffer) because we will be using direct IO via the
931          * logical buffer cache buffer.
932          */
933         parent = &ip->chain;
934         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
935
936         chain = hammer2_chain_lookup(hmp, &parent,
937                                      lbase, lbase,
938                                      HAMMER2_LOOKUP_NODATA);
939
940         if (chain == NULL) {
941                 /*
942                  * We found a hole, create a new chain entry.
943                  *
944                  * NOTE: DATA chains are created without device backing
945                  *       store (nor do we want any).
946                  */
947                 chain = hammer2_chain_create(hmp, parent, NULL,
948                                              lbase, HAMMER2_PBUFRADIX,
949                                              HAMMER2_BREF_TYPE_DATA,
950                                              lblksize);
951                 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
952                 ip->delta_dcount += lblksize;
953         } else {
954                 switch (chain->bref.type) {
955                 case HAMMER2_BREF_TYPE_INODE:
956                         /*
957                          * The data is embedded in the inode.  The
958                          * caller is responsible for marking the inode
959                          * modified and copying the data to the embedded
960                          * area.
961                          */
962                         pbase = NOOFFSET;
963                         break;
964                 case HAMMER2_BREF_TYPE_DATA:
965                         if (chain->bytes != lblksize) {
966                                 panic("hammer2_assign_physical: "
967                                       "size mismatch %d/%d\n",
968                                       lblksize, chain->bytes);
969                         }
970                         hammer2_chain_modify(hmp, chain,
971                                              HAMMER2_MODIFY_OPTDATA);
972                         pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
973                         break;
974                 default:
975                         panic("hammer2_assign_physical: bad type");
976                         /* NOT REACHED */
977                         pbase = NOOFFSET;
978                         break;
979                 }
980         }
981
982         if (chain)
983                 hammer2_chain_unlock(hmp, chain);
984         hammer2_chain_unlock(hmp, parent);
985
986         return (pbase);
987 }
988
989 /*
990  * Truncate the size of a file.
991  *
992  * This routine adjusts ip->ip_data.size smaller, destroying any related
993  * data beyond the new EOF and potentially resizing the block straddling
994  * the EOF.
995  *
996  * The inode must be locked.
997  */
998 static
999 void
1000 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1001 {
1002         hammer2_chain_t *parent;
1003         hammer2_chain_t *chain;
1004         hammer2_mount_t *hmp = ip->hmp;
1005         hammer2_key_t lbase;
1006         hammer2_key_t leof;
1007         struct buf *bp;
1008         int loff;
1009         int error;
1010         int oblksize;
1011         int nblksize;
1012
1013         hammer2_chain_modify(hmp, &ip->chain, 0);
1014         bp = NULL;
1015
1016         /*
1017          * Destroy any logical buffer cache buffers beyond the file EOF.
1018          *
1019          * We call nvtruncbuf() w/ trivial == 1 to prevent it from messing
1020          * around with the buffer straddling EOF, because we need to assign
1021          * a new physical offset to it.
1022          */
1023         if (ip->vp) {
1024                 nvtruncbuf(ip->vp, nsize,
1025                            HAMMER2_PBUFSIZE, (int)nsize & HAMMER2_PBUFMASK,
1026                            1);
1027         }
1028
1029         /*
1030          * Setup for lookup/search
1031          */
1032         parent = &ip->chain;
1033         error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1034         if (error) {
1035                 hammer2_chain_unlock(hmp, parent);
1036                 /* XXX error reporting */
1037                 return;
1038         }
1039
1040         /*
1041          * Handle the case where a chain/logical-buffer straddles the new
1042          * EOF.  We told nvtruncbuf() above not to mess with the logical
1043          * buffer straddling the EOF because we need to reassign its storage
1044          * and can't let the strategy code do it for us.
1045          */
1046         loff = (int)nsize & HAMMER2_PBUFMASK;
1047         if (loff && ip->vp) {
1048                 oblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof);
1049                 error = bread(ip->vp, lbase, oblksize, &bp);
1050                 KKASSERT(error == 0);
1051         }
1052         ip->ip_data.size = nsize;
1053         nblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof);
1054
1055         /*
1056          * Fixup the chain element.  If we have a logical buffer in-hand
1057          * we don't want to create a conflicting device buffer.
1058          */
1059         if (loff && bp) {
1060                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase,
1061                                              HAMMER2_LOOKUP_NODATA);
1062                 if (chain) {
1063                         allocbuf(bp, nblksize);
1064                         switch(chain->bref.type) {
1065                         case HAMMER2_BREF_TYPE_DATA:
1066                                 hammer2_chain_resize(ip, chain,
1067                                              hammer2_bytes_to_radix(nblksize),
1068                                              HAMMER2_MODIFY_OPTDATA);
1069                                 bzero(bp->b_data + loff, nblksize - loff);
1070                                 bp->b_bio2.bio_offset = chain->bref.data_off &
1071                                                         HAMMER2_OFF_MASK;
1072                                 break;
1073                         case HAMMER2_BREF_TYPE_INODE:
1074                                 bzero(bp->b_data + loff, nblksize - loff);
1075                                 bp->b_bio2.bio_offset = NOOFFSET;
1076                                 break;
1077                         default:
1078                                 panic("hammer2_truncate_file: bad type");
1079                                 break;
1080                         }
1081                         hammer2_chain_unlock(hmp, chain);
1082                         bp->b_flags |= B_CLUSTEROK;
1083                         bdwrite(bp);
1084                 } else {
1085                         /*
1086                          * Destroy clean buffer w/ wrong buffer size.  Retain
1087                          * backing store.
1088                          */
1089                         bp->b_flags |= B_RELBUF;
1090                         KKASSERT(bp->b_bio2.bio_offset == NOOFFSET);
1091                         KKASSERT((bp->b_flags & B_DIRTY) == 0);
1092                         bqrelse(bp);
1093                 }
1094         } else if (loff) {
1095                 /*
1096                  * WARNING: This utilizes a device buffer for the data.
1097                  *
1098                  * XXX case should not occur
1099                  */
1100                 panic("hammer2_truncate_file: non-zero truncation, no-vnode");
1101                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase, 0);
1102                 if (chain) {
1103                         switch(chain->bref.type) {
1104                         case HAMMER2_BREF_TYPE_DATA:
1105                                 hammer2_chain_resize(ip, chain,
1106                                              hammer2_bytes_to_radix(nblksize),
1107                                              0);
1108                                 hammer2_chain_modify(hmp, chain, 0);
1109                                 bzero(chain->data->buf + loff, nblksize - loff);
1110                                 break;
1111                         case HAMMER2_BREF_TYPE_INODE:
1112                                 if (loff < HAMMER2_EMBEDDED_BYTES) {
1113                                         hammer2_chain_modify(hmp, chain, 0);
1114                                         bzero(chain->data->ipdata.u.data + loff,
1115                                               HAMMER2_EMBEDDED_BYTES - loff);
1116                                 }
1117                                 break;
1118                         }
1119                         hammer2_chain_unlock(hmp, chain);
1120                 }
1121         }
1122
1123         /*
1124          * Clean up any fragmentory VM pages now that we have properly
1125          * resized the straddling buffer.  These pages are no longer
1126          * part of the buffer.
1127          */
1128         if (ip->vp) {
1129                 nvtruncbuf(ip->vp, nsize,
1130                            nblksize, (int)nsize & (nblksize - 1),
1131                            1);
1132         }
1133
1134         /*
1135          * Destroy any physical blocks after the new EOF point.
1136          */
1137         lbase = (nsize + HAMMER2_PBUFMASK64) & ~HAMMER2_PBUFMASK64;
1138         chain = hammer2_chain_lookup(hmp, &parent,
1139                                      lbase, (hammer2_key_t)-1,
1140                                      HAMMER2_LOOKUP_NODATA);
1141         while (chain) {
1142                 /*
1143                  * Degenerate embedded data case, nothing to loop on.
1144                  */
1145                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1146                         hammer2_chain_unlock(hmp, chain);
1147                         break;
1148                 }
1149
1150                 /*
1151                  * Delete physical data blocks past the file EOF.
1152                  */
1153                 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1154                         ip->delta_dcount -= chain->bytes;
1155                         hammer2_chain_delete(hmp, parent, chain);
1156                 }
1157                 /* XXX check parent if empty indirect block & delete */
1158                 chain = hammer2_chain_next(hmp, &parent, chain,
1159                                            lbase, (hammer2_key_t)-1,
1160                                            HAMMER2_LOOKUP_NODATA);
1161         }
1162         hammer2_chain_unlock(hmp, parent);
1163 }
1164
1165 /*
1166  * Extend the size of a file.  The inode must be locked.
1167  *
1168  * We may have to resize the block straddling the old EOF.
1169  */
1170 static
1171 void
1172 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1173 {
1174         hammer2_mount_t *hmp;
1175         hammer2_chain_t *parent;
1176         hammer2_chain_t *chain;
1177         struct buf *bp;
1178         hammer2_key_t osize;
1179         hammer2_key_t obase;
1180         hammer2_key_t nbase;
1181         hammer2_key_t leof;
1182         int oblksize;
1183         int nblksize;
1184         int nradix;
1185         int error;
1186
1187         KKASSERT(ip->vp);
1188         hmp = ip->hmp;
1189
1190         hammer2_chain_modify(hmp, &ip->chain, 0);
1191
1192         /*
1193          * Nothing to do if the direct-data case is still intact
1194          */
1195         if ((ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1196             nsize <= HAMMER2_EMBEDDED_BYTES) {
1197                 ip->ip_data.size = nsize;
1198                 return;
1199         }
1200
1201         /*
1202          * Calculate the blocksize at the original EOF and resize the block
1203          * if necessary.  Adjust the file size in the inode.
1204          */
1205         osize = ip->ip_data.size;
1206         oblksize = hammer2_calc_logical(ip, osize, &obase, &leof);
1207         ip->ip_data.size = nsize;
1208         nblksize = hammer2_calc_logical(ip, osize, &nbase, &leof);
1209
1210         /*
1211          * Do all required vnode operations, but do not mess with the
1212          * buffer straddling the orignal EOF.
1213          */
1214         nvextendbuf(ip->vp,
1215                     ip->ip_data.size, nsize,
1216                     0, nblksize,
1217                     0, (int)nsize & HAMMER2_PBUFMASK,
1218                     1);
1219
1220         /*
1221          * Early return if we have no more work to do.
1222          */
1223         if (obase == nbase && oblksize == nblksize &&
1224             (ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1225                 return;
1226         }
1227
1228         /*
1229          * We have work to do, including possibly resizing the buffer
1230          * at the EOF point and turning off DIRECTDATA mode.
1231          */
1232         bp = NULL;
1233         if (((int)osize & HAMMER2_PBUFMASK)) {
1234                 error = bread(ip->vp, obase, oblksize, &bp);
1235                 KKASSERT(error == 0);
1236
1237                 if (obase != nbase) {
1238                         allocbuf(bp, HAMMER2_PBUFSIZE);
1239                 } else {
1240                         allocbuf(bp, nblksize);
1241                 }
1242                 vfs_bio_clrbuf(bp);
1243         }
1244
1245         /*
1246          * Disable direct-data mode by loading up a buffer cache buffer
1247          * with the data, then converting the inode data area into the
1248          * inode indirect block array area.
1249          */
1250         if (ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
1251                 ip->ip_data.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1252                 bzero(&ip->ip_data.u.blockset, sizeof(ip->ip_data.u.blockset));
1253         }
1254
1255         /*
1256          * Resize the chain element at the old EOF.
1257          */
1258         if (((int)osize & HAMMER2_PBUFMASK)) {
1259                 parent = &ip->chain;
1260                 error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1261                 KKASSERT(error == 0);
1262
1263                 nradix = hammer2_bytes_to_radix(nblksize);
1264
1265                 chain = hammer2_chain_lookup(hmp, &parent,
1266                                              obase, obase,
1267                                              HAMMER2_LOOKUP_NODATA);
1268                 if (chain == NULL) {
1269                         chain = hammer2_chain_create(hmp, parent, NULL,
1270                                                      obase, nblksize,
1271                                                      HAMMER2_BREF_TYPE_DATA,
1272                                                      nblksize);
1273                         ip->delta_dcount += nblksize;
1274                 } else {
1275                         KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA);
1276                         hammer2_chain_resize(ip, chain, nradix,
1277                                              HAMMER2_MODIFY_OPTDATA);
1278                 }
1279                 bp->b_bio2.bio_offset = chain->bref.data_off &
1280                                         HAMMER2_OFF_MASK;
1281                 hammer2_chain_unlock(hmp, chain);
1282                 bp->b_flags |= B_CLUSTEROK;
1283                 bdwrite(bp);
1284                 hammer2_chain_unlock(hmp, parent);
1285         }
1286 }
1287
1288 static
1289 int
1290 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
1291 {
1292         hammer2_inode_t *dip;
1293         hammer2_inode_t *ip;
1294         hammer2_mount_t *hmp;
1295         hammer2_chain_t *parent;
1296         hammer2_chain_t *chain;
1297         struct namecache *ncp;
1298         const uint8_t *name;
1299         size_t name_len;
1300         hammer2_key_t lhc;
1301         int error = 0;
1302         struct vnode *vp;
1303
1304         dip = VTOI(ap->a_dvp);
1305         hmp = dip->hmp;
1306         ncp = ap->a_nch->ncp;
1307         name = ncp->nc_name;
1308         name_len = ncp->nc_nlen;
1309         lhc = hammer2_dirhash(name, name_len);
1310
1311         /*
1312          * Note: In DragonFly the kernel handles '.' and '..'.
1313          */
1314         parent = &dip->chain;
1315         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1316         chain = hammer2_chain_lookup(hmp, &parent,
1317                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1318                                      0);
1319         while (chain) {
1320                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
1321                     chain->u.ip &&
1322                     name_len == chain->data->ipdata.name_len &&
1323                     bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
1324                         break;
1325                 }
1326                 chain = hammer2_chain_next(hmp, &parent, chain,
1327                                            lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1328                                            0);
1329         }
1330         hammer2_chain_unlock(hmp, parent);
1331
1332         /*
1333          * If the inode represents a forwarding entry for a hardlink we have
1334          * to locate the actual inode.  The original ip is saved for possible
1335          * deconsolidation.  (ip) will only be set to non-NULL when we have
1336          * to locate the real file via a hardlink.  ip will be referenced but
1337          * not locked in that situation.  chain is passed in locked and
1338          * returned locked.
1339          */
1340         ip = NULL;
1341         if (chain && chain->u.ip->ip_data.type == HAMMER2_OBJTYPE_HARDLINK) {
1342                 kprintf("hammer2: need to find hardlink for %s\n",
1343                         chain->u.ip->ip_data.filename);
1344                 error = hammer2_hardlink_find(dip, &chain, &ip);
1345                 if (error) {
1346                         if (chain) {
1347                                 hammer2_chain_unlock(hmp, chain);
1348                                 chain = NULL;
1349                         }
1350                         goto failed;
1351                 }
1352         }
1353
1354         /*
1355          * Deconsolidate any hardlink whos nlinks == 1.  Ignore errors.
1356          * If an error occurs chain and ip are left alone.
1357          */
1358         if (ip && chain && chain->u.ip->ip_data.nlinks == 1 && !hmp->ronly) {
1359                 kprintf("hammer2: need to unconsolidate hardlink for %s\n",
1360                         chain->u.ip->ip_data.filename);
1361                 hammer2_hardlink_deconsolidate(dip, &chain, &ip);
1362         }
1363
1364         /*
1365          * Acquire the related vnode
1366          */
1367         if (chain) {
1368                 vp = hammer2_igetv(chain->u.ip, &error);
1369                 if (error == 0) {
1370                         vn_unlock(vp);
1371                         cache_setvp(ap->a_nch, vp);
1372                         vrele(vp);
1373                 }
1374                 hammer2_chain_unlock(hmp, chain);
1375         } else {
1376                 error = ENOENT;
1377 failed:
1378                 cache_setvp(ap->a_nch, NULL);
1379         }
1380         if (ip)
1381                 hammer2_inode_drop(ip);
1382         return error;
1383 }
1384
1385 static
1386 int
1387 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1388 {
1389         hammer2_inode_t *dip;
1390         hammer2_inode_t *ip;
1391         hammer2_mount_t *hmp;
1392         int error;
1393
1394         dip = VTOI(ap->a_dvp);
1395         hmp = dip->hmp;
1396
1397         if ((ip = dip->pip) == NULL) {
1398                 *ap->a_vpp = NULL;
1399                 return ENOENT;
1400         }
1401         hammer2_chain_lock(hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
1402         *ap->a_vpp = hammer2_igetv(ip, &error);
1403         hammer2_chain_unlock(hmp, &ip->chain);
1404
1405         return error;
1406 }
1407
1408 static
1409 int
1410 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1411 {
1412         hammer2_mount_t *hmp;
1413         hammer2_inode_t *dip;
1414         hammer2_inode_t *nip;
1415         struct namecache *ncp;
1416         const uint8_t *name;
1417         size_t name_len;
1418         int error;
1419
1420         dip = VTOI(ap->a_dvp);
1421         hmp = dip->hmp;
1422         if (hmp->ronly)
1423                 return (EROFS);
1424
1425         ncp = ap->a_nch->ncp;
1426         name = ncp->nc_name;
1427         name_len = ncp->nc_nlen;
1428
1429         error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1430                                      name, name_len, &nip);
1431         if (error) {
1432                 KKASSERT(nip == NULL);
1433                 *ap->a_vpp = NULL;
1434                 return error;
1435         }
1436         *ap->a_vpp = hammer2_igetv(nip, &error);
1437         hammer2_chain_unlock(hmp, &nip->chain);
1438
1439         if (error == 0) {
1440                 cache_setunresolved(ap->a_nch);
1441                 cache_setvp(ap->a_nch, *ap->a_vpp);
1442         }
1443         return error;
1444 }
1445
1446 /*
1447  * Return the largest contiguous physical disk range for the logical
1448  * request.
1449  *
1450  * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
1451  */
1452 static
1453 int
1454 hammer2_vop_bmap(struct vop_bmap_args *ap)
1455 {
1456         struct vnode *vp;
1457         hammer2_mount_t *hmp;
1458         hammer2_inode_t *ip;
1459         hammer2_chain_t *parent;
1460         hammer2_chain_t *chain;
1461         hammer2_key_t lbeg;
1462         hammer2_key_t lend;
1463         hammer2_off_t pbeg;
1464         hammer2_off_t pbytes;
1465         hammer2_off_t array[HAMMER2_BMAP_COUNT][2];
1466         int loff;
1467         int ai;
1468
1469         /*
1470          * Only supported on regular files
1471          *
1472          * Only supported for read operations (required for cluster_read).
1473          * The block allocation is delayed for write operations.
1474          */
1475         vp = ap->a_vp;
1476         if (vp->v_type != VREG)
1477                 return (EOPNOTSUPP);
1478         if (ap->a_cmd != BUF_CMD_READ)
1479                 return (EOPNOTSUPP);
1480
1481         ip = VTOI(vp);
1482         hmp = ip->hmp;
1483         bzero(array, sizeof(array));
1484
1485         /*
1486          * Calculate logical range
1487          */
1488         KKASSERT((ap->a_loffset & HAMMER2_LBUFMASK64) == 0);
1489         lbeg = ap->a_loffset & HAMMER2_OFF_MASK_HI;
1490         lend = lbeg + HAMMER2_BMAP_COUNT * HAMMER2_PBUFSIZE - 1;
1491         if (lend < lbeg)
1492                 lend = lbeg;
1493         loff = ap->a_loffset & HAMMER2_OFF_MASK_LO;
1494
1495         parent = &ip->chain;
1496         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1497         chain = hammer2_chain_lookup(hmp, &parent,
1498                                      lbeg, lend,
1499                                      HAMMER2_LOOKUP_NODATA);
1500         if (chain == NULL) {
1501                 *ap->a_doffsetp = ZFOFFSET;
1502                 hammer2_chain_unlock(hmp, parent);
1503                 return (0);
1504         }
1505
1506         while (chain) {
1507                 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1508                         ai = (chain->bref.key - lbeg) / HAMMER2_PBUFSIZE;
1509                         KKASSERT(ai >= 0 && ai < HAMMER2_BMAP_COUNT);
1510                         array[ai][0] = chain->bref.data_off & HAMMER2_OFF_MASK;
1511                         array[ai][1] = chain->bytes;
1512                 }
1513                 chain = hammer2_chain_next(hmp, &parent, chain,
1514                                            lbeg, lend,
1515                                            HAMMER2_LOOKUP_NODATA);
1516         }
1517         hammer2_chain_unlock(hmp, parent);
1518
1519         /*
1520          * If the requested loffset is not mappable physically we can't
1521          * bmap.  The caller will have to access the file data via a
1522          * device buffer.
1523          */
1524         if (array[0][0] == 0 || array[0][1] < loff + HAMMER2_LBUFSIZE) {
1525                 *ap->a_doffsetp = NOOFFSET;
1526                 return (0);
1527         }
1528
1529         /*
1530          * Calculate the physical disk offset range for array[0]
1531          */
1532         pbeg = array[0][0] + loff;
1533         pbytes = array[0][1] - loff;
1534
1535         for (ai = 1; ai < HAMMER2_BMAP_COUNT; ++ai) {
1536                 if (array[ai][0] != pbeg + pbytes)
1537                         break;
1538                 pbytes += array[ai][1];
1539         }
1540
1541         *ap->a_doffsetp = pbeg;
1542         if (ap->a_runp)
1543                 *ap->a_runp = pbytes;
1544         return (0);
1545 }
1546
1547 static
1548 int
1549 hammer2_vop_open(struct vop_open_args *ap)
1550 {
1551         return vop_stdopen(ap);
1552 }
1553
1554 /*
1555  * hammer2_vop_advlock { vp, id, op, fl, flags }
1556  */
1557 static
1558 int
1559 hammer2_vop_advlock(struct vop_advlock_args *ap)
1560 {
1561         hammer2_inode_t *ip = VTOI(ap->a_vp);
1562
1563         return (lf_advlock(ap, &ip->advlock, ip->ip_data.size));
1564 }
1565
1566
1567 static
1568 int
1569 hammer2_vop_close(struct vop_close_args *ap)
1570 {
1571         return vop_stdclose(ap);
1572 }
1573
1574 /*
1575  * hammer2_vop_nlink { nch, dvp, vp, cred }
1576  *
1577  * Create a hardlink from (vp) to {dvp, nch}.
1578  */
1579 static
1580 int
1581 hammer2_vop_nlink(struct vop_nlink_args *ap)
1582 {
1583         hammer2_inode_t *dip;   /* target directory to create link in */
1584         hammer2_inode_t *ip;    /* inode we are hardlinking to */
1585         hammer2_inode_t *oip;
1586         hammer2_mount_t *hmp;
1587         struct namecache *ncp;
1588         const uint8_t *name;
1589         size_t name_len;
1590         int error;
1591
1592         dip = VTOI(ap->a_dvp);
1593         hmp = dip->hmp;
1594         if (hmp->ronly)
1595                 return (EROFS);
1596
1597         /*
1598          * (ip) is the inode we are linking to.
1599          */
1600         ip = oip = VTOI(ap->a_vp);
1601         hammer2_inode_lock_nlinks(ip);
1602
1603         ncp = ap->a_nch->ncp;
1604         name = ncp->nc_name;
1605         name_len = ncp->nc_nlen;
1606
1607         /*
1608          * Create a consolidated real file for the hardlink, adjust (ip),
1609          * and move the nlinks lock if necessary.  Tell the function to
1610          * bump the hardlink count on the consolidated file.
1611          */
1612         error = hammer2_hardlink_consolidate(&ip, dip);
1613         if (error)
1614                 goto done;
1615
1616         /*
1617          * If the consolidation changed ip to a HARDLINK pointer we have
1618          * to adjust the vnode to point to the actual ip.
1619          *
1620          * XXX this can race against concurrent vnode ops.
1621          */
1622         if (oip != ip) {
1623                 hammer2_chain_ref(hmp, &ip->chain);
1624                 hammer2_inode_lock_ex(ip);
1625                 hammer2_inode_lock_ex(oip);
1626                 ip->vp = ap->a_vp;
1627                 ap->a_vp->v_data = ip;
1628                 oip->vp = NULL;
1629                 hammer2_inode_unlock_ex(oip);
1630                 hammer2_inode_unlock_ex(ip);
1631                 hammer2_chain_drop(hmp, &oip->chain);
1632         }
1633
1634         /*
1635          * The act of connecting the existing (ip) will properly bump the
1636          * nlinks count.  However, vp will incorrectly point at the old
1637          * inode which has now been turned into a OBJTYPE_HARDLINK pointer.
1638          *
1639          * We must reconnect the vp.
1640          */
1641         hammer2_chain_lock(hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
1642         error = hammer2_inode_connect(dip, ip, name, name_len);
1643         hammer2_chain_unlock(hmp, &ip->chain);
1644         if (error == 0) {
1645                 cache_setunresolved(ap->a_nch);
1646                 cache_setvp(ap->a_nch, ap->a_vp);
1647         }
1648 done:
1649         hammer2_inode_unlock_nlinks(ip);
1650         return error;
1651 }
1652
1653 /*
1654  * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
1655  *
1656  * The operating system has already ensured that the directory entry
1657  * does not exist and done all appropriate namespace locking.
1658  */
1659 static
1660 int
1661 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1662 {
1663         hammer2_mount_t *hmp;
1664         hammer2_inode_t *dip;
1665         hammer2_inode_t *nip;
1666         struct namecache *ncp;
1667         const uint8_t *name;
1668         size_t name_len;
1669         int error;
1670
1671         dip = VTOI(ap->a_dvp);
1672         hmp = dip->hmp;
1673         if (hmp->ronly)
1674                 return (EROFS);
1675
1676         ncp = ap->a_nch->ncp;
1677         name = ncp->nc_name;
1678         name_len = ncp->nc_nlen;
1679
1680         error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1681                                      name, name_len, &nip);
1682         if (error) {
1683                 KKASSERT(nip == NULL);
1684                 *ap->a_vpp = NULL;
1685                 return error;
1686         }
1687         *ap->a_vpp = hammer2_igetv(nip, &error);
1688         hammer2_chain_unlock(hmp, &nip->chain);
1689
1690         if (error == 0) {
1691                 cache_setunresolved(ap->a_nch);
1692                 cache_setvp(ap->a_nch, *ap->a_vpp);
1693         }
1694         return error;
1695 }
1696
1697 /*
1698  * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1699  */
1700 static
1701 int
1702 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1703 {
1704         hammer2_mount_t *hmp;
1705         hammer2_inode_t *dip;
1706         hammer2_inode_t *nip;
1707         struct namecache *ncp;
1708         const uint8_t *name;
1709         size_t name_len;
1710         int error;
1711
1712         dip = VTOI(ap->a_dvp);
1713         hmp = dip->hmp;
1714         if (hmp->ronly)
1715                 return (EROFS);
1716
1717         ncp = ap->a_nch->ncp;
1718         name = ncp->nc_name;
1719         name_len = ncp->nc_nlen;
1720
1721         ap->a_vap->va_type = VLNK;      /* enforce type */
1722
1723         error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1724                                      name, name_len, &nip);
1725         if (error) {
1726                 KKASSERT(nip == NULL);
1727                 *ap->a_vpp = NULL;
1728                 return error;
1729         }
1730         *ap->a_vpp = hammer2_igetv(nip, &error);
1731
1732         /*
1733          * Build the softlink (~like file data) and finalize the namecache.
1734          */
1735         if (error == 0) {
1736                 size_t bytes;
1737                 struct uio auio;
1738                 struct iovec aiov;
1739
1740                 bytes = strlen(ap->a_target);
1741
1742                 if (bytes <= HAMMER2_EMBEDDED_BYTES) {
1743                         KKASSERT(nip->ip_data.op_flags &
1744                                  HAMMER2_OPFLAG_DIRECTDATA);
1745                         bcopy(ap->a_target, nip->ip_data.u.data, bytes);
1746                         nip->ip_data.size = bytes;
1747                 } else {
1748                         bzero(&auio, sizeof(auio));
1749                         bzero(&aiov, sizeof(aiov));
1750                         auio.uio_iov = &aiov;
1751                         auio.uio_segflg = UIO_SYSSPACE;
1752                         auio.uio_rw = UIO_WRITE;
1753                         auio.uio_resid = bytes;
1754                         auio.uio_iovcnt = 1;
1755                         auio.uio_td = curthread;
1756                         aiov.iov_base = ap->a_target;
1757                         aiov.iov_len = bytes;
1758                         error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
1759                         /* XXX handle error */
1760                         error = 0;
1761                 }
1762         }
1763         hammer2_chain_unlock(hmp, &nip->chain);
1764
1765         /*
1766          * Finalize namecache
1767          */
1768         if (error == 0) {
1769                 cache_setunresolved(ap->a_nch);
1770                 cache_setvp(ap->a_nch, *ap->a_vpp);
1771                 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */
1772         }
1773         return error;
1774 }
1775
1776 /*
1777  * hammer2_vop_nremove { nch, dvp, cred }
1778  */
1779 static
1780 int
1781 hammer2_vop_nremove(struct vop_nremove_args *ap)
1782 {
1783         hammer2_inode_t *dip;
1784         hammer2_mount_t *hmp;
1785         struct namecache *ncp;
1786         const uint8_t *name;
1787         size_t name_len;
1788         int error;
1789
1790         dip = VTOI(ap->a_dvp);
1791         hmp = dip->hmp;
1792         if (hmp->ronly)
1793                 return(EROFS);
1794
1795         ncp = ap->a_nch->ncp;
1796         name = ncp->nc_name;
1797         name_len = ncp->nc_nlen;
1798
1799         error = hammer2_unlink_file(dip, name, name_len, 0);
1800
1801         if (error == 0) {
1802                 cache_setunresolved(ap->a_nch);
1803                 cache_setvp(ap->a_nch, NULL);
1804         }
1805         return (error);
1806 }
1807
1808 /*
1809  * hammer2_vop_nrmdir { nch, dvp, cred }
1810  */
1811 static
1812 int
1813 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1814 {
1815         hammer2_inode_t *dip;
1816         hammer2_mount_t *hmp;
1817         struct namecache *ncp;
1818         const uint8_t *name;
1819         size_t name_len;
1820         int error;
1821
1822         dip = VTOI(ap->a_dvp);
1823         hmp = dip->hmp;
1824         if (hmp->ronly)
1825                 return(EROFS);
1826
1827         ncp = ap->a_nch->ncp;
1828         name = ncp->nc_name;
1829         name_len = ncp->nc_nlen;
1830
1831         error = hammer2_unlink_file(dip, name, name_len, 1);
1832
1833         if (error == 0) {
1834                 cache_setunresolved(ap->a_nch);
1835                 cache_setvp(ap->a_nch, NULL);
1836         }
1837         return (error);
1838 }
1839
1840 /*
1841  * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1842  */
1843 static
1844 int
1845 hammer2_vop_nrename(struct vop_nrename_args *ap)
1846 {
1847         struct namecache *fncp;
1848         struct namecache *tncp;
1849         hammer2_inode_t *fdip;
1850         hammer2_inode_t *tdip;
1851         hammer2_inode_t *ip;
1852         hammer2_mount_t *hmp;
1853         const uint8_t *fname;
1854         size_t fname_len;
1855         const uint8_t *tname;
1856         size_t tname_len;
1857         int error;
1858
1859         if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1860                 return(EXDEV);
1861         if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1862                 return(EXDEV);
1863
1864         fdip = VTOI(ap->a_fdvp);        /* source directory */
1865         tdip = VTOI(ap->a_tdvp);        /* target directory */
1866
1867         hmp = fdip->hmp;                /* check read-only filesystem */
1868         if (hmp->ronly)
1869                 return(EROFS);
1870
1871         fncp = ap->a_fnch->ncp;         /* entry name in source */
1872         fname = fncp->nc_name;
1873         fname_len = fncp->nc_nlen;
1874
1875         tncp = ap->a_tnch->ncp;         /* entry name in target */
1876         tname = tncp->nc_name;
1877         tname_len = tncp->nc_nlen;
1878
1879         /*
1880          * ip is the inode being removed.  If this is a hardlink then
1881          * ip represents the actual file and not the hardlink marker.
1882          */
1883         ip = VTOI(fncp->nc_vp);
1884
1885         /*
1886          * Keep a tight grip on the inode as removing it should disconnect
1887          * it and we don't want to destroy it.
1888          *
1889          * NOTE: To avoid deadlocks we cannot lock (ip) while we are
1890          *       unlinking elements from their directories.  Locking
1891          *       the nlinks field does not lock the whole inode.
1892          */
1893         hammer2_inode_lock_nlinks(ip);
1894
1895         /*
1896          * Remove target if it exists
1897          */
1898         error = hammer2_unlink_file(tdip, tname, tname_len, -1);
1899         if (error && error != ENOENT)
1900                 goto done;
1901         cache_setunresolved(ap->a_tnch);
1902         cache_setvp(ap->a_tnch, NULL);
1903
1904         /*
1905          * Disconnect (fdip, fname) from the source directory.  This will
1906          * disconnect (ip) if it represents a direct file.  If (ip) represents
1907          * a hardlink the HARDLINK pointer object will be removed but the
1908          * hardlink will stay intact.
1909          *
1910          * If (ip) is already hardlinked we have to resolve to a consolidated
1911          * file but we do not bump the nlinks count.  (ip) must hold the nlinks
1912          * lock & ref for the operation.  If the consolidated file has been
1913          * relocated (ip) will be adjusted and the related nlinks lock moved
1914          * along with it.
1915          *
1916          * If (ip) does not have multiple links we can just copy the physical
1917          * contents of the inode.
1918          */
1919         if (ip->ip_data.nlinks > 1) {
1920                 error = hammer2_hardlink_consolidate(&ip, tdip);
1921                 if (error)
1922                         goto done;
1923         }
1924         error = hammer2_unlink_file(fdip, fname, fname_len, -1);
1925         if (error)
1926                 goto done;
1927
1928         /*
1929          * Reconnect ip to target directory.
1930          *
1931          * WARNING: chain locks can lock buffer cache buffers, to avoid
1932          *          deadlocks we want to unlock before issuing a cache_*()
1933          *          op (that might have to lock a vnode).
1934          */
1935         hammer2_chain_lock(hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
1936         error = hammer2_inode_connect(tdip, ip, tname, tname_len);
1937         hammer2_chain_unlock(hmp, &ip->chain);
1938
1939         if (error == 0) {
1940                 cache_rename(ap->a_fnch, ap->a_tnch);
1941         }
1942 done:
1943         hammer2_inode_unlock_nlinks(ip);
1944
1945         return (error);
1946 }
1947
1948 static int hammer2_strategy_read(struct vop_strategy_args *ap);
1949 static int hammer2_strategy_write(struct vop_strategy_args *ap);
1950
1951 static
1952 int
1953 hammer2_vop_strategy(struct vop_strategy_args *ap)
1954 {
1955         struct bio *biop;
1956         struct buf *bp;
1957         int error;
1958
1959         biop = ap->a_bio;
1960         bp = biop->bio_buf;
1961
1962         switch(bp->b_cmd) {
1963         case BUF_CMD_READ:
1964                 error = hammer2_strategy_read(ap);
1965                 ++hammer2_iod_file_read;
1966                 break;
1967         case BUF_CMD_WRITE:
1968                 error = hammer2_strategy_write(ap);
1969                 ++hammer2_iod_file_write;
1970                 break;
1971         default:
1972                 bp->b_error = error = EINVAL;
1973                 bp->b_flags |= B_ERROR;
1974                 biodone(biop);
1975                 break;
1976         }
1977
1978         return (error);
1979 }
1980
1981 static
1982 int
1983 hammer2_strategy_read(struct vop_strategy_args *ap)
1984 {
1985         struct buf *bp;
1986         struct bio *bio;
1987         struct bio *nbio;
1988         hammer2_mount_t *hmp;
1989         hammer2_inode_t *ip;
1990         hammer2_chain_t *parent;
1991         hammer2_chain_t *chain;
1992         hammer2_key_t lbase;
1993
1994         bio = ap->a_bio;
1995         bp = bio->bio_buf;
1996         ip = VTOI(ap->a_vp);
1997         hmp = ip->hmp;
1998         nbio = push_bio(bio);
1999
2000         lbase = bio->bio_offset;
2001         chain = NULL;
2002         KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
2003
2004         /*
2005          * We must characterize the logical->physical translation if it
2006          * has not already been cached.
2007          *
2008          * Physical data references < LBUFSIZE are never cached.  This
2009          * includes both small-block allocations and inode-embedded data.
2010          */
2011         if (nbio->bio_offset == NOOFFSET) {
2012                 parent = &ip->chain;
2013                 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
2014
2015                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase,
2016                                              HAMMER2_LOOKUP_NODATA);
2017                 if (chain == NULL) {
2018                         /*
2019                          * Data is zero-fill
2020                          */
2021                         nbio->bio_offset = ZFOFFSET;
2022                 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
2023                         /*
2024                          * Data is embedded in the inode (do nothing)
2025                          */
2026                         KKASSERT(chain == parent);
2027                         hammer2_chain_unlock(hmp, chain);
2028                 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
2029                         /*
2030                          * Data is on-media
2031                          */
2032                         KKASSERT(bp->b_bcount == chain->bytes);
2033                         nbio->bio_offset = chain->bref.data_off &
2034                                            HAMMER2_OFF_MASK;
2035                         hammer2_chain_unlock(hmp, chain);
2036                         KKASSERT(nbio->bio_offset != 0);
2037                 } else {
2038                         panic("hammer2_strategy_read: unknown bref type");
2039                 }
2040                 hammer2_chain_unlock(hmp, parent);
2041         }
2042
2043         if (hammer2_debug & 0x0020) {
2044                 kprintf("read %016jx %016jx\n",
2045                         bio->bio_offset, nbio->bio_offset);
2046         }
2047
2048         if (nbio->bio_offset == ZFOFFSET) {
2049                 /*
2050                  * Data is zero-fill
2051                  */
2052                 bp->b_resid = 0;
2053                 bp->b_error = 0;
2054                 bzero(bp->b_data, bp->b_bcount);
2055                 biodone(nbio);
2056         } else if (nbio->bio_offset != NOOFFSET) {
2057                 /*
2058                  * Forward direct IO to the device
2059                  */
2060                 vn_strategy(hmp->devvp, nbio);
2061         } else {
2062                 /*
2063                  * Data is embedded in inode.
2064                  */
2065                 bcopy(chain->data->ipdata.u.data, bp->b_data,
2066                       HAMMER2_EMBEDDED_BYTES);
2067                 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES,
2068                       bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
2069                 bp->b_resid = 0;
2070                 bp->b_error = 0;
2071                 biodone(nbio);
2072         }
2073         return (0);
2074 }
2075
2076 static
2077 int
2078 hammer2_strategy_write(struct vop_strategy_args *ap)
2079 {
2080         struct buf *bp;
2081         struct bio *bio;
2082         struct bio *nbio;
2083         hammer2_mount_t *hmp;
2084         hammer2_inode_t *ip;
2085
2086         bio = ap->a_bio;
2087         bp = bio->bio_buf;
2088         ip = VTOI(ap->a_vp);
2089         hmp = ip->hmp;
2090         nbio = push_bio(bio);
2091
2092         KKASSERT((bio->bio_offset & HAMMER2_PBUFMASK64) == 0);
2093         KKASSERT(nbio->bio_offset != 0 && nbio->bio_offset != ZFOFFSET);
2094
2095         if (nbio->bio_offset == NOOFFSET) {
2096                 /*
2097                  * Must be embedded in the inode.
2098                  */
2099                 KKASSERT(bio->bio_offset == 0);
2100                 bcopy(bp->b_data, ip->ip_data.u.data, HAMMER2_EMBEDDED_BYTES);
2101                 bp->b_resid = 0;
2102                 bp->b_error = 0;
2103                 biodone(nbio);
2104
2105                 /*
2106                  * This special flag does not follow the normal MODIFY rules
2107                  * because we might deadlock on ip.  Instead we depend on
2108                  * VOP_FSYNC() to detect the case.
2109                  */
2110                 atomic_set_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
2111         } else {
2112                 /*
2113                  * Forward direct IO to the device
2114                  */
2115                 vn_strategy(hmp->devvp, nbio);
2116         }
2117         return (0);
2118 }
2119
2120 /*
2121  * hammer2_vop_ioctl { vp, command, data, fflag, cred }
2122  */
2123 static
2124 int
2125 hammer2_vop_ioctl(struct vop_ioctl_args *ap)
2126 {
2127         hammer2_mount_t *hmp;
2128         hammer2_inode_t *ip;
2129         int error;
2130
2131         ip = VTOI(ap->a_vp);
2132         hmp = ip->hmp;
2133
2134         error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
2135                               ap->a_fflag, ap->a_cred);
2136         return (error);
2137 }
2138
2139 static
2140 int 
2141 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
2142 {
2143         struct mount *mp;
2144         hammer2_pfsmount_t *pmp;
2145         int rc;
2146
2147         switch (ap->a_op) {
2148         case (MOUNTCTL_SET_EXPORT):
2149                 mp = ap->a_head.a_ops->head.vv_mount;
2150                 pmp = MPTOPMP(mp);
2151
2152                 if (ap->a_ctllen != sizeof(struct export_args))
2153                         rc = (EINVAL);
2154                 else
2155                         rc = vfs_export(mp, &pmp->export,
2156                                         (const struct export_args *)ap->a_ctl);
2157                 break;
2158         default:
2159                 rc = vop_stdmountctl(ap);
2160                 break;
2161         }
2162         return (rc);
2163 }
2164
2165 struct vop_ops hammer2_vnode_vops = {
2166         .vop_default    = vop_defaultop,
2167         .vop_fsync      = hammer2_vop_fsync,
2168         .vop_getpages   = vop_stdgetpages,
2169         .vop_putpages   = vop_stdputpages,
2170         .vop_access     = hammer2_vop_access,
2171         .vop_advlock    = hammer2_vop_advlock,
2172         .vop_close      = hammer2_vop_close,
2173         .vop_nlink      = hammer2_vop_nlink,
2174         .vop_ncreate    = hammer2_vop_ncreate,
2175         .vop_nsymlink   = hammer2_vop_nsymlink,
2176         .vop_nremove    = hammer2_vop_nremove,
2177         .vop_nrmdir     = hammer2_vop_nrmdir,
2178         .vop_nrename    = hammer2_vop_nrename,
2179         .vop_getattr    = hammer2_vop_getattr,
2180         .vop_setattr    = hammer2_vop_setattr,
2181         .vop_readdir    = hammer2_vop_readdir,
2182         .vop_readlink   = hammer2_vop_readlink,
2183         .vop_getpages   = vop_stdgetpages,
2184         .vop_putpages   = vop_stdputpages,
2185         .vop_read       = hammer2_vop_read,
2186         .vop_write      = hammer2_vop_write,
2187         .vop_open       = hammer2_vop_open,
2188         .vop_inactive   = hammer2_vop_inactive,
2189         .vop_reclaim    = hammer2_vop_reclaim,
2190         .vop_nresolve   = hammer2_vop_nresolve,
2191         .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2192         .vop_nmkdir     = hammer2_vop_nmkdir,
2193         .vop_ioctl      = hammer2_vop_ioctl,
2194         .vop_mountctl   = hammer2_vop_mountctl,
2195         .vop_bmap       = hammer2_vop_bmap,
2196         .vop_strategy   = hammer2_vop_strategy,
2197 };
2198
2199 struct vop_ops hammer2_spec_vops = {
2200
2201 };
2202
2203 struct vop_ops hammer2_fifo_vops = {
2204
2205 };