05cad2ef86f6531dcf31e6a4628308dd11238c9c
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/fcntl.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/namei.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 #include <sys/mountctl.h>
45 #include <sys/dirent.h>
46 #include <sys/uio.h>
47
48 #include "hammer2.h"
49
50 #define ZFOFFSET        (-2LL)
51
52 static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
53                                 int seqcount);
54 static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio, int ioflag,
55                               int seqcount);
56 static hammer2_off_t hammer2_assign_physical(hammer2_inode_t *ip,
57                                 hammer2_key_t lbase, int lblksize, int *errorp);
58 static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
59 static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
60
61 static __inline
62 void
63 hammer2_knote(struct vnode *vp, int flags)
64 {
65         if (flags)
66                 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
67 }
68
69 /*
70  * Last reference to a vnode is going away but it is still cached.
71  */
72 static
73 int
74 hammer2_vop_inactive(struct vop_inactive_args *ap)
75 {
76         struct vnode *vp;
77         struct hammer2_inode *ip;
78 #if 0
79         struct hammer2_mount *hmp;
80 #endif
81
82         vp = ap->a_vp;
83         ip = VTOI(vp);
84
85         /*
86          * Degenerate case
87          */
88         if (ip == NULL) {
89                 vrecycle(vp);
90                 return (0);
91         }
92
93         /*
94          * Detect updates to the embedded data which may be synchronized by
95          * the strategy code.  Simply mark the inode modified so it gets
96          * picked up by our normal flush.
97          */
98         if (ip->chain.flags & HAMMER2_CHAIN_DIRTYEMBED) {
99                 hammer2_inode_lock_ex(ip);
100                 atomic_clear_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
101                 hammer2_chain_modify(ip->hmp, &ip->chain, 0);
102                 hammer2_inode_unlock_ex(ip);
103         }
104
105         /*
106          * Check for deleted inodes and recycle immediately.
107          */
108         if (ip->chain.flags & HAMMER2_CHAIN_DELETED) {
109                 vrecycle(vp);
110         }
111         return (0);
112 }
113
114 /*
115  * Reclaim a vnode so that it can be reused; after the inode is
116  * disassociated, the filesystem must manage it alone.
117  */
118 static
119 int
120 hammer2_vop_reclaim(struct vop_reclaim_args *ap)
121 {
122         struct hammer2_inode *ip;
123         struct hammer2_mount *hmp;
124         struct vnode *vp;
125
126         vp = ap->a_vp;
127         ip = VTOI(vp);
128         if (ip == NULL)
129                 return(0);
130         hmp = ip->hmp;
131
132         /*
133          * Set SUBMODIFIED so we can detect and propagate the DESTROYED
134          * bit in the flush code.
135          */
136         hammer2_inode_lock_ex(ip);
137         vp->v_data = NULL;
138         ip->vp = NULL;
139         if (ip->chain.flags & HAMMER2_CHAIN_DELETED) {
140                 atomic_set_int(&ip->chain.flags, HAMMER2_CHAIN_DESTROYED |
141                                                  HAMMER2_CHAIN_SUBMODIFIED);
142         }
143         hammer2_chain_flush(hmp, &ip->chain, 0);
144         hammer2_inode_unlock_ex(ip);
145         hammer2_chain_drop(hmp, &ip->chain);    /* vp ref */
146
147         /*
148          * XXX handle background sync when ip dirty, kernel will no longer
149          * notify us regarding this inode because there is no longer a
150          * vnode attached to it.
151          */
152
153         return (0);
154 }
155
156 static
157 int
158 hammer2_vop_fsync(struct vop_fsync_args *ap)
159 {
160         struct hammer2_inode *ip;
161         struct hammer2_mount *hmp;
162         struct vnode *vp;
163
164         vp = ap->a_vp;
165         ip = VTOI(vp);
166         hmp = ip->hmp;
167
168         hammer2_inode_lock_ex(ip);
169         vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
170
171         /*
172          * Detect updates to the embedded data which may be synchronized by
173          * the strategy code.  Simply mark the inode modified so it gets
174          * picked up by our normal flush.
175          */
176         if (ip->chain.flags & HAMMER2_CHAIN_DIRTYEMBED) {
177                 atomic_clear_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
178                 hammer2_chain_modify(hmp, &ip->chain, 0);
179         }
180
181         /*
182          * Calling chain_flush here creates a lot of duplicative
183          * COW operations due to non-optimal vnode ordering.
184          *
185          * Only do it for an actual fsync() syscall.  The other forms
186          * which call this function will eventually call chain_flush
187          * on the volume root as a catch-all, which is far more optimal.
188          */
189         if (ap->a_flags & VOP_FSYNC_SYSCALL)
190                 hammer2_chain_flush(hmp, &ip->chain, 0);
191         hammer2_inode_unlock_ex(ip);
192         return (0);
193 }
194
195 static
196 int
197 hammer2_vop_access(struct vop_access_args *ap)
198 {
199         hammer2_inode_t *ip = VTOI(ap->a_vp);
200         uid_t uid;
201         gid_t gid;
202         int error;
203
204         uid = hammer2_to_unix_xid(&ip->ip_data.uid);
205         gid = hammer2_to_unix_xid(&ip->ip_data.gid);
206
207         error = vop_helper_access(ap, uid, gid, ip->ip_data.mode,
208                                   ip->ip_data.uflags);
209         return (error);
210 }
211
212 static
213 int
214 hammer2_vop_getattr(struct vop_getattr_args *ap)
215 {
216         hammer2_pfsmount_t *pmp;
217         hammer2_inode_t *ip;
218         struct vnode *vp;
219         struct vattr *vap;
220
221         vp = ap->a_vp;
222         vap = ap->a_vap;
223
224         ip = VTOI(vp);
225         pmp = ip->pmp;
226
227         hammer2_inode_lock_sh(ip);
228
229         vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
230         vap->va_fileid = ip->ip_data.inum;
231         vap->va_mode = ip->ip_data.mode;
232         vap->va_nlink = ip->ip_data.nlinks;
233         vap->va_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
234         vap->va_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
235         vap->va_rmajor = 0;
236         vap->va_rminor = 0;
237         vap->va_size = ip->ip_data.size;
238         vap->va_blocksize = HAMMER2_PBUFSIZE;
239         vap->va_flags = ip->ip_data.uflags;
240         hammer2_time_to_timespec(ip->ip_data.ctime, &vap->va_ctime);
241         hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_mtime);
242         hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_atime);
243         vap->va_gen = 1;
244         vap->va_bytes = vap->va_size;   /* XXX */
245         vap->va_type = hammer2_get_vtype(ip);
246         vap->va_filerev = 0;
247         vap->va_uid_uuid = ip->ip_data.uid;
248         vap->va_gid_uuid = ip->ip_data.gid;
249         vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
250                           VA_FSID_UUID_VALID;
251
252         hammer2_inode_unlock_sh(ip);
253
254         return (0);
255 }
256
257 static
258 int
259 hammer2_vop_setattr(struct vop_setattr_args *ap)
260 {
261         hammer2_mount_t *hmp;
262         hammer2_inode_t *ip;
263         struct vnode *vp;
264         struct vattr *vap;
265         int error;
266         int kflags = 0;
267         int domtime = 0;
268         uint64_t ctime;
269
270         vp = ap->a_vp;
271         vap = ap->a_vap;
272         hammer2_update_time(&ctime);
273
274         ip = VTOI(vp);
275         hmp = ip->hmp;
276
277         if (hmp->ronly)
278                 return(EROFS);
279
280         hammer2_inode_lock_ex(ip);
281         error = 0;
282
283         if (vap->va_flags != VNOVAL) {
284                 u_int32_t flags;
285
286                 flags = ip->ip_data.uflags;
287                 error = vop_helper_setattr_flags(&flags, vap->va_flags,
288                                          hammer2_to_unix_xid(&ip->ip_data.uid),
289                                          ap->a_cred);
290                 if (error == 0) {
291                         if (ip->ip_data.uflags != flags) {
292                                 hammer2_chain_modify(hmp, &ip->chain, 0);
293                                 ip->ip_data.uflags = flags;
294                                 ip->ip_data.ctime = ctime;
295                                 kflags |= NOTE_ATTRIB;
296                         }
297                         if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
298                                 error = 0;
299                                 goto done;
300                         }
301                 }
302                 goto done;
303         }
304         if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
305                 error = EPERM;
306                 goto done;
307         }
308         if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
309                 mode_t cur_mode = ip->ip_data.mode;
310                 uid_t cur_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
311                 gid_t cur_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
312                 uuid_t uuid_uid;
313                 uuid_t uuid_gid;
314
315                 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
316                                          ap->a_cred,
317                                          &cur_uid, &cur_gid, &cur_mode);
318                 if (error == 0) {
319                         hammer2_guid_to_uuid(&uuid_uid, cur_uid);
320                         hammer2_guid_to_uuid(&uuid_gid, cur_gid);
321                         if (bcmp(&uuid_uid, &ip->ip_data.uid,
322                                  sizeof(uuid_uid)) ||
323                             bcmp(&uuid_gid, &ip->ip_data.gid,
324                                  sizeof(uuid_gid)) ||
325                             ip->ip_data.mode != cur_mode
326                         ) {
327                                 hammer2_chain_modify(hmp, &ip->chain, 0);
328                                 ip->ip_data.uid = uuid_uid;
329                                 ip->ip_data.gid = uuid_gid;
330                                 ip->ip_data.mode = cur_mode;
331                                 ip->ip_data.ctime = ctime;
332                         }
333                         kflags |= NOTE_ATTRIB;
334                 }
335         }
336
337         /*
338          * Resize the file
339          */
340         if (vap->va_size != VNOVAL && ip->ip_data.size != vap->va_size) {
341                 switch(vp->v_type) {
342                 case VREG:
343                         if (vap->va_size == ip->ip_data.size)
344                                 break;
345                         if (vap->va_size < ip->ip_data.size) {
346                                 hammer2_truncate_file(ip, vap->va_size);
347                         } else {
348                                 hammer2_extend_file(ip, vap->va_size);
349                         }
350                         domtime = 1;
351                         break;
352                 default:
353                         error = EINVAL;
354                         goto done;
355                 }
356         }
357 #if 0
358         /* atime not supported */
359         if (vap->va_atime.tv_sec != VNOVAL) {
360                 hammer2_chain_modify(hmp, &ip->chain, 0);
361                 ip->ip_data.atime = hammer2_timespec_to_time(&vap->va_atime);
362                 kflags |= NOTE_ATTRIB;
363         }
364 #endif
365         if (vap->va_mtime.tv_sec != VNOVAL) {
366                 hammer2_chain_modify(hmp, &ip->chain, 0);
367                 ip->ip_data.mtime = hammer2_timespec_to_time(&vap->va_mtime);
368                 kflags |= NOTE_ATTRIB;
369         }
370         if (vap->va_mode != (mode_t)VNOVAL) {
371                 mode_t cur_mode = ip->ip_data.mode;
372                 uid_t cur_uid = hammer2_to_unix_xid(&ip->ip_data.uid);
373                 gid_t cur_gid = hammer2_to_unix_xid(&ip->ip_data.gid);
374
375                 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
376                                          cur_uid, cur_gid, &cur_mode);
377                 if (error == 0 && ip->ip_data.mode != cur_mode) {
378                         ip->ip_data.mode = cur_mode;
379                         ip->ip_data.ctime = ctime;
380                         kflags |= NOTE_ATTRIB;
381                 }
382         }
383 done:
384         hammer2_inode_unlock_ex(ip);
385         return (error);
386 }
387
388 static
389 int
390 hammer2_vop_readdir(struct vop_readdir_args *ap)
391 {
392         hammer2_mount_t *hmp;
393         hammer2_inode_t *ip;
394         hammer2_inode_t *xip;
395         hammer2_chain_t *parent;
396         hammer2_chain_t *chain;
397         hammer2_key_t lkey;
398         struct uio *uio;
399         off_t *cookies;
400         off_t saveoff;
401         int cookie_index;
402         int ncookies;
403         int error;
404         int dtype;
405         int r;
406
407         ip = VTOI(ap->a_vp);
408         hmp = ip->hmp;
409         uio = ap->a_uio;
410         saveoff = uio->uio_offset;
411
412         /*
413          * Setup cookies directory entry cookies if requested
414          */
415         if (ap->a_ncookies) {
416                 ncookies = uio->uio_resid / 16 + 1;
417                 if (ncookies > 1024)
418                         ncookies = 1024;
419                 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
420         } else {
421                 ncookies = -1;
422                 cookies = NULL;
423         }
424         cookie_index = 0;
425
426         /*
427          * Handle artificial entries.  To ensure that only positive 64 bit
428          * quantities are returned to userland we always strip off bit 63.
429          * The hash code is designed such that codes 0x0000-0x7FFF are not
430          * used, allowing us to use these codes for articial entries.
431          *
432          * Entry 0 is used for '.' and entry 1 is used for '..'.  Do not
433          * allow '..' to cross the mount point into (e.g.) the super-root.
434          */
435         error = 0;
436         chain = (void *)(intptr_t)-1;   /* non-NULL for early goto done case */
437
438         if (saveoff == 0) {
439                 r = vop_write_dirent(&error, uio,
440                                      ip->ip_data.inum &
441                                         HAMMER2_DIRHASH_USERMSK,
442                                      DT_DIR, 1, ".");
443                 if (r)
444                         goto done;
445                 if (cookies)
446                         cookies[cookie_index] = saveoff;
447                 ++saveoff;
448                 ++cookie_index;
449                 if (cookie_index == ncookies)
450                         goto done;
451         }
452         if (saveoff == 1) {
453                 if (ip->pip == NULL || ip == ip->pmp->iroot)
454                         xip = ip;
455                 else
456                         xip = ip->pip;
457
458                 r = vop_write_dirent(&error, uio,
459                                      xip->ip_data.inum &
460                                       HAMMER2_DIRHASH_USERMSK,
461                                      DT_DIR, 2, "..");
462                 if (r)
463                         goto done;
464                 if (cookies)
465                         cookies[cookie_index] = saveoff;
466                 ++saveoff;
467                 ++cookie_index;
468                 if (cookie_index == ncookies)
469                         goto done;
470         }
471
472         lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
473
474         parent = &ip->chain;
475         error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
476                                                 HAMMER2_RESOLVE_SHARED);
477         if (error) {
478                 hammer2_chain_unlock(hmp, parent);
479                 goto done;
480         }
481         chain = hammer2_chain_lookup(hmp, &parent, lkey, lkey,
482                                      HAMMER2_LOOKUP_SHARED);
483         if (chain == NULL) {
484                 chain = hammer2_chain_lookup(hmp, &parent,
485                                              lkey, (hammer2_key_t)-1,
486                                              HAMMER2_LOOKUP_SHARED);
487         }
488         while (chain) {
489                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
490                         dtype = hammer2_get_dtype(chain->u.ip);
491                         saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK;
492                         r = vop_write_dirent(&error, uio,
493                                              chain->u.ip->ip_data.inum &
494                                               HAMMER2_DIRHASH_USERMSK,
495                                              dtype, chain->u.ip->ip_data.name_len,
496                                              chain->u.ip->ip_data.filename);
497                         if (r)
498                                 break;
499                         if (cookies)
500                                 cookies[cookie_index] = saveoff;
501                         ++cookie_index;
502                 } else {
503                         /* XXX chain error */
504                         kprintf("bad chain type readdir %d\n",
505                                 chain->bref.type);
506                 }
507
508                 /*
509                  * Keys may not be returned in order so once we have a
510                  * placemarker (chain) the scan must allow the full range
511                  * or some entries will be missed.
512                  */
513                 chain = hammer2_chain_next(hmp, &parent, chain,
514                                            HAMMER2_DIRHASH_VISIBLE,
515                                            (hammer2_key_t)-1,
516                                            HAMMER2_LOOKUP_SHARED);
517                 if (chain) {
518                         saveoff = (chain->bref.key &
519                                    HAMMER2_DIRHASH_USERMSK) + 1;
520                 } else {
521                         saveoff = (hammer2_key_t)-1;
522                 }
523                 if (cookie_index == ncookies)
524                         break;
525         }
526         if (chain)
527                 hammer2_chain_unlock(hmp, chain);
528         hammer2_chain_unlock(hmp, parent);
529 done:
530         if (ap->a_eofflag)
531                 *ap->a_eofflag = (chain == NULL);
532         uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
533         if (error && cookie_index == 0) {
534                 if (cookies) {
535                         kfree(cookies, M_TEMP);
536                         *ap->a_ncookies = 0;
537                         *ap->a_cookies = NULL;
538                 }
539         } else {
540                 if (cookies) {
541                         *ap->a_ncookies = cookie_index;
542                         *ap->a_cookies = cookies;
543                 }
544         }
545         return (error);
546 }
547
548 /*
549  * hammer2_vop_readlink { vp, uio, cred }
550  */
551 static
552 int
553 hammer2_vop_readlink(struct vop_readlink_args *ap)
554 {
555         struct vnode *vp;
556         hammer2_mount_t *hmp;
557         hammer2_inode_t *ip;
558         int error;
559
560         vp = ap->a_vp;
561         if (vp->v_type != VLNK)
562                 return (EINVAL);
563         ip = VTOI(vp);
564         hmp = ip->hmp;
565
566         error = hammer2_read_file(ip, ap->a_uio, 0);
567         return (error);
568 }
569
570 static
571 int
572 hammer2_vop_read(struct vop_read_args *ap)
573 {
574         struct vnode *vp;
575         hammer2_mount_t *hmp;
576         hammer2_inode_t *ip;
577         struct uio *uio;
578         int error;
579         int seqcount;
580         int bigread;
581
582         /*
583          * Read operations supported on this vnode?
584          */
585         vp = ap->a_vp;
586         if (vp->v_type != VREG)
587                 return (EINVAL);
588
589         /*
590          * Misc
591          */
592         ip = VTOI(vp);
593         hmp = ip->hmp;
594         uio = ap->a_uio;
595         error = 0;
596
597         seqcount = ap->a_ioflag >> 16;
598         bigread = (uio->uio_resid > 100 * 1024 * 1024);
599
600         error = hammer2_read_file(ip, uio, seqcount);
601         return (error);
602 }
603
604 static
605 int
606 hammer2_vop_write(struct vop_write_args *ap)
607 {
608         thread_t td;
609         struct vnode *vp;
610         hammer2_mount_t *hmp;
611         hammer2_inode_t *ip;
612         struct uio *uio;
613         int error;
614         int seqcount;
615         int bigwrite;
616
617         /*
618          * Read operations supported on this vnode?
619          */
620         vp = ap->a_vp;
621         if (vp->v_type != VREG)
622                 return (EINVAL);
623
624         /*
625          * Misc
626          */
627         ip = VTOI(vp);
628         hmp = ip->hmp;
629         uio = ap->a_uio;
630         error = 0;
631         if (hmp->ronly)
632                 return (EROFS);
633
634         seqcount = ap->a_ioflag >> 16;
635         bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
636
637         /*
638          * Check resource limit
639          */
640         if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
641             uio->uio_offset + uio->uio_resid >
642              td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
643                 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
644                 return (EFBIG);
645         }
646
647         bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
648
649         /*
650          * ip must be locked if extending the file.
651          * ip must be locked to avoid racing a truncation.
652          *
653          * ip must be marked modified, particularly because the write
654          * might wind up being copied into the embedded data area.
655          */
656         hammer2_inode_lock_ex(ip);
657         error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
658         hammer2_inode_unlock_ex(ip);
659         return (error);
660 }
661
662 /*
663  * Perform read operations on a file or symlink given an UNLOCKED
664  * inode and uio.
665  */
666 static
667 int
668 hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
669 {
670         struct buf *bp;
671         int error;
672
673         error = 0;
674
675         /*
676          * UIO read loop
677          */
678         while (uio->uio_resid > 0 && uio->uio_offset < ip->ip_data.size) {
679                 hammer2_key_t lbase;
680                 hammer2_key_t leof;
681                 int lblksize;
682                 int loff;
683                 int n;
684
685                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
686                                                 &lbase, &leof);
687
688                 error = cluster_read(ip->vp, leof, lbase, lblksize,
689                                      uio->uio_resid, seqcount * BKVASIZE,
690                                      &bp);
691
692                 if (error)
693                         break;
694                 loff = (int)(uio->uio_offset - lbase);
695                 n = lblksize - loff;
696                 if (n > uio->uio_resid)
697                         n = uio->uio_resid;
698                 if (n > ip->ip_data.size - uio->uio_offset)
699                         n = (int)(ip->ip_data.size - uio->uio_offset);
700                 bp->b_flags |= B_AGE;
701                 uiomove((char *)bp->b_data + loff, n, uio);
702                 bqrelse(bp);
703         }
704         return (error);
705 }
706
707 /*
708  * Called with a locked (ip) to do the underlying write to a file or
709  * to build the symlink target.
710  */
711 static
712 int
713 hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
714                    int ioflag, int seqcount)
715 {
716         hammer2_key_t old_eof;
717         struct buf *bp;
718         int kflags;
719         int error;
720         int modified = 0;
721
722         /*
723          * Setup if append
724          */
725         if (ioflag & IO_APPEND)
726                 uio->uio_offset = ip->ip_data.size;
727         kflags = 0;
728         error = 0;
729
730         /*
731          * Extend the file if necessary.  If the write fails at some point
732          * we will truncate it back down to cover as much as we were able
733          * to write.
734          *
735          * Doing this now makes it easier to calculate buffer sizes in
736          * the loop.
737          */
738         old_eof = ip->ip_data.size;
739         if (uio->uio_offset + uio->uio_resid > ip->ip_data.size) {
740                 modified = 1;
741                 hammer2_extend_file(ip, uio->uio_offset + uio->uio_resid);
742                 kflags |= NOTE_EXTEND;
743         }
744
745         /*
746          * UIO write loop
747          */
748         while (uio->uio_resid > 0) {
749                 hammer2_key_t lbase;
750                 hammer2_key_t leof;
751                 int trivial;
752                 int lblksize;
753                 int loff;
754                 int n;
755
756                 /*
757                  * Don't allow the buffer build to blow out the buffer
758                  * cache.
759                  */
760                 if ((ioflag & IO_RECURSE) == 0) {
761                         /*
762                          * XXX should try to leave this unlocked through
763                          *      the whole loop
764                          */
765                         hammer2_chain_unlock(ip->hmp, &ip->chain);
766                         bwillwrite(HAMMER2_PBUFSIZE);
767                         hammer2_chain_lock(ip->hmp, &ip->chain,
768                                            HAMMER2_RESOLVE_ALWAYS);
769                 }
770
771                 /* XXX bigwrite & signal check test */
772
773                 /*
774                  * This nominally tells us how much we can cluster and
775                  * what the logical buffer size needs to be.  Currently
776                  * we don't try to cluster the write and just handle one
777                  * block at a time.
778                  */
779                 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
780                                                 &lbase, &leof);
781                 loff = (int)(uio->uio_offset - lbase);
782
783                 /*
784                  * Calculate bytes to copy this transfer and whether the
785                  * copy completely covers the buffer or not.
786                  */
787                 trivial = 0;
788                 n = lblksize - loff;
789                 if (n > uio->uio_resid) {
790                         n = uio->uio_resid;
791                         if (uio->uio_offset + n == ip->ip_data.size)
792                                 trivial = 1;
793                 } else if (loff == 0) {
794                         trivial = 1;
795                 }
796
797                 /*
798                  * Get the buffer
799                  */
800                 if (uio->uio_segflg == UIO_NOCOPY) {
801                         /*
802                          * Issuing a write with the same data backing the
803                          * buffer.  Instantiate the buffer to collect the
804                          * backing vm pages, then read-in any missing bits.
805                          *
806                          * This case is used by vop_stdputpages().
807                          */
808                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
809                         if ((bp->b_flags & B_CACHE) == 0) {
810                                 bqrelse(bp);
811                                 error = bread(ip->vp, lbase, lblksize, &bp);
812                         }
813                 } else if (trivial) {
814                         /*
815                          * Even though we are entirely overwriting the buffer
816                          * we may still have to zero it out to avoid a
817                          * mmap/write visibility issue.
818                          */
819                         bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
820                         if ((bp->b_flags & B_CACHE) == 0)
821                                 vfs_bio_clrbuf(bp);
822                 } else {
823                         /*
824                          * Partial overwrite, read in any missing bits then
825                          * replace the portion being written.
826                          *
827                          * (The strategy code will detect zero-fill physical
828                          * blocks for this case).
829                          */
830                         error = bread(ip->vp, lbase, lblksize, &bp);
831                         if (error == 0)
832                                 bheavy(bp);
833                 }
834
835                 if (error) {
836                         brelse(bp);
837                         break;
838                 }
839
840                 /*
841                  * We have to assign physical storage to the buffer we intend
842                  * to dirty or write now to avoid deadlocks in the strategy
843                  * code later.
844                  *
845                  * This can return NOOFFSET for inode-embedded data.  The
846                  * strategy code will take care of it in that case.
847                  */
848                 bp->b_bio2.bio_offset =
849                         hammer2_assign_physical(ip, lbase, lblksize, &error);
850                 if (error) {
851                         brelse(bp);
852                         break;
853                 }
854
855                 /*
856                  * Ok, copy the data in
857                  */
858                 hammer2_chain_unlock(ip->hmp, &ip->chain);
859                 error = uiomove(bp->b_data + loff, n, uio);
860                 hammer2_chain_lock(ip->hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
861                 kflags |= NOTE_WRITE;
862                 modified = 1;
863
864                 if (error) {
865                         brelse(bp);
866                         break;
867                 }
868
869                 /* XXX update ip_data.mtime */
870
871                 /*
872                  * Once we dirty a buffer any cached offset becomes invalid.
873                  *
874                  * NOTE: For cluster_write() always use the trailing block
875                  *       size, which is HAMMER2_PBUFSIZE.  lblksize is the
876                  *       eof-straddling blocksize and is incorrect.
877                  */
878                 bp->b_flags |= B_AGE;
879                 if (ioflag & IO_SYNC) {
880                         bwrite(bp);
881                 } else if ((ioflag & IO_DIRECT) && loff + n == lblksize) {
882                         if (bp->b_bcount == HAMMER2_PBUFSIZE)
883                                 bp->b_flags |= B_CLUSTEROK;
884                         bdwrite(bp);
885                 } else if (ioflag & IO_ASYNC) {
886                         bawrite(bp);
887                 } else if (hammer2_cluster_enable) {
888                         if (bp->b_bcount == HAMMER2_PBUFSIZE)
889                                 bp->b_flags |= B_CLUSTEROK;
890                         cluster_write(bp, leof, HAMMER2_PBUFSIZE, seqcount);
891                 } else {
892                         if (bp->b_bcount == HAMMER2_PBUFSIZE)
893                                 bp->b_flags |= B_CLUSTEROK;
894                         bdwrite(bp);
895                 }
896         }
897
898         /*
899          * Cleanup.  If we extended the file EOF but failed to write through
900          * the entire write is a failure and we have to back-up.
901          */
902         if (error && ip->ip_data.size != old_eof) {
903                 hammer2_truncate_file(ip, old_eof);
904         } else if (modified) {
905                 hammer2_chain_modify(ip->hmp, &ip->chain, 0);
906                 hammer2_update_time(&ip->ip_data.mtime);
907         }
908         hammer2_knote(ip->vp, kflags);
909         return error;
910 }
911
912 /*
913  * Assign physical storage to a logical block.
914  *
915  * NOOFFSET is returned if the data is inode-embedded.  In this case the
916  * strategy code will simply bcopy() the data into the inode.
917  *
918  * The inode's delta_dcount is adjusted.
919  */
920 static
921 hammer2_off_t
922 hammer2_assign_physical(hammer2_inode_t *ip, hammer2_key_t lbase,
923                         int lblksize, int *errorp)
924 {
925         hammer2_mount_t *hmp;
926         hammer2_chain_t *parent;
927         hammer2_chain_t *chain;
928         hammer2_off_t pbase;
929
930         /*
931          * Locate the chain associated with lbase, return a locked chain.
932          * However, do not instantiate any data reference (which utilizes a
933          * device buffer) because we will be using direct IO via the
934          * logical buffer cache buffer.
935          */
936         hmp = ip->hmp;
937 retry:
938         *errorp = 0;
939         parent = &ip->chain;
940         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
941
942         chain = hammer2_chain_lookup(hmp, &parent,
943                                      lbase, lbase,
944                                      HAMMER2_LOOKUP_NODATA);
945
946         if (chain == NULL) {
947                 /*
948                  * We found a hole, create a new chain entry.
949                  *
950                  * NOTE: DATA chains are created without device backing
951                  *       store (nor do we want any).
952                  */
953                 chain = hammer2_chain_create(hmp, parent, NULL,
954                                              lbase, HAMMER2_PBUFRADIX,
955                                              HAMMER2_BREF_TYPE_DATA,
956                                              lblksize, errorp);
957                 if (chain == NULL) {
958                         KKASSERT(*errorp == EAGAIN); /* XXX */
959                         hammer2_chain_unlock(hmp, parent);
960                         goto retry;
961                 }
962
963                 pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
964                 ip->delta_dcount += lblksize;
965         } else {
966                 switch (chain->bref.type) {
967                 case HAMMER2_BREF_TYPE_INODE:
968                         /*
969                          * The data is embedded in the inode.  The
970                          * caller is responsible for marking the inode
971                          * modified and copying the data to the embedded
972                          * area.
973                          */
974                         pbase = NOOFFSET;
975                         break;
976                 case HAMMER2_BREF_TYPE_DATA:
977                         if (chain->bytes != lblksize) {
978                                 panic("hammer2_assign_physical: "
979                                       "size mismatch %d/%d\n",
980                                       lblksize, chain->bytes);
981                         }
982                         hammer2_chain_modify(hmp, chain,
983                                              HAMMER2_MODIFY_OPTDATA);
984                         pbase = chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX;
985                         break;
986                 default:
987                         panic("hammer2_assign_physical: bad type");
988                         /* NOT REACHED */
989                         pbase = NOOFFSET;
990                         break;
991                 }
992         }
993
994         if (chain)
995                 hammer2_chain_unlock(hmp, chain);
996         hammer2_chain_unlock(hmp, parent);
997
998         return (pbase);
999 }
1000
1001 /*
1002  * Truncate the size of a file.
1003  *
1004  * This routine adjusts ip->ip_data.size smaller, destroying any related
1005  * data beyond the new EOF and potentially resizing the block straddling
1006  * the EOF.
1007  *
1008  * The inode must be locked.
1009  */
1010 static
1011 void
1012 hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1013 {
1014         hammer2_chain_t *parent;
1015         hammer2_chain_t *chain;
1016         hammer2_mount_t *hmp = ip->hmp;
1017         hammer2_key_t lbase;
1018         hammer2_key_t leof;
1019         struct buf *bp;
1020         int loff;
1021         int error;
1022         int oblksize;
1023         int nblksize;
1024
1025         hammer2_chain_modify(hmp, &ip->chain, 0);
1026         bp = NULL;
1027
1028         /*
1029          * Destroy any logical buffer cache buffers beyond the file EOF.
1030          *
1031          * We call nvtruncbuf() w/ trivial == 1 to prevent it from messing
1032          * around with the buffer straddling EOF, because we need to assign
1033          * a new physical offset to it.
1034          */
1035         if (ip->vp) {
1036                 nvtruncbuf(ip->vp, nsize,
1037                            HAMMER2_PBUFSIZE, (int)nsize & HAMMER2_PBUFMASK,
1038                            1);
1039         }
1040
1041         /*
1042          * Setup for lookup/search
1043          */
1044         parent = &ip->chain;
1045         error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1046         if (error) {
1047                 hammer2_chain_unlock(hmp, parent);
1048                 /* XXX error reporting */
1049                 return;
1050         }
1051
1052         /*
1053          * Handle the case where a chain/logical-buffer straddles the new
1054          * EOF.  We told nvtruncbuf() above not to mess with the logical
1055          * buffer straddling the EOF because we need to reassign its storage
1056          * and can't let the strategy code do it for us.
1057          */
1058         loff = (int)nsize & HAMMER2_PBUFMASK;
1059         if (loff && ip->vp) {
1060                 oblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof);
1061                 error = bread(ip->vp, lbase, oblksize, &bp);
1062                 KKASSERT(error == 0);
1063         }
1064         ip->ip_data.size = nsize;
1065         nblksize = hammer2_calc_logical(ip, nsize, &lbase, &leof);
1066
1067         /*
1068          * Fixup the chain element.  If we have a logical buffer in-hand
1069          * we don't want to create a conflicting device buffer.
1070          */
1071         if (loff && bp) {
1072                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase,
1073                                              HAMMER2_LOOKUP_NODATA);
1074                 if (chain) {
1075                         allocbuf(bp, nblksize);
1076                         switch(chain->bref.type) {
1077                         case HAMMER2_BREF_TYPE_DATA:
1078                                 hammer2_chain_resize(ip, chain,
1079                                              hammer2_bytes_to_radix(nblksize),
1080                                              HAMMER2_MODIFY_OPTDATA);
1081                                 bzero(bp->b_data + loff, nblksize - loff);
1082                                 bp->b_bio2.bio_offset = chain->bref.data_off &
1083                                                         HAMMER2_OFF_MASK;
1084                                 break;
1085                         case HAMMER2_BREF_TYPE_INODE:
1086                                 bzero(bp->b_data + loff, nblksize - loff);
1087                                 bp->b_bio2.bio_offset = NOOFFSET;
1088                                 break;
1089                         default:
1090                                 panic("hammer2_truncate_file: bad type");
1091                                 break;
1092                         }
1093                         hammer2_chain_unlock(hmp, chain);
1094                         if (bp->b_bcount == HAMMER2_PBUFSIZE)
1095                                 bp->b_flags |= B_CLUSTEROK;
1096                         bdwrite(bp);
1097                 } else {
1098                         /*
1099                          * Destroy clean buffer w/ wrong buffer size.  Retain
1100                          * backing store.
1101                          */
1102                         bp->b_flags |= B_RELBUF;
1103                         KKASSERT(bp->b_bio2.bio_offset == NOOFFSET);
1104                         KKASSERT((bp->b_flags & B_DIRTY) == 0);
1105                         bqrelse(bp);
1106                 }
1107         } else if (loff) {
1108                 /*
1109                  * WARNING: This utilizes a device buffer for the data.
1110                  *
1111                  * This case should not occur because file truncations without
1112                  * a vnode (and hence no logical buffer cache) should only
1113                  * always truncate to 0-length.
1114                  */
1115                 panic("hammer2_truncate_file: non-zero truncation, no-vnode");
1116 #if 0
1117                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase, 0);
1118                 if (chain) {
1119                         switch(chain->bref.type) {
1120                         case HAMMER2_BREF_TYPE_DATA:
1121                                 hammer2_chain_resize(ip, chain,
1122                                              hammer2_bytes_to_radix(nblksize),
1123                                              0);
1124                                 hammer2_chain_modify(hmp, chain, 0);
1125                                 bzero(chain->data->buf + loff, nblksize - loff);
1126                                 break;
1127                         case HAMMER2_BREF_TYPE_INODE:
1128                                 if (loff < HAMMER2_EMBEDDED_BYTES) {
1129                                         hammer2_chain_modify(hmp, chain, 0);
1130                                         bzero(chain->data->ipdata.u.data + loff,
1131                                               HAMMER2_EMBEDDED_BYTES - loff);
1132                                 }
1133                                 break;
1134                         }
1135                         hammer2_chain_unlock(hmp, chain);
1136                 }
1137 #endif
1138         }
1139
1140         /*
1141          * Clean up any fragmentory VM pages now that we have properly
1142          * resized the straddling buffer.  These pages are no longer
1143          * part of the buffer.
1144          */
1145         if (ip->vp) {
1146                 nvtruncbuf(ip->vp, nsize,
1147                            nblksize, (int)nsize & (nblksize - 1),
1148                            1);
1149         }
1150
1151         /*
1152          * Destroy any physical blocks after the new EOF point.
1153          */
1154         lbase = (nsize + HAMMER2_PBUFMASK64) & ~HAMMER2_PBUFMASK64;
1155         chain = hammer2_chain_lookup(hmp, &parent,
1156                                      lbase, (hammer2_key_t)-1,
1157                                      HAMMER2_LOOKUP_NODATA);
1158         while (chain) {
1159                 /*
1160                  * Degenerate embedded data case, nothing to loop on.
1161                  */
1162                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1163                         hammer2_chain_unlock(hmp, chain);
1164                         break;
1165                 }
1166
1167                 /*
1168                  * Delete physical data blocks past the file EOF.
1169                  */
1170                 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1171                         ip->delta_dcount -= chain->bytes;
1172                         hammer2_chain_delete(hmp, parent, chain, 0);
1173                 }
1174                 /* XXX check parent if empty indirect block & delete */
1175                 chain = hammer2_chain_next(hmp, &parent, chain,
1176                                            lbase, (hammer2_key_t)-1,
1177                                            HAMMER2_LOOKUP_NODATA);
1178         }
1179         hammer2_chain_unlock(hmp, parent);
1180 }
1181
1182 /*
1183  * Extend the size of a file.  The inode must be locked.
1184  *
1185  * We may have to resize the block straddling the old EOF.
1186  */
1187 static
1188 void
1189 hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
1190 {
1191         hammer2_mount_t *hmp;
1192         hammer2_chain_t *parent;
1193         hammer2_chain_t *chain;
1194         struct buf *bp;
1195         hammer2_key_t osize;
1196         hammer2_key_t obase;
1197         hammer2_key_t nbase;
1198         hammer2_key_t leof;
1199         int oblksize;
1200         int nblksize;
1201         int nradix;
1202         int error;
1203
1204         KKASSERT(ip->vp);
1205         hmp = ip->hmp;
1206
1207         hammer2_chain_modify(hmp, &ip->chain, 0);
1208
1209         /*
1210          * Nothing to do if the direct-data case is still intact
1211          */
1212         if ((ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
1213             nsize <= HAMMER2_EMBEDDED_BYTES) {
1214                 ip->ip_data.size = nsize;
1215                 nvextendbuf(ip->vp,
1216                             ip->ip_data.size, nsize,
1217                             0, HAMMER2_EMBEDDED_BYTES,
1218                             0, (int)nsize,
1219                             1);
1220                 return;
1221         }
1222
1223         /*
1224          * Calculate the blocksize at the original EOF and resize the block
1225          * if necessary.  Adjust the file size in the inode.
1226          */
1227         osize = ip->ip_data.size;
1228         oblksize = hammer2_calc_logical(ip, osize, &obase, &leof);
1229         ip->ip_data.size = nsize;
1230         nblksize = hammer2_calc_logical(ip, osize, &nbase, &leof);
1231
1232         /*
1233          * Do all required vnode operations, but do not mess with the
1234          * buffer straddling the orignal EOF.
1235          */
1236         nvextendbuf(ip->vp,
1237                     ip->ip_data.size, nsize,
1238                     0, nblksize,
1239                     0, (int)nsize & HAMMER2_PBUFMASK,
1240                     1);
1241
1242         /*
1243          * Early return if we have no more work to do.
1244          */
1245         if (obase == nbase && oblksize == nblksize &&
1246             (ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1247                 return;
1248         }
1249
1250         /*
1251          * We have work to do, including possibly resizing the buffer
1252          * at the previous EOF point and turning off DIRECTDATA mode.
1253          */
1254         bp = NULL;
1255         if (((int)osize & HAMMER2_PBUFMASK)) {
1256                 error = bread(ip->vp, obase, oblksize, &bp);
1257                 KKASSERT(error == 0);
1258
1259                 if (obase != nbase) {
1260                         if (oblksize != HAMMER2_PBUFSIZE)
1261                                 allocbuf(bp, HAMMER2_PBUFSIZE);
1262                 } else {
1263                         if (oblksize != nblksize)
1264                                 allocbuf(bp, nblksize);
1265                 }
1266         }
1267
1268         /*
1269          * Disable direct-data mode by loading up a buffer cache buffer
1270          * with the data, then converting the inode data area into the
1271          * inode indirect block array area.
1272          */
1273         if (ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) {
1274                 ip->ip_data.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
1275                 bzero(&ip->ip_data.u.blockset, sizeof(ip->ip_data.u.blockset));
1276         }
1277
1278         /*
1279          * Resize the chain element at the old EOF.
1280          */
1281         if (((int)osize & HAMMER2_PBUFMASK)) {
1282 retry:
1283                 parent = &ip->chain;
1284                 error = hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS);
1285                 KKASSERT(error == 0);
1286
1287                 nradix = hammer2_bytes_to_radix(nblksize);
1288
1289                 chain = hammer2_chain_lookup(hmp, &parent,
1290                                              obase, obase,
1291                                              HAMMER2_LOOKUP_NODATA);
1292                 if (chain == NULL) {
1293                         chain = hammer2_chain_create(hmp, parent, NULL,
1294                                                      obase, nblksize,
1295                                                      HAMMER2_BREF_TYPE_DATA,
1296                                                      nblksize, &error);
1297                         if (chain == NULL) {
1298                                 KKASSERT(error == EAGAIN);
1299                                 hammer2_chain_unlock(hmp, parent);
1300                                 goto retry;
1301                         }
1302                         ip->delta_dcount += nblksize;
1303                 } else {
1304                         KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA);
1305                         hammer2_chain_resize(ip, chain, nradix,
1306                                              HAMMER2_MODIFY_OPTDATA);
1307                 }
1308                 bp->b_bio2.bio_offset = chain->bref.data_off &
1309                                         HAMMER2_OFF_MASK;
1310                 hammer2_chain_unlock(hmp, chain);
1311                 if (bp->b_bcount == HAMMER2_PBUFSIZE)
1312                         bp->b_flags |= B_CLUSTEROK;
1313                 bdwrite(bp);
1314                 hammer2_chain_unlock(hmp, parent);
1315         }
1316 }
1317
1318 static
1319 int
1320 hammer2_vop_nresolve(struct vop_nresolve_args *ap)
1321 {
1322         hammer2_inode_t *dip;
1323         hammer2_inode_t *ip;
1324         hammer2_mount_t *hmp;
1325         hammer2_chain_t *parent;
1326         hammer2_chain_t *chain;
1327         struct namecache *ncp;
1328         const uint8_t *name;
1329         size_t name_len;
1330         hammer2_key_t lhc;
1331         int error = 0;
1332         struct vnode *vp;
1333
1334         dip = VTOI(ap->a_dvp);
1335         hmp = dip->hmp;
1336         ncp = ap->a_nch->ncp;
1337         name = ncp->nc_name;
1338         name_len = ncp->nc_nlen;
1339         lhc = hammer2_dirhash(name, name_len);
1340
1341         /*
1342          * Note: In DragonFly the kernel handles '.' and '..'.
1343          */
1344         parent = &dip->chain;
1345         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
1346                                         HAMMER2_RESOLVE_SHARED);
1347         chain = hammer2_chain_lookup(hmp, &parent,
1348                                      lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1349                                      HAMMER2_LOOKUP_SHARED);
1350         while (chain) {
1351                 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
1352                     chain->u.ip &&
1353                     name_len == chain->data->ipdata.name_len &&
1354                     bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
1355                         break;
1356                 }
1357                 chain = hammer2_chain_next(hmp, &parent, chain,
1358                                            lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1359                                            HAMMER2_LOOKUP_SHARED);
1360         }
1361         hammer2_chain_unlock(hmp, parent);
1362
1363         /*
1364          * If the inode represents a forwarding entry for a hardlink we have
1365          * to locate the actual inode.  The original ip is saved for possible
1366          * deconsolidation.  (ip) will only be set to non-NULL when we have
1367          * to locate the real file via a hardlink.  ip will be referenced but
1368          * not locked in that situation.  chain is passed in locked and
1369          * returned locked.
1370          *
1371          * XXX what kind of chain lock?
1372          */
1373         ip = NULL;
1374         if (chain && chain->u.ip->ip_data.type == HAMMER2_OBJTYPE_HARDLINK) {
1375                 error = hammer2_hardlink_find(dip, &chain, &ip);
1376                 if (error) {
1377                         kprintf("hammer2: unable to find hardlink\n");
1378                         if (chain) {
1379                                 hammer2_chain_unlock(hmp, chain);
1380                                 chain = NULL;
1381                         }
1382                         goto failed;
1383                 }
1384         }
1385
1386         /*
1387          * Deconsolidate any hardlink whos nlinks == 1.  Ignore errors.
1388          * If an error occurs chain and ip are left alone.
1389          *
1390          * XXX upgrade shared lock?
1391          */
1392         if (ip && chain && chain->u.ip->ip_data.nlinks == 1 && !hmp->ronly) {
1393                 kprintf("hammer2: need to unconsolidate hardlink for %s\n",
1394                         chain->u.ip->ip_data.filename);
1395                 hammer2_hardlink_deconsolidate(dip, &chain, &ip);
1396         }
1397
1398         /*
1399          * Acquire the related vnode
1400          *
1401          * NOTE: For error processing, only ENOENT resolves the namecache
1402          *       entry to NULL, otherwise we just return the error and
1403          *       leave the namecache unresolved.
1404          */
1405         if (chain) {
1406                 vp = hammer2_igetv(chain->u.ip, &error);
1407                 if (error == 0) {
1408                         vn_unlock(vp);
1409                         cache_setvp(ap->a_nch, vp);
1410                         vrele(vp);
1411                 } else if (error == ENOENT) {
1412                         cache_setvp(ap->a_nch, NULL);
1413                 }
1414                 hammer2_chain_unlock(hmp, chain);
1415         } else {
1416                 error = ENOENT;
1417                 cache_setvp(ap->a_nch, NULL);
1418         }
1419 failed:
1420         KASSERT(error || ap->a_nch->ncp->nc_vp != NULL,
1421                 ("resolve error %d/%p chain %p ap %p\n",
1422                  error, ap->a_nch->ncp->nc_vp, chain, ap));
1423         if (ip)
1424                 hammer2_inode_drop(ip);
1425         return error;
1426 }
1427
1428 static
1429 int
1430 hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1431 {
1432         hammer2_inode_t *dip;
1433         hammer2_inode_t *ip;
1434         hammer2_mount_t *hmp;
1435         int error;
1436
1437         dip = VTOI(ap->a_dvp);
1438         hmp = dip->hmp;
1439
1440         if ((ip = dip->pip) == NULL) {
1441                 *ap->a_vpp = NULL;
1442                 return ENOENT;
1443         }
1444         hammer2_chain_lock(hmp, &ip->chain, HAMMER2_RESOLVE_ALWAYS);
1445         *ap->a_vpp = hammer2_igetv(ip, &error);
1446         hammer2_chain_unlock(hmp, &ip->chain);
1447
1448         return error;
1449 }
1450
1451 static
1452 int
1453 hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1454 {
1455         hammer2_mount_t *hmp;
1456         hammer2_inode_t *dip;
1457         hammer2_inode_t *nip;
1458         struct namecache *ncp;
1459         const uint8_t *name;
1460         size_t name_len;
1461         int error;
1462
1463         dip = VTOI(ap->a_dvp);
1464         hmp = dip->hmp;
1465         if (hmp->ronly)
1466                 return (EROFS);
1467
1468         ncp = ap->a_nch->ncp;
1469         name = ncp->nc_name;
1470         name_len = ncp->nc_nlen;
1471
1472         error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1473                                      name, name_len, &nip);
1474         if (error) {
1475                 KKASSERT(nip == NULL);
1476                 *ap->a_vpp = NULL;
1477                 return error;
1478         }
1479         *ap->a_vpp = hammer2_igetv(nip, &error);
1480         hammer2_chain_unlock(hmp, &nip->chain);
1481
1482         if (error == 0) {
1483                 cache_setunresolved(ap->a_nch);
1484                 cache_setvp(ap->a_nch, *ap->a_vpp);
1485         }
1486         return error;
1487 }
1488
1489 /*
1490  * Return the largest contiguous physical disk range for the logical
1491  * request.
1492  *
1493  * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
1494  */
1495 static
1496 int
1497 hammer2_vop_bmap(struct vop_bmap_args *ap)
1498 {
1499         struct vnode *vp;
1500         hammer2_mount_t *hmp;
1501         hammer2_inode_t *ip;
1502         hammer2_chain_t *parent;
1503         hammer2_chain_t *chain;
1504         hammer2_key_t lbeg;
1505         hammer2_key_t lend;
1506         hammer2_off_t pbeg;
1507         hammer2_off_t pbytes;
1508         hammer2_off_t array[HAMMER2_BMAP_COUNT][2];
1509         int loff;
1510         int ai;
1511
1512         /*
1513          * Only supported on regular files
1514          *
1515          * Only supported for read operations (required for cluster_read).
1516          * The block allocation is delayed for write operations.
1517          */
1518         vp = ap->a_vp;
1519         if (vp->v_type != VREG)
1520                 return (EOPNOTSUPP);
1521         if (ap->a_cmd != BUF_CMD_READ)
1522                 return (EOPNOTSUPP);
1523
1524         ip = VTOI(vp);
1525         hmp = ip->hmp;
1526         bzero(array, sizeof(array));
1527
1528         /*
1529          * Calculate logical range
1530          */
1531         KKASSERT((ap->a_loffset & HAMMER2_LBUFMASK64) == 0);
1532         lbeg = ap->a_loffset & HAMMER2_OFF_MASK_HI;
1533         lend = lbeg + HAMMER2_BMAP_COUNT * HAMMER2_PBUFSIZE - 1;
1534         if (lend < lbeg)
1535                 lend = lbeg;
1536         loff = ap->a_loffset & HAMMER2_OFF_MASK_LO;
1537
1538         parent = &ip->chain;
1539         hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
1540                                         HAMMER2_RESOLVE_SHARED);
1541         chain = hammer2_chain_lookup(hmp, &parent,
1542                                      lbeg, lend,
1543                                      HAMMER2_LOOKUP_NODATA |
1544                                      HAMMER2_LOOKUP_SHARED);
1545         if (chain == NULL) {
1546                 *ap->a_doffsetp = ZFOFFSET;
1547                 hammer2_chain_unlock(hmp, parent);
1548                 return (0);
1549         }
1550
1551         while (chain) {
1552                 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1553                         ai = (chain->bref.key - lbeg) / HAMMER2_PBUFSIZE;
1554                         KKASSERT(ai >= 0 && ai < HAMMER2_BMAP_COUNT);
1555                         array[ai][0] = chain->bref.data_off & HAMMER2_OFF_MASK;
1556                         array[ai][1] = chain->bytes;
1557                 }
1558                 chain = hammer2_chain_next(hmp, &parent, chain,
1559                                            lbeg, lend,
1560                                            HAMMER2_LOOKUP_NODATA |
1561                                            HAMMER2_LOOKUP_SHARED);
1562         }
1563         hammer2_chain_unlock(hmp, parent);
1564
1565         /*
1566          * If the requested loffset is not mappable physically we can't
1567          * bmap.  The caller will have to access the file data via a
1568          * device buffer.
1569          */
1570         if (array[0][0] == 0 || array[0][1] < loff + HAMMER2_LBUFSIZE) {
1571                 *ap->a_doffsetp = NOOFFSET;
1572                 return (0);
1573         }
1574
1575         /*
1576          * Calculate the physical disk offset range for array[0]
1577          */
1578         pbeg = array[0][0] + loff;
1579         pbytes = array[0][1] - loff;
1580
1581         for (ai = 1; ai < HAMMER2_BMAP_COUNT; ++ai) {
1582                 if (array[ai][0] != pbeg + pbytes)
1583                         break;
1584                 pbytes += array[ai][1];
1585         }
1586
1587         *ap->a_doffsetp = pbeg;
1588         if (ap->a_runp)
1589                 *ap->a_runp = pbytes;
1590         return (0);
1591 }
1592
1593 static
1594 int
1595 hammer2_vop_open(struct vop_open_args *ap)
1596 {
1597         return vop_stdopen(ap);
1598 }
1599
1600 /*
1601  * hammer2_vop_advlock { vp, id, op, fl, flags }
1602  */
1603 static
1604 int
1605 hammer2_vop_advlock(struct vop_advlock_args *ap)
1606 {
1607         hammer2_inode_t *ip = VTOI(ap->a_vp);
1608
1609         return (lf_advlock(ap, &ip->advlock, ip->ip_data.size));
1610 }
1611
1612
1613 static
1614 int
1615 hammer2_vop_close(struct vop_close_args *ap)
1616 {
1617         return vop_stdclose(ap);
1618 }
1619
1620 /*
1621  * hammer2_vop_nlink { nch, dvp, vp, cred }
1622  *
1623  * Create a hardlink from (vp) to {dvp, nch}.
1624  */
1625 static
1626 int
1627 hammer2_vop_nlink(struct vop_nlink_args *ap)
1628 {
1629         hammer2_inode_t *dip;   /* target directory to create link in */
1630         hammer2_inode_t *ip;    /* inode we are hardlinking to */
1631         hammer2_inode_t *oip;
1632         hammer2_mount_t *hmp;
1633         struct namecache *ncp;
1634         const uint8_t *name;
1635         size_t name_len;
1636         int error;
1637
1638         dip = VTOI(ap->a_dvp);
1639         hmp = dip->hmp;
1640         if (hmp->ronly)
1641                 return (EROFS);
1642
1643         /*
1644          * (ip) is the inode we are linking to.
1645          */
1646         ip = oip = VTOI(ap->a_vp);
1647         hammer2_inode_lock_nlinks(ip);
1648
1649         ncp = ap->a_nch->ncp;
1650         name = ncp->nc_name;
1651         name_len = ncp->nc_nlen;
1652
1653         /*
1654          * Create a consolidated real file for the hardlink, adjust (ip),
1655          * and move the nlinks lock if necessary.  Tell the function to
1656          * bump the hardlink count on the consolidated file.
1657          */
1658         error = hammer2_hardlink_consolidate(&ip, dip);
1659         if (error)
1660                 goto done;
1661
1662         /*
1663          * If the consolidation changed ip to a HARDLINK pointer we have
1664          * to adjust the vnode to point to the actual ip.
1665          *
1666          * XXX this can race against concurrent vnode ops.
1667          */
1668         if (oip != ip) {
1669                 hammer2_chain_ref(hmp, &ip->chain);
1670                 hammer2_inode_lock_ex(ip);
1671                 hammer2_inode_lock_ex(oip);
1672                 ip->vp = ap->a_vp;
1673                 ap->a_vp->v_data = ip;
1674                 oip->vp = NULL;
1675                 hammer2_inode_unlock_ex(oip);
1676                 hammer2_inode_unlock_ex(ip);
1677                 hammer2_chain_drop(hmp, &oip->chain);
1678         }
1679
1680         /*
1681          * The act of connecting the existing (ip) will properly bump the
1682          * nlinks count.  However, vp will incorrectly point at the old
1683          * inode which has now been turned into a OBJTYPE_HARDLINK pointer.
1684          *
1685          * We must reconnect the vp.
1686          */
1687         error = hammer2_inode_connect(dip, ip, name, name_len);
1688         if (error == 0) {
1689                 cache_setunresolved(ap->a_nch);
1690                 cache_setvp(ap->a_nch, ap->a_vp);
1691         }
1692 done:
1693         hammer2_inode_unlock_nlinks(ip);
1694         return error;
1695 }
1696
1697 /*
1698  * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
1699  *
1700  * The operating system has already ensured that the directory entry
1701  * does not exist and done all appropriate namespace locking.
1702  */
1703 static
1704 int
1705 hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1706 {
1707         hammer2_mount_t *hmp;
1708         hammer2_inode_t *dip;
1709         hammer2_inode_t *nip;
1710         struct namecache *ncp;
1711         const uint8_t *name;
1712         size_t name_len;
1713         int error;
1714
1715         dip = VTOI(ap->a_dvp);
1716         hmp = dip->hmp;
1717         if (hmp->ronly)
1718                 return (EROFS);
1719
1720         ncp = ap->a_nch->ncp;
1721         name = ncp->nc_name;
1722         name_len = ncp->nc_nlen;
1723
1724         error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1725                                      name, name_len, &nip);
1726         if (error) {
1727                 KKASSERT(nip == NULL);
1728                 *ap->a_vpp = NULL;
1729                 return error;
1730         }
1731         *ap->a_vpp = hammer2_igetv(nip, &error);
1732         hammer2_chain_unlock(hmp, &nip->chain);
1733
1734         if (error == 0) {
1735                 cache_setunresolved(ap->a_nch);
1736                 cache_setvp(ap->a_nch, *ap->a_vpp);
1737         }
1738         return error;
1739 }
1740
1741 /*
1742  * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1743  */
1744 static
1745 int
1746 hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1747 {
1748         hammer2_mount_t *hmp;
1749         hammer2_inode_t *dip;
1750         hammer2_inode_t *nip;
1751         struct namecache *ncp;
1752         const uint8_t *name;
1753         size_t name_len;
1754         int error;
1755
1756         dip = VTOI(ap->a_dvp);
1757         hmp = dip->hmp;
1758         if (hmp->ronly)
1759                 return (EROFS);
1760
1761         ncp = ap->a_nch->ncp;
1762         name = ncp->nc_name;
1763         name_len = ncp->nc_nlen;
1764
1765         ap->a_vap->va_type = VLNK;      /* enforce type */
1766
1767         error = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
1768                                      name, name_len, &nip);
1769         if (error) {
1770                 KKASSERT(nip == NULL);
1771                 *ap->a_vpp = NULL;
1772                 return error;
1773         }
1774         *ap->a_vpp = hammer2_igetv(nip, &error);
1775
1776         /*
1777          * Build the softlink (~like file data) and finalize the namecache.
1778          */
1779         if (error == 0) {
1780                 size_t bytes;
1781                 struct uio auio;
1782                 struct iovec aiov;
1783
1784                 bytes = strlen(ap->a_target);
1785
1786                 if (bytes <= HAMMER2_EMBEDDED_BYTES) {
1787                         KKASSERT(nip->ip_data.op_flags &
1788                                  HAMMER2_OPFLAG_DIRECTDATA);
1789                         bcopy(ap->a_target, nip->ip_data.u.data, bytes);
1790                         nip->ip_data.size = bytes;
1791                 } else {
1792                         bzero(&auio, sizeof(auio));
1793                         bzero(&aiov, sizeof(aiov));
1794                         auio.uio_iov = &aiov;
1795                         auio.uio_segflg = UIO_SYSSPACE;
1796                         auio.uio_rw = UIO_WRITE;
1797                         auio.uio_resid = bytes;
1798                         auio.uio_iovcnt = 1;
1799                         auio.uio_td = curthread;
1800                         aiov.iov_base = ap->a_target;
1801                         aiov.iov_len = bytes;
1802                         error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
1803                         /* XXX handle error */
1804                         error = 0;
1805                 }
1806         }
1807         hammer2_chain_unlock(hmp, &nip->chain);
1808
1809         /*
1810          * Finalize namecache
1811          */
1812         if (error == 0) {
1813                 cache_setunresolved(ap->a_nch);
1814                 cache_setvp(ap->a_nch, *ap->a_vpp);
1815                 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */
1816         }
1817         return error;
1818 }
1819
1820 /*
1821  * hammer2_vop_nremove { nch, dvp, cred }
1822  */
1823 static
1824 int
1825 hammer2_vop_nremove(struct vop_nremove_args *ap)
1826 {
1827         hammer2_inode_t *dip;
1828         hammer2_mount_t *hmp;
1829         struct namecache *ncp;
1830         const uint8_t *name;
1831         size_t name_len;
1832         int error;
1833
1834         dip = VTOI(ap->a_dvp);
1835         hmp = dip->hmp;
1836         if (hmp->ronly)
1837                 return(EROFS);
1838
1839         ncp = ap->a_nch->ncp;
1840         name = ncp->nc_name;
1841         name_len = ncp->nc_nlen;
1842
1843         error = hammer2_unlink_file(dip, name, name_len, 0, NULL);
1844
1845         if (error == 0) {
1846                 cache_unlink(ap->a_nch);
1847         }
1848         return (error);
1849 }
1850
1851 /*
1852  * hammer2_vop_nrmdir { nch, dvp, cred }
1853  */
1854 static
1855 int
1856 hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1857 {
1858         hammer2_inode_t *dip;
1859         hammer2_mount_t *hmp;
1860         struct namecache *ncp;
1861         const uint8_t *name;
1862         size_t name_len;
1863         int error;
1864
1865         dip = VTOI(ap->a_dvp);
1866         hmp = dip->hmp;
1867         if (hmp->ronly)
1868                 return(EROFS);
1869
1870         ncp = ap->a_nch->ncp;
1871         name = ncp->nc_name;
1872         name_len = ncp->nc_nlen;
1873
1874         error = hammer2_unlink_file(dip, name, name_len, 1, NULL);
1875
1876         if (error == 0) {
1877                 cache_unlink(ap->a_nch);
1878         }
1879         return (error);
1880 }
1881
1882 /*
1883  * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1884  */
1885 static
1886 int
1887 hammer2_vop_nrename(struct vop_nrename_args *ap)
1888 {
1889         struct namecache *fncp;
1890         struct namecache *tncp;
1891         hammer2_inode_t *fdip;
1892         hammer2_inode_t *tdip;
1893         hammer2_inode_t *ip;
1894         hammer2_mount_t *hmp;
1895         const uint8_t *fname;
1896         size_t fname_len;
1897         const uint8_t *tname;
1898         size_t tname_len;
1899         int error;
1900
1901         if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1902                 return(EXDEV);
1903         if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1904                 return(EXDEV);
1905
1906         fdip = VTOI(ap->a_fdvp);        /* source directory */
1907         tdip = VTOI(ap->a_tdvp);        /* target directory */
1908
1909         hmp = fdip->hmp;                /* check read-only filesystem */
1910         if (hmp->ronly)
1911                 return(EROFS);
1912
1913         fncp = ap->a_fnch->ncp;         /* entry name in source */
1914         fname = fncp->nc_name;
1915         fname_len = fncp->nc_nlen;
1916
1917         tncp = ap->a_tnch->ncp;         /* entry name in target */
1918         tname = tncp->nc_name;
1919         tname_len = tncp->nc_nlen;
1920
1921         /*
1922          * ip is the inode being removed.  If this is a hardlink then
1923          * ip represents the actual file and not the hardlink marker.
1924          */
1925         ip = VTOI(fncp->nc_vp);
1926
1927         /*
1928          * Keep a tight grip on the inode as removing it should disconnect
1929          * it and we don't want to destroy it.
1930          *
1931          * NOTE: To avoid deadlocks we cannot lock (ip) while we are
1932          *       unlinking elements from their directories.  Locking
1933          *       the nlinks field does not lock the whole inode.
1934          */
1935         hammer2_inode_lock_nlinks(ip);
1936
1937         /*
1938          * Remove target if it exists
1939          */
1940         error = hammer2_unlink_file(tdip, tname, tname_len, -1, NULL);
1941         if (error && error != ENOENT)
1942                 goto done;
1943         cache_setunresolved(ap->a_tnch);
1944
1945         /*
1946          * Disconnect (fdip, fname) from the source directory.  This will
1947          * disconnect (ip) if it represents a direct file.  If (ip) represents
1948          * a hardlink the HARDLINK pointer object will be removed but the
1949          * hardlink will stay intact.
1950          *
1951          * If (ip) is already hardlinked we have to resolve to a consolidated
1952          * file but we do not bump the nlinks count.  (ip) must hold the nlinks
1953          * lock & ref for the operation.  If the consolidated file has been
1954          * relocated (ip) will be adjusted and the related nlinks lock moved
1955          * along with it.
1956          *
1957          * If (ip) does not have multiple links we can just copy the physical
1958          * contents of the inode.
1959          */
1960         if (ip->ip_data.nlinks > 1) {
1961                 error = hammer2_hardlink_consolidate(&ip, tdip);
1962                 if (error)
1963                         goto done;
1964         }
1965
1966         /*
1967          * NOTE! Because we are retaining (ip) the unlink can fail with
1968          *       an EAGAIN.
1969          */
1970         for (;;) {
1971                 error = hammer2_unlink_file(fdip, fname, fname_len, -1, ip);
1972                 if (error != EAGAIN)
1973                         break;
1974                 kprintf("hammer2_vop_nrename: unlink race %s\n", fname);
1975                 tsleep(fdip, 0, "h2renr", 1);
1976         }
1977         if (error)
1978                 goto done;
1979
1980         /*
1981          * Reconnect ip to target directory.
1982          *
1983          * WARNING: chain locks can lock buffer cache buffers, to avoid
1984          *          deadlocks we want to unlock before issuing a cache_*()
1985          *          op (that might have to lock a vnode).
1986          */
1987         error = hammer2_inode_connect(tdip, ip, tname, tname_len);
1988         if (error == 0) {
1989                 cache_rename(ap->a_fnch, ap->a_tnch);
1990         }
1991 done:
1992         hammer2_inode_unlock_nlinks(ip);
1993
1994         return (error);
1995 }
1996
1997 static int hammer2_strategy_read(struct vop_strategy_args *ap);
1998 static int hammer2_strategy_write(struct vop_strategy_args *ap);
1999
2000 static
2001 int
2002 hammer2_vop_strategy(struct vop_strategy_args *ap)
2003 {
2004         struct bio *biop;
2005         struct buf *bp;
2006         int error;
2007
2008         biop = ap->a_bio;
2009         bp = biop->bio_buf;
2010
2011         switch(bp->b_cmd) {
2012         case BUF_CMD_READ:
2013                 error = hammer2_strategy_read(ap);
2014                 ++hammer2_iod_file_read;
2015                 break;
2016         case BUF_CMD_WRITE:
2017                 error = hammer2_strategy_write(ap);
2018                 ++hammer2_iod_file_write;
2019                 break;
2020         default:
2021                 bp->b_error = error = EINVAL;
2022                 bp->b_flags |= B_ERROR;
2023                 biodone(biop);
2024                 break;
2025         }
2026
2027         return (error);
2028 }
2029
2030 static
2031 int
2032 hammer2_strategy_read(struct vop_strategy_args *ap)
2033 {
2034         struct buf *bp;
2035         struct bio *bio;
2036         struct bio *nbio;
2037         hammer2_mount_t *hmp;
2038         hammer2_inode_t *ip;
2039         hammer2_chain_t *parent;
2040         hammer2_chain_t *chain;
2041         hammer2_key_t lbase;
2042
2043         bio = ap->a_bio;
2044         bp = bio->bio_buf;
2045         ip = VTOI(ap->a_vp);
2046         hmp = ip->hmp;
2047         nbio = push_bio(bio);
2048
2049         lbase = bio->bio_offset;
2050         chain = NULL;
2051         KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
2052
2053         /*
2054          * We must characterize the logical->physical translation if it
2055          * has not already been cached.
2056          *
2057          * Physical data references < LBUFSIZE are never cached.  This
2058          * includes both small-block allocations and inode-embedded data.
2059          */
2060         if (nbio->bio_offset == NOOFFSET) {
2061                 parent = &ip->chain;
2062                 hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_ALWAYS |
2063                                                 HAMMER2_RESOLVE_SHARED);
2064
2065                 chain = hammer2_chain_lookup(hmp, &parent, lbase, lbase,
2066                                              HAMMER2_LOOKUP_NODATA |
2067                                              HAMMER2_LOOKUP_SHARED);
2068                 if (chain == NULL) {
2069                         /*
2070                          * Data is zero-fill
2071                          */
2072                         nbio->bio_offset = ZFOFFSET;
2073                 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
2074                         /*
2075                          * Data is embedded in the inode (do nothing)
2076                          */
2077                         KKASSERT(chain == parent);
2078                         hammer2_chain_unlock(hmp, chain);
2079                 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
2080                         /*
2081                          * Data is on-media
2082                          */
2083                         KKASSERT(bp->b_bcount == chain->bytes);
2084                         nbio->bio_offset = chain->bref.data_off &
2085                                            HAMMER2_OFF_MASK;
2086                         hammer2_chain_unlock(hmp, chain);
2087                         KKASSERT(nbio->bio_offset != 0);
2088                 } else {
2089                         panic("hammer2_strategy_read: unknown bref type");
2090                 }
2091                 hammer2_chain_unlock(hmp, parent);
2092         }
2093
2094         if (hammer2_debug & 0x0020) {
2095                 kprintf("read %016jx %016jx\n",
2096                         bio->bio_offset, nbio->bio_offset);
2097         }
2098
2099         if (nbio->bio_offset == ZFOFFSET) {
2100                 /*
2101                  * Data is zero-fill
2102                  */
2103                 bp->b_resid = 0;
2104                 bp->b_error = 0;
2105                 bzero(bp->b_data, bp->b_bcount);
2106                 biodone(nbio);
2107         } else if (nbio->bio_offset != NOOFFSET) {
2108                 /*
2109                  * Forward direct IO to the device
2110                  */
2111                 vn_strategy(hmp->devvp, nbio);
2112         } else {
2113                 /*
2114                  * Data is embedded in inode.
2115                  */
2116                 bcopy(chain->data->ipdata.u.data, bp->b_data,
2117                       HAMMER2_EMBEDDED_BYTES);
2118                 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES,
2119                       bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
2120                 bp->b_resid = 0;
2121                 bp->b_error = 0;
2122                 biodone(nbio);
2123         }
2124         return (0);
2125 }
2126
2127 static
2128 int
2129 hammer2_strategy_write(struct vop_strategy_args *ap)
2130 {
2131         struct buf *bp;
2132         struct bio *bio;
2133         struct bio *nbio;
2134         hammer2_mount_t *hmp;
2135         hammer2_inode_t *ip;
2136
2137         bio = ap->a_bio;
2138         bp = bio->bio_buf;
2139         ip = VTOI(ap->a_vp);
2140         hmp = ip->hmp;
2141         nbio = push_bio(bio);
2142
2143         KKASSERT((bio->bio_offset & HAMMER2_PBUFMASK64) == 0);
2144         KKASSERT(nbio->bio_offset != 0 && nbio->bio_offset != ZFOFFSET);
2145
2146         if (nbio->bio_offset == NOOFFSET) {
2147                 /*
2148                  * Must be embedded in the inode.
2149                  */
2150                 KKASSERT(bio->bio_offset == 0);
2151                 bcopy(bp->b_data, ip->ip_data.u.data, HAMMER2_EMBEDDED_BYTES);
2152                 bp->b_resid = 0;
2153                 bp->b_error = 0;
2154                 biodone(nbio);
2155
2156                 /*
2157                  * This special flag does not follow the normal MODIFY rules
2158                  * because we might deadlock on ip.  Instead we depend on
2159                  * VOP_FSYNC() to detect the case.
2160                  */
2161                 atomic_set_int(&ip->chain.flags, HAMMER2_CHAIN_DIRTYEMBED);
2162         } else {
2163                 /*
2164                  * Forward direct IO to the device
2165                  */
2166                 vn_strategy(hmp->devvp, nbio);
2167         }
2168         return (0);
2169 }
2170
2171 /*
2172  * hammer2_vop_ioctl { vp, command, data, fflag, cred }
2173  */
2174 static
2175 int
2176 hammer2_vop_ioctl(struct vop_ioctl_args *ap)
2177 {
2178         hammer2_mount_t *hmp;
2179         hammer2_inode_t *ip;
2180         int error;
2181
2182         ip = VTOI(ap->a_vp);
2183         hmp = ip->hmp;
2184
2185         error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
2186                               ap->a_fflag, ap->a_cred);
2187         return (error);
2188 }
2189
2190 static
2191 int 
2192 hammer2_vop_mountctl(struct vop_mountctl_args *ap)
2193 {
2194         struct mount *mp;
2195         hammer2_pfsmount_t *pmp;
2196         int rc;
2197
2198         switch (ap->a_op) {
2199         case (MOUNTCTL_SET_EXPORT):
2200                 mp = ap->a_head.a_ops->head.vv_mount;
2201                 pmp = MPTOPMP(mp);
2202
2203                 if (ap->a_ctllen != sizeof(struct export_args))
2204                         rc = (EINVAL);
2205                 else
2206                         rc = vfs_export(mp, &pmp->export,
2207                                         (const struct export_args *)ap->a_ctl);
2208                 break;
2209         default:
2210                 rc = vop_stdmountctl(ap);
2211                 break;
2212         }
2213         return (rc);
2214 }
2215
2216 struct vop_ops hammer2_vnode_vops = {
2217         .vop_default    = vop_defaultop,
2218         .vop_fsync      = hammer2_vop_fsync,
2219         .vop_getpages   = vop_stdgetpages,
2220         .vop_putpages   = vop_stdputpages,
2221         .vop_access     = hammer2_vop_access,
2222         .vop_advlock    = hammer2_vop_advlock,
2223         .vop_close      = hammer2_vop_close,
2224         .vop_nlink      = hammer2_vop_nlink,
2225         .vop_ncreate    = hammer2_vop_ncreate,
2226         .vop_nsymlink   = hammer2_vop_nsymlink,
2227         .vop_nremove    = hammer2_vop_nremove,
2228         .vop_nrmdir     = hammer2_vop_nrmdir,
2229         .vop_nrename    = hammer2_vop_nrename,
2230         .vop_getattr    = hammer2_vop_getattr,
2231         .vop_setattr    = hammer2_vop_setattr,
2232         .vop_readdir    = hammer2_vop_readdir,
2233         .vop_readlink   = hammer2_vop_readlink,
2234         .vop_getpages   = vop_stdgetpages,
2235         .vop_putpages   = vop_stdputpages,
2236         .vop_read       = hammer2_vop_read,
2237         .vop_write      = hammer2_vop_write,
2238         .vop_open       = hammer2_vop_open,
2239         .vop_inactive   = hammer2_vop_inactive,
2240         .vop_reclaim    = hammer2_vop_reclaim,
2241         .vop_nresolve   = hammer2_vop_nresolve,
2242         .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2243         .vop_nmkdir     = hammer2_vop_nmkdir,
2244         .vop_ioctl      = hammer2_vop_ioctl,
2245         .vop_mountctl   = hammer2_vop_mountctl,
2246         .vop_bmap       = hammer2_vop_bmap,
2247         .vop_strategy   = hammer2_vop_strategy,
2248 };
2249
2250 struct vop_ops hammer2_spec_vops = {
2251
2252 };
2253
2254 struct vop_ops hammer2_fifo_vops = {
2255
2256 };