Merge branches 'hammer2' and 'master' of ssh://crater.dragonflybsd.org/repository...
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
CommitLineData
e118c14f
MD
1/*
2 * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 * 3. Neither the name of The DragonFly Project nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific, prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
703720e4
MD
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/kernel.h>
38#include <sys/fcntl.h>
39#include <sys/buf.h>
40#include <sys/proc.h>
41#include <sys/namei.h>
42#include <sys/mount.h>
43#include <sys/vnode.h>
f0206a67 44#include <sys/mountctl.h>
e028fa74 45#include <sys/dirent.h>
703720e4
MD
46
47#include "hammer2.h"
48
db71f61f
MD
49#define ZFOFFSET (-2LL)
50
3ac6a319
MD
51static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize,
52 int trivial);
53static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
54
703720e4
MD
55/*
56 * Last reference to a vnode is going away but it is still cached.
57 */
e118c14f 58static
703720e4 59int
e118c14f 60hammer2_vop_inactive(struct vop_inactive_args *ap)
703720e4
MD
61{
62 struct vnode *vp;
63 struct hammer2_inode *ip;
e118c14f 64#if 0
703720e4 65 struct hammer2_mount *hmp;
e118c14f 66#endif
703720e4 67
703720e4
MD
68 vp = ap->a_vp;
69 ip = VTOI(vp);
703720e4 70
df9ea374
MD
71 /*
72 * Degenerate case
73 */
74 if (ip == NULL) {
75 vrecycle(vp);
76 return (0);
77 }
78
703720e4
MD
79 return (0);
80}
81
82/*
83 * Reclaim a vnode so that it can be reused; after the inode is
84 * disassociated, the filesystem must manage it alone.
85 */
e118c14f 86static
703720e4 87int
e118c14f 88hammer2_vop_reclaim(struct vop_reclaim_args *ap)
703720e4 89{
703720e4
MD
90 struct hammer2_inode *ip;
91 struct hammer2_mount *hmp;
b7926f31 92 struct vnode *vp;
703720e4 93
703720e4
MD
94 vp = ap->a_vp;
95 ip = VTOI(vp);
9c2e0de0
MD
96 if (ip == NULL)
97 return(0);
9c2e0de0 98 hmp = ip->hmp;
b7926f31 99
54eb943b 100 hammer2_inode_lock_ex(ip);
703720e4 101 vp->v_data = NULL;
0e92b724 102 ip->vp = NULL;
b7926f31 103 hammer2_chain_flush(hmp, &ip->chain, NULL);
54eb943b 104 hammer2_inode_unlock_ex(ip);
9c2e0de0 105 hammer2_chain_drop(hmp, &ip->chain); /* vp ref removed */
54eb943b
MD
106
107 /*
108 * XXX handle background sync when ip dirty, kernel will no longer
109 * notify us regarding this inode because there is no longer a
110 * vnode attached to it.
111 */
703720e4
MD
112
113 return (0);
114}
115
e118c14f 116static
703720e4 117int
e118c14f 118hammer2_vop_fsync(struct vop_fsync_args *ap)
703720e4 119{
b7926f31
MD
120 struct hammer2_inode *ip;
121 struct hammer2_mount *hmp;
122 struct vnode *vp;
123
124 vp = ap->a_vp;
125 ip = VTOI(vp);
126 hmp = ip->hmp;
127
128 hammer2_inode_lock_ex(ip);
129 vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
130 hammer2_chain_flush(hmp, &ip->chain, NULL);
131 hammer2_inode_unlock_ex(ip);
132 return (0);
703720e4
MD
133}
134
e118c14f 135static
703720e4 136int
e118c14f 137hammer2_vop_access(struct vop_access_args *ap)
703720e4 138{
37494cab
MD
139 hammer2_inode_t *ip = VTOI(ap->a_vp);
140 uid_t uid;
141 gid_t gid;
142 int error;
143
144 uid = hammer2_to_unix_xid(&ip->ip_data.uid);
145 gid = hammer2_to_unix_xid(&ip->ip_data.gid);
146
147 error = vop_helper_access(ap, uid, gid, ip->ip_data.mode,
148 ip->ip_data.uflags);
149 return (error);
703720e4
MD
150}
151
e118c14f 152static
703720e4 153int
e118c14f 154hammer2_vop_getattr(struct vop_getattr_args *ap)
703720e4 155{
cd4b3d92
MD
156 hammer2_mount_t *hmp;
157 hammer2_inode_t *ip;
703720e4
MD
158 struct vnode *vp;
159 struct vattr *vap;
703720e4
MD
160
161 vp = ap->a_vp;
162 vap = ap->a_vap;
163
cd4b3d92
MD
164 ip = VTOI(vp);
165 hmp = ip->hmp;
166
703720e4
MD
167 hammer2_inode_lock_sh(ip);
168
cd4b3d92
MD
169 vap->va_fsid = hmp->mp->mnt_stat.f_fsid.val[0];
170 vap->va_fileid = ip->ip_data.inum;
171 vap->va_mode = ip->ip_data.mode;
172 vap->va_nlink = ip->ip_data.nlinks;
703720e4
MD
173 vap->va_uid = 0;
174 vap->va_gid = 0;
cd4b3d92
MD
175 vap->va_rmajor = 0;
176 vap->va_rminor = 0;
177 vap->va_size = ip->ip_data.size;
df9ea374 178 vap->va_blocksize = HAMMER2_PBUFSIZE;
cd4b3d92
MD
179 vap->va_flags = ip->ip_data.uflags;
180 hammer2_time_to_timespec(ip->ip_data.ctime, &vap->va_ctime);
181 hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_mtime);
182 hammer2_time_to_timespec(ip->ip_data.mtime, &vap->va_atime);
183 vap->va_gen = 1;
184 vap->va_bytes = vap->va_size;
185 vap->va_type = hammer2_get_vtype(ip);
186 vap->va_filerev = 0;
187 vap->va_uid_uuid = ip->ip_data.uid;
188 vap->va_gid_uuid = ip->ip_data.gid;
189 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
190 VA_FSID_UUID_VALID;
703720e4
MD
191
192 hammer2_inode_unlock_sh(ip);
193
194 return (0);
195}
196
3ac6a319
MD
197static
198int
199hammer2_vop_setattr(struct vop_setattr_args *ap)
200{
201 hammer2_mount_t *hmp;
202 hammer2_inode_t *ip;
203 struct vnode *vp;
204 struct vattr *vap;
205 int error;
206 int kflags = 0;
207 int doctime = 0;
208 int domtime = 0;
209
210 vp = ap->a_vp;
211 vap = ap->a_vap;
212
213 ip = VTOI(vp);
214 hmp = ip->hmp;
215
216 if (hmp->ronly)
217 return(EROFS);
218
219 hammer2_inode_lock_ex(ip);
220 error = 0;
221
222 if (vap->va_flags != VNOVAL) {
223 u_int32_t flags;
224
225 flags = ip->ip_data.uflags;
226 error = vop_helper_setattr_flags(&flags, vap->va_flags,
227 hammer2_to_unix_xid(&ip->ip_data.uid),
228 ap->a_cred);
229 if (error == 0) {
230 if (ip->ip_data.uflags != flags) {
231 hammer2_chain_modify(hmp, &ip->chain);
232 ip->ip_data.uflags = flags;
233 doctime = 1;
234 kflags |= NOTE_ATTRIB;
235 }
236 if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
237 error = 0;
238 goto done;
239 }
240 }
241 }
242
243 if (ip->ip_data.uflags & (IMMUTABLE | APPEND)) {
244 error = EPERM;
245 goto done;
246 }
247 /* uid, gid */
248
249 /*
250 * Resize the file
251 */
252 if (vap->va_size != VNOVAL && ip->ip_data.size != vap->va_size) {
253 switch(vp->v_type) {
254 case VREG:
255 if (vap->va_size == ip->ip_data.size)
256 break;
257 if (vap->va_size < ip->ip_data.size) {
258 hammer2_chain_modify(hmp, &ip->chain);
259 hammer2_truncate_file(ip, vap->va_size);
260 ip->ip_data.size = vap->va_size;
261 } else {
262 hammer2_chain_modify(hmp, &ip->chain);
263 hammer2_extend_file(ip, vap->va_size, 0);
264 ip->ip_data.size = vap->va_size;
265 }
266 domtime = 1;
267 break;
268 default:
269 error = EINVAL;
270 goto done;
271 }
272 }
273done:
274 hammer2_inode_unlock_ex(ip);
275 return (error);
276}
277
e118c14f 278static
703720e4 279int
e118c14f 280hammer2_vop_readdir(struct vop_readdir_args *ap)
703720e4 281{
e028fa74
MD
282 hammer2_mount_t *hmp;
283 hammer2_inode_t *ip;
284 hammer2_inode_t *xip;
285 hammer2_chain_t *parent;
286 hammer2_chain_t *chain;
287 hammer2_key_t lkey;
288 struct uio *uio;
289 off_t *cookies;
290 off_t saveoff;
291 int cookie_index;
292 int ncookies;
293 int error;
294 int dtype;
295 int r;
296
297 ip = VTOI(ap->a_vp);
298 hmp = ip->hmp;
299 uio = ap->a_uio;
300 saveoff = uio->uio_offset;
301
302 /*
303 * Setup cookies directory entry cookies if requested
304 */
305 if (ap->a_ncookies) {
306 ncookies = uio->uio_resid / 16 + 1;
307 if (ncookies > 1024)
308 ncookies = 1024;
309 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
310 } else {
311 ncookies = -1;
312 cookies = NULL;
313 }
314 cookie_index = 0;
315
316 /*
317 * Handle artificial entries. To ensure that only positive 64 bit
318 * quantities are returned to userland we always strip off bit 63.
319 * The hash code is designed such that codes 0x0000-0x7FFF are not
320 * used, allowing us to use these codes for articial entries.
321 *
322 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not
323 * allow '..' to cross the mount point into (e.g.) the super-root.
324 */
325 error = 0;
37aa19df 326 chain = (void *)(intptr_t)-1; /* non-NULL for early goto done case */
e028fa74
MD
327
328 if (saveoff == 0) {
329 r = vop_write_dirent(&error, uio,
330 ip->ip_data.inum &
331 HAMMER2_DIRHASH_USERMSK,
332 DT_DIR, 1, ".");
333 if (r)
334 goto done;
335 if (cookies)
336 cookies[cookie_index] = saveoff;
337 ++saveoff;
338 ++cookie_index;
339 if (cookie_index == ncookies)
340 goto done;
341 }
342 if (saveoff == 1) {
343 if (ip->pip == NULL || ip == hmp->iroot)
344 xip = ip;
345 else
346 xip = ip->pip;
347
348 r = vop_write_dirent(&error, uio,
349 xip->ip_data.inum &
350 HAMMER2_DIRHASH_USERMSK,
351 DT_DIR, 2, "..");
352 if (r)
353 goto done;
354 if (cookies)
355 cookies[cookie_index] = saveoff;
356 ++saveoff;
357 ++cookie_index;
358 if (cookie_index == ncookies)
359 goto done;
360 }
361
362 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
363
364 parent = &ip->chain;
365 hammer2_chain_ref(hmp, parent);
366 error = hammer2_chain_lock(hmp, parent);
367 if (error) {
368 hammer2_chain_put(hmp, parent);
369 goto done;
370 }
37aa19df
MD
371 chain = hammer2_chain_lookup(hmp, &parent, lkey, lkey, 0);
372 if (chain == NULL) {
373 chain = hammer2_chain_lookup(hmp, &parent,
374 lkey, (hammer2_key_t)-1, 0);
375 }
e028fa74 376 while (chain) {
c667909f
MD
377 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
378 dtype = hammer2_get_dtype(chain->u.ip);
379 saveoff = chain->bref.key & HAMMER2_DIRHASH_USERMSK;
380 r = vop_write_dirent(&error, uio,
381 chain->u.ip->ip_data.inum &
382 HAMMER2_DIRHASH_USERMSK,
383 dtype, chain->u.ip->ip_data.name_len,
384 chain->u.ip->ip_data.filename);
385 if (r)
386 break;
387 if (cookies)
388 cookies[cookie_index] = saveoff;
389 ++cookie_index;
390 } else {
391 /* XXX chain error */
392 kprintf("bad chain type readdir %d\n",
393 chain->bref.type);
394 }
995e78dc
MD
395
396 /*
397 * Keys may not be returned in order so once we have a
398 * placemarker (chain) the scan must allow the full range
399 * or some entries will be missed.
400 */
e028fa74 401 chain = hammer2_chain_next(hmp, &parent, chain,
995e78dc 402 0, (hammer2_key_t)-1, 0);
028a55bb
MD
403 if (chain) {
404 saveoff = (chain->bref.key &
405 HAMMER2_DIRHASH_USERMSK) + 1;
406 } else {
407 saveoff = (hammer2_key_t)-1;
408 }
409 if (cookie_index == ncookies)
410 break;
e028fa74
MD
411 }
412 hammer2_chain_put(hmp, parent);
028a55bb
MD
413 if (chain)
414 hammer2_chain_put(hmp, chain);
e028fa74
MD
415done:
416 if (ap->a_eofflag)
417 *ap->a_eofflag = (chain == NULL);
37aa19df 418 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
e028fa74
MD
419 if (error && cookie_index == 0) {
420 if (cookies) {
421 kfree(cookies, M_TEMP);
422 *ap->a_ncookies = 0;
423 *ap->a_cookies = NULL;
424 }
425 } else {
426 if (cookies) {
427 *ap->a_ncookies = cookie_index;
428 *ap->a_cookies = cookies;
429 }
430 }
431 return (error);
703720e4
MD
432}
433
e118c14f 434static
703720e4 435int
e118c14f 436hammer2_vop_read(struct vop_read_args *ap)
703720e4 437{
db71f61f
MD
438 struct vnode *vp;
439 hammer2_mount_t *hmp;
440 hammer2_inode_t *ip;
441 struct buf *bp;
442 struct uio *uio;
443 int error;
444 int seqcount;
445 int bigread;
446
447 /*
448 * Read operations supported on this vnode?
449 */
450 vp = ap->a_vp;
451 if (vp->v_type != VREG)
452 return (EINVAL);
453
454 /*
455 * Misc
456 */
457 ip = VTOI(vp);
458 hmp = ip->hmp;
459 uio = ap->a_uio;
460 error = 0;
461
462 seqcount = ap->a_ioflag >> 16;
463 bigread = (uio->uio_resid > 100 * 1024 * 1024);
464
465 /*
466 * UIO read loop
467 */
468 while (uio->uio_resid > 0 && uio->uio_offset < ip->ip_data.size) {
469 hammer2_key_t off_hi;
470 int off_lo;
471 int n;
472
473 off_hi = uio->uio_offset & ~HAMMER2_LBUFMASK64;
474 off_lo = (int)(uio->uio_offset & HAMMER2_LBUFMASK64);
475
476 /* XXX bigread & signal check test */
477
478 error = cluster_read(vp, ip->ip_data.size, off_hi,
479 HAMMER2_LBUFSIZE, HAMMER2_PBUFSIZE,
480 seqcount * BKVASIZE, &bp);
481 if (error)
482 break;
483 n = HAMMER2_LBUFSIZE - off_lo;
484 if (n > uio->uio_resid)
485 n = uio->uio_resid;
486 if (n > ip->ip_data.size - uio->uio_offset)
487 n = (int)(ip->ip_data.size - uio->uio_offset);
488 bp->b_flags |= B_AGE;
489 uiomove((char *)bp->b_data + off_lo, n, uio);
c667909f 490 bqrelse(bp);
db71f61f
MD
491 }
492 return (error);
47902fef 493}
703720e4 494
e118c14f 495static
47902fef 496int
e118c14f 497hammer2_vop_write(struct vop_write_args *ap)
47902fef 498{
db71f61f
MD
499 thread_t td;
500 struct vnode *vp;
501 hammer2_mount_t *hmp;
502 hammer2_inode_t *ip;
503 struct buf *bp;
504 struct uio *uio;
505 int error;
506 int kflags;
507 int seqcount;
508 int bigwrite;
509
510 /*
511 * Read operations supported on this vnode?
512 */
513 vp = ap->a_vp;
514 if (vp->v_type != VREG)
515 return (EINVAL);
516
517 /*
518 * Misc
519 */
520 ip = VTOI(vp);
521 hmp = ip->hmp;
522 uio = ap->a_uio;
523 error = 0;
524 kflags = 0;
525 if (hmp->ronly)
526 return (EROFS);
527
528 seqcount = ap->a_ioflag >> 16;
529 bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
530
531 /*
532 * Check resource limit
533 */
534 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
535 uio->uio_offset + uio->uio_resid >
536 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
537 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
538 return (EFBIG);
539 }
540
541 bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
542
543 /*
3ac6a319
MD
544 * ip must be locked if extending the file.
545 * ip must be locked to avoid racing a truncation.
546 */
547 hammer2_inode_lock_ex(ip);
548 hammer2_chain_modify(hmp, &ip->chain);
549
550 if (ap->a_ioflag & IO_APPEND)
551 uio->uio_offset = ip->ip_data.size;
552
553 /*
554 * UIO write loop
db71f61f
MD
555 */
556 while (uio->uio_resid > 0) {
557 hammer2_key_t nsize;
558 hammer2_key_t off_hi;
559 int fixsize;
560 int off_lo;
561 int n;
562 int trivial;
563 int endofblk;
564
565 off_hi = uio->uio_offset & ~HAMMER2_LBUFMASK64;
566 off_lo = (int)(uio->uio_offset & HAMMER2_LBUFMASK64);
567
568 n = HAMMER2_LBUFSIZE - off_lo;
569 if (n > uio->uio_resid) {
570 n = uio->uio_resid;
571 endofblk = 0;
572 } else {
573 endofblk = 1;
574 }
575 nsize = uio->uio_offset + n;
576
577 /* XXX bigwrite & signal check test */
578
579 /*
580 * Don't allow the buffer build to blow out the buffer
581 * cache.
582 */
583 if ((ap->a_ioflag & IO_RECURSE) == 0)
584 bwillwrite(HAMMER2_LBUFSIZE);
585
586 /*
587 * Extend the size of the file as needed
588 * XXX lock.
589 */
590 if (nsize > ip->ip_data.size) {
591 if (uio->uio_offset > ip->ip_data.size)
592 trivial = 0;
593 else
594 trivial = 1;
3ac6a319 595 hammer2_extend_file(ip, nsize, trivial);
db71f61f
MD
596 kflags |= NOTE_EXTEND;
597 fixsize = 1;
598 } else {
599 fixsize = 0;
600 }
601
602 if (uio->uio_segflg == UIO_NOCOPY) {
603 /*
604 * Issuing a write with the same data backing the
605 * buffer. Instantiate the buffer to collect the
606 * backing vm pages, then read-in any missing bits.
607 *
608 * This case is used by vop_stdputpages().
609 */
610 bp = getblk(vp, off_hi,
611 HAMMER2_LBUFSIZE, GETBLK_BHEAVY, 0);
612 if ((bp->b_flags & B_CACHE) == 0) {
613 bqrelse(bp);
614 error = bread(ap->a_vp,
615 off_hi, HAMMER2_LBUFSIZE, &bp);
616 }
617 } else if (off_lo == 0 && uio->uio_resid >= HAMMER2_LBUFSIZE) {
618 /*
619 * Even though we are entirely overwriting the buffer
620 * we may still have to zero it out to avoid a
621 * mmap/write visibility issue.
622 */
623 bp = getblk(vp, off_hi,
624 HAMMER2_LBUFSIZE, GETBLK_BHEAVY, 0);
625 if ((bp->b_flags & B_CACHE) == 0)
626 vfs_bio_clrbuf(bp);
627 } else if (off_hi >= ip->ip_data.size) {
628 /*
629 * If the base offset of the buffer is beyond the
630 * file EOF, we don't have to issue a read.
631 */
632 bp = getblk(vp, off_hi,
633 HAMMER2_LBUFSIZE, GETBLK_BHEAVY, 0);
634 vfs_bio_clrbuf(bp);
635 } else {
636 /*
637 * Partial overwrite, read in any missing bits then
638 * replace the portion being written.
639 */
640 error = bread(vp, off_hi, HAMMER2_LBUFSIZE, &bp);
641 if (error == 0)
642 bheavy(bp);
643 }
644
645 if (error == 0) {
646 /* release lock */
647 error = uiomove(bp->b_data + off_lo, n, uio);
648 /* acquire lock */
649 }
650
651 if (error) {
652 brelse(bp);
3ac6a319
MD
653 if (fixsize)
654 hammer2_truncate_file(ip, ip->ip_data.size);
db71f61f
MD
655 break;
656 }
657 kflags |= NOTE_WRITE;
658 if (ip->ip_data.size < uio->uio_offset)
659 ip->ip_data.size = uio->uio_offset;
660 /* XXX update ino_data.mtime */
661
662 /*
663 * Once we dirty a buffer any cached offset becomes invalid.
664 */
665 bp->b_bio2.bio_offset = NOOFFSET;
666 bp->b_flags |= B_AGE;
667 if (ap->a_ioflag & IO_SYNC) {
668 bwrite(bp);
669 } else if ((ap->a_ioflag & IO_DIRECT) && endofblk) {
670 bawrite(bp);
671 } else if (ap->a_ioflag & IO_ASYNC) {
672 bawrite(bp);
673 } else {
674 bdwrite(bp);
675 }
676 }
677 /* hammer2_knote(vp, kflags); */
3ac6a319 678 hammer2_inode_unlock_ex(ip);
db71f61f 679 return (error);
703720e4
MD
680}
681
3ac6a319
MD
682/*
683 * Truncate the size of a file. The inode must be locked and marked
684 * for modification. The caller will set ip->ip_data.size after we
685 * return, we do not do it ourselves.
686 */
687static
688void
689hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
690{
691 hammer2_chain_t *parent;
692 hammer2_chain_t *chain;
693 hammer2_mount_t *hmp = ip->hmp;
694 hammer2_key_t psize;
695 int error;
696
697 /*
698 * Destroy any logical buffer cache buffers beyond the file EOF
699 * and partially clean out any straddling buffer.
700 */
701 if (ip->vp) {
702 nvtruncbuf(ip->vp, nsize,
703 HAMMER2_LBUFSIZE, nsize & HAMMER2_LBUFMASK);
704 }
705 nsize = (nsize + HAMMER2_LBUFMASK64) & ~HAMMER2_LBUFMASK64;
706
707 /*
708 * Setup for lookup/next
709 */
710 parent = &ip->chain;
711 hammer2_chain_ref(hmp, parent);
712 error = hammer2_chain_lock(hmp, parent);
713 if (error) {
714 hammer2_chain_put(hmp, parent);
715 /* XXX error reporting */
716 return;
717 }
718
719 /*
720 * Calculate the first physical buffer beyond the new file EOF.
721 * The straddling physical buffer will be at (psize - PBUFSIZE).
722 */
723 psize = (nsize + HAMMER2_PBUFMASK64) & ~HAMMER2_PBUFMASK64;
724
725 if (nsize != psize) {
726 KKASSERT(psize >= HAMMER2_PBUFSIZE64);
727 chain = hammer2_chain_lookup(hmp, &parent,
728 psize - HAMMER2_PBUFSIZE,
729 psize - HAMMER2_PBUFSIZE, 0);
730 if (chain) {
731 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
732 hammer2_chain_modify(hmp, chain);
733 bzero(chain->data->buf +
734 (int)(nsize & HAMMER2_PBUFMASK64),
735 (size_t)(psize - nsize));
736 kprintf("ZEROBIGBOY %08x/%zd\n",
737 (int)(nsize & HAMMER2_PBUFMASK64),
738 (size_t)(psize - nsize));
739 }
740 hammer2_chain_put(hmp, chain);
741 }
742 }
743
744 chain = hammer2_chain_lookup(hmp, &parent,
745 psize, (hammer2_key_t)-1,
746 HAMMER2_LOOKUP_NOLOCK);
747 while (chain) {
748 /*
749 * Degenerate embedded data case, nothing to loop on.
750 */
751 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
752 break;
753
754 /*
755 * Delete physical data blocks past the file EOF.
756 */
757 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
758 hammer2_chain_delete(hmp, parent, chain);
759 }
760 chain = hammer2_chain_next(hmp, &parent, chain,
761 psize, (hammer2_key_t)-1,
762 HAMMER2_LOOKUP_NOLOCK);
763 }
764 hammer2_chain_put(hmp, parent);
765}
766
767/*
768 * Extend the size of a file. The inode must be locked and marked
769 * for modification. The caller will set ip->ip_data.size after we
770 * return, we do not do it ourselves.
771 */
772static
773void
774hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize, int trivial)
775{
776 struct buf *bp;
777 int error;
778
779 /*
780 * Turn off the embedded-data-in-inode feature if the file size
781 * extends past the embedded limit. To keep things simple this
782 * feature is never re-enabled once disabled.
783 */
784 if ((ip->ip_data.op_flags & HAMMER2_OPFLAG_DIRECTDATA) &&
785 nsize > HAMMER2_EMBEDDED_BYTES) {
786 error = bread(ip->vp, 0, HAMMER2_LBUFSIZE, &bp);
787 KKASSERT(error == 0);
788 ip->ip_data.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA;
789 bzero(&ip->ip_data.u.blockset,
790 sizeof(ip->ip_data.u.blockset));
791 bdwrite(bp);
792 }
793 if (ip->vp) {
794 nvextendbuf(ip->vp, ip->ip_data.size, nsize,
795 HAMMER2_LBUFSIZE, HAMMER2_LBUFSIZE,
796 (int)(ip->ip_data.size & HAMMER2_LBUFMASK),
797 (int)(nsize & HAMMER2_LBUFMASK),
798 trivial);
799 }
800}
801
e118c14f 802static
703720e4 803int
e118c14f 804hammer2_vop_nresolve(struct vop_nresolve_args *ap)
703720e4 805{
37494cab
MD
806 hammer2_inode_t *dip;
807 hammer2_mount_t *hmp;
808 hammer2_chain_t *parent;
809 hammer2_chain_t *chain;
810 struct namecache *ncp;
811 const uint8_t *name;
812 size_t name_len;
813 hammer2_key_t lhc;
814 int error = 0;
815 struct vnode *vp;
816
817 dip = VTOI(ap->a_dvp);
818 hmp = dip->hmp;
819 ncp = ap->a_nch->ncp;
820 name = ncp->nc_name;
821 name_len = ncp->nc_nlen;
822 lhc = hammer2_dirhash(name, name_len);
823
824 /*
825 * Note: In DragonFly the kernel handles '.' and '..'.
826 */
827 parent = &dip->chain;
828 hammer2_chain_ref(hmp, parent);
829 hammer2_chain_lock(hmp, parent);
830 chain = hammer2_chain_lookup(hmp, &parent,
c667909f
MD
831 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
832 0);
37494cab
MD
833 while (chain) {
834 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
835 chain->u.ip &&
836 name_len == chain->data->ipdata.name_len &&
837 bcmp(name, chain->data->ipdata.filename, name_len) == 0) {
838 break;
839 }
840 chain = hammer2_chain_next(hmp, &parent, chain,
c667909f
MD
841 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
842 0);
37494cab
MD
843 }
844 hammer2_chain_put(hmp, parent);
845
846 if (chain) {
847 vp = hammer2_igetv(chain->u.ip, &error);
848 if (error == 0) {
849 vn_unlock(vp);
850 cache_setvp(ap->a_nch, vp);
851 vrele(vp);
852 }
853 hammer2_chain_put(hmp, chain);
854 } else {
855 error = ENOENT;
856 cache_setvp(ap->a_nch, NULL);
857 }
858 return error;
859}
860
861static
862int
863hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
864{
865 hammer2_inode_t *dip;
866 hammer2_inode_t *ip;
867 hammer2_mount_t *hmp;
868 int error;
869
870 dip = VTOI(ap->a_dvp);
871 hmp = dip->hmp;
872
873 if ((ip = dip->pip) == NULL) {
874 *ap->a_vpp = NULL;
875 return ENOENT;
876 }
877 hammer2_chain_ref(hmp, &ip->chain);
878 hammer2_chain_lock(hmp, &ip->chain);
879 *ap->a_vpp = hammer2_igetv(ip, &error);
880 hammer2_chain_put(hmp, &ip->chain);
881
882 return error;
883}
884
885static
886int
887hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
888{
889 hammer2_mount_t *hmp;
890 hammer2_inode_t *dip;
891 hammer2_inode_t *nip;
892 struct namecache *ncp;
893 const uint8_t *name;
894 size_t name_len;
895 int error;
896
897 dip = VTOI(ap->a_dvp);
898 hmp = dip->hmp;
db71f61f
MD
899 if (hmp->ronly)
900 return (EROFS);
901
37494cab
MD
902 ncp = ap->a_nch->ncp;
903 name = ncp->nc_name;
904 name_len = ncp->nc_nlen;
905
906 error = hammer2_create_inode(hmp, ap->a_vap, ap->a_cred,
907 dip, name, name_len, &nip);
908 if (error) {
909 KKASSERT(nip == NULL);
910 *ap->a_vpp = NULL;
911 return error;
912 }
913 *ap->a_vpp = hammer2_igetv(nip, &error);
914 hammer2_chain_put(hmp, &nip->chain);
915
916 if (error == 0) {
917 cache_setunresolved(ap->a_nch);
918 cache_setvp(ap->a_nch, *ap->a_vpp);
919 }
920 return error;
703720e4
MD
921}
922
db71f61f
MD
923/*
924 * Return the largest contiguous physical disk range for the logical
925 * request.
926 *
927 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
928 */
e118c14f 929static
703720e4 930int
e118c14f 931hammer2_vop_bmap(struct vop_bmap_args *ap)
703720e4 932{
db71f61f
MD
933 struct vnode *vp;
934 hammer2_mount_t *hmp;
935 hammer2_inode_t *ip;
936 hammer2_chain_t *parent;
937 hammer2_chain_t *chain;
5b4a2132
MD
938 hammer2_key_t loff;
939 hammer2_off_t poff;
db71f61f
MD
940
941 /*
942 * Only supported on regular files
943 *
944 * Only supported for read operations (required for cluster_read).
945 * The block allocation is delayed for write operations.
946 */
947 vp = ap->a_vp;
948 if (vp->v_type != VREG)
949 return (EOPNOTSUPP);
950 if (ap->a_cmd != BUF_CMD_READ)
951 return (EOPNOTSUPP);
952
953 ip = VTOI(vp);
954 hmp = ip->hmp;
5b4a2132
MD
955
956 loff = ap->a_loffset;
957 KKASSERT((loff & HAMMER2_LBUFMASK64) == 0);
db71f61f
MD
958
959 parent = &ip->chain;
960 hammer2_chain_ref(hmp, parent);
961 hammer2_chain_lock(hmp, parent);
5b4a2132 962 chain = hammer2_chain_lookup(hmp, &parent, loff, loff, 0);
3ac6a319
MD
963 if (chain == NULL) {
964 /*
965 * zero-fill hole
966 */
967 *ap->a_doffsetp = ZFOFFSET;
968 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
969 /*
970 * Normal data ref
971 */
5b4a2132
MD
972 poff = loff - chain->bref.key +
973 (chain->bref.data_off & HAMMER2_OFF_MASK);
974 *ap->a_doffsetp = poff;
db71f61f
MD
975 hammer2_chain_put(hmp, chain);
976 } else {
3ac6a319
MD
977 /*
978 * Data is embedded in inode, no direct I/O possible.
979 */
980 *ap->a_doffsetp = NOOFFSET;
981 hammer2_chain_put(hmp, chain);
db71f61f
MD
982 }
983 hammer2_chain_put(hmp, parent);
984 return (0);
703720e4
MD
985}
986
e118c14f 987static
703720e4 988int
e118c14f 989hammer2_vop_open(struct vop_open_args *ap)
703720e4 990{
703720e4
MD
991 return vop_stdopen(ap);
992}
993
37aa19df
MD
994/*
995 * hammer_vop_advlock { vp, id, op, fl, flags }
996 *
997 * MPSAFE - does not require fs_token
998 */
999static
1000int
1001hammer2_vop_advlock(struct vop_advlock_args *ap)
1002{
1003 hammer2_inode_t *ip = VTOI(ap->a_vp);
1004
1005 return (lf_advlock(ap, &ip->advlock, ip->ip_data.size));
1006}
1007
1008
c667909f
MD
1009static
1010int
1011hammer2_vop_close(struct vop_close_args *ap)
1012{
1013 return vop_stdclose(ap);
1014}
1015
1016/*
1017 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
1018 *
1019 * The operating system has already ensured that the directory entry
1020 * does not exist and done all appropriate namespace locking.
1021 */
1022static
1023int
1024hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1025{
1026 hammer2_mount_t *hmp;
1027 hammer2_inode_t *dip;
1028 hammer2_inode_t *nip;
1029 struct namecache *ncp;
1030 const uint8_t *name;
1031 size_t name_len;
1032 int error;
1033
1034 dip = VTOI(ap->a_dvp);
1035 hmp = dip->hmp;
1036 if (hmp->ronly)
1037 return (EROFS);
1038
1039 ncp = ap->a_nch->ncp;
1040 name = ncp->nc_name;
1041 name_len = ncp->nc_nlen;
1042
1043 error = hammer2_create_inode(hmp, ap->a_vap, ap->a_cred,
1044 dip, name, name_len, &nip);
1045 if (error) {
1046 KKASSERT(nip == NULL);
1047 *ap->a_vpp = NULL;
1048 return error;
1049 }
1050 *ap->a_vpp = hammer2_igetv(nip, &error);
1051 hammer2_chain_put(hmp, &nip->chain);
1052
1053 if (error == 0) {
1054 cache_setunresolved(ap->a_nch);
1055 cache_setvp(ap->a_nch, *ap->a_vpp);
1056 }
1057 return error;
1058}
1059
db71f61f
MD
1060static int hammer2_strategy_read(struct vop_strategy_args *ap);
1061static int hammer2_strategy_write(struct vop_strategy_args *ap);
1062
e118c14f 1063static
703720e4 1064int
e118c14f 1065hammer2_vop_strategy(struct vop_strategy_args *ap)
703720e4 1066{
703720e4
MD
1067 struct bio *biop;
1068 struct buf *bp;
703720e4
MD
1069 int error;
1070
703720e4
MD
1071 biop = ap->a_bio;
1072 bp = biop->bio_buf;
703720e4
MD
1073
1074 switch(bp->b_cmd) {
9c2e0de0 1075 case BUF_CMD_READ:
db71f61f
MD
1076 error = hammer2_strategy_read(ap);
1077 break;
9c2e0de0 1078 case BUF_CMD_WRITE:
db71f61f
MD
1079 error = hammer2_strategy_write(ap);
1080 break;
703720e4
MD
1081 default:
1082 bp->b_error = error = EINVAL;
1083 bp->b_flags |= B_ERROR;
1084 biodone(biop);
1085 break;
1086 }
1087
1088 return (error);
1089}
1090
db71f61f
MD
1091static
1092int
1093hammer2_strategy_read(struct vop_strategy_args *ap)
1094{
1095 struct buf *bp;
1096 struct bio *bio;
1097 struct bio *nbio;
1098 hammer2_mount_t *hmp;
1099 hammer2_inode_t *ip;
1100 hammer2_chain_t *parent;
1101 hammer2_chain_t *chain;
5b4a2132
MD
1102 hammer2_key_t loff;
1103 hammer2_off_t poff;
3ac6a319
MD
1104 size_t ddlen = 0; /* direct data shortcut */
1105 char *ddata = NULL;
db71f61f
MD
1106
1107 bio = ap->a_bio;
1108 bp = bio->bio_buf;
1109 ip = VTOI(ap->a_vp);
1110 hmp = ip->hmp;
1111 nbio = push_bio(bio);
1112
1113 if (nbio->bio_offset == NOOFFSET) {
5b4a2132
MD
1114 loff = bio->bio_offset;
1115 KKASSERT((loff & HAMMER2_LBUFMASK64) == 0);
db71f61f
MD
1116
1117 parent = &ip->chain;
1118 hammer2_chain_ref(hmp, parent);
1119 hammer2_chain_lock(hmp, parent);
c667909f
MD
1120
1121 /*
1122 * Specifying NOLOCK avoids unnecessary bread()s of the
1123 * chain element's content. We just need the block device
1124 * offset.
1125 */
5b4a2132 1126 chain = hammer2_chain_lookup(hmp, &parent, loff, loff,
c667909f 1127 HAMMER2_LOOKUP_NOLOCK);
3ac6a319
MD
1128 if (chain == NULL) {
1129 /*
1130 * Data is zero-fill
1131 */
1132 nbio->bio_offset = ZFOFFSET;
1133 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
1134 /*
1135 * Data is on-media, implement direct-read
1136 */
5b4a2132
MD
1137 poff = loff - chain->bref.key +
1138 (chain->bref.data_off & HAMMER2_OFF_MASK);
1139 nbio->bio_offset = poff;
c667909f 1140 hammer2_chain_drop(hmp, chain);
3ac6a319
MD
1141 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1142 /*
1143 * Data is embedded in the inode
1144 */
1145 ddata = chain->data->ipdata.u.data;
1146 ddlen = HAMMER2_EMBEDDED_BYTES;
1147 KKASSERT(chain == parent);
1148 hammer2_chain_drop(hmp, chain);
1149 /* leave bio_offset set to NOOFFSET */
db71f61f 1150 } else {
3ac6a319 1151 panic("hammer2_strategy_read: unknown bref type");
db71f61f
MD
1152 }
1153 hammer2_chain_put(hmp, parent);
1154 }
3ac6a319
MD
1155 if (ddlen) {
1156 /*
1157 * Data embedded directly in inode
1158 */
1159 bp->b_resid = 0;
1160 bp->b_error = 0;
1161 vfs_bio_clrbuf(bp);
1162 bcopy(ddata, bp->b_data, ddlen);
1163 biodone(nbio);
1164 } else if (nbio->bio_offset == ZFOFFSET) {
1165 /*
1166 * Data is zero-fill
1167 */
db71f61f
MD
1168 bp->b_resid = 0;
1169 bp->b_error = 0;
1170 vfs_bio_clrbuf(bp);
1171 biodone(nbio);
1172 } else {
3ac6a319
MD
1173 /*
1174 * Data on media
1175 */
db71f61f
MD
1176 vn_strategy(hmp->devvp, nbio);
1177 }
1178 return (0);
1179}
1180
1181static
1182int
1183hammer2_strategy_write(struct vop_strategy_args *ap)
1184{
1185 struct buf *bp;
1186 struct bio *bio;
1187 struct bio *nbio;
1188 hammer2_mount_t *hmp;
1189 hammer2_inode_t *ip;
1190 hammer2_chain_t *parent;
1191 hammer2_chain_t *chain;
1192 hammer2_key_t off_hi;
1193 int off_lo;
1194
1195 bio = ap->a_bio;
1196 bp = bio->bio_buf;
1197 ip = VTOI(ap->a_vp);
1198 hmp = ip->hmp;
1199 nbio = push_bio(bio);
1200
1201 /*
1202 * Our bmap doesn't support writes atm, and a vop_write should
1203 * clear the physical disk offset cache for the copy-on-write
1204 * operation.
1205 */
1206 KKASSERT(nbio->bio_offset == NOOFFSET);
1207
1208 off_hi = bio->bio_offset & HAMMER2_OFF_MASK_HI;
1209 off_lo = bio->bio_offset & HAMMER2_OFF_MASK_LO;
1210 KKASSERT((bio->bio_offset & HAMMER2_LBUFMASK64) == 0);
1211
1212 parent = &ip->chain;
1213 hammer2_chain_ref(hmp, parent);
1214 hammer2_chain_lock(hmp, parent);
3ac6a319
MD
1215 /*
1216 * XXX implement NODATA flag to avoid instantiating bp if
1217 * it isn't already present for direct-write implementation.
1218 */
c667909f 1219 chain = hammer2_chain_lookup(hmp, &parent, off_hi, off_hi, 0);
3ac6a319
MD
1220 if (chain == NULL) {
1221 /*
1222 * A new data block must be allocated.
1223 */
db71f61f
MD
1224 chain = hammer2_chain_create(hmp, parent,
1225 off_hi, HAMMER2_PBUFRADIX,
1226 HAMMER2_BREF_TYPE_DATA,
1227 HAMMER2_PBUFSIZE);
1228 bcopy(bp->b_data, chain->data->buf + off_lo, bp->b_bcount);
3ac6a319
MD
1229 } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
1230 /*
1231 * The data is embedded in the inode
1232 */
1233 hammer2_chain_modify(hmp, chain);
1234 if (off_lo < HAMMER2_EMBEDDED_BYTES) {
1235 bcopy(bp->b_data,
1236 chain->data->ipdata.u.data + off_lo,
1237 HAMMER2_EMBEDDED_BYTES - off_lo);
1238 }
1239 } else {
1240 /*
1241 * The data is on media, possibly in a larger block.
1242 *
1243 * XXX implement direct-write if bp not cached using NODATA
1244 * flag.
1245 */
1246 hammer2_chain_modify(hmp, chain);
1247 KKASSERT(bp->b_bcount <= HAMMER2_PBUFSIZE - off_lo);
1248 bcopy(bp->b_data, chain->data->buf + off_lo, bp->b_bcount);
db71f61f 1249 }
37aa19df
MD
1250 if (off_lo + bp->b_bcount == HAMMER2_PBUFSIZE)
1251 atomic_set_int(&chain->flags, HAMMER2_CHAIN_IOFLUSH);
1252 hammer2_chain_put(hmp, chain);
db71f61f
MD
1253 hammer2_chain_put(hmp, parent);
1254
1255 bp->b_resid = 0;
1256 bp->b_error = 0;
1257 biodone(nbio);
1258
1259 return (0);
1260}
1261
e118c14f 1262static
f0206a67 1263int
e118c14f 1264hammer2_vop_mountctl(struct vop_mountctl_args *ap)
f0206a67
VS
1265{
1266 struct mount *mp;
1267 struct hammer2_mount *hmp;
1268 int rc;
1269
1270 switch (ap->a_op) {
1271 case (MOUNTCTL_SET_EXPORT):
1272 mp = ap->a_head.a_ops->head.vv_mount;
1273 hmp = MPTOH2(mp);
1274
1275 if (ap->a_ctllen != sizeof(struct export_args))
1276 rc = (EINVAL);
1277 else
10c5dee0
MD
1278 rc = vfs_export(mp, &hmp->export,
1279 (const struct export_args *)ap->a_ctl);
f0206a67
VS
1280 break;
1281 default:
1282 rc = vop_stdmountctl(ap);
1283 break;
1284 }
1285 return (rc);
1286}
1287
703720e4
MD
1288struct vop_ops hammer2_vnode_vops = {
1289 .vop_default = vop_defaultop,
e118c14f 1290 .vop_fsync = hammer2_vop_fsync,
703720e4
MD
1291 .vop_getpages = vop_stdgetpages,
1292 .vop_putpages = vop_stdputpages,
e118c14f 1293 .vop_access = hammer2_vop_access,
37aa19df 1294 .vop_advlock = hammer2_vop_advlock,
c667909f
MD
1295 .vop_close = hammer2_vop_close,
1296 .vop_ncreate = hammer2_vop_ncreate,
e118c14f 1297 .vop_getattr = hammer2_vop_getattr,
3ac6a319 1298 .vop_setattr = hammer2_vop_setattr,
e118c14f 1299 .vop_readdir = hammer2_vop_readdir,
5b4a2132
MD
1300 .vop_getpages = vop_stdgetpages,
1301 .vop_putpages = vop_stdputpages,
e118c14f
MD
1302 .vop_read = hammer2_vop_read,
1303 .vop_write = hammer2_vop_write,
1304 .vop_open = hammer2_vop_open,
1305 .vop_inactive = hammer2_vop_inactive,
1306 .vop_reclaim = hammer2_vop_reclaim,
1307 .vop_nresolve = hammer2_vop_nresolve,
37494cab
MD
1308 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
1309 .vop_nmkdir = hammer2_vop_nmkdir,
e118c14f
MD
1310 .vop_mountctl = hammer2_vop_mountctl,
1311 .vop_bmap = hammer2_vop_bmap,
1312 .vop_strategy = hammer2_vop_strategy,
703720e4
MD
1313};
1314
1315struct vop_ops hammer2_spec_vops = {
1316
1317};
1318
1319struct vop_ops hammer2_fifo_vops = {
1320
1321};