sys/vfs/hammer: Remove obsolete code and comments
[dragonfly.git] / sys / vfs / hammer / hammer_vnops.c
CommitLineData
427e5fc6 1/*
b84de5af 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
745703c7 3 *
427e5fc6
MD
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
745703c7 6 *
427e5fc6
MD
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
745703c7 10 *
427e5fc6
MD
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
745703c7 20 *
427e5fc6
MD
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
427e5fc6
MD
33 */
34
427e5fc6
MD
35#include <sys/fcntl.h>
36#include <sys/namecache.h>
427e5fc6 37#include <sys/event.h>
b3deaf57 38#include <sys/dirent.h>
fbb84158 39#include <sys/file.h>
18bee4a2 40#include <vm/swap_pager.h>
7a04d74f 41#include <vfs/fifofs/fifo.h>
684a93c4 42
427e5fc6
MD
43#include "hammer.h"
44
45/*
46 * USERFS VNOPS
47 */
66325755
MD
48static int hammer_vop_fsync(struct vop_fsync_args *);
49static int hammer_vop_read(struct vop_read_args *);
50static int hammer_vop_write(struct vop_write_args *);
51static int hammer_vop_access(struct vop_access_args *);
52static int hammer_vop_advlock(struct vop_advlock_args *);
53static int hammer_vop_close(struct vop_close_args *);
54static int hammer_vop_ncreate(struct vop_ncreate_args *);
55static int hammer_vop_getattr(struct vop_getattr_args *);
56static int hammer_vop_nresolve(struct vop_nresolve_args *);
57static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
58static int hammer_vop_nlink(struct vop_nlink_args *);
59static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
60static int hammer_vop_nmknod(struct vop_nmknod_args *);
61static int hammer_vop_open(struct vop_open_args *);
66325755
MD
62static int hammer_vop_print(struct vop_print_args *);
63static int hammer_vop_readdir(struct vop_readdir_args *);
64static int hammer_vop_readlink(struct vop_readlink_args *);
65static int hammer_vop_nremove(struct vop_nremove_args *);
66static int hammer_vop_nrename(struct vop_nrename_args *);
67static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
349433c9 68static int hammer_vop_markatime(struct vop_markatime_args *);
66325755
MD
69static int hammer_vop_setattr(struct vop_setattr_args *);
70static int hammer_vop_strategy(struct vop_strategy_args *);
a99b9ea2 71static int hammer_vop_bmap(struct vop_bmap_args *ap);
66325755
MD
72static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
73static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
7dc57964 74static int hammer_vop_ioctl(struct vop_ioctl_args *);
513ca7d7 75static int hammer_vop_mountctl(struct vop_mountctl_args *);
fbb84158 76static int hammer_vop_kqfilter (struct vop_kqfilter_args *);
427e5fc6 77
7a04d74f
MD
78static int hammer_vop_fifoclose (struct vop_close_args *);
79static int hammer_vop_fiforead (struct vop_read_args *);
80static int hammer_vop_fifowrite (struct vop_write_args *);
fbb84158 81static int hammer_vop_fifokqfilter (struct vop_kqfilter_args *);
7a04d74f 82
427e5fc6
MD
83struct vop_ops hammer_vnode_vops = {
84 .vop_default = vop_defaultop,
85 .vop_fsync = hammer_vop_fsync,
c0ade690
MD
86 .vop_getpages = vop_stdgetpages,
87 .vop_putpages = vop_stdputpages,
427e5fc6
MD
88 .vop_read = hammer_vop_read,
89 .vop_write = hammer_vop_write,
90 .vop_access = hammer_vop_access,
91 .vop_advlock = hammer_vop_advlock,
92 .vop_close = hammer_vop_close,
93 .vop_ncreate = hammer_vop_ncreate,
94 .vop_getattr = hammer_vop_getattr,
95 .vop_inactive = hammer_vop_inactive,
96 .vop_reclaim = hammer_vop_reclaim,
97 .vop_nresolve = hammer_vop_nresolve,
98 .vop_nlookupdotdot = hammer_vop_nlookupdotdot,
99 .vop_nlink = hammer_vop_nlink,
100 .vop_nmkdir = hammer_vop_nmkdir,
101 .vop_nmknod = hammer_vop_nmknod,
102 .vop_open = hammer_vop_open,
64950f31 103 .vop_pathconf = vop_stdpathconf,
427e5fc6
MD
104 .vop_print = hammer_vop_print,
105 .vop_readdir = hammer_vop_readdir,
106 .vop_readlink = hammer_vop_readlink,
107 .vop_nremove = hammer_vop_nremove,
108 .vop_nrename = hammer_vop_nrename,
109 .vop_nrmdir = hammer_vop_nrmdir,
7866ea2a 110 .vop_markatime = hammer_vop_markatime,
427e5fc6 111 .vop_setattr = hammer_vop_setattr,
a99b9ea2 112 .vop_bmap = hammer_vop_bmap,
427e5fc6
MD
113 .vop_strategy = hammer_vop_strategy,
114 .vop_nsymlink = hammer_vop_nsymlink,
7dc57964 115 .vop_nwhiteout = hammer_vop_nwhiteout,
513ca7d7 116 .vop_ioctl = hammer_vop_ioctl,
fbb84158
MD
117 .vop_mountctl = hammer_vop_mountctl,
118 .vop_kqfilter = hammer_vop_kqfilter
427e5fc6
MD
119};
120
7a04d74f 121struct vop_ops hammer_spec_vops = {
8be7edad 122 .vop_default = vop_defaultop,
7a04d74f 123 .vop_fsync = hammer_vop_fsync,
8be7edad
MD
124 .vop_read = vop_stdnoread,
125 .vop_write = vop_stdnowrite,
7a04d74f 126 .vop_access = hammer_vop_access,
8be7edad 127 .vop_close = hammer_vop_close,
7866ea2a 128 .vop_markatime = hammer_vop_markatime,
8be7edad 129 .vop_getattr = hammer_vop_getattr,
7a04d74f
MD
130 .vop_inactive = hammer_vop_inactive,
131 .vop_reclaim = hammer_vop_reclaim,
132 .vop_setattr = hammer_vop_setattr
133};
134
135struct vop_ops hammer_fifo_vops = {
136 .vop_default = fifo_vnoperate,
137 .vop_fsync = hammer_vop_fsync,
138 .vop_read = hammer_vop_fiforead,
139 .vop_write = hammer_vop_fifowrite,
140 .vop_access = hammer_vop_access,
141 .vop_close = hammer_vop_fifoclose,
7866ea2a 142 .vop_markatime = hammer_vop_markatime,
7a04d74f
MD
143 .vop_getattr = hammer_vop_getattr,
144 .vop_inactive = hammer_vop_inactive,
145 .vop_reclaim = hammer_vop_reclaim,
fbb84158
MD
146 .vop_setattr = hammer_vop_setattr,
147 .vop_kqfilter = hammer_vop_fifokqfilter
7a04d74f
MD
148};
149
fbb84158
MD
150static __inline
151void
152hammer_knote(struct vnode *vp, int flags)
153{
154 if (flags)
5b22f1a7 155 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
fbb84158
MD
156}
157
0832c9bb
MD
158#ifdef DEBUG_TRUNCATE
159struct hammer_inode *HammerTruncIp;
160#endif
161
b84de5af 162static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
d7e278bb
MD
163 struct vnode *dvp, struct ucred *cred,
164 int flags, int isdir);
8cd0a023
MD
165static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
166static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
167
66325755
MD
168/*
169 * hammer_vop_fsync { vp, waitfor }
ddfdf542
MD
170 *
171 * fsync() an inode to disk and wait for it to be completely committed
172 * such that the information would not be undone if a crash occured after
173 * return.
6f3d87c0
MD
174 *
175 * NOTE: HAMMER's fsync()'s are going to remain expensive until we implement
176 * a REDO log. A sysctl is provided to relax HAMMER's fsync()
177 * operation.
178 *
179 * Ultimately the combination of a REDO log and use of fast storage
180 * to front-end cluster caches will make fsync fast, but it aint
181 * here yet. And, in anycase, we need real transactional
182 * all-or-nothing features which are not restricted to a single file.
66325755 183 */
427e5fc6
MD
184static
185int
66325755 186hammer_vop_fsync(struct vop_fsync_args *ap)
427e5fc6 187{
b84de5af 188 hammer_inode_t ip = VTOI(ap->a_vp);
9192654c 189 hammer_mount_t hmp = ip->hmp;
6f3d87c0 190 int waitfor = ap->a_waitfor;
9192654c 191 int mode;
6f3d87c0 192
b0aab9b9
MD
193 lwkt_gettoken(&hmp->fs_token);
194
6f3d87c0 195 /*
9192654c
MD
196 * Fsync rule relaxation (default is either full synchronous flush
197 * or REDO semantics with synchronous flush).
6f3d87c0
MD
198 */
199 if (ap->a_flags & VOP_FSYNC_SYSCALL) {
200 switch(hammer_fsync_mode) {
201 case 0:
9192654c 202mode0:
47f363f1 203 /* no REDO, full synchronous flush */
9192654c 204 goto skip;
6f3d87c0 205 case 1:
9192654c 206mode1:
47f363f1 207 /* no REDO, full asynchronous flush */
6f3d87c0
MD
208 if (waitfor == MNT_WAIT)
209 waitfor = MNT_NOWAIT;
9192654c 210 goto skip;
6f3d87c0 211 case 2:
9192654c
MD
212 /* REDO semantics, synchronous flush */
213 if (hmp->version < HAMMER_VOL_VERSION_FOUR)
214 goto mode0;
215 mode = HAMMER_FLUSH_UNDOS_AUTO;
216 break;
6f3d87c0 217 case 3:
9192654c
MD
218 /* REDO semantics, relaxed asynchronous flush */
219 if (hmp->version < HAMMER_VOL_VERSION_FOUR)
220 goto mode1;
221 mode = HAMMER_FLUSH_UNDOS_RELAXED;
222 if (waitfor == MNT_WAIT)
223 waitfor = MNT_NOWAIT;
224 break;
225 case 4:
226 /* ignore the fsync() system call */
b0aab9b9 227 lwkt_reltoken(&hmp->fs_token);
6f3d87c0
MD
228 return(0);
229 default:
9192654c
MD
230 /* we have to do something */
231 mode = HAMMER_FLUSH_UNDOS_RELAXED;
232 if (waitfor == MNT_WAIT)
233 waitfor = MNT_NOWAIT;
234 break;
235 }
236
237 /*
47f363f1
MD
238 * Fast fsync only needs to flush the UNDO/REDO fifo if
239 * HAMMER_INODE_REDO is non-zero and the only modifications
240 * made to the file are write or write-extends.
9192654c 241 */
47f363f1 242 if ((ip->flags & HAMMER_INODE_REDO) &&
9a620123 243 (ip->flags & HAMMER_INODE_MODMASK_NOREDO) == 0) {
9192654c
MD
244 ++hammer_count_fsyncs;
245 hammer_flusher_flush_undos(hmp, mode);
246 ip->redo_count = 0;
0d60b0ab 247 if (ip->vp && (ip->flags & HAMMER_INODE_MODMASK) == 0)
0f79f6b2 248 vclrisdirty(ip->vp);
b0aab9b9 249 lwkt_reltoken(&hmp->fs_token);
6f3d87c0
MD
250 return(0);
251 }
47f363f1
MD
252
253 /*
254 * REDO is enabled by fsync(), the idea being we really only
255 * want to lay down REDO records when programs are using
256 * fsync() heavily. The first fsync() on the file starts
257 * the gravy train going and later fsync()s keep it hot by
258 * resetting the redo_count.
259 *
260 * We weren't running REDOs before now so we have to fall
261 * through and do a full fsync of what we have.
262 */
c58123da
MD
263 if (hmp->version >= HAMMER_VOL_VERSION_FOUR &&
264 (hmp->flags & HAMMER_MOUNT_REDO_RECOVERY_RUN) == 0) {
47f363f1
MD
265 ip->flags |= HAMMER_INODE_REDO;
266 ip->redo_count = 0;
267 }
6f3d87c0 268 }
9192654c 269skip:
c0ade690 270
6f3d87c0 271 /*
9192654c 272 * Do a full flush sequence.
aad00981
MD
273 *
274 * Attempt to release the vnode while waiting for the inode to
275 * finish flushing. This can really mess up inactive->reclaim
276 * sequences so only do it if the vnode is active.
7a5c5bbb
MD
277 *
278 * WARNING! The VX lock functions must be used. vn_lock() will
279 * fail when this is part of a VOP_RECLAIM sequence.
6f3d87c0 280 */
7a61b85d 281 ++hammer_count_fsyncs;
6f3d87c0 282 vfsync(ap->a_vp, waitfor, 1, NULL, NULL);
af209b0f 283 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
6f3d87c0 284 if (waitfor == MNT_WAIT) {
7a5c5bbb
MD
285 int dorelock;
286
287 if ((ap->a_vp->v_flag & VRECLAIMED) == 0) {
288 vx_unlock(ap->a_vp);
289 dorelock = 1;
290 } else {
291 dorelock = 0;
292 }
b84de5af 293 hammer_wait_inode(ip);
7a5c5bbb
MD
294 if (dorelock)
295 vx_lock(ap->a_vp);
b424ca30 296 }
0d60b0ab 297 if (ip->vp && (ip->flags & HAMMER_INODE_MODMASK) == 0)
0f79f6b2 298 vclrisdirty(ip->vp);
b0aab9b9 299 lwkt_reltoken(&hmp->fs_token);
059819e3 300 return (ip->error);
427e5fc6
MD
301}
302
66325755
MD
303/*
304 * hammer_vop_read { vp, uio, ioflag, cred }
42cd5131 305 *
b0aab9b9 306 * MPSAFE (for the cache safe does not require fs_token)
66325755 307 */
427e5fc6
MD
308static
309int
66325755 310hammer_vop_read(struct vop_read_args *ap)
427e5fc6 311{
66325755 312 struct hammer_transaction trans;
c0ade690 313 hammer_inode_t ip;
b0aab9b9 314 hammer_mount_t hmp;
66325755
MD
315 off_t offset;
316 struct buf *bp;
317 struct uio *uio;
318 int error;
319 int n;
8cd0a023 320 int seqcount;
4a2796f3
MD
321 int ioseqcount;
322 int blksize;
f864373f 323 int bigread;
32fcc103 324 int got_trans;
8f2d91a6 325 size_t resid;
66325755
MD
326
327 if (ap->a_vp->v_type != VREG)
328 return (EINVAL);
329 ip = VTOI(ap->a_vp);
b0aab9b9 330 hmp = ip->hmp;
66325755 331 error = 0;
32fcc103 332 got_trans = 0;
4a2796f3
MD
333 uio = ap->a_uio;
334
68ad1455
MD
335 /*
336 * Attempt to shortcut directly to the VM object using lwbufs.
337 * This is much faster than instantiating buffer cache buffers.
338 */
8f2d91a6 339 resid = uio->uio_resid;
68ad1455 340 error = vop_helper_read_shortcut(ap);
8f2d91a6 341 hammer_stats_file_read += resid - uio->uio_resid;
68ad1455
MD
342 if (error)
343 return (error);
344 if (uio->uio_resid == 0)
345 goto finished;
346
4a2796f3
MD
347 /*
348 * Allow the UIO's size to override the sequential heuristic.
349 */
350 blksize = hammer_blocksize(uio->uio_offset);
69adbed4
MD
351 seqcount = (uio->uio_resid + (BKVASIZE - 1)) / BKVASIZE;
352 ioseqcount = (ap->a_ioflag >> 16);
4a2796f3
MD
353 if (seqcount < ioseqcount)
354 seqcount = ioseqcount;
66325755 355
f864373f
MD
356 /*
357 * If reading or writing a huge amount of data we have to break
358 * atomicy and allow the operation to be interrupted by a signal
359 * or it can DOS the machine.
360 */
361 bigread = (uio->uio_resid > 100 * 1024 * 1024);
362
66325755 363 /*
4a2796f3
MD
364 * Access the data typically in HAMMER_BUFSIZE blocks via the
365 * buffer cache, but HAMMER may use a variable block size based
366 * on the offset.
42cd5131
MD
367 *
368 * XXX Temporary hack, delay the start transaction while we remain
369 * MPSAFE. NOTE: ino_data.size cannot change while vnode is
370 * locked-shared.
66325755 371 */
11ad5ade 372 while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_data.size) {
4a2796f3
MD
373 int64_t base_offset;
374 int64_t file_limit;
375
376 blksize = hammer_blocksize(uio->uio_offset);
377 offset = (int)uio->uio_offset & (blksize - 1);
378 base_offset = uio->uio_offset - offset;
379
f864373f
MD
380 if (bigread && (error = hammer_signal_check(ip->hmp)) != 0)
381 break;
382
42cd5131
MD
383 /*
384 * MPSAFE
385 */
54341a3b
MD
386 bp = getblk(ap->a_vp, base_offset, blksize, 0, 0);
387 if ((bp->b_flags & (B_INVAL | B_CACHE | B_RAM)) == B_CACHE) {
388 bp->b_flags &= ~B_AGE;
42cd5131
MD
389 error = 0;
390 goto skip;
54341a3b
MD
391 }
392 if (ap->a_ioflag & IO_NRDELAY) {
393 bqrelse(bp);
394 return (EWOULDBLOCK);
42cd5131
MD
395 }
396
397 /*
398 * MPUNSAFE
399 */
32fcc103 400 if (got_trans == 0) {
42cd5131 401 hammer_start_transaction(&trans, ip->hmp);
32fcc103 402 got_trans = 1;
42cd5131
MD
403 }
404
54341a3b
MD
405 /*
406 * NOTE: A valid bp has already been acquired, but was not
407 * B_CACHE.
408 */
1b0ab2c3 409 if (hammer_cluster_enable) {
4a2796f3
MD
410 /*
411 * Use file_limit to prevent cluster_read() from
412 * creating buffers of the wrong block size past
413 * the demarc.
414 */
415 file_limit = ip->ino_data.size;
416 if (base_offset < HAMMER_XDEMARC &&
417 file_limit > HAMMER_XDEMARC) {
418 file_limit = HAMMER_XDEMARC;
419 }
54341a3b 420 error = cluster_readx(ap->a_vp,
4a2796f3 421 file_limit, base_offset,
364c022c
MD
422 blksize, uio->uio_resid,
423 seqcount * BKVASIZE, &bp);
a99b9ea2 424 } else {
54341a3b
MD
425 error = breadnx(ap->a_vp, base_offset, blksize,
426 NULL, NULL, 0, &bp);
a99b9ea2 427 }
66325755
MD
428 if (error) {
429 brelse(bp);
430 break;
431 }
42cd5131 432skip:
24c8374a
MD
433 if ((hammer_debug_io & 0x0001) && (bp->b_flags & B_IODEBUG)) {
434 kprintf("doff %016jx read file %016jx@%016jx\n",
435 (intmax_t)bp->b_bio2.bio_offset,
436 (intmax_t)ip->obj_id,
437 (intmax_t)bp->b_loffset);
438 }
439 bp->b_flags &= ~B_IODEBUG;
66030e2b
MD
440 if (blksize == HAMMER_XBUFSIZE)
441 bp->b_flags |= B_CLUSTEROK;
7bc5b8c2 442
4a2796f3 443 n = blksize - offset;
66325755
MD
444 if (n > uio->uio_resid)
445 n = uio->uio_resid;
11ad5ade
MD
446 if (n > ip->ino_data.size - uio->uio_offset)
447 n = (int)(ip->ino_data.size - uio->uio_offset);
283b9448
MD
448
449 /*
450 * Set B_AGE, data has a lower priority than meta-data.
451 *
452 * Use a hold/unlock/drop sequence to run the uiomove
453 * with the buffer unlocked, avoiding deadlocks against
454 * read()s on mmap()'d spaces.
455 */
456 bp->b_flags |= B_AGE;
44480e31 457 error = uiomovebp(bp, (char *)bp->b_data + offset, n, uio);
283b9448 458 bqrelse(bp);
283b9448 459
af209b0f
MD
460 if (error)
461 break;
ce0138a6 462 hammer_stats_file_read += n;
66325755 463 }
42cd5131 464
68ad1455
MD
465finished:
466
42cd5131 467 /*
70125e78
MD
468 * Try to update the atime with just the inode lock for maximum
469 * concurrency. If we can't shortcut it we have to get the full
470 * blown transaction.
42cd5131 471 */
32fcc103 472 if (got_trans == 0 && hammer_update_atime_quick(ip) < 0) {
70125e78 473 hammer_start_transaction(&trans, ip->hmp);
32fcc103 474 got_trans = 1;
70125e78
MD
475 }
476
32fcc103 477 if (got_trans) {
42cd5131
MD
478 if ((ip->flags & HAMMER_INODE_RO) == 0 &&
479 (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
32fcc103 480 lwkt_gettoken(&hmp->fs_token);
42cd5131 481 ip->ino_data.atime = trans.time;
e98f1b96 482 hammer_modify_inode(&trans, ip, HAMMER_INODE_ATIME);
32fcc103
MD
483 hammer_done_transaction(&trans);
484 lwkt_reltoken(&hmp->fs_token);
485 } else {
486 hammer_done_transaction(&trans);
42cd5131 487 }
b84de5af 488 }
66325755 489 return (error);
427e5fc6
MD
490}
491
66325755
MD
492/*
493 * hammer_vop_write { vp, uio, ioflag, cred }
494 */
427e5fc6
MD
495static
496int
66325755 497hammer_vop_write(struct vop_write_args *ap)
427e5fc6 498{
66325755
MD
499 struct hammer_transaction trans;
500 struct hammer_inode *ip;
4a2796f3 501 hammer_mount_t hmp;
1589191a 502 thread_t td;
66325755 503 struct uio *uio;
4a2796f3 504 int offset;
47637bff 505 off_t base_offset;
9de13b88 506 int64_t cluster_eof;
66325755 507 struct buf *bp;
fbb84158 508 int kflags;
66325755
MD
509 int error;
510 int n;
c0ade690 511 int flags;
cb51be26 512 int seqcount;
f864373f 513 int bigwrite;
66325755
MD
514
515 if (ap->a_vp->v_type != VREG)
516 return (EINVAL);
517 ip = VTOI(ap->a_vp);
4a2796f3 518 hmp = ip->hmp;
66325755 519 error = 0;
fbb84158 520 kflags = 0;
cb51be26 521 seqcount = ap->a_ioflag >> 16;
66325755 522
d113fda1
MD
523 if (ip->flags & HAMMER_INODE_RO)
524 return (EROFS);
525
66325755
MD
526 /*
527 * Create a transaction to cover the operations we perform.
528 */
4a2796f3 529 hammer_start_transaction(&trans, hmp);
66325755
MD
530 uio = ap->a_uio;
531
532 /*
533 * Check append mode
534 */
535 if (ap->a_ioflag & IO_APPEND)
11ad5ade 536 uio->uio_offset = ip->ino_data.size;
66325755
MD
537
538 /*
af209b0f
MD
539 * Check for illegal write offsets. Valid range is 0...2^63-1.
540 *
541 * NOTE: the base_off assignment is required to work around what
542 * I consider to be a GCC-4 optimization bug.
66325755 543 */
af209b0f
MD
544 if (uio->uio_offset < 0) {
545 hammer_done_transaction(&trans);
546 return (EFBIG);
547 }
548 base_offset = uio->uio_offset + uio->uio_resid; /* work around gcc-4 */
e54488bb 549 if (uio->uio_resid > 0 && base_offset <= uio->uio_offset) {
b84de5af 550 hammer_done_transaction(&trans);
66325755 551 return (EFBIG);
9c448776 552 }
66325755 553
1589191a
MD
554 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
555 base_offset > td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
556 hammer_done_transaction(&trans);
1589191a
MD
557 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
558 return (EFBIG);
559 }
560
f864373f
MD
561 /*
562 * If reading or writing a huge amount of data we have to break
563 * atomicy and allow the operation to be interrupted by a signal
564 * or it can DOS the machine.
9192654c 565 *
47f363f1
MD
566 * Preset redo_count so we stop generating REDOs earlier if the
567 * limit is exceeded.
32fcc103
MD
568 *
569 * redo_count is heuristical, SMP races are ok
f864373f
MD
570 */
571 bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
47f363f1
MD
572 if ((ip->flags & HAMMER_INODE_REDO) &&
573 ip->redo_count < hammer_limit_redo) {
9192654c 574 ip->redo_count += uio->uio_resid;
47f363f1 575 }
f864373f 576
66325755 577 /*
4a2796f3
MD
578 * Access the data typically in HAMMER_BUFSIZE blocks via the
579 * buffer cache, but HAMMER may use a variable block size based
580 * on the offset.
66325755
MD
581 */
582 while (uio->uio_resid > 0) {
d5ef456e 583 int fixsize = 0;
4a2796f3
MD
584 int blksize;
585 int blkmask;
6362a262 586 int trivial;
d1eff1f7 587 int endofblk;
6362a262 588 off_t nsize;
d5ef456e 589
93291532 590 if ((error = hammer_checkspace(hmp, HAMMER_CHKSPC_WRITE)) != 0)
e63644f0 591 break;
f864373f
MD
592 if (bigwrite && (error = hammer_signal_check(hmp)) != 0)
593 break;
e63644f0 594
a9d52b76
MD
595 blksize = hammer_blocksize(uio->uio_offset);
596
de996e86
MD
597 /*
598 * Control the number of pending records associated with
599 * this inode. If too many have accumulated start a
600 * flush. Try to maintain a pipeline with the flusher.
a117fbeb
MD
601 *
602 * NOTE: It is possible for other sources to grow the
603 * records but not necessarily issue another flush,
604 * so use a timeout and ensure that a re-flush occurs.
de996e86
MD
605 */
606 if (ip->rsv_recs >= hammer_limit_inode_recs) {
32fcc103 607 lwkt_gettoken(&hmp->fs_token);
de996e86 608 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
a117fbeb
MD
609 while (ip->rsv_recs >= hammer_limit_inode_recs * 2) {
610 ip->flags |= HAMMER_INODE_RECSW;
de996e86 611 tsleep(&ip->rsv_recs, 0, "hmrwww", hz);
a117fbeb 612 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
de996e86 613 }
32fcc103 614 lwkt_reltoken(&hmp->fs_token);
de996e86
MD
615 }
616
df301614 617 /*
32fcc103
MD
618 * Do not allow HAMMER to blow out the buffer cache. Very
619 * large UIOs can lockout other processes due to bwillwrite()
620 * mechanics.
621 *
622 * The hammer inode is not locked during these operations.
623 * The vnode is locked which can interfere with the pageout
624 * daemon for non-UIO_NOCOPY writes but should not interfere
625 * with the buffer cache. Even so, we cannot afford to
626 * allow the pageout daemon to build up too many dirty buffer
627 * cache buffers.
628 *
629 * Only call this if we aren't being recursively called from
630 * a virtual disk device (vn), else we may deadlock.
059819e3 631 */
32fcc103
MD
632 if ((ap->a_ioflag & IO_RECURSE) == 0)
633 bwillwrite(blksize);
059819e3 634
4a2796f3
MD
635 /*
636 * Calculate the blocksize at the current offset and figure
637 * out how much we can actually write.
638 */
4a2796f3
MD
639 blkmask = blksize - 1;
640 offset = (int)uio->uio_offset & blkmask;
641 base_offset = uio->uio_offset & ~(int64_t)blkmask;
642 n = blksize - offset;
d1eff1f7 643 if (n > uio->uio_resid) {
d5ef456e 644 n = uio->uio_resid;
d1eff1f7
MD
645 endofblk = 0;
646 } else {
647 endofblk = 1;
648 }
6362a262
MD
649 nsize = uio->uio_offset + n;
650 if (nsize > ip->ino_data.size) {
651 if (uio->uio_offset > ip->ino_data.size)
652 trivial = 0;
653 else
654 trivial = 1;
655 nvextendbuf(ap->a_vp,
656 ip->ino_data.size,
657 nsize,
658 hammer_blocksize(ip->ino_data.size),
659 hammer_blocksize(nsize),
660 hammer_blockoff(ip->ino_data.size),
661 hammer_blockoff(nsize),
662 trivial);
d5ef456e 663 fixsize = 1;
fbb84158 664 kflags |= NOTE_EXTEND;
d5ef456e
MD
665 }
666
c0ade690
MD
667 if (uio->uio_segflg == UIO_NOCOPY) {
668 /*
669 * Issuing a write with the same data backing the
670 * buffer. Instantiate the buffer to collect the
671 * backing vm pages, then read-in any missing bits.
672 *
673 * This case is used by vop_stdputpages().
674 */
47637bff 675 bp = getblk(ap->a_vp, base_offset,
4a2796f3 676 blksize, GETBLK_BHEAVY, 0);
c0ade690
MD
677 if ((bp->b_flags & B_CACHE) == 0) {
678 bqrelse(bp);
47637bff 679 error = bread(ap->a_vp, base_offset,
4a2796f3 680 blksize, &bp);
c0ade690 681 }
4a2796f3 682 } else if (offset == 0 && uio->uio_resid >= blksize) {
c0ade690 683 /*
a5fddc16 684 * Even though we are entirely overwriting the buffer
745703c7 685 * we may still have to zero it out to avoid a
a5fddc16 686 * mmap/write visibility issue.
c0ade690 687 */
4a2796f3 688 bp = getblk(ap->a_vp, base_offset, blksize, GETBLK_BHEAVY, 0);
a5fddc16
MD
689 if ((bp->b_flags & B_CACHE) == 0)
690 vfs_bio_clrbuf(bp);
47637bff 691 } else if (base_offset >= ip->ino_data.size) {
c0ade690 692 /*
a5fddc16
MD
693 * If the base offset of the buffer is beyond the
694 * file EOF, we don't have to issue a read.
c0ade690 695 */
47637bff 696 bp = getblk(ap->a_vp, base_offset,
4a2796f3 697 blksize, GETBLK_BHEAVY, 0);
66325755
MD
698 vfs_bio_clrbuf(bp);
699 } else {
c0ade690
MD
700 /*
701 * Partial overwrite, read in any missing bits then
702 * replace the portion being written.
703 */
4a2796f3 704 error = bread(ap->a_vp, base_offset, blksize, &bp);
d5ef456e
MD
705 if (error == 0)
706 bheavy(bp);
66325755 707 }
32fcc103 708 if (error == 0)
44480e31 709 error = uiomovebp(bp, bp->b_data + offset, n, uio);
32fcc103
MD
710
711 lwkt_gettoken(&hmp->fs_token);
9192654c
MD
712
713 /*
47f363f1
MD
714 * Generate REDO records if enabled and redo_count will not
715 * exceeded the limit.
716 *
717 * If redo_count exceeds the limit we stop generating records
718 * and clear HAMMER_INODE_REDO. This will cause the next
719 * fsync() to do a full meta-data sync instead of just an
720 * UNDO/REDO fifo update.
721 *
722 * When clearing HAMMER_INODE_REDO any pre-existing REDOs
723 * will still be tracked. The tracks will be terminated
724 * when the related meta-data (including possible data
725 * modifications which are not tracked via REDO) is
726 * flushed.
9192654c 727 */
47f363f1
MD
728 if ((ip->flags & HAMMER_INODE_REDO) && error == 0) {
729 if (ip->redo_count < hammer_limit_redo) {
730 bp->b_flags |= B_VFSFLAG1;
731 error = hammer_generate_redo(&trans, ip,
9192654c 732 base_offset + offset,
47f363f1 733 HAMMER_REDO_WRITE,
9192654c
MD
734 bp->b_data + offset,
735 (size_t)n);
47f363f1
MD
736 } else {
737 ip->flags &= ~HAMMER_INODE_REDO;
738 }
47637bff 739 }
d5ef456e
MD
740
741 /*
742 * If we screwed up we have to undo any VM size changes we
743 * made.
744 */
66325755
MD
745 if (error) {
746 brelse(bp);
d5ef456e 747 if (fixsize) {
6362a262
MD
748 nvtruncbuf(ap->a_vp, ip->ino_data.size,
749 hammer_blocksize(ip->ino_data.size),
753df37e
MD
750 hammer_blockoff(ip->ino_data.size),
751 0);
d5ef456e 752 }
141ff1a5 753 lwkt_reltoken(&hmp->fs_token);
66325755
MD
754 break;
755 }
fbb84158 756 kflags |= NOTE_WRITE;
ce0138a6 757 hammer_stats_file_write += n;
66030e2b
MD
758 if (blksize == HAMMER_XBUFSIZE)
759 bp->b_flags |= B_CLUSTEROK;
11ad5ade
MD
760 if (ip->ino_data.size < uio->uio_offset) {
761 ip->ino_data.size = uio->uio_offset;
9192654c 762 flags = HAMMER_INODE_SDIRTY;
c0ade690 763 } else {
d113fda1 764 flags = 0;
66325755 765 }
11ad5ade 766 ip->ino_data.mtime = trans.time;
ddfdf542 767 flags |= HAMMER_INODE_MTIME | HAMMER_INODE_BUFS;
e98f1b96 768 hammer_modify_inode(&trans, ip, flags);
32c90105 769
1b0ab2c3
MD
770 /*
771 * Once we dirty the buffer any cached zone-X offset
745703c7 772 * becomes invalid. HAMMER NOTE: no-history mode cannot
1b0ab2c3
MD
773 * allow overwriting over the same data sector unless
774 * we provide UNDOs for the old data, which we don't.
775 */
776 bp->b_bio2.bio_offset = NOOFFSET;
777
32fcc103
MD
778 lwkt_reltoken(&hmp->fs_token);
779
47637bff
MD
780 /*
781 * Final buffer disposition.
de996e86
MD
782 *
783 * Because meta-data updates are deferred, HAMMER is
784 * especially sensitive to excessive bdwrite()s because
785 * the I/O stream is not broken up by disk reads. So the
786 * buffer cache simply cannot keep up.
787 *
788 * WARNING! blksize is variable. cluster_write() is
d1eff1f7
MD
789 * expected to not blow up if it encounters
790 * buffers that do not match the passed blksize.
710733a6
MD
791 *
792 * NOTE! Hammer shouldn't need to bawrite()/cluster_write().
793 * The ip->rsv_recs check should burst-flush the data.
794 * If we queue it immediately the buf could be left
795 * locked on the device queue for a very long time.
d1eff1f7 796 *
55b50bd5
MD
797 * However, failing to flush a dirty buffer out when
798 * issued from the pageout daemon can result in a low
799 * memory deadlock against bio_page_alloc(), so we
800 * have to bawrite() on IO_ASYNC as well.
801 *
d1eff1f7
MD
802 * NOTE! To avoid degenerate stalls due to mismatched block
803 * sizes we only honor IO_DIRECT on the write which
804 * abuts the end of the buffer. However, we must
805 * honor IO_SYNC in case someone is silly enough to
806 * configure a HAMMER file as swap, or when HAMMER
807 * is serving NFS (for commits). Ick ick.
47637bff 808 */
66030e2b
MD
809 bp->b_flags |= B_AGE;
810 if (blksize == HAMMER_XBUFSIZE)
811 bp->b_flags |= B_CLUSTEROK;
812
66325755
MD
813 if (ap->a_ioflag & IO_SYNC) {
814 bwrite(bp);
d1eff1f7 815 } else if ((ap->a_ioflag & IO_DIRECT) && endofblk) {
66325755 816 bawrite(bp);
55b50bd5
MD
817 } else if (ap->a_ioflag & IO_ASYNC) {
818 bawrite(bp);
9de13b88
MD
819 } else if (hammer_cluster_enable &&
820 !(ap->a_vp->v_mount->mnt_flag & MNT_NOCLUSTERW)) {
821 if (base_offset < HAMMER_XDEMARC)
822 cluster_eof = hammer_blockdemarc(base_offset,
823 ip->ino_data.size);
824 else
825 cluster_eof = ip->ino_data.size;
826 cluster_write(bp, cluster_eof, blksize, seqcount);
710733a6 827 } else {
4a2796f3
MD
828 bdwrite(bp);
829 }
66325755 830 }
b84de5af 831 hammer_done_transaction(&trans);
fbb84158 832 hammer_knote(ap->a_vp, kflags);
32fcc103 833
66325755 834 return (error);
427e5fc6
MD
835}
836
66325755
MD
837/*
838 * hammer_vop_access { vp, mode, cred }
b0aab9b9
MD
839 *
840 * MPSAFE - does not require fs_token
66325755 841 */
427e5fc6
MD
842static
843int
66325755 844hammer_vop_access(struct vop_access_args *ap)
427e5fc6 845{
66325755
MD
846 struct hammer_inode *ip = VTOI(ap->a_vp);
847 uid_t uid;
848 gid_t gid;
849 int error;
850
ce0138a6 851 ++hammer_stats_file_iopsr;
66325755
MD
852 uid = hammer_to_unix_xid(&ip->ino_data.uid);
853 gid = hammer_to_unix_xid(&ip->ino_data.gid);
854
855 error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
856 ip->ino_data.uflags);
857 return (error);
427e5fc6
MD
858}
859
66325755
MD
860/*
861 * hammer_vop_advlock { vp, id, op, fl, flags }
b0aab9b9
MD
862 *
863 * MPSAFE - does not require fs_token
66325755 864 */
427e5fc6
MD
865static
866int
66325755 867hammer_vop_advlock(struct vop_advlock_args *ap)
427e5fc6 868{
4a2796f3 869 hammer_inode_t ip = VTOI(ap->a_vp);
66325755 870
11ad5ade 871 return (lf_advlock(ap, &ip->advlock, ip->ino_data.size));
427e5fc6
MD
872}
873
66325755
MD
874/*
875 * hammer_vop_close { vp, fflag }
6f3d87c0 876 *
b0aab9b9 877 * We can only sync-on-close for normal closes. XXX disabled for now.
66325755 878 */
427e5fc6
MD
879static
880int
66325755 881hammer_vop_close(struct vop_close_args *ap)
427e5fc6 882{
9192654c 883#if 0
6f3d87c0
MD
884 struct vnode *vp = ap->a_vp;
885 hammer_inode_t ip = VTOI(vp);
886 int waitfor;
6f3d87c0
MD
887 if (ip->flags & (HAMMER_INODE_CLOSESYNC|HAMMER_INODE_CLOSEASYNC)) {
888 if (vn_islocked(vp) == LK_EXCLUSIVE &&
889 (vp->v_flag & (VINACTIVE|VRECLAIMED)) == 0) {
890 if (ip->flags & HAMMER_INODE_CLOSESYNC)
891 waitfor = MNT_WAIT;
892 else
893 waitfor = MNT_NOWAIT;
894 ip->flags &= ~(HAMMER_INODE_CLOSESYNC |
895 HAMMER_INODE_CLOSEASYNC);
896 VOP_FSYNC(vp, MNT_NOWAIT, waitfor);
897 }
898 }
9192654c 899#endif
a89aec1b 900 return (vop_stdclose(ap));
427e5fc6
MD
901}
902
66325755
MD
903/*
904 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
905 *
906 * The operating system has already ensured that the directory entry
907 * does not exist and done all appropriate namespace locking.
908 */
427e5fc6
MD
909static
910int
66325755 911hammer_vop_ncreate(struct vop_ncreate_args *ap)
427e5fc6 912{
66325755
MD
913 struct hammer_transaction trans;
914 struct hammer_inode *dip;
915 struct hammer_inode *nip;
916 struct nchandle *nch;
b0aab9b9 917 hammer_mount_t hmp;
66325755
MD
918 int error;
919
920 nch = ap->a_nch;
921 dip = VTOI(ap->a_dvp);
b0aab9b9 922 hmp = dip->hmp;
66325755 923
d113fda1
MD
924 if (dip->flags & HAMMER_INODE_RO)
925 return (EROFS);
b0aab9b9 926 if ((error = hammer_checkspace(hmp, HAMMER_CHKSPC_CREATE)) != 0)
e63644f0 927 return (error);
d113fda1 928
66325755
MD
929 /*
930 * Create a transaction to cover the operations we perform.
931 */
b0aab9b9
MD
932 lwkt_gettoken(&hmp->fs_token);
933 hammer_start_transaction(&trans, hmp);
ce0138a6 934 ++hammer_stats_file_iopsw;
66325755
MD
935
936 /*
937 * Create a new filesystem object of the requested type. The
b84de5af
MD
938 * returned inode will be referenced and shared-locked to prevent
939 * it from being moved to the flusher.
66325755 940 */
5a930e66 941 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred,
5a64efa1
MD
942 dip, nch->ncp->nc_name, nch->ncp->nc_nlen,
943 NULL, &nip);
66325755 944 if (error) {
77062c8a 945 hkprintf("hammer_create_inode error %d\n", error);
b84de5af 946 hammer_done_transaction(&trans);
66325755 947 *ap->a_vpp = NULL;
b0aab9b9 948 lwkt_reltoken(&hmp->fs_token);
66325755
MD
949 return (error);
950 }
66325755
MD
951
952 /*
953 * Add the new filesystem object to the directory. This will also
954 * bump the inode's link count.
955 */
5a930e66
MD
956 error = hammer_ip_add_directory(&trans, dip,
957 nch->ncp->nc_name, nch->ncp->nc_nlen,
958 nip);
0b075555 959 if (error)
77062c8a 960 hkprintf("hammer_ip_add_directory error %d\n", error);
66325755
MD
961
962 /*
963 * Finish up.
964 */
965 if (error) {
a89aec1b 966 hammer_rel_inode(nip, 0);
b84de5af 967 hammer_done_transaction(&trans);
66325755
MD
968 *ap->a_vpp = NULL;
969 } else {
e8599db1 970 error = hammer_get_vnode(nip, ap->a_vpp);
b84de5af 971 hammer_done_transaction(&trans);
a89aec1b
MD
972 hammer_rel_inode(nip, 0);
973 if (error == 0) {
974 cache_setunresolved(ap->a_nch);
975 cache_setvp(ap->a_nch, *ap->a_vpp);
976 }
fbb84158 977 hammer_knote(ap->a_dvp, NOTE_WRITE);
66325755 978 }
b0aab9b9 979 lwkt_reltoken(&hmp->fs_token);
66325755 980 return (error);
427e5fc6
MD
981}
982
66325755
MD
983/*
984 * hammer_vop_getattr { vp, vap }
98f7132d
MD
985 *
986 * Retrieve an inode's attribute information. When accessing inodes
987 * historically we fake the atime field to ensure consistent results.
988 * The atime field is stored in the B-Tree element and allowed to be
989 * updated without cycling the element.
899eb297 990 *
b0aab9b9 991 * MPSAFE - does not require fs_token
66325755 992 */
427e5fc6
MD
993static
994int
66325755 995hammer_vop_getattr(struct vop_getattr_args *ap)
427e5fc6 996{
66325755
MD
997 struct hammer_inode *ip = VTOI(ap->a_vp);
998 struct vattr *vap = ap->a_vap;
999
a56cb012
MD
1000 /*
1001 * We want the fsid to be different when accessing a filesystem
1002 * with different as-of's so programs like diff don't think
1003 * the files are the same.
1004 *
1005 * We also want the fsid to be the same when comparing snapshots,
1006 * or when comparing mirrors (which might be backed by different
1007 * physical devices). HAMMER fsids are based on the PFS's
1008 * shared_uuid field.
1009 *
1010 * XXX there is a chance of collision here. The va_fsid reported
1011 * by stat is different from the more involved fsid used in the
1012 * mount structure.
c82af904 1013 */
ce0138a6 1014 ++hammer_stats_file_iopsr;
899eb297 1015 hammer_lock_sh(&ip->lock);
a56cb012
MD
1016 vap->va_fsid = ip->pfsm->fsid_udev ^ (u_int32_t)ip->obj_asof ^
1017 (u_int32_t)(ip->obj_asof >> 32);
1018
11ad5ade 1019 vap->va_fileid = ip->ino_leaf.base.obj_id;
66325755 1020 vap->va_mode = ip->ino_data.mode;
11ad5ade 1021 vap->va_nlink = ip->ino_data.nlinks;
66325755
MD
1022 vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
1023 vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
1024 vap->va_rmajor = 0;
1025 vap->va_rminor = 0;
11ad5ade 1026 vap->va_size = ip->ino_data.size;
bcac4bbb 1027
f437a2ab
MD
1028 /*
1029 * Special case for @@PFS softlinks. The actual size of the
1030 * expanded softlink is "@@0x%016llx:%05d" == 26 bytes.
cb3c760c 1031 * or for MAX_TID is "@@-1:%05d" == 10 bytes.
6207f545
TK
1032 *
1033 * Note that userspace hammer command does not allow users to
1034 * create a @@PFS softlink under an existing other PFS (id!=0)
1035 * so the ip localization here for @@PFS softlink is always 0.
f437a2ab
MD
1036 */
1037 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_SOFTLINK &&
1038 ip->ino_data.size == 10 &&
1039 ip->obj_asof == HAMMER_MAX_TID &&
1040 ip->obj_localization == 0 &&
1041 strncmp(ip->ino_data.ext.symlink, "@@PFS", 5) == 0) {
cb3c760c
MD
1042 if (ip->pfsm->pfsd.mirror_flags & HAMMER_PFSD_SLAVE)
1043 vap->va_size = 26;
1044 else
1045 vap->va_size = 10;
f437a2ab
MD
1046 }
1047
bcac4bbb
MD
1048 /*
1049 * We must provide a consistent atime and mtime for snapshots
1050 * so people can do a 'tar cf - ... | md5' on them and get
1051 * consistent results.
1052 */
1053 if (ip->flags & HAMMER_INODE_RO) {
ddfdf542
MD
1054 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_atime);
1055 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_mtime);
bcac4bbb 1056 } else {
ddfdf542
MD
1057 hammer_time_to_timespec(ip->ino_data.atime, &vap->va_atime);
1058 hammer_time_to_timespec(ip->ino_data.mtime, &vap->va_mtime);
bcac4bbb 1059 }
ddfdf542 1060 hammer_time_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
66325755
MD
1061 vap->va_flags = ip->ino_data.uflags;
1062 vap->va_gen = 1; /* hammer inums are unique for all time */
bf686dbe 1063 vap->va_blocksize = HAMMER_BUFSIZE;
4a2796f3
MD
1064 if (ip->ino_data.size >= HAMMER_XDEMARC) {
1065 vap->va_bytes = (ip->ino_data.size + HAMMER_XBUFMASK64) &
1066 ~HAMMER_XBUFMASK64;
1067 } else if (ip->ino_data.size > HAMMER_BUFSIZE / 2) {
1068 vap->va_bytes = (ip->ino_data.size + HAMMER_BUFMASK64) &
1069 ~HAMMER_BUFMASK64;
1070 } else {
1071 vap->va_bytes = (ip->ino_data.size + 15) & ~15;
1072 }
64950f31 1073
11ad5ade 1074 vap->va_type = hammer_get_vnode_type(ip->ino_data.obj_type);
7866ea2a 1075 vap->va_filerev = 0; /* XXX */
66325755
MD
1076 vap->va_uid_uuid = ip->ino_data.uid;
1077 vap->va_gid_uuid = ip->ino_data.gid;
1078 vap->va_fsid_uuid = ip->hmp->fsid;
1079 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
1080 VA_FSID_UUID_VALID;
7a04d74f 1081
11ad5ade 1082 switch (ip->ino_data.obj_type) {
7a04d74f
MD
1083 case HAMMER_OBJTYPE_CDEV:
1084 case HAMMER_OBJTYPE_BDEV:
1085 vap->va_rmajor = ip->ino_data.rmajor;
1086 vap->va_rminor = ip->ino_data.rminor;
1087 break;
1088 default:
1089 break;
1090 }
899eb297 1091 hammer_unlock(&ip->lock);
66325755 1092 return(0);
427e5fc6
MD
1093}
1094
66325755
MD
1095/*
1096 * hammer_vop_nresolve { nch, dvp, cred }
1097 *
1098 * Locate the requested directory entry.
1099 */
427e5fc6
MD
1100static
1101int
66325755 1102hammer_vop_nresolve(struct vop_nresolve_args *ap)
427e5fc6 1103{
36f82b23 1104 struct hammer_transaction trans;
66325755 1105 struct namecache *ncp;
b0aab9b9 1106 hammer_mount_t hmp;
7f7c1f84
MD
1107 hammer_inode_t dip;
1108 hammer_inode_t ip;
1109 hammer_tid_t asof;
8cd0a023 1110 struct hammer_cursor cursor;
66325755
MD
1111 struct vnode *vp;
1112 int64_t namekey;
1113 int error;
7f7c1f84
MD
1114 int i;
1115 int nlen;
d113fda1 1116 int flags;
a56cb012 1117 int ispfs;
adf01747 1118 int64_t obj_id;
ddfdf542 1119 u_int32_t localization;
5e435c92 1120 u_int32_t max_iterations;
7f7c1f84
MD
1121
1122 /*
1123 * Misc initialization, plus handle as-of name extensions. Look for
1124 * the '@@' extension. Note that as-of files and directories cannot
1125 * be modified.
7f7c1f84
MD
1126 */
1127 dip = VTOI(ap->a_dvp);
1128 ncp = ap->a_nch->ncp;
1129 asof = dip->obj_asof;
bc6c1f13 1130 localization = dip->obj_localization; /* for code consistency */
7f7c1f84 1131 nlen = ncp->nc_nlen;
ea434b6f 1132 flags = dip->flags & HAMMER_INODE_RO;
a56cb012 1133 ispfs = 0;
b0aab9b9 1134 hmp = dip->hmp;
7f7c1f84 1135
b0aab9b9
MD
1136 lwkt_gettoken(&hmp->fs_token);
1137 hammer_simple_transaction(&trans, hmp);
ce0138a6 1138 ++hammer_stats_file_iopsr;
36f82b23 1139
7f7c1f84
MD
1140 for (i = 0; i < nlen; ++i) {
1141 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
bc6c1f13
MD
1142 error = hammer_str_to_tid(ncp->nc_name + i + 2,
1143 &ispfs, &asof, &localization);
1144 if (error != 0) {
1145 i = nlen;
1146 break;
1147 }
ea434b6f
MD
1148 if (asof != HAMMER_MAX_TID)
1149 flags |= HAMMER_INODE_RO;
7f7c1f84
MD
1150 break;
1151 }
1152 }
1153 nlen = i;
66325755 1154
ea434b6f
MD
1155 /*
1156 * If this is a PFS softlink we dive into the PFS
1157 */
1158 if (ispfs && nlen == 0) {
1159 ip = hammer_get_inode(&trans, dip, HAMMER_OBJID_ROOT,
1160 asof, localization,
1161 flags, &error);
1162 if (error == 0) {
1163 error = hammer_get_vnode(ip, &vp);
1164 hammer_rel_inode(ip, 0);
1165 } else {
1166 vp = NULL;
1167 }
1168 if (error == 0) {
1169 vn_unlock(vp);
1170 cache_setvp(ap->a_nch, vp);
1171 vrele(vp);
1172 }
1173 goto done;
1174 }
1175
d113fda1 1176 /*
294aec9f
MD
1177 * If there is no path component the time extension is relative to dip.
1178 * e.g. "fubar/@@<snapshot>"
1179 *
1180 * "." is handled by the kernel, but ".@@<snapshot>" is not.
1181 * e.g. "fubar/.@@<snapshot>"
1182 *
1183 * ".." is handled by the kernel. We do not currently handle
1184 * "..@<snapshot>".
d113fda1 1185 */
294aec9f 1186 if (nlen == 0 || (nlen == 1 && ncp->nc_name[0] == '.')) {
bcac4bbb 1187 ip = hammer_get_inode(&trans, dip, dip->obj_id,
ddfdf542
MD
1188 asof, dip->obj_localization,
1189 flags, &error);
d113fda1 1190 if (error == 0) {
e8599db1 1191 error = hammer_get_vnode(ip, &vp);
d113fda1
MD
1192 hammer_rel_inode(ip, 0);
1193 } else {
1194 vp = NULL;
1195 }
1196 if (error == 0) {
1197 vn_unlock(vp);
1198 cache_setvp(ap->a_nch, vp);
1199 vrele(vp);
1200 }
36f82b23 1201 goto done;
d113fda1
MD
1202 }
1203
8cd0a023
MD
1204 /*
1205 * Calculate the namekey and setup the key range for the scan. This
1206 * works kinda like a chained hash table where the lower 32 bits
1207 * of the namekey synthesize the chain.
1208 *
1209 * The key range is inclusive of both key_beg and key_end.
1210 */
5e435c92
MD
1211 namekey = hammer_directory_namekey(dip, ncp->nc_name, nlen,
1212 &max_iterations);
66325755 1213
bcac4bbb 1214 error = hammer_init_cursor(&trans, &cursor, &dip->cache[1], dip);
5a930e66 1215 cursor.key_beg.localization = dip->obj_localization +
beec5dc4 1216 hammer_dir_localization(dip);
8cd0a023
MD
1217 cursor.key_beg.obj_id = dip->obj_id;
1218 cursor.key_beg.key = namekey;
d5530d22 1219 cursor.key_beg.create_tid = 0;
8cd0a023
MD
1220 cursor.key_beg.delete_tid = 0;
1221 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1222 cursor.key_beg.obj_type = 0;
66325755 1223
8cd0a023 1224 cursor.key_end = cursor.key_beg;
5e435c92 1225 cursor.key_end.key += max_iterations;
d5530d22
MD
1226 cursor.asof = asof;
1227 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
66325755
MD
1228
1229 /*
8cd0a023 1230 * Scan all matching records (the chain), locate the one matching
a89aec1b 1231 * the requested path component.
8cd0a023
MD
1232 *
1233 * The hammer_ip_*() functions merge in-memory records with on-disk
1234 * records for the purposes of the search.
66325755 1235 */
6a37e7e4 1236 obj_id = 0;
43c665ae 1237 localization = HAMMER_DEF_LOCALIZATION;
6a37e7e4 1238
4e17f465 1239 if (error == 0) {
4e17f465
MD
1240 error = hammer_ip_first(&cursor);
1241 while (error == 0) {
1242 error = hammer_ip_resolve_data(&cursor);
1243 if (error)
1244 break;
11ad5ade
MD
1245 if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF &&
1246 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
1247 obj_id = cursor.data->entry.obj_id;
ddfdf542 1248 localization = cursor.data->entry.localization;
4e17f465
MD
1249 break;
1250 }
1251 error = hammer_ip_next(&cursor);
66325755
MD
1252 }
1253 }
6a37e7e4 1254 hammer_done_cursor(&cursor);
4c286c36
MD
1255
1256 /*
1257 * Lookup the obj_id. This should always succeed. If it does not
1258 * the filesystem may be damaged and we return a dummy inode.
1259 */
66325755 1260 if (error == 0) {
bcac4bbb 1261 ip = hammer_get_inode(&trans, dip, obj_id,
ddfdf542
MD
1262 asof, localization,
1263 flags, &error);
4c286c36
MD
1264 if (error == ENOENT) {
1265 kprintf("HAMMER: WARNING: Missing "
1266 "inode for dirent \"%s\"\n"
3d30bff3
MD
1267 "\tobj_id = %016llx, asof=%016llx, lo=%08x\n",
1268 ncp->nc_name,
1269 (long long)obj_id, (long long)asof,
1270 localization);
4c286c36
MD
1271 error = 0;
1272 ip = hammer_get_dummy_inode(&trans, dip, obj_id,
1273 asof, localization,
1274 flags, &error);
1275 }
7f7c1f84 1276 if (error == 0) {
e8599db1 1277 error = hammer_get_vnode(ip, &vp);
7f7c1f84
MD
1278 hammer_rel_inode(ip, 0);
1279 } else {
1280 vp = NULL;
1281 }
66325755
MD
1282 if (error == 0) {
1283 vn_unlock(vp);
1284 cache_setvp(ap->a_nch, vp);
1285 vrele(vp);
1286 }
1287 } else if (error == ENOENT) {
1288 cache_setvp(ap->a_nch, NULL);
1289 }
36f82b23 1290done:
b84de5af 1291 hammer_done_transaction(&trans);
b0aab9b9 1292 lwkt_reltoken(&hmp->fs_token);
66325755 1293 return (error);
427e5fc6
MD
1294}
1295
66325755
MD
1296/*
1297 * hammer_vop_nlookupdotdot { dvp, vpp, cred }
1298 *
1299 * Locate the parent directory of a directory vnode.
1300 *
1301 * dvp is referenced but not locked. *vpp must be returned referenced and
1302 * locked. A parent_obj_id of 0 does not necessarily indicate that we are
1303 * at the root, instead it could indicate that the directory we were in was
1304 * removed.
42c7d26b
MD
1305 *
1306 * NOTE: as-of sequences are not linked into the directory structure. If
1307 * we are at the root with a different asof then the mount point, reload
1308 * the same directory with the mount point's asof. I'm not sure what this
1309 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not
1310 * get confused, but it hasn't been tested.
66325755 1311 */
427e5fc6
MD
1312static
1313int
66325755 1314hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
427e5fc6 1315{
36f82b23 1316 struct hammer_transaction trans;
66325755 1317 struct hammer_inode *dip;
d113fda1 1318 struct hammer_inode *ip;
b0aab9b9 1319 hammer_mount_t hmp;
42c7d26b 1320 int64_t parent_obj_id;
5a930e66 1321 u_int32_t parent_obj_localization;
42c7d26b 1322 hammer_tid_t asof;
d113fda1 1323 int error;
66325755
MD
1324
1325 dip = VTOI(ap->a_dvp);
42c7d26b 1326 asof = dip->obj_asof;
b0aab9b9 1327 hmp = dip->hmp;
5a930e66
MD
1328
1329 /*
1330 * Whos are parent? This could be the root of a pseudo-filesystem
1331 * whos parent is in another localization domain.
1332 */
b0aab9b9 1333 lwkt_gettoken(&hmp->fs_token);
42c7d26b 1334 parent_obj_id = dip->ino_data.parent_obj_id;
5a930e66
MD
1335 if (dip->obj_id == HAMMER_OBJID_ROOT)
1336 parent_obj_localization = dip->ino_data.ext.obj.parent_obj_localization;
1337 else
1338 parent_obj_localization = dip->obj_localization;
42c7d26b 1339
160242d4
TK
1340 /*
1341 * It's probably a PFS root when dip->ino_data.parent_obj_id is 0.
1342 */
42c7d26b
MD
1343 if (parent_obj_id == 0) {
1344 if (dip->obj_id == HAMMER_OBJID_ROOT &&
b0aab9b9 1345 asof != hmp->asof) {
42c7d26b 1346 parent_obj_id = dip->obj_id;
b0aab9b9 1347 asof = hmp->asof;
42c7d26b
MD
1348 *ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK);
1349 ksnprintf(*ap->a_fakename, 19, "0x%016llx",
973c11b9 1350 (long long)dip->obj_asof);
42c7d26b
MD
1351 } else {
1352 *ap->a_vpp = NULL;
b0aab9b9 1353 lwkt_reltoken(&hmp->fs_token);
42c7d26b
MD
1354 return ENOENT;
1355 }
66325755 1356 }
d113fda1 1357
b0aab9b9 1358 hammer_simple_transaction(&trans, hmp);
ce0138a6 1359 ++hammer_stats_file_iopsr;
36f82b23 1360
bcac4bbb 1361 ip = hammer_get_inode(&trans, dip, parent_obj_id,
5a930e66 1362 asof, parent_obj_localization,
ddfdf542 1363 dip->flags, &error);
36f82b23 1364 if (ip) {
e8599db1 1365 error = hammer_get_vnode(ip, ap->a_vpp);
36f82b23
MD
1366 hammer_rel_inode(ip, 0);
1367 } else {
d113fda1 1368 *ap->a_vpp = NULL;
d113fda1 1369 }
b84de5af 1370 hammer_done_transaction(&trans);
b0aab9b9 1371 lwkt_reltoken(&hmp->fs_token);
d113fda1 1372 return (error);
427e5fc6
MD
1373}
1374
66325755
MD
1375/*
1376 * hammer_vop_nlink { nch, dvp, vp, cred }
1377 */
427e5fc6
MD
1378static
1379int
66325755 1380hammer_vop_nlink(struct vop_nlink_args *ap)
427e5fc6 1381{
66325755
MD
1382 struct hammer_transaction trans;
1383 struct hammer_inode *dip;
1384 struct hammer_inode *ip;
1385 struct nchandle *nch;
b0aab9b9 1386 hammer_mount_t hmp;
66325755
MD
1387 int error;
1388
66611793 1389 if (ap->a_dvp->v_mount != ap->a_vp->v_mount)
f437a2ab
MD
1390 return(EXDEV);
1391
66325755
MD
1392 nch = ap->a_nch;
1393 dip = VTOI(ap->a_dvp);
1394 ip = VTOI(ap->a_vp);
b0aab9b9 1395 hmp = dip->hmp;
66325755 1396
f437a2ab
MD
1397 if (dip->obj_localization != ip->obj_localization)
1398 return(EXDEV);
1399
d113fda1
MD
1400 if (dip->flags & HAMMER_INODE_RO)
1401 return (EROFS);
1402 if (ip->flags & HAMMER_INODE_RO)
1403 return (EROFS);
b0aab9b9 1404 if ((error = hammer_checkspace(hmp, HAMMER_CHKSPC_CREATE)) != 0)
e63644f0 1405 return (error);
d113fda1 1406
66325755
MD
1407 /*
1408 * Create a transaction to cover the operations we perform.
1409 */
b0aab9b9
MD
1410 lwkt_gettoken(&hmp->fs_token);
1411 hammer_start_transaction(&trans, hmp);
ce0138a6 1412 ++hammer_stats_file_iopsw;
66325755
MD
1413
1414 /*
1415 * Add the filesystem object to the directory. Note that neither
1416 * dip nor ip are referenced or locked, but their vnodes are
1417 * referenced. This function will bump the inode's link count.
1418 */
5a930e66
MD
1419 error = hammer_ip_add_directory(&trans, dip,
1420 nch->ncp->nc_name, nch->ncp->nc_nlen,
1421 ip);
66325755
MD
1422
1423 /*
1424 * Finish up.
1425 */
b84de5af 1426 if (error == 0) {
6b4f890b
MD
1427 cache_setunresolved(nch);
1428 cache_setvp(nch, ap->a_vp);
66325755 1429 }
b84de5af 1430 hammer_done_transaction(&trans);
fbb84158
MD
1431 hammer_knote(ap->a_vp, NOTE_LINK);
1432 hammer_knote(ap->a_dvp, NOTE_WRITE);
b0aab9b9 1433 lwkt_reltoken(&hmp->fs_token);
66325755 1434 return (error);
427e5fc6
MD
1435}
1436
66325755
MD
1437/*
1438 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
1439 *
1440 * The operating system has already ensured that the directory entry
1441 * does not exist and done all appropriate namespace locking.
1442 */
427e5fc6
MD
1443static
1444int
66325755 1445hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
427e5fc6 1446{
66325755
MD
1447 struct hammer_transaction trans;
1448 struct hammer_inode *dip;
1449 struct hammer_inode *nip;
1450 struct nchandle *nch;
b0aab9b9 1451 hammer_mount_t hmp;
66325755
MD
1452 int error;
1453
1454 nch = ap->a_nch;
1455 dip = VTOI(ap->a_dvp);
b0aab9b9 1456 hmp = dip->hmp;
66325755 1457
d113fda1
MD
1458 if (dip->flags & HAMMER_INODE_RO)
1459 return (EROFS);
b0aab9b9 1460 if ((error = hammer_checkspace(hmp, HAMMER_CHKSPC_CREATE)) != 0)
e63644f0 1461 return (error);
d113fda1 1462
66325755
MD
1463 /*
1464 * Create a transaction to cover the operations we perform.
1465 */
b0aab9b9
MD
1466 lwkt_gettoken(&hmp->fs_token);
1467 hammer_start_transaction(&trans, hmp);
ce0138a6 1468 ++hammer_stats_file_iopsw;
66325755
MD
1469
1470 /*
1471 * Create a new filesystem object of the requested type. The
8cd0a023 1472 * returned inode will be referenced but not locked.
66325755 1473 */
5a930e66 1474 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred,
5a64efa1
MD
1475 dip, nch->ncp->nc_name, nch->ncp->nc_nlen,
1476 NULL, &nip);
66325755 1477 if (error) {
77062c8a 1478 hkprintf("hammer_mkdir error %d\n", error);
b84de5af 1479 hammer_done_transaction(&trans);
66325755 1480 *ap->a_vpp = NULL;
b0aab9b9 1481 lwkt_reltoken(&hmp->fs_token);
66325755
MD
1482 return (error);
1483 }
66325755
MD
1484 /*
1485 * Add the new filesystem object to the directory. This will also
1486 * bump the inode's link count.
1487 */
5a930e66
MD
1488 error = hammer_ip_add_directory(&trans, dip,
1489 nch->ncp->nc_name, nch->ncp->nc_nlen,
1490 nip);
0b075555 1491 if (error)
77062c8a 1492 hkprintf("hammer_mkdir (add) error %d\n", error);
66325755
MD
1493
1494 /*
1495 * Finish up.
1496 */
1497 if (error) {
a89aec1b 1498 hammer_rel_inode(nip, 0);
66325755
MD
1499 *ap->a_vpp = NULL;
1500 } else {
e8599db1 1501 error = hammer_get_vnode(nip, ap->a_vpp);
a89aec1b
MD
1502 hammer_rel_inode(nip, 0);
1503 if (error == 0) {
1504 cache_setunresolved(ap->a_nch);
1505 cache_setvp(ap->a_nch, *ap->a_vpp);
1506 }
66325755 1507 }
b84de5af 1508 hammer_done_transaction(&trans);
fbb84158
MD
1509 if (error == 0)
1510 hammer_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK);
b0aab9b9 1511 lwkt_reltoken(&hmp->fs_token);
66325755 1512 return (error);
427e5fc6
MD
1513}
1514
66325755
MD
1515/*
1516 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
1517 *
1518 * The operating system has already ensured that the directory entry
1519 * does not exist and done all appropriate namespace locking.
1520 */
427e5fc6
MD
1521static
1522int
66325755 1523hammer_vop_nmknod(struct vop_nmknod_args *ap)
427e5fc6 1524{
66325755
MD
1525 struct hammer_transaction trans;
1526 struct hammer_inode *dip;
1527 struct hammer_inode *nip;
1528 struct nchandle *nch;
b0aab9b9 1529 hammer_mount_t hmp;
66325755
MD
1530 int error;
1531
1532 nch = ap->a_nch;
1533 dip = VTOI(ap->a_dvp);
b0aab9b9 1534 hmp = dip->hmp;
66325755 1535
d113fda1
MD
1536 if (dip->flags & HAMMER_INODE_RO)
1537 return (EROFS);
b0aab9b9 1538 if ((error = hammer_checkspace(hmp, HAMMER_CHKSPC_CREATE)) != 0)
e63644f0 1539 return (error);
d113fda1 1540
66325755
MD
1541 /*
1542 * Create a transaction to cover the operations we perform.
1543 */
b0aab9b9
MD
1544 lwkt_gettoken(&hmp->fs_token);
1545 hammer_start_transaction(&trans, hmp);
ce0138a6 1546 ++hammer_stats_file_iopsw;
66325755
MD
1547
1548 /*
1549 * Create a new filesystem object of the requested type. The
8cd0a023 1550 * returned inode will be referenced but not locked.
5a930e66
MD
1551 *
1552 * If mknod specifies a directory a pseudo-fs is created.
66325755 1553 */
5a930e66 1554 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred,
5a64efa1
MD
1555 dip, nch->ncp->nc_name, nch->ncp->nc_nlen,
1556 NULL, &nip);
66325755 1557 if (error) {
b84de5af 1558 hammer_done_transaction(&trans);
66325755 1559 *ap->a_vpp = NULL;
b0aab9b9 1560 lwkt_reltoken(&hmp->fs_token);
66325755
MD
1561 return (error);
1562 }
66325755
MD
1563
1564 /*
1565 * Add the new filesystem object to the directory. This will also
1566 * bump the inode's link count.
1567 */
5a930e66
MD
1568 error = hammer_ip_add_directory(&trans, dip,
1569 nch->ncp->nc_name, nch->ncp->nc_nlen,
1570 nip);
66325755
MD
1571
1572 /*
1573 * Finish up.
1574 */
1575 if (error) {
a89aec1b 1576 hammer_rel_inode(nip, 0);
66325755
MD
1577 *ap->a_vpp = NULL;
1578 } else {
e8599db1 1579 error = hammer_get_vnode(nip, ap->a_vpp);
a89aec1b
MD
1580 hammer_rel_inode(nip, 0);
1581 if (error == 0) {
1582 cache_setunresolved(ap->a_nch);
1583 cache_setvp(ap->a_nch, *ap->a_vpp);
1584 }
66325755 1585 }
b84de5af 1586 hammer_done_transaction(&trans);
fbb84158
MD
1587 if (error == 0)
1588 hammer_knote(ap->a_dvp, NOTE_WRITE);
b0aab9b9 1589 lwkt_reltoken(&hmp->fs_token);
66325755 1590 return (error);
427e5fc6
MD
1591}
1592
66325755
MD
1593/*
1594 * hammer_vop_open { vp, mode, cred, fp }
b0aab9b9
MD
1595 *
1596 * MPSAFE (does not require fs_token)
66325755 1597 */
427e5fc6
MD
1598static
1599int
66325755 1600hammer_vop_open(struct vop_open_args *ap)
427e5fc6 1601{
9f5097dc
MD
1602 hammer_inode_t ip;
1603
ce0138a6 1604 ++hammer_stats_file_iopsr;
9f5097dc
MD
1605 ip = VTOI(ap->a_vp);
1606
1607 if ((ap->a_mode & FWRITE) && (ip->flags & HAMMER_INODE_RO))
d113fda1 1608 return (EROFS);
a89aec1b 1609 return(vop_stdopen(ap));
427e5fc6
MD
1610}
1611
66325755
MD
1612/*
1613 * hammer_vop_print { vp }
1614 */
427e5fc6
MD
1615static
1616int
66325755 1617hammer_vop_print(struct vop_print_args *ap)
427e5fc6
MD
1618{
1619 return EOPNOTSUPP;
1620}
1621
66325755 1622/*
6b4f890b 1623 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
66325755 1624 */
427e5fc6
MD
1625static
1626int
66325755 1627hammer_vop_readdir(struct vop_readdir_args *ap)
427e5fc6 1628{
36f82b23 1629 struct hammer_transaction trans;
6b4f890b
MD
1630 struct hammer_cursor cursor;
1631 struct hammer_inode *ip;
b0aab9b9 1632 hammer_mount_t hmp;
6b4f890b 1633 struct uio *uio;
6b4f890b
MD
1634 hammer_base_elm_t base;
1635 int error;
1636 int cookie_index;
1637 int ncookies;
1638 off_t *cookies;
1639 off_t saveoff;
1640 int r;
ea434b6f 1641 int dtype;
6b4f890b 1642
ce0138a6 1643 ++hammer_stats_file_iopsr;
6b4f890b
MD
1644 ip = VTOI(ap->a_vp);
1645 uio = ap->a_uio;
b3deaf57 1646 saveoff = uio->uio_offset;
b0aab9b9 1647 hmp = ip->hmp;
b3deaf57
MD
1648
1649 if (ap->a_ncookies) {
1650 ncookies = uio->uio_resid / 16 + 1;
1651 if (ncookies > 1024)
1652 ncookies = 1024;
1653 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
1654 cookie_index = 0;
1655 } else {
1656 ncookies = -1;
1657 cookies = NULL;
1658 cookie_index = 0;
1659 }
1660
b0aab9b9
MD
1661 lwkt_gettoken(&hmp->fs_token);
1662 hammer_simple_transaction(&trans, hmp);
36f82b23 1663
b3deaf57
MD
1664 /*
1665 * Handle artificial entries
4c286c36
MD
1666 *
1667 * It should be noted that the minimum value for a directory
1668 * hash key on-media is 0x0000000100000000, so we can use anything
1669 * less then that to represent our 'special' key space.
b3deaf57
MD
1670 */
1671 error = 0;
1672 if (saveoff == 0) {
1673 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, ".");
1674 if (r)
1675 goto done;
1676 if (cookies)
1677 cookies[cookie_index] = saveoff;
1678 ++saveoff;
1679 ++cookie_index;
1680 if (cookie_index == ncookies)
1681 goto done;
1682 }
1683 if (saveoff == 1) {
1684 if (ip->ino_data.parent_obj_id) {
1685 r = vop_write_dirent(&error, uio,
1686 ip->ino_data.parent_obj_id,
1687 DT_DIR, 2, "..");
1688 } else {
1689 r = vop_write_dirent(&error, uio,
1690 ip->obj_id, DT_DIR, 2, "..");
1691 }
1692 if (r)
1693 goto done;
1694 if (cookies)
1695 cookies[cookie_index] = saveoff;
1696 ++saveoff;
1697 ++cookie_index;
1698 if (cookie_index == ncookies)
1699 goto done;
1700 }
6b4f890b
MD
1701
1702 /*
1703 * Key range (begin and end inclusive) to scan. Directory keys
1704 * directly translate to a 64 bit 'seek' position.
1705 */
bcac4bbb 1706 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
5a930e66 1707 cursor.key_beg.localization = ip->obj_localization +
beec5dc4 1708 hammer_dir_localization(ip);
6b4f890b 1709 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1710 cursor.key_beg.create_tid = 0;
6b4f890b
MD
1711 cursor.key_beg.delete_tid = 0;
1712 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1713 cursor.key_beg.obj_type = 0;
b3deaf57 1714 cursor.key_beg.key = saveoff;
6b4f890b
MD
1715
1716 cursor.key_end = cursor.key_beg;
1717 cursor.key_end.key = HAMMER_MAX_KEY;
d5530d22
MD
1718 cursor.asof = ip->obj_asof;
1719 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
6b4f890b 1720
4e17f465 1721 error = hammer_ip_first(&cursor);
6b4f890b
MD
1722
1723 while (error == 0) {
11ad5ade 1724 error = hammer_ip_resolve_data(&cursor);
6b4f890b
MD
1725 if (error)
1726 break;
11ad5ade 1727 base = &cursor.leaf->base;
6b4f890b 1728 saveoff = base->key;
11ad5ade 1729 KKASSERT(cursor.leaf->data_len > HAMMER_ENTRY_NAME_OFF);
6b4f890b 1730
7a04d74f
MD
1731 if (base->obj_id != ip->obj_id)
1732 panic("readdir: bad record at %p", cursor.node);
1733
ea434b6f
MD
1734 /*
1735 * Convert pseudo-filesystems into softlinks
1736 */
1737 dtype = hammer_get_dtype(cursor.leaf->base.obj_type);
6b4f890b 1738 r = vop_write_dirent(
11ad5ade 1739 &error, uio, cursor.data->entry.obj_id,
ea434b6f 1740 dtype,
11ad5ade
MD
1741 cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF ,
1742 (void *)cursor.data->entry.name);
6b4f890b
MD
1743 if (r)
1744 break;
1745 ++saveoff;
1746 if (cookies)
1747 cookies[cookie_index] = base->key;
1748 ++cookie_index;
1749 if (cookie_index == ncookies)
1750 break;
1751 error = hammer_ip_next(&cursor);
1752 }
1753 hammer_done_cursor(&cursor);
1754
b3deaf57 1755done:
b84de5af 1756 hammer_done_transaction(&trans);
36f82b23 1757
6b4f890b
MD
1758 if (ap->a_eofflag)
1759 *ap->a_eofflag = (error == ENOENT);
6b4f890b
MD
1760 uio->uio_offset = saveoff;
1761 if (error && cookie_index == 0) {
b3deaf57
MD
1762 if (error == ENOENT)
1763 error = 0;
6b4f890b
MD
1764 if (cookies) {
1765 kfree(cookies, M_TEMP);
1766 *ap->a_ncookies = 0;
1767 *ap->a_cookies = NULL;
1768 }
1769 } else {
7a04d74f
MD
1770 if (error == ENOENT)
1771 error = 0;
6b4f890b
MD
1772 if (cookies) {
1773 *ap->a_ncookies = cookie_index;
1774 *ap->a_cookies = cookies;
1775 }
1776 }
b0aab9b9 1777 lwkt_reltoken(&hmp->fs_token);
6b4f890b 1778 return(error);
427e5fc6
MD
1779}
1780
66325755
MD
1781/*
1782 * hammer_vop_readlink { vp, uio, cred }
1783 */
427e5fc6
MD
1784static
1785int
66325755 1786hammer_vop_readlink(struct vop_readlink_args *ap)
427e5fc6 1787{
36f82b23 1788 struct hammer_transaction trans;
7a04d74f
MD
1789 struct hammer_cursor cursor;
1790 struct hammer_inode *ip;
b0aab9b9 1791 hammer_mount_t hmp;
ea434b6f
MD
1792 char buf[32];
1793 u_int32_t localization;
1794 hammer_pseudofs_inmem_t pfsm;
7a04d74f
MD
1795 int error;
1796
1797 ip = VTOI(ap->a_vp);
b0aab9b9
MD
1798 hmp = ip->hmp;
1799
1800 lwkt_gettoken(&hmp->fs_token);
36f82b23 1801
2f85fa4d
MD
1802 /*
1803 * Shortcut if the symlink data was stuffed into ino_data.
ea434b6f 1804 *
842e7a70
MD
1805 * Also expand special "@@PFS%05d" softlinks (expansion only
1806 * occurs for non-historical (current) accesses made from the
1807 * primary filesystem).
6207f545
TK
1808 *
1809 * Note that userspace hammer command does not allow users to
1810 * create a @@PFS softlink under an existing other PFS (id!=0)
1811 * so the ip localization here for @@PFS softlink is always 0.
2f85fa4d
MD
1812 */
1813 if (ip->ino_data.size <= HAMMER_INODE_BASESYMLEN) {
ea434b6f
MD
1814 char *ptr;
1815 int bytes;
1816
1817 ptr = ip->ino_data.ext.symlink;
1818 bytes = (int)ip->ino_data.size;
842e7a70
MD
1819 if (bytes == 10 &&
1820 ip->obj_asof == HAMMER_MAX_TID &&
1821 ip->obj_localization == 0 &&
1822 strncmp(ptr, "@@PFS", 5) == 0) {
b0aab9b9 1823 hammer_simple_transaction(&trans, hmp);
ea434b6f
MD
1824 bcopy(ptr + 5, buf, 5);
1825 buf[5] = 0;
1826 localization = strtoul(buf, NULL, 10) << 16;
1827 pfsm = hammer_load_pseudofs(&trans, localization,
1828 &error);
1829 if (error == 0) {
4c038e17
MD
1830 if (pfsm->pfsd.mirror_flags &
1831 HAMMER_PFSD_SLAVE) {
cb3c760c 1832 /* vap->va_size == 26 */
4c038e17
MD
1833 ksnprintf(buf, sizeof(buf),
1834 "@@0x%016llx:%05d",
973c11b9 1835 (long long)pfsm->pfsd.sync_end_tid,
4c038e17
MD
1836 localization >> 16);
1837 } else {
cb3c760c
MD
1838 /* vap->va_size == 10 */
1839 ksnprintf(buf, sizeof(buf),
1840 "@@-1:%05d",
1841 localization >> 16);
1842#if 0
4c038e17
MD
1843 ksnprintf(buf, sizeof(buf),
1844 "@@0x%016llx:%05d",
973c11b9 1845 (long long)HAMMER_MAX_TID,
4c038e17 1846 localization >> 16);
cb3c760c 1847#endif
4c038e17 1848 }
ea434b6f
MD
1849 ptr = buf;
1850 bytes = strlen(buf);
1851 }
1852 if (pfsm)
b0aab9b9 1853 hammer_rel_pseudofs(hmp, pfsm);
ea434b6f
MD
1854 hammer_done_transaction(&trans);
1855 }
1856 error = uiomove(ptr, bytes, ap->a_uio);
b0aab9b9 1857 lwkt_reltoken(&hmp->fs_token);
2f85fa4d
MD
1858 return(error);
1859 }
36f82b23 1860
2f85fa4d
MD
1861 /*
1862 * Long version
1863 */
b0aab9b9 1864 hammer_simple_transaction(&trans, hmp);
ce0138a6 1865 ++hammer_stats_file_iopsr;
bcac4bbb 1866 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
7a04d74f
MD
1867
1868 /*
1869 * Key range (begin and end inclusive) to scan. Directory keys
1870 * directly translate to a 64 bit 'seek' position.
1871 */
5a930e66
MD
1872 cursor.key_beg.localization = ip->obj_localization +
1873 HAMMER_LOCALIZE_MISC;
7a04d74f 1874 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1875 cursor.key_beg.create_tid = 0;
7a04d74f
MD
1876 cursor.key_beg.delete_tid = 0;
1877 cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX;
1878 cursor.key_beg.obj_type = 0;
1879 cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK;
d5530d22
MD
1880 cursor.asof = ip->obj_asof;
1881 cursor.flags |= HAMMER_CURSOR_ASOF;
7a04d74f 1882
45a014dc 1883 error = hammer_ip_lookup(&cursor);
7a04d74f
MD
1884 if (error == 0) {
1885 error = hammer_ip_resolve_data(&cursor);
1886 if (error == 0) {
11ad5ade
MD
1887 KKASSERT(cursor.leaf->data_len >=
1888 HAMMER_SYMLINK_NAME_OFF);
1889 error = uiomove(cursor.data->symlink.name,
1890 cursor.leaf->data_len -
1891 HAMMER_SYMLINK_NAME_OFF,
7a04d74f
MD
1892 ap->a_uio);
1893 }
1894 }
1895 hammer_done_cursor(&cursor);
b84de5af 1896 hammer_done_transaction(&trans);
b0aab9b9 1897 lwkt_reltoken(&hmp->fs_token);
7a04d74f 1898 return(error);
427e5fc6
MD
1899}
1900
66325755
MD
1901/*
1902 * hammer_vop_nremove { nch, dvp, cred }
1903 */
427e5fc6
MD
1904static
1905int
66325755 1906hammer_vop_nremove(struct vop_nremove_args *ap)
427e5fc6 1907{
b84de5af 1908 struct hammer_transaction trans;
e63644f0 1909 struct hammer_inode *dip;
b0aab9b9 1910 hammer_mount_t hmp;
b84de5af
MD
1911 int error;
1912
e63644f0 1913 dip = VTOI(ap->a_dvp);
b0aab9b9 1914 hmp = dip->hmp;
e63644f0
MD
1915
1916 if (hammer_nohistory(dip) == 0 &&
b0aab9b9 1917 (error = hammer_checkspace(hmp, HAMMER_CHKSPC_REMOVE)) != 0) {
e63644f0
MD
1918 return (error);
1919 }
1920
b0aab9b9
MD
1921 lwkt_gettoken(&hmp->fs_token);
1922 hammer_start_transaction(&trans, hmp);
ce0138a6 1923 ++hammer_stats_file_iopsw;
d7e278bb 1924 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 0);
b84de5af 1925 hammer_done_transaction(&trans);
fbb84158
MD
1926 if (error == 0)
1927 hammer_knote(ap->a_dvp, NOTE_WRITE);
b0aab9b9 1928 lwkt_reltoken(&hmp->fs_token);
b84de5af 1929 return (error);
427e5fc6
MD
1930}
1931
66325755
MD
1932/*
1933 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1934 */
427e5fc6
MD
1935static
1936int
66325755 1937hammer_vop_nrename(struct vop_nrename_args *ap)
427e5fc6 1938{
8cd0a023
MD
1939 struct hammer_transaction trans;
1940 struct namecache *fncp;
1941 struct namecache *tncp;
1942 struct hammer_inode *fdip;
1943 struct hammer_inode *tdip;
1944 struct hammer_inode *ip;
b0aab9b9 1945 hammer_mount_t hmp;
8cd0a023 1946 struct hammer_cursor cursor;
8cd0a023 1947 int64_t namekey;
5e435c92 1948 u_int32_t max_iterations;
11ad5ade 1949 int nlen, error;
8cd0a023 1950
66611793 1951 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
f437a2ab
MD
1952 return(EXDEV);
1953 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1954 return(EXDEV);
1955
8cd0a023
MD
1956 fdip = VTOI(ap->a_fdvp);
1957 tdip = VTOI(ap->a_tdvp);
1958 fncp = ap->a_fnch->ncp;
1959 tncp = ap->a_tnch->ncp;
b3deaf57
MD
1960 ip = VTOI(fncp->nc_vp);
1961 KKASSERT(ip != NULL);
d113fda1 1962
b0aab9b9
MD
1963 hmp = ip->hmp;
1964
f437a2ab
MD
1965 if (fdip->obj_localization != tdip->obj_localization)
1966 return(EXDEV);
1967 if (fdip->obj_localization != ip->obj_localization)
1968 return(EXDEV);
1969
d113fda1
MD
1970 if (fdip->flags & HAMMER_INODE_RO)
1971 return (EROFS);
1972 if (tdip->flags & HAMMER_INODE_RO)
1973 return (EROFS);
1974 if (ip->flags & HAMMER_INODE_RO)
1975 return (EROFS);
b0aab9b9 1976 if ((error = hammer_checkspace(hmp, HAMMER_CHKSPC_CREATE)) != 0)
e63644f0 1977 return (error);
d113fda1 1978
b0aab9b9
MD
1979 lwkt_gettoken(&hmp->fs_token);
1980 hammer_start_transaction(&trans, hmp);
ce0138a6 1981 ++hammer_stats_file_iopsw;
8cd0a023
MD
1982
1983 /*
b3deaf57
MD
1984 * Remove tncp from the target directory and then link ip as
1985 * tncp. XXX pass trans to dounlink
42c7d26b
MD
1986 *
1987 * Force the inode sync-time to match the transaction so it is
1988 * in-sync with the creation of the target directory entry.
8cd0a023 1989 */
d7e278bb
MD
1990 error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp,
1991 ap->a_cred, 0, -1);
42c7d26b 1992 if (error == 0 || error == ENOENT) {
5a930e66
MD
1993 error = hammer_ip_add_directory(&trans, tdip,
1994 tncp->nc_name, tncp->nc_nlen,
1995 ip);
42c7d26b
MD
1996 if (error == 0) {
1997 ip->ino_data.parent_obj_id = tdip->obj_id;
cc0758d0 1998 ip->ino_data.ctime = trans.time;
e98f1b96 1999 hammer_modify_inode(&trans, ip, HAMMER_INODE_DDIRTY);
42c7d26b
MD
2000 }
2001 }
b3deaf57
MD
2002 if (error)
2003 goto failed; /* XXX */
8cd0a023
MD
2004
2005 /*
2006 * Locate the record in the originating directory and remove it.
2007 *
2008 * Calculate the namekey and setup the key range for the scan. This
2009 * works kinda like a chained hash table where the lower 32 bits
2010 * of the namekey synthesize the chain.
2011 *
2012 * The key range is inclusive of both key_beg and key_end.
2013 */
5e435c92
MD
2014 namekey = hammer_directory_namekey(fdip, fncp->nc_name, fncp->nc_nlen,
2015 &max_iterations);
6a37e7e4 2016retry:
bcac4bbb 2017 hammer_init_cursor(&trans, &cursor, &fdip->cache[1], fdip);
5a930e66 2018 cursor.key_beg.localization = fdip->obj_localization +
beec5dc4 2019 hammer_dir_localization(fdip);
8cd0a023
MD
2020 cursor.key_beg.obj_id = fdip->obj_id;
2021 cursor.key_beg.key = namekey;
d5530d22 2022 cursor.key_beg.create_tid = 0;
8cd0a023
MD
2023 cursor.key_beg.delete_tid = 0;
2024 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
2025 cursor.key_beg.obj_type = 0;
2026
2027 cursor.key_end = cursor.key_beg;
5e435c92 2028 cursor.key_end.key += max_iterations;
d5530d22
MD
2029 cursor.asof = fdip->obj_asof;
2030 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
8cd0a023
MD
2031
2032 /*
2033 * Scan all matching records (the chain), locate the one matching
a89aec1b 2034 * the requested path component.
8cd0a023
MD
2035 *
2036 * The hammer_ip_*() functions merge in-memory records with on-disk
2037 * records for the purposes of the search.
2038 */
4e17f465 2039 error = hammer_ip_first(&cursor);
a89aec1b 2040 while (error == 0) {
8cd0a023
MD
2041 if (hammer_ip_resolve_data(&cursor) != 0)
2042 break;
11ad5ade
MD
2043 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
2044 KKASSERT(nlen > 0);
2045 if (fncp->nc_nlen == nlen &&
2046 bcmp(fncp->nc_name, cursor.data->entry.name, nlen) == 0) {
8cd0a023
MD
2047 break;
2048 }
a89aec1b 2049 error = hammer_ip_next(&cursor);
8cd0a023 2050 }
8cd0a023
MD
2051
2052 /*
2053 * If all is ok we have to get the inode so we can adjust nlinks.
6a37e7e4
MD
2054 *
2055 * WARNING: hammer_ip_del_directory() may have to terminate the
2056 * cursor to avoid a recursion. It's ok to call hammer_done_cursor()
2057 * twice.
8cd0a023 2058 */
9944ae54 2059 if (error == 0)
6a37e7e4 2060 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
b84de5af
MD
2061
2062 /*
2063 * XXX A deadlock here will break rename's atomicy for the purposes
2064 * of crash recovery.
2065 */
2066 if (error == EDEADLK) {
b84de5af 2067 hammer_done_cursor(&cursor);
b84de5af
MD
2068 goto retry;
2069 }
2070
2071 /*
2072 * Cleanup and tell the kernel that the rename succeeded.
036ea0c3
MD
2073 *
2074 * NOTE: ip->vp, if non-NULL, cannot be directly referenced
2075 * without formally acquiring the vp since the vp might
2076 * have zero refs on it, or in the middle of a reclaim,
2077 * etc.
b84de5af 2078 */
c0ade690 2079 hammer_done_cursor(&cursor);
fbb84158 2080 if (error == 0) {
6a37e7e4 2081 cache_rename(ap->a_fnch, ap->a_tnch);
fbb84158
MD
2082 hammer_knote(ap->a_fdvp, NOTE_WRITE);
2083 hammer_knote(ap->a_tdvp, NOTE_WRITE);
036ea0c3
MD
2084 while (ip->vp) {
2085 struct vnode *vp;
2086
2087 error = hammer_get_vnode(ip, &vp);
2088 if (error == 0 && vp) {
2089 vn_unlock(vp);
2090 hammer_knote(ip->vp, NOTE_RENAME);
2091 vrele(vp);
2092 break;
2093 }
2094 kprintf("Debug: HAMMER ip/vp race2 avoided\n");
2095 }
fbb84158 2096 }
b84de5af 2097
b3deaf57 2098failed:
b84de5af 2099 hammer_done_transaction(&trans);
b0aab9b9 2100 lwkt_reltoken(&hmp->fs_token);
8cd0a023 2101 return (error);
427e5fc6
MD
2102}
2103
66325755
MD
2104/*
2105 * hammer_vop_nrmdir { nch, dvp, cred }
2106 */
427e5fc6
MD
2107static
2108int
66325755 2109hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
427e5fc6 2110{
b84de5af 2111 struct hammer_transaction trans;
e63644f0 2112 struct hammer_inode *dip;
b0aab9b9 2113 hammer_mount_t hmp;
b84de5af
MD
2114 int error;
2115
e63644f0 2116 dip = VTOI(ap->a_dvp);
b0aab9b9 2117 hmp = dip->hmp;
e63644f0
MD
2118
2119 if (hammer_nohistory(dip) == 0 &&
b0aab9b9 2120 (error = hammer_checkspace(hmp, HAMMER_CHKSPC_REMOVE)) != 0) {
e63644f0
MD
2121 return (error);
2122 }
2123
b0aab9b9
MD
2124 lwkt_gettoken(&hmp->fs_token);
2125 hammer_start_transaction(&trans, hmp);
ce0138a6 2126 ++hammer_stats_file_iopsw;
d7e278bb 2127 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0, 1);
b84de5af 2128 hammer_done_transaction(&trans);
fbb84158
MD
2129 if (error == 0)
2130 hammer_knote(ap->a_dvp, NOTE_WRITE | NOTE_LINK);
b0aab9b9 2131 lwkt_reltoken(&hmp->fs_token);
b84de5af 2132 return (error);
427e5fc6
MD
2133}
2134
349433c9
MD
2135/*
2136 * hammer_vop_markatime { vp, cred }
2137 */
2138static
2139int
2140hammer_vop_markatime(struct vop_markatime_args *ap)
2141{
2142 struct hammer_transaction trans;
2143 struct hammer_inode *ip;
b0aab9b9 2144 hammer_mount_t hmp;
349433c9
MD
2145
2146 ip = VTOI(ap->a_vp);
2147 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
2148 return (EROFS);
2149 if (ip->flags & HAMMER_INODE_RO)
2150 return (EROFS);
b0aab9b9
MD
2151 hmp = ip->hmp;
2152 if (hmp->mp->mnt_flag & MNT_NOATIME)
349433c9 2153 return (0);
b0aab9b9
MD
2154 lwkt_gettoken(&hmp->fs_token);
2155 hammer_start_transaction(&trans, hmp);
349433c9
MD
2156 ++hammer_stats_file_iopsw;
2157
2158 ip->ino_data.atime = trans.time;
e98f1b96 2159 hammer_modify_inode(&trans, ip, HAMMER_INODE_ATIME);
349433c9
MD
2160 hammer_done_transaction(&trans);
2161 hammer_knote(ap->a_vp, NOTE_ATTRIB);
b0aab9b9 2162 lwkt_reltoken(&hmp->fs_token);
349433c9
MD
2163 return (0);
2164}
2165
66325755
MD
2166/*
2167 * hammer_vop_setattr { vp, vap, cred }
2168 */
427e5fc6
MD
2169static
2170int
66325755 2171hammer_vop_setattr(struct vop_setattr_args *ap)
427e5fc6 2172{
8cd0a023 2173 struct hammer_transaction trans;
8cd0a023 2174 struct hammer_inode *ip;
b0aab9b9
MD
2175 struct vattr *vap;
2176 hammer_mount_t hmp;
8cd0a023
MD
2177 int modflags;
2178 int error;
d5ef456e 2179 int truncating;
4a2796f3 2180 int blksize;
fbb84158 2181 int kflags;
6362a262 2182#if 0
4a2796f3 2183 int64_t aligned_size;
6362a262 2184#endif
8cd0a023 2185 u_int32_t flags;
8cd0a023
MD
2186
2187 vap = ap->a_vap;
2188 ip = ap->a_vp->v_data;
2189 modflags = 0;
fbb84158 2190 kflags = 0;
b0aab9b9 2191 hmp = ip->hmp;
8cd0a023
MD
2192
2193 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
2194 return(EROFS);
d113fda1
MD
2195 if (ip->flags & HAMMER_INODE_RO)
2196 return (EROFS);
e63644f0 2197 if (hammer_nohistory(ip) == 0 &&
b0aab9b9 2198 (error = hammer_checkspace(hmp, HAMMER_CHKSPC_REMOVE)) != 0) {
e63644f0
MD
2199 return (error);
2200 }
8cd0a023 2201
b0aab9b9
MD
2202 lwkt_gettoken(&hmp->fs_token);
2203 hammer_start_transaction(&trans, hmp);
ce0138a6 2204 ++hammer_stats_file_iopsw;
8cd0a023
MD
2205 error = 0;
2206
2207 if (vap->va_flags != VNOVAL) {
2208 flags = ip->ino_data.uflags;
2209 error = vop_helper_setattr_flags(&flags, vap->va_flags,
2210 hammer_to_unix_xid(&ip->ino_data.uid),
2211 ap->a_cred);
2212 if (error == 0) {
2213 if (ip->ino_data.uflags != flags) {
2214 ip->ino_data.uflags = flags;
cc0758d0 2215 ip->ino_data.ctime = trans.time;
8cd0a023 2216 modflags |= HAMMER_INODE_DDIRTY;
fbb84158 2217 kflags |= NOTE_ATTRIB;
8cd0a023
MD
2218 }
2219 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
2220 error = 0;
2221 goto done;
2222 }
2223 }
2224 goto done;
2225 }
2226 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
2227 error = EPERM;
2228 goto done;
2229 }
7538695e
MD
2230 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
2231 mode_t cur_mode = ip->ino_data.mode;
2232 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid);
2233 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid);
2234 uuid_t uuid_uid;
2235 uuid_t uuid_gid;
2236
2237 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
2238 ap->a_cred,
2239 &cur_uid, &cur_gid, &cur_mode);
2240 if (error == 0) {
2241 hammer_guid_to_uuid(&uuid_uid, cur_uid);
2242 hammer_guid_to_uuid(&uuid_gid, cur_gid);
2243 if (bcmp(&uuid_uid, &ip->ino_data.uid,
2244 sizeof(uuid_uid)) ||
2245 bcmp(&uuid_gid, &ip->ino_data.gid,
2246 sizeof(uuid_gid)) ||
9a620123 2247 ip->ino_data.mode != cur_mode) {
7538695e
MD
2248 ip->ino_data.uid = uuid_uid;
2249 ip->ino_data.gid = uuid_gid;
2250 ip->ino_data.mode = cur_mode;
cc0758d0
MD
2251 ip->ino_data.ctime = trans.time;
2252 modflags |= HAMMER_INODE_DDIRTY;
7538695e 2253 }
fbb84158 2254 kflags |= NOTE_ATTRIB;
8cd0a023
MD
2255 }
2256 }
11ad5ade 2257 while (vap->va_size != VNOVAL && ip->ino_data.size != vap->va_size) {
8cd0a023
MD
2258 switch(ap->a_vp->v_type) {
2259 case VREG:
11ad5ade 2260 if (vap->va_size == ip->ino_data.size)
d5ef456e 2261 break;
47f363f1
MD
2262
2263 /*
c58123da
MD
2264 * Log the operation if in fast-fsync mode or if
2265 * there are unterminated redo write records present.
2266 *
2267 * The second check is needed so the recovery code
2268 * properly truncates write redos even if nominal
2269 * REDO operations is turned off due to excessive
2270 * writes, because the related records might be
2271 * destroyed and never lay down a TERM_WRITE.
47f363f1 2272 */
c58123da
MD
2273 if ((ip->flags & HAMMER_INODE_REDO) ||
2274 (ip->flags & HAMMER_INODE_RDIRTY)) {
47f363f1
MD
2275 error = hammer_generate_redo(&trans, ip,
2276 vap->va_size,
2277 HAMMER_REDO_TRUNC,
2278 NULL, 0);
2279 }
2280 blksize = hammer_blocksize(vap->va_size);
2281
b84de5af
MD
2282 /*
2283 * XXX break atomicy, we can deadlock the backend
2284 * if we do not release the lock. Probably not a
2285 * big deal here.
2286 */
11ad5ade 2287 if (vap->va_size < ip->ino_data.size) {
6362a262
MD
2288 nvtruncbuf(ap->a_vp, vap->va_size,
2289 blksize,
753df37e
MD
2290 hammer_blockoff(vap->va_size),
2291 0);
d5ef456e 2292 truncating = 1;
fbb84158 2293 kflags |= NOTE_WRITE;
d5ef456e 2294 } else {
6362a262
MD
2295 nvextendbuf(ap->a_vp,
2296 ip->ino_data.size,
2297 vap->va_size,
2298 hammer_blocksize(ip->ino_data.size),
2299 hammer_blocksize(vap->va_size),
2300 hammer_blockoff(ip->ino_data.size),
2301 hammer_blockoff(vap->va_size),
2302 0);
d5ef456e 2303 truncating = 0;
fbb84158 2304 kflags |= NOTE_WRITE | NOTE_EXTEND;
c0ade690 2305 }
11ad5ade 2306 ip->ino_data.size = vap->va_size;
cc0758d0 2307 ip->ino_data.mtime = trans.time;
47f363f1 2308 /* XXX safe to use SDIRTY instead of DDIRTY here? */
cc0758d0 2309 modflags |= HAMMER_INODE_MTIME | HAMMER_INODE_DDIRTY;
d5ef456e 2310
b84de5af 2311 /*
6362a262
MD
2312 * On-media truncation is cached in the inode until
2313 * the inode is synchronized. We must immediately
2314 * handle any frontend records.
b84de5af 2315 */
d5ef456e 2316 if (truncating) {
47637bff 2317 hammer_ip_frontend_trunc(ip, vap->va_size);
0832c9bb
MD
2318#ifdef DEBUG_TRUNCATE
2319 if (HammerTruncIp == NULL)
2320 HammerTruncIp = ip;
2321#endif
b84de5af
MD
2322 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
2323 ip->flags |= HAMMER_INODE_TRUNCATED;
2324 ip->trunc_off = vap->va_size;
0d60b0ab 2325 hammer_inode_dirty(ip);
0832c9bb
MD
2326#ifdef DEBUG_TRUNCATE
2327 if (ip == HammerTruncIp)
973c11b9
MD
2328 kprintf("truncate1 %016llx\n",
2329 (long long)ip->trunc_off);
0832c9bb 2330#endif
b84de5af
MD
2331 } else if (ip->trunc_off > vap->va_size) {
2332 ip->trunc_off = vap->va_size;
0832c9bb
MD
2333#ifdef DEBUG_TRUNCATE
2334 if (ip == HammerTruncIp)
973c11b9
MD
2335 kprintf("truncate2 %016llx\n",
2336 (long long)ip->trunc_off);
0832c9bb
MD
2337#endif
2338 } else {
2339#ifdef DEBUG_TRUNCATE
2340 if (ip == HammerTruncIp)
973c11b9
MD
2341 kprintf("truncate3 %016llx (ignored)\n",
2342 (long long)vap->va_size);
0832c9bb 2343#endif
b84de5af 2344 }
d5ef456e 2345 }
b84de5af 2346
6362a262 2347#if 0
d5ef456e 2348 /*
6362a262
MD
2349 * When truncating, nvtruncbuf() may have cleaned out
2350 * a portion of the last block on-disk in the buffer
2351 * cache. We must clean out any frontend records
2352 * for blocks beyond the new last block.
d5ef456e 2353 */
4a2796f3
MD
2354 aligned_size = (vap->va_size + (blksize - 1)) &
2355 ~(int64_t)(blksize - 1);
b84de5af 2356 if (truncating && vap->va_size < aligned_size) {
4a2796f3 2357 aligned_size -= blksize;
47637bff 2358 hammer_ip_frontend_trunc(ip, aligned_size);
d5ef456e 2359 }
6362a262 2360#endif
76376933 2361 break;
8cd0a023 2362 case VDATABASE:
b84de5af
MD
2363 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
2364 ip->flags |= HAMMER_INODE_TRUNCATED;
2365 ip->trunc_off = vap->va_size;
0d60b0ab 2366 hammer_inode_dirty(ip);
b84de5af
MD
2367 } else if (ip->trunc_off > vap->va_size) {
2368 ip->trunc_off = vap->va_size;
2369 }
47637bff 2370 hammer_ip_frontend_trunc(ip, vap->va_size);
11ad5ade 2371 ip->ino_data.size = vap->va_size;
cc0758d0
MD
2372 ip->ino_data.mtime = trans.time;
2373 modflags |= HAMMER_INODE_MTIME | HAMMER_INODE_DDIRTY;
fbb84158 2374 kflags |= NOTE_ATTRIB;
8cd0a023
MD
2375 break;
2376 default:
2377 error = EINVAL;
2378 goto done;
2379 }
d26d0ae9 2380 break;
8cd0a023
MD
2381 }
2382 if (vap->va_atime.tv_sec != VNOVAL) {
cc0758d0 2383 ip->ino_data.atime = hammer_timespec_to_time(&vap->va_atime);
ddfdf542 2384 modflags |= HAMMER_INODE_ATIME;
fbb84158 2385 kflags |= NOTE_ATTRIB;
8cd0a023
MD
2386 }
2387 if (vap->va_mtime.tv_sec != VNOVAL) {
cc0758d0 2388 ip->ino_data.mtime = hammer_timespec_to_time(&vap->va_mtime);
ddfdf542 2389 modflags |= HAMMER_INODE_MTIME;
fbb84158 2390 kflags |= NOTE_ATTRIB;
8cd0a023
MD
2391 }
2392 if (vap->va_mode != (mode_t)VNOVAL) {
7538695e
MD
2393 mode_t cur_mode = ip->ino_data.mode;
2394 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid);
2395 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid);
2396
2397 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
2398 cur_uid, cur_gid, &cur_mode);
2399 if (error == 0 && ip->ino_data.mode != cur_mode) {
2400 ip->ino_data.mode = cur_mode;
cc0758d0 2401 ip->ino_data.ctime = trans.time;
8cd0a023 2402 modflags |= HAMMER_INODE_DDIRTY;
fbb84158 2403 kflags |= NOTE_ATTRIB;
8cd0a023
MD
2404 }
2405 }
2406done:
b84de5af 2407 if (error == 0)
e98f1b96 2408 hammer_modify_inode(&trans, ip, modflags);
b84de5af 2409 hammer_done_transaction(&trans);
fbb84158 2410 hammer_knote(ap->a_vp, kflags);
b0aab9b9 2411 lwkt_reltoken(&hmp->fs_token);
8cd0a023 2412 return (error);
427e5fc6
MD
2413}
2414
66325755
MD
2415/*
2416 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
2417 */
427e5fc6
MD
2418static
2419int
66325755 2420hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
427e5fc6 2421{
7a04d74f
MD
2422 struct hammer_transaction trans;
2423 struct hammer_inode *dip;
2424 struct hammer_inode *nip;
7a04d74f 2425 hammer_record_t record;
b0aab9b9
MD
2426 struct nchandle *nch;
2427 hammer_mount_t hmp;
7a04d74f
MD
2428 int error;
2429 int bytes;
2430
2431 ap->a_vap->va_type = VLNK;
2432
2433 nch = ap->a_nch;
2434 dip = VTOI(ap->a_dvp);
b0aab9b9 2435 hmp = dip->hmp;
7a04d74f 2436
d113fda1
MD
2437 if (dip->flags & HAMMER_INODE_RO)
2438 return (EROFS);
b0aab9b9 2439 if ((error = hammer_checkspace(hmp, HAMMER_CHKSPC_CREATE)) != 0)
e63644f0 2440 return (error);
d113fda1 2441
7a04d74f
MD
2442 /*
2443 * Create a transaction to cover the operations we perform.
2444 */
b0aab9b9
MD
2445 lwkt_gettoken(&hmp->fs_token);
2446 hammer_start_transaction(&trans, hmp);
ce0138a6 2447 ++hammer_stats_file_iopsw;
7a04d74f
MD
2448
2449 /*
2450 * Create a new filesystem object of the requested type. The
2451 * returned inode will be referenced but not locked.
2452 */
2453
5a930e66 2454 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred,
5a64efa1
MD
2455 dip, nch->ncp->nc_name, nch->ncp->nc_nlen,
2456 NULL, &nip);
7a04d74f 2457 if (error) {
b84de5af 2458 hammer_done_transaction(&trans);
7a04d74f 2459 *ap->a_vpp = NULL;
b0aab9b9 2460 lwkt_reltoken(&hmp->fs_token);
7a04d74f
MD
2461 return (error);
2462 }
2463
7a04d74f
MD
2464 /*
2465 * Add a record representing the symlink. symlink stores the link
2466 * as pure data, not a string, and is no \0 terminated.
2467 */
2468 if (error == 0) {
7a04d74f
MD
2469 bytes = strlen(ap->a_target);
2470
2f85fa4d
MD
2471 if (bytes <= HAMMER_INODE_BASESYMLEN) {
2472 bcopy(ap->a_target, nip->ino_data.ext.symlink, bytes);
2473 } else {
2474 record = hammer_alloc_mem_record(nip, bytes);
2475 record->type = HAMMER_MEM_RECORD_GENERAL;
2476
5a930e66
MD
2477 record->leaf.base.localization = nip->obj_localization +
2478 HAMMER_LOCALIZE_MISC;
2f85fa4d
MD
2479 record->leaf.base.key = HAMMER_FIXKEY_SYMLINK;
2480 record->leaf.base.rec_type = HAMMER_RECTYPE_FIX;
2481 record->leaf.data_len = bytes;
2482 KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0);
2483 bcopy(ap->a_target, record->data->symlink.name, bytes);
2484 error = hammer_ip_add_record(&trans, record);
2485 }
42c7d26b
MD
2486
2487 /*
2488 * Set the file size to the length of the link.
2489 */
2490 if (error == 0) {
11ad5ade 2491 nip->ino_data.size = bytes;
e98f1b96 2492 hammer_modify_inode(&trans, nip, HAMMER_INODE_DDIRTY);
42c7d26b 2493 }
7a04d74f 2494 }
1f07f686 2495 if (error == 0)
5a930e66
MD
2496 error = hammer_ip_add_directory(&trans, dip, nch->ncp->nc_name,
2497 nch->ncp->nc_nlen, nip);
7a04d74f
MD
2498
2499 /*
2500 * Finish up.
2501 */
2502 if (error) {
2503 hammer_rel_inode(nip, 0);
7a04d74f
MD
2504 *ap->a_vpp = NULL;
2505 } else {
e8599db1 2506 error = hammer_get_vnode(nip, ap->a_vpp);
7a04d74f
MD
2507 hammer_rel_inode(nip, 0);
2508 if (error == 0) {
2509 cache_setunresolved(ap->a_nch);
2510 cache_setvp(ap->a_nch, *ap->a_vpp);
fbb84158 2511 hammer_knote(ap->a_dvp, NOTE_WRITE);
7a04d74f
MD
2512 }
2513 }
b84de5af 2514 hammer_done_transaction(&trans);
b0aab9b9 2515 lwkt_reltoken(&hmp->fs_token);
7a04d74f 2516 return (error);
427e5fc6
MD
2517}
2518
66325755
MD
2519/*
2520 * hammer_vop_nwhiteout { nch, dvp, cred, flags }
2521 */
427e5fc6
MD
2522static
2523int
66325755 2524hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
427e5fc6 2525{
b84de5af 2526 struct hammer_transaction trans;
e63644f0 2527 struct hammer_inode *dip;
b0aab9b9 2528 hammer_mount_t hmp;
b84de5af
MD
2529 int error;
2530
e63644f0 2531 dip = VTOI(ap->a_dvp);
b0aab9b9 2532 hmp = dip->hmp;
e63644f0
MD
2533
2534 if (hammer_nohistory(dip) == 0 &&
b0aab9b9 2535 (error = hammer_checkspace(hmp, HAMMER_CHKSPC_CREATE)) != 0) {
e63644f0
MD
2536 return (error);
2537 }
2538
b0aab9b9
MD
2539 lwkt_gettoken(&hmp->fs_token);
2540 hammer_start_transaction(&trans, hmp);
ce0138a6 2541 ++hammer_stats_file_iopsw;
b84de5af 2542 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp,
d7e278bb 2543 ap->a_cred, ap->a_flags, -1);
b84de5af 2544 hammer_done_transaction(&trans);
b0aab9b9 2545 lwkt_reltoken(&hmp->fs_token);
b84de5af
MD
2546
2547 return (error);
427e5fc6
MD
2548}
2549
7dc57964
MD
2550/*
2551 * hammer_vop_ioctl { vp, command, data, fflag, cred }
2552 */
2553static
2554int
2555hammer_vop_ioctl(struct vop_ioctl_args *ap)
2556{
2557 struct hammer_inode *ip = ap->a_vp->v_data;
b0aab9b9
MD
2558 hammer_mount_t hmp = ip->hmp;
2559 int error;
7dc57964 2560
ce0138a6 2561 ++hammer_stats_file_iopsr;
b0aab9b9
MD
2562 lwkt_gettoken(&hmp->fs_token);
2563 error = hammer_ioctl(ip, ap->a_command, ap->a_data,
2564 ap->a_fflag, ap->a_cred);
2565 lwkt_reltoken(&hmp->fs_token);
2566 return (error);
7dc57964
MD
2567}
2568
513ca7d7
MD
2569static
2570int
2571hammer_vop_mountctl(struct vop_mountctl_args *ap)
2572{
dad088a5 2573 static const struct mountctl_opt extraopt[] = {
7866ea2a 2574 { HMNT_NOHISTORY, "nohistory" },
dad088a5
MD
2575 { HMNT_MASTERID, "master" },
2576 { 0, NULL}
2577
2578 };
2579 struct hammer_mount *hmp;
513ca7d7 2580 struct mount *mp;
dad088a5 2581 int usedbytes;
513ca7d7
MD
2582 int error;
2583
dad088a5
MD
2584 error = 0;
2585 usedbytes = 0;
513ca7d7 2586 mp = ap->a_head.a_ops->head.vv_mount;
dad088a5
MD
2587 KKASSERT(mp->mnt_data != NULL);
2588 hmp = (struct hammer_mount *)mp->mnt_data;
513ca7d7 2589
b0aab9b9 2590 lwkt_gettoken(&hmp->fs_token);
dad088a5 2591
b0aab9b9 2592 switch(ap->a_op) {
513ca7d7
MD
2593 case MOUNTCTL_SET_EXPORT:
2594 if (ap->a_ctllen != sizeof(struct export_args))
2595 error = EINVAL;
b424ca30
MD
2596 else
2597 error = hammer_vfs_export(mp, ap->a_op,
513ca7d7
MD
2598 (const struct export_args *)ap->a_ctl);
2599 break;
dad088a5
MD
2600 case MOUNTCTL_MOUNTFLAGS:
2601 {
2602 /*
2603 * Call standard mountctl VOP function
2604 * so we get user mount flags.
2605 */
2606 error = vop_stdmountctl(ap);
2607 if (error)
2608 break;
2609
2610 usedbytes = *ap->a_res;
2611
eac446c5 2612 if (usedbytes > 0 && usedbytes < ap->a_buflen) {
b0aab9b9
MD
2613 usedbytes += vfs_flagstostr(hmp->hflags, extraopt,
2614 ap->a_buf,
dad088a5
MD
2615 ap->a_buflen - usedbytes,
2616 &error);
dad088a5
MD
2617 }
2618
2619 *ap->a_res += usedbytes;
2620 break;
2621 }
513ca7d7 2622 default:
726e0641 2623 error = vop_stdmountctl(ap);
513ca7d7
MD
2624 break;
2625 }
b0aab9b9 2626 lwkt_reltoken(&hmp->fs_token);
513ca7d7
MD
2627 return(error);
2628}
2629
66325755
MD
2630/*
2631 * hammer_vop_strategy { vp, bio }
8cd0a023
MD
2632 *
2633 * Strategy call, used for regular file read & write only. Note that the
2634 * bp may represent a cluster.
2635 *
2636 * To simplify operation and allow better optimizations in the future,
2637 * this code does not make any assumptions with regards to buffer alignment
2638 * or size.
66325755 2639 */
427e5fc6
MD
2640static
2641int
66325755 2642hammer_vop_strategy(struct vop_strategy_args *ap)
427e5fc6 2643{
8cd0a023
MD
2644 struct buf *bp;
2645 int error;
2646
2647 bp = ap->a_bio->bio_buf;
2648
2649 switch(bp->b_cmd) {
2650 case BUF_CMD_READ:
2651 error = hammer_vop_strategy_read(ap);
2652 break;
2653 case BUF_CMD_WRITE:
2654 error = hammer_vop_strategy_write(ap);
2655 break;
2656 default:
059819e3
MD
2657 bp->b_error = error = EINVAL;
2658 bp->b_flags |= B_ERROR;
2659 biodone(ap->a_bio);
8cd0a023
MD
2660 break;
2661 }
507df98a
ID
2662
2663 /* hammer_dump_dedup_cache(((hammer_inode_t)ap->a_vp->v_data)->hmp); */
2664
8cd0a023 2665 return (error);
427e5fc6
MD
2666}
2667
8cd0a023
MD
2668/*
2669 * Read from a regular file. Iterate the related records and fill in the
2670 * BIO/BUF. Gaps are zero-filled.
2671 *
2672 * The support code in hammer_object.c should be used to deal with mixed
2673 * in-memory and on-disk records.
2674 *
4a2796f3
MD
2675 * NOTE: Can be called from the cluster code with an oversized buf.
2676 *
8cd0a023
MD
2677 * XXX atime update
2678 */
2679static
2680int
2681hammer_vop_strategy_read(struct vop_strategy_args *ap)
2682{
36f82b23
MD
2683 struct hammer_transaction trans;
2684 struct hammer_inode *ip;
39d8fd63 2685 struct hammer_inode *dip;
b0aab9b9 2686 hammer_mount_t hmp;
8cd0a023 2687 struct hammer_cursor cursor;
8cd0a023 2688 hammer_base_elm_t base;
4a2796f3 2689 hammer_off_t disk_offset;
8cd0a023 2690 struct bio *bio;
a99b9ea2 2691 struct bio *nbio;
8cd0a023
MD
2692 struct buf *bp;
2693 int64_t rec_offset;
a89aec1b 2694 int64_t ran_end;
195c19a1 2695 int64_t tmp64;
8cd0a023
MD
2696 int error;
2697 int boff;
2698 int roff;
2699 int n;
b4f86ea3 2700 int isdedupable;
8cd0a023
MD
2701
2702 bio = ap->a_bio;
2703 bp = bio->bio_buf;
36f82b23 2704 ip = ap->a_vp->v_data;
b0aab9b9 2705 hmp = ip->hmp;
8cd0a023 2706
a99b9ea2
MD
2707 /*
2708 * The zone-2 disk offset may have been set by the cluster code via
4a2796f3 2709 * a BMAP operation, or else should be NOOFFSET.
a99b9ea2 2710 *
4a2796f3 2711 * Checking the high bits for a match against zone-2 should suffice.
b4f86ea3
MD
2712 *
2713 * In cases where a lot of data duplication is present it may be
2714 * more beneficial to drop through and doubule-buffer through the
2715 * device.
a99b9ea2
MD
2716 */
2717 nbio = push_bio(bio);
9a98f3cc 2718 if ((nbio->bio_offset & HAMMER_OFF_ZONE_MASK) ==
1b0ab2c3 2719 HAMMER_ZONE_LARGE_DATA) {
9a98f3cc
MD
2720 if (hammer_double_buffer == 0) {
2721 lwkt_gettoken(&hmp->fs_token);
2722 error = hammer_io_direct_read(hmp, nbio, NULL);
2723 lwkt_reltoken(&hmp->fs_token);
2724 return (error);
2725 }
2726
2727 /*
2728 * Try to shortcut requests for double_buffer mode too.
2729 * Since this mode runs through the device buffer cache
2730 * only compatible buffer sizes (meaning those generated
2731 * by normal filesystem buffers) are legal.
2732 */
2733 if (hammer_live_dedup == 0 && (bp->b_flags & B_PAGING) == 0) {
32fcc103 2734 lwkt_gettoken(&hmp->fs_token);
9a98f3cc 2735 error = hammer_io_indirect_read(hmp, nbio, NULL);
32fcc103 2736 lwkt_reltoken(&hmp->fs_token);
9a98f3cc
MD
2737 return (error);
2738 }
a99b9ea2
MD
2739 }
2740
2741 /*
4a2796f3
MD
2742 * Well, that sucked. Do it the hard way. If all the stars are
2743 * aligned we may still be able to issue a direct-read.
a99b9ea2 2744 */
b0aab9b9
MD
2745 lwkt_gettoken(&hmp->fs_token);
2746 hammer_simple_transaction(&trans, hmp);
47637bff 2747 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
8cd0a023
MD
2748
2749 /*
2750 * Key range (begin and end inclusive) to scan. Note that the key's
c0ade690
MD
2751 * stored in the actual records represent BASE+LEN, not BASE. The
2752 * first record containing bio_offset will have a key > bio_offset.
8cd0a023 2753 */
5a930e66
MD
2754 cursor.key_beg.localization = ip->obj_localization +
2755 HAMMER_LOCALIZE_MISC;
8cd0a023 2756 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 2757 cursor.key_beg.create_tid = 0;
8cd0a023 2758 cursor.key_beg.delete_tid = 0;
8cd0a023 2759 cursor.key_beg.obj_type = 0;
c0ade690 2760 cursor.key_beg.key = bio->bio_offset + 1;
d5530d22 2761 cursor.asof = ip->obj_asof;
bf3b416b 2762 cursor.flags |= HAMMER_CURSOR_ASOF;
8cd0a023
MD
2763
2764 cursor.key_end = cursor.key_beg;
11ad5ade 2765 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE);
b84de5af 2766#if 0
11ad5ade 2767 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) {
a89aec1b
MD
2768 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
2769 cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
2770 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
b84de5af
MD
2771 } else
2772#endif
2773 {
c0ade690 2774 ran_end = bio->bio_offset + bp->b_bufsize;
a89aec1b
MD
2775 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
2776 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
195c19a1
MD
2777 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */
2778 if (tmp64 < ran_end)
a89aec1b
MD
2779 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
2780 else
7f7c1f84 2781 cursor.key_end.key = ran_end + MAXPHYS + 1;
a89aec1b 2782 }
d26d0ae9 2783 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
8cd0a023 2784
18bee4a2
MD
2785 /*
2786 * Set NOSWAPCACHE for cursor data extraction if double buffering
2787 * is disabled or (if the file is not marked cacheable via chflags
2788 * and vm.swapcache_use_chflags is enabled).
2789 */
2790 if (hammer_double_buffer == 0 ||
2791 ((ap->a_vp->v_flag & VSWAPCACHE) == 0 &&
2792 vm_swapcache_use_chflags)) {
2793 cursor.flags |= HAMMER_CURSOR_NOSWAPCACHE;
2794 }
2795
4e17f465 2796 error = hammer_ip_first(&cursor);
8cd0a023
MD
2797 boff = 0;
2798
a89aec1b 2799 while (error == 0) {
47637bff
MD
2800 /*
2801 * Get the base file offset of the record. The key for
2802 * data records is (base + bytes) rather then (base).
2803 */
11ad5ade 2804 base = &cursor.leaf->base;
11ad5ade 2805 rec_offset = base->key - cursor.leaf->data_len;
8cd0a023 2806
66325755 2807 /*
a89aec1b 2808 * Calculate the gap, if any, and zero-fill it.
1fef775e
MD
2809 *
2810 * n is the offset of the start of the record verses our
2811 * current seek offset in the bio.
66325755 2812 */
8cd0a023
MD
2813 n = (int)(rec_offset - (bio->bio_offset + boff));
2814 if (n > 0) {
a89aec1b
MD
2815 if (n > bp->b_bufsize - boff)
2816 n = bp->b_bufsize - boff;
8cd0a023
MD
2817 bzero((char *)bp->b_data + boff, n);
2818 boff += n;
2819 n = 0;
66325755 2820 }
8cd0a023
MD
2821
2822 /*
2823 * Calculate the data offset in the record and the number
2824 * of bytes we can copy.
a89aec1b 2825 *
1fef775e
MD
2826 * There are two degenerate cases. First, boff may already
2827 * be at bp->b_bufsize. Secondly, the data offset within
2828 * the record may exceed the record's size.
8cd0a023
MD
2829 */
2830 roff = -n;
b84de5af 2831 rec_offset += roff;
11ad5ade 2832 n = cursor.leaf->data_len - roff;
1fef775e
MD
2833 if (n <= 0) {
2834 kprintf("strategy_read: bad n=%d roff=%d\n", n, roff);
2835 n = 0;
2836 } else if (n > bp->b_bufsize - boff) {
8cd0a023 2837 n = bp->b_bufsize - boff;
1fef775e 2838 }
059819e3 2839
b84de5af 2840 /*
47637bff
MD
2841 * Deal with cached truncations. This cool bit of code
2842 * allows truncate()/ftruncate() to avoid having to sync
2843 * the file.
2844 *
2845 * If the frontend is truncated then all backend records are
2846 * subject to the frontend's truncation.
2847 *
2848 * If the backend is truncated then backend records on-disk
2849 * (but not in-memory) are subject to the backend's
2850 * truncation. In-memory records owned by the backend
2851 * represent data written after the truncation point on the
2852 * backend and must not be truncated.
2853 *
2854 * Truncate operations deal with frontend buffer cache
2855 * buffers and frontend-owned in-memory records synchronously.
b84de5af 2856 */
47637bff 2857 if (ip->flags & HAMMER_INODE_TRUNCATED) {
6362a262
MD
2858 if (hammer_cursor_ondisk(&cursor)/* ||
2859 cursor.iprec->flush_state == HAMMER_FST_FLUSH*/) {
47637bff
MD
2860 if (ip->trunc_off <= rec_offset)
2861 n = 0;
2862 else if (ip->trunc_off < rec_offset + n)
2863 n = (int)(ip->trunc_off - rec_offset);
2864 }
2865 }
2866 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
2867 if (hammer_cursor_ondisk(&cursor)) {
2868 if (ip->sync_trunc_off <= rec_offset)
2869 n = 0;
2870 else if (ip->sync_trunc_off < rec_offset + n)
2871 n = (int)(ip->sync_trunc_off - rec_offset);
2872 }
2873 }
b84de5af
MD
2874
2875 /*
47637bff
MD
2876 * Try to issue a direct read into our bio if possible,
2877 * otherwise resolve the element data into a hammer_buffer
2878 * and copy.
4a2796f3
MD
2879 *
2880 * The buffer on-disk should be zerod past any real
2881 * truncation point, but may not be for any synthesized
2882 * truncation point from above.
9a98f3cc
MD
2883 *
2884 * NOTE: disk_offset is only valid if the cursor data is
2885 * on-disk.
b84de5af 2886 */
1b0ab2c3 2887 disk_offset = cursor.leaf->data_offset + roff;
b4f86ea3
MD
2888 isdedupable = (boff == 0 && n == bp->b_bufsize &&
2889 hammer_cursor_ondisk(&cursor) &&
2890 ((int)disk_offset & HAMMER_BUFMASK) == 0);
2891
2892 if (isdedupable && hammer_double_buffer == 0) {
9a98f3cc
MD
2893 /*
2894 * Direct read case
2895 */
1b0ab2c3
MD
2896 KKASSERT((disk_offset & HAMMER_OFF_ZONE_MASK) ==
2897 HAMMER_ZONE_LARGE_DATA);
4a2796f3 2898 nbio->bio_offset = disk_offset;
b0aab9b9 2899 error = hammer_io_direct_read(hmp, nbio, cursor.leaf);
b4f86ea3 2900 if (hammer_live_dedup && error == 0)
507df98a 2901 hammer_dedup_cache_add(ip, cursor.leaf);
47637bff 2902 goto done;
9a98f3cc
MD
2903 } else if (isdedupable) {
2904 /*
2905 * Async I/O case for reading from backing store
2906 * and copying the data to the filesystem buffer.
2907 * live-dedup has to verify the data anyway if it
2908 * gets a hit later so we can just add the entry
2909 * now.
2910 */
2911 KKASSERT((disk_offset & HAMMER_OFF_ZONE_MASK) ==
2912 HAMMER_ZONE_LARGE_DATA);
2913 nbio->bio_offset = disk_offset;
2914 if (hammer_live_dedup)
2915 hammer_dedup_cache_add(ip, cursor.leaf);
2916 error = hammer_io_indirect_read(hmp, nbio, cursor.leaf);
2917 goto done;
47637bff
MD
2918 } else if (n) {
2919 error = hammer_ip_resolve_data(&cursor);
2920 if (error == 0) {
b4f86ea3
MD
2921 if (hammer_live_dedup && isdedupable)
2922 hammer_dedup_cache_add(ip, cursor.leaf);
47637bff
MD
2923 bcopy((char *)cursor.data + roff,
2924 (char *)bp->b_data + boff, n);
2925 }
b84de5af 2926 }
47637bff
MD
2927 if (error)
2928 break;
2929
507df98a
ID
2930 /*
2931 * We have to be sure that the only elements added to the
2932 * dedup cache are those which are already on-media.
2933 */
2934 if (hammer_live_dedup && hammer_cursor_ondisk(&cursor))
2935 hammer_dedup_cache_add(ip, cursor.leaf);
2936
47637bff
MD
2937 /*
2938 * Iterate until we have filled the request.
2939 */
2940 boff += n;
8cd0a023 2941 if (boff == bp->b_bufsize)
66325755 2942 break;
a89aec1b 2943 error = hammer_ip_next(&cursor);
66325755
MD
2944 }
2945
2946 /*
8cd0a023 2947 * There may have been a gap after the last record
66325755 2948 */
8cd0a023
MD
2949 if (error == ENOENT)
2950 error = 0;
2951 if (error == 0 && boff != bp->b_bufsize) {
7f7c1f84 2952 KKASSERT(boff < bp->b_bufsize);
8cd0a023
MD
2953 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
2954 /* boff = bp->b_bufsize; */
2955 }
18bee4a2
MD
2956
2957 /*
2958 * Disallow swapcache operation on the vnode buffer if double
2959 * buffering is enabled, the swapcache will get the data via
2960 * the block device buffer.
2961 */
2962 if (hammer_double_buffer)
2963 bp->b_flags |= B_NOTMETA;
2964
2965 /*
2966 * Cleanup
2967 */
8cd0a023 2968 bp->b_resid = 0;
059819e3
MD
2969 bp->b_error = error;
2970 if (error)
2971 bp->b_flags |= B_ERROR;
2972 biodone(ap->a_bio);
47637bff
MD
2973
2974done:
39d8fd63
MD
2975 /*
2976 * Cache the b-tree node for the last data read in cache[1].
2977 *
2978 * If we hit the file EOF then also cache the node in the
2979 * governing director's cache[3], it will be used to initialize
2980 * the inode's cache[1] for any inodes looked up via the directory.
2981 *
2982 * This doesn't reduce disk accesses since the B-Tree chain is
2983 * likely cached, but it does reduce cpu overhead when looking
2984 * up file offsets for cpdup/tar/cpio style iterations.
2985 */
47637bff 2986 if (cursor.node)
bcac4bbb 2987 hammer_cache_node(&ip->cache[1], cursor.node);
39d8fd63
MD
2988 if (ran_end >= ip->ino_data.size) {
2989 dip = hammer_find_inode(&trans, ip->ino_data.parent_obj_id,
2990 ip->obj_asof, ip->obj_localization);
2991 if (dip) {
2992 hammer_cache_node(&dip->cache[3], cursor.node);
2993 hammer_rel_inode(dip, 0);
2994 }
2995 }
47637bff
MD
2996 hammer_done_cursor(&cursor);
2997 hammer_done_transaction(&trans);
b0aab9b9 2998 lwkt_reltoken(&hmp->fs_token);
8cd0a023
MD
2999 return(error);
3000}
3001
a99b9ea2
MD
3002/*
3003 * BMAP operation - used to support cluster_read() only.
3004 *
3005 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
3006 *
3007 * This routine may return EOPNOTSUPP if the opration is not supported for
3008 * the specified offset. The contents of the pointer arguments do not
745703c7 3009 * need to be initialized in that case.
a99b9ea2 3010 *
745703c7 3011 * If a disk address is available and properly aligned return 0 with
a99b9ea2
MD
3012 * *doffsetp set to the zone-2 address, and *runp / *runb set appropriately
3013 * to the run-length relative to that offset. Callers may assume that
3014 * *doffsetp is valid if 0 is returned, even if *runp is not sufficiently