Change cluster_read() to not block on read-ahead buffers it is unable to
[dragonfly.git] / sys / vfs / hammer / hammer_vnops.c
CommitLineData
427e5fc6 1/*
b84de5af 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
427e5fc6
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
98f7132d 34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.53 2008/05/12 23:15:46 dillon Exp $
427e5fc6
MD
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
41#include <sys/namecache.h>
42#include <sys/vnode.h>
43#include <sys/lockf.h>
44#include <sys/event.h>
45#include <sys/stat.h>
b3deaf57 46#include <sys/dirent.h>
c0ade690 47#include <vm/vm_extern.h>
7a04d74f 48#include <vfs/fifofs/fifo.h>
427e5fc6
MD
49#include "hammer.h"
50
51/*
52 * USERFS VNOPS
53 */
54/*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
66325755
MD
55static int hammer_vop_fsync(struct vop_fsync_args *);
56static int hammer_vop_read(struct vop_read_args *);
57static int hammer_vop_write(struct vop_write_args *);
58static int hammer_vop_access(struct vop_access_args *);
59static int hammer_vop_advlock(struct vop_advlock_args *);
60static int hammer_vop_close(struct vop_close_args *);
61static int hammer_vop_ncreate(struct vop_ncreate_args *);
62static int hammer_vop_getattr(struct vop_getattr_args *);
63static int hammer_vop_nresolve(struct vop_nresolve_args *);
64static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
65static int hammer_vop_nlink(struct vop_nlink_args *);
66static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
67static int hammer_vop_nmknod(struct vop_nmknod_args *);
68static int hammer_vop_open(struct vop_open_args *);
69static int hammer_vop_pathconf(struct vop_pathconf_args *);
70static int hammer_vop_print(struct vop_print_args *);
71static int hammer_vop_readdir(struct vop_readdir_args *);
72static int hammer_vop_readlink(struct vop_readlink_args *);
73static int hammer_vop_nremove(struct vop_nremove_args *);
74static int hammer_vop_nrename(struct vop_nrename_args *);
75static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
76static int hammer_vop_setattr(struct vop_setattr_args *);
77static int hammer_vop_strategy(struct vop_strategy_args *);
78static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
79static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
7dc57964 80static int hammer_vop_ioctl(struct vop_ioctl_args *);
513ca7d7 81static int hammer_vop_mountctl(struct vop_mountctl_args *);
427e5fc6 82
7a04d74f
MD
83static int hammer_vop_fifoclose (struct vop_close_args *);
84static int hammer_vop_fiforead (struct vop_read_args *);
85static int hammer_vop_fifowrite (struct vop_write_args *);
86
87static int hammer_vop_specclose (struct vop_close_args *);
88static int hammer_vop_specread (struct vop_read_args *);
89static int hammer_vop_specwrite (struct vop_write_args *);
90
427e5fc6
MD
91struct vop_ops hammer_vnode_vops = {
92 .vop_default = vop_defaultop,
93 .vop_fsync = hammer_vop_fsync,
c0ade690
MD
94 .vop_getpages = vop_stdgetpages,
95 .vop_putpages = vop_stdputpages,
427e5fc6
MD
96 .vop_read = hammer_vop_read,
97 .vop_write = hammer_vop_write,
98 .vop_access = hammer_vop_access,
99 .vop_advlock = hammer_vop_advlock,
100 .vop_close = hammer_vop_close,
101 .vop_ncreate = hammer_vop_ncreate,
102 .vop_getattr = hammer_vop_getattr,
103 .vop_inactive = hammer_vop_inactive,
104 .vop_reclaim = hammer_vop_reclaim,
105 .vop_nresolve = hammer_vop_nresolve,
106 .vop_nlookupdotdot = hammer_vop_nlookupdotdot,
107 .vop_nlink = hammer_vop_nlink,
108 .vop_nmkdir = hammer_vop_nmkdir,
109 .vop_nmknod = hammer_vop_nmknod,
110 .vop_open = hammer_vop_open,
111 .vop_pathconf = hammer_vop_pathconf,
112 .vop_print = hammer_vop_print,
113 .vop_readdir = hammer_vop_readdir,
114 .vop_readlink = hammer_vop_readlink,
115 .vop_nremove = hammer_vop_nremove,
116 .vop_nrename = hammer_vop_nrename,
117 .vop_nrmdir = hammer_vop_nrmdir,
118 .vop_setattr = hammer_vop_setattr,
119 .vop_strategy = hammer_vop_strategy,
120 .vop_nsymlink = hammer_vop_nsymlink,
7dc57964 121 .vop_nwhiteout = hammer_vop_nwhiteout,
513ca7d7
MD
122 .vop_ioctl = hammer_vop_ioctl,
123 .vop_mountctl = hammer_vop_mountctl
427e5fc6
MD
124};
125
7a04d74f
MD
126struct vop_ops hammer_spec_vops = {
127 .vop_default = spec_vnoperate,
128 .vop_fsync = hammer_vop_fsync,
129 .vop_read = hammer_vop_specread,
130 .vop_write = hammer_vop_specwrite,
131 .vop_access = hammer_vop_access,
132 .vop_close = hammer_vop_specclose,
133 .vop_getattr = hammer_vop_getattr,
134 .vop_inactive = hammer_vop_inactive,
135 .vop_reclaim = hammer_vop_reclaim,
136 .vop_setattr = hammer_vop_setattr
137};
138
139struct vop_ops hammer_fifo_vops = {
140 .vop_default = fifo_vnoperate,
141 .vop_fsync = hammer_vop_fsync,
142 .vop_read = hammer_vop_fiforead,
143 .vop_write = hammer_vop_fifowrite,
144 .vop_access = hammer_vop_access,
145 .vop_close = hammer_vop_fifoclose,
146 .vop_getattr = hammer_vop_getattr,
147 .vop_inactive = hammer_vop_inactive,
148 .vop_reclaim = hammer_vop_reclaim,
149 .vop_setattr = hammer_vop_setattr
150};
151
b84de5af
MD
152static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
153 struct vnode *dvp, struct ucred *cred, int flags);
8cd0a023
MD
154static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
155static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
156
427e5fc6
MD
157#if 0
158static
159int
160hammer_vop_vnoperate(struct vop_generic_args *)
161{
162 return (VOCALL(&hammer_vnode_vops, ap));
163}
164#endif
165
66325755
MD
166/*
167 * hammer_vop_fsync { vp, waitfor }
168 */
427e5fc6
MD
169static
170int
66325755 171hammer_vop_fsync(struct vop_fsync_args *ap)
427e5fc6 172{
b84de5af 173 hammer_inode_t ip = VTOI(ap->a_vp);
c0ade690 174
f90dde4c 175 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
e8599db1 176 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
b84de5af
MD
177 if (ap->a_waitfor == MNT_WAIT)
178 hammer_wait_inode(ip);
059819e3 179 return (ip->error);
427e5fc6
MD
180}
181
66325755
MD
182/*
183 * hammer_vop_read { vp, uio, ioflag, cred }
184 */
427e5fc6
MD
185static
186int
66325755 187hammer_vop_read(struct vop_read_args *ap)
427e5fc6 188{
66325755 189 struct hammer_transaction trans;
c0ade690 190 hammer_inode_t ip;
66325755
MD
191 off_t offset;
192 struct buf *bp;
193 struct uio *uio;
194 int error;
195 int n;
8cd0a023 196 int seqcount;
66325755
MD
197
198 if (ap->a_vp->v_type != VREG)
199 return (EINVAL);
200 ip = VTOI(ap->a_vp);
201 error = 0;
8cd0a023 202 seqcount = ap->a_ioflag >> 16;
66325755 203
8cd0a023 204 hammer_start_transaction(&trans, ip->hmp);
66325755
MD
205
206 /*
207 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
208 */
209 uio = ap->a_uio;
11ad5ade 210 while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_data.size) {
66325755 211 offset = uio->uio_offset & HAMMER_BUFMASK;
c0ade690 212#if 0
11ad5ade 213 error = cluster_read(ap->a_vp, ip->ino_data.size,
8cd0a023
MD
214 uio->uio_offset - offset, HAMMER_BUFSIZE,
215 MAXBSIZE, seqcount, &bp);
c0ade690
MD
216#endif
217 error = bread(ap->a_vp, uio->uio_offset - offset,
218 HAMMER_BUFSIZE, &bp);
66325755
MD
219 if (error) {
220 brelse(bp);
221 break;
222 }
c0ade690 223 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
66325755
MD
224 n = HAMMER_BUFSIZE - offset;
225 if (n > uio->uio_resid)
226 n = uio->uio_resid;
11ad5ade
MD
227 if (n > ip->ino_data.size - uio->uio_offset)
228 n = (int)(ip->ino_data.size - uio->uio_offset);
66325755
MD
229 error = uiomove((char *)bp->b_data + offset, n, uio);
230 if (error) {
8cd0a023 231 bqrelse(bp);
66325755
MD
232 break;
233 }
66325755
MD
234 bqrelse(bp);
235 }
b84de5af
MD
236 if ((ip->flags & HAMMER_INODE_RO) == 0 &&
237 (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
11ad5ade 238 ip->ino_leaf.atime = trans.time;
b84de5af
MD
239 hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
240 }
241 hammer_done_transaction(&trans);
66325755 242 return (error);
427e5fc6
MD
243}
244
66325755
MD
245/*
246 * hammer_vop_write { vp, uio, ioflag, cred }
247 */
427e5fc6
MD
248static
249int
66325755 250hammer_vop_write(struct vop_write_args *ap)
427e5fc6 251{
66325755
MD
252 struct hammer_transaction trans;
253 struct hammer_inode *ip;
254 struct uio *uio;
255 off_t offset;
256 struct buf *bp;
257 int error;
258 int n;
c0ade690 259 int flags;
059819e3 260 int count;
66325755
MD
261
262 if (ap->a_vp->v_type != VREG)
263 return (EINVAL);
264 ip = VTOI(ap->a_vp);
265 error = 0;
266
d113fda1
MD
267 if (ip->flags & HAMMER_INODE_RO)
268 return (EROFS);
269
66325755
MD
270 /*
271 * Create a transaction to cover the operations we perform.
272 */
8cd0a023 273 hammer_start_transaction(&trans, ip->hmp);
66325755
MD
274 uio = ap->a_uio;
275
276 /*
277 * Check append mode
278 */
279 if (ap->a_ioflag & IO_APPEND)
11ad5ade 280 uio->uio_offset = ip->ino_data.size;
66325755
MD
281
282 /*
283 * Check for illegal write offsets. Valid range is 0...2^63-1
284 */
9c448776 285 if (uio->uio_offset < 0 || uio->uio_offset + uio->uio_resid <= 0) {
b84de5af 286 hammer_done_transaction(&trans);
66325755 287 return (EFBIG);
9c448776 288 }
66325755
MD
289
290 /*
291 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
292 */
059819e3 293 count = 0;
66325755 294 while (uio->uio_resid > 0) {
d5ef456e
MD
295 int fixsize = 0;
296
059819e3
MD
297 /*
298 * Do not allow huge writes to deadlock the buffer cache
299 */
300 if ((++count & 15) == 0) {
301 vn_unlock(ap->a_vp);
302 if ((ap->a_ioflag & IO_NOBWILL) == 0)
303 bwillwrite();
304 vn_lock(ap->a_vp, LK_EXCLUSIVE|LK_RETRY);
305 }
306
66325755 307 offset = uio->uio_offset & HAMMER_BUFMASK;
d5ef456e
MD
308 n = HAMMER_BUFSIZE - offset;
309 if (n > uio->uio_resid)
310 n = uio->uio_resid;
11ad5ade 311 if (uio->uio_offset + n > ip->ino_data.size) {
d5ef456e
MD
312 vnode_pager_setsize(ap->a_vp, uio->uio_offset + n);
313 fixsize = 1;
314 }
315
c0ade690
MD
316 if (uio->uio_segflg == UIO_NOCOPY) {
317 /*
318 * Issuing a write with the same data backing the
319 * buffer. Instantiate the buffer to collect the
320 * backing vm pages, then read-in any missing bits.
321 *
322 * This case is used by vop_stdputpages().
323 */
d5ef456e
MD
324 bp = getblk(ap->a_vp, uio->uio_offset - offset,
325 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
c0ade690
MD
326 if ((bp->b_flags & B_CACHE) == 0) {
327 bqrelse(bp);
328 error = bread(ap->a_vp,
329 uio->uio_offset - offset,
330 HAMMER_BUFSIZE, &bp);
c0ade690
MD
331 }
332 } else if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
333 /*
a5fddc16
MD
334 * Even though we are entirely overwriting the buffer
335 * we may still have to zero it out to avoid a
336 * mmap/write visibility issue.
c0ade690 337 */
d5ef456e
MD
338 bp = getblk(ap->a_vp, uio->uio_offset - offset,
339 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
a5fddc16
MD
340 if ((bp->b_flags & B_CACHE) == 0)
341 vfs_bio_clrbuf(bp);
11ad5ade 342 } else if (uio->uio_offset - offset >= ip->ino_data.size) {
c0ade690 343 /*
a5fddc16
MD
344 * If the base offset of the buffer is beyond the
345 * file EOF, we don't have to issue a read.
c0ade690 346 */
d5ef456e
MD
347 bp = getblk(ap->a_vp, uio->uio_offset - offset,
348 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
66325755
MD
349 vfs_bio_clrbuf(bp);
350 } else {
c0ade690
MD
351 /*
352 * Partial overwrite, read in any missing bits then
353 * replace the portion being written.
354 */
66325755
MD
355 error = bread(ap->a_vp, uio->uio_offset - offset,
356 HAMMER_BUFSIZE, &bp);
d5ef456e
MD
357 if (error == 0)
358 bheavy(bp);
66325755 359 }
d5ef456e
MD
360 if (error == 0)
361 error = uiomove((char *)bp->b_data + offset, n, uio);
362
363 /*
364 * If we screwed up we have to undo any VM size changes we
365 * made.
366 */
66325755
MD
367 if (error) {
368 brelse(bp);
d5ef456e 369 if (fixsize) {
11ad5ade 370 vtruncbuf(ap->a_vp, ip->ino_data.size,
d5ef456e
MD
371 HAMMER_BUFSIZE);
372 }
66325755
MD
373 break;
374 }
c0ade690 375 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
11ad5ade
MD
376 if (ip->ino_data.size < uio->uio_offset) {
377 ip->ino_data.size = uio->uio_offset;
378 flags = HAMMER_INODE_DDIRTY;
379 vnode_pager_setsize(ap->a_vp, ip->ino_data.size);
c0ade690 380 } else {
d113fda1 381 flags = 0;
66325755 382 }
11ad5ade 383 ip->ino_data.mtime = trans.time;
f3b0f382 384 flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS;
11ad5ade 385 flags |= HAMMER_INODE_DDIRTY; /* XXX mtime */
c0ade690 386 hammer_modify_inode(&trans, ip, flags);
32c90105 387
66325755
MD
388 if (ap->a_ioflag & IO_SYNC) {
389 bwrite(bp);
390 } else if (ap->a_ioflag & IO_DIRECT) {
66325755 391 bawrite(bp);
059819e3
MD
392#if 0
393 } else if ((ap->a_ioflag >> 16) == IO_SEQMAX &&
34d829f7 394 (uio->uio_offset & HAMMER_BUFMASK) == 0) {
059819e3
MD
395 /*
396 * XXX HAMMER can only fsync the whole inode,
397 * doing it on every buffer would be a bad idea.
398 */
34d829f7
MD
399 /*
400 * If seqcount indicates sequential operation and
401 * we just finished filling a buffer, push it out
402 * now to prevent the buffer cache from becoming
403 * too full, which would trigger non-optimal
404 * flushes.
405 */
059819e3
MD
406 bdwrite(bp);
407#endif
66325755 408 } else {
66325755
MD
409 bdwrite(bp);
410 }
411 }
b84de5af 412 hammer_done_transaction(&trans);
66325755 413 return (error);
427e5fc6
MD
414}
415
66325755
MD
416/*
417 * hammer_vop_access { vp, mode, cred }
418 */
427e5fc6
MD
419static
420int
66325755 421hammer_vop_access(struct vop_access_args *ap)
427e5fc6 422{
66325755
MD
423 struct hammer_inode *ip = VTOI(ap->a_vp);
424 uid_t uid;
425 gid_t gid;
426 int error;
427
428 uid = hammer_to_unix_xid(&ip->ino_data.uid);
429 gid = hammer_to_unix_xid(&ip->ino_data.gid);
430
431 error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
432 ip->ino_data.uflags);
433 return (error);
427e5fc6
MD
434}
435
66325755
MD
436/*
437 * hammer_vop_advlock { vp, id, op, fl, flags }
438 */
427e5fc6
MD
439static
440int
66325755 441hammer_vop_advlock(struct vop_advlock_args *ap)
427e5fc6 442{
66325755
MD
443 struct hammer_inode *ip = VTOI(ap->a_vp);
444
11ad5ade 445 return (lf_advlock(ap, &ip->advlock, ip->ino_data.size));
427e5fc6
MD
446}
447
66325755
MD
448/*
449 * hammer_vop_close { vp, fflag }
450 */
427e5fc6
MD
451static
452int
66325755 453hammer_vop_close(struct vop_close_args *ap)
427e5fc6 454{
a89aec1b 455 return (vop_stdclose(ap));
427e5fc6
MD
456}
457
66325755
MD
458/*
459 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
460 *
461 * The operating system has already ensured that the directory entry
462 * does not exist and done all appropriate namespace locking.
463 */
427e5fc6
MD
464static
465int
66325755 466hammer_vop_ncreate(struct vop_ncreate_args *ap)
427e5fc6 467{
66325755
MD
468 struct hammer_transaction trans;
469 struct hammer_inode *dip;
470 struct hammer_inode *nip;
471 struct nchandle *nch;
472 int error;
473
474 nch = ap->a_nch;
475 dip = VTOI(ap->a_dvp);
476
d113fda1
MD
477 if (dip->flags & HAMMER_INODE_RO)
478 return (EROFS);
479
66325755
MD
480 /*
481 * Create a transaction to cover the operations we perform.
482 */
8cd0a023 483 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
484
485 /*
486 * Create a new filesystem object of the requested type. The
b84de5af
MD
487 * returned inode will be referenced and shared-locked to prevent
488 * it from being moved to the flusher.
66325755 489 */
8cd0a023
MD
490
491 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 492 if (error) {
77062c8a 493 hkprintf("hammer_create_inode error %d\n", error);
b84de5af 494 hammer_done_transaction(&trans);
66325755
MD
495 *ap->a_vpp = NULL;
496 return (error);
497 }
66325755
MD
498
499 /*
500 * Add the new filesystem object to the directory. This will also
501 * bump the inode's link count.
502 */
a89aec1b 503 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
0b075555 504 if (error)
77062c8a 505 hkprintf("hammer_ip_add_directory error %d\n", error);
66325755
MD
506
507 /*
508 * Finish up.
509 */
510 if (error) {
a89aec1b 511 hammer_rel_inode(nip, 0);
b84de5af 512 hammer_done_transaction(&trans);
66325755
MD
513 *ap->a_vpp = NULL;
514 } else {
e8599db1 515 error = hammer_get_vnode(nip, ap->a_vpp);
b84de5af 516 hammer_done_transaction(&trans);
a89aec1b
MD
517 hammer_rel_inode(nip, 0);
518 if (error == 0) {
519 cache_setunresolved(ap->a_nch);
520 cache_setvp(ap->a_nch, *ap->a_vpp);
521 }
66325755
MD
522 }
523 return (error);
427e5fc6
MD
524}
525
66325755
MD
526/*
527 * hammer_vop_getattr { vp, vap }
98f7132d
MD
528 *
529 * Retrieve an inode's attribute information. When accessing inodes
530 * historically we fake the atime field to ensure consistent results.
531 * The atime field is stored in the B-Tree element and allowed to be
532 * updated without cycling the element.
66325755 533 */
427e5fc6
MD
534static
535int
66325755 536hammer_vop_getattr(struct vop_getattr_args *ap)
427e5fc6 537{
66325755
MD
538 struct hammer_inode *ip = VTOI(ap->a_vp);
539 struct vattr *vap = ap->a_vap;
540
541#if 0
542 if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) &&
543 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 &&
7f7c1f84 544 ip->obj_asof == XXX
66325755
MD
545 ) {
546 /* LAZYMOD XXX */
547 }
548 hammer_itimes(ap->a_vp);
549#endif
550
551 vap->va_fsid = ip->hmp->fsid_udev;
11ad5ade 552 vap->va_fileid = ip->ino_leaf.base.obj_id;
66325755 553 vap->va_mode = ip->ino_data.mode;
11ad5ade 554 vap->va_nlink = ip->ino_data.nlinks;
66325755
MD
555 vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
556 vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
557 vap->va_rmajor = 0;
558 vap->va_rminor = 0;
11ad5ade 559 vap->va_size = ip->ino_data.size;
98f7132d
MD
560 if (ip->flags & HAMMER_INODE_RO)
561 hammer_to_timespec(ip->ino_data.mtime, &vap->va_atime);
562 else
563 hammer_to_timespec(ip->ino_leaf.atime, &vap->va_atime);
11ad5ade 564 hammer_to_timespec(ip->ino_data.mtime, &vap->va_mtime);
66325755
MD
565 hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
566 vap->va_flags = ip->ino_data.uflags;
567 vap->va_gen = 1; /* hammer inums are unique for all time */
bf686dbe 568 vap->va_blocksize = HAMMER_BUFSIZE;
11ad5ade
MD
569 vap->va_bytes = (ip->ino_data.size + 63) & ~63;
570 vap->va_type = hammer_get_vnode_type(ip->ino_data.obj_type);
66325755
MD
571 vap->va_filerev = 0; /* XXX */
572 /* mtime uniquely identifies any adjustments made to the file */
11ad5ade 573 vap->va_fsmid = ip->ino_data.mtime;
66325755
MD
574 vap->va_uid_uuid = ip->ino_data.uid;
575 vap->va_gid_uuid = ip->ino_data.gid;
576 vap->va_fsid_uuid = ip->hmp->fsid;
577 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
578 VA_FSID_UUID_VALID;
7a04d74f 579
11ad5ade 580 switch (ip->ino_data.obj_type) {
7a04d74f
MD
581 case HAMMER_OBJTYPE_CDEV:
582 case HAMMER_OBJTYPE_BDEV:
583 vap->va_rmajor = ip->ino_data.rmajor;
584 vap->va_rminor = ip->ino_data.rminor;
585 break;
586 default:
587 break;
588 }
589
66325755 590 return(0);
427e5fc6
MD
591}
592
66325755
MD
593/*
594 * hammer_vop_nresolve { nch, dvp, cred }
595 *
596 * Locate the requested directory entry.
597 */
427e5fc6
MD
598static
599int
66325755 600hammer_vop_nresolve(struct vop_nresolve_args *ap)
427e5fc6 601{
36f82b23 602 struct hammer_transaction trans;
66325755 603 struct namecache *ncp;
7f7c1f84
MD
604 hammer_inode_t dip;
605 hammer_inode_t ip;
606 hammer_tid_t asof;
8cd0a023 607 struct hammer_cursor cursor;
66325755
MD
608 struct vnode *vp;
609 int64_t namekey;
610 int error;
7f7c1f84
MD
611 int i;
612 int nlen;
d113fda1 613 int flags;
6a37e7e4 614 u_int64_t obj_id;
7f7c1f84
MD
615
616 /*
617 * Misc initialization, plus handle as-of name extensions. Look for
618 * the '@@' extension. Note that as-of files and directories cannot
619 * be modified.
7f7c1f84
MD
620 */
621 dip = VTOI(ap->a_dvp);
622 ncp = ap->a_nch->ncp;
623 asof = dip->obj_asof;
624 nlen = ncp->nc_nlen;
d113fda1 625 flags = dip->flags;
7f7c1f84 626
36f82b23
MD
627 hammer_simple_transaction(&trans, dip->hmp);
628
7f7c1f84
MD
629 for (i = 0; i < nlen; ++i) {
630 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
d113fda1 631 asof = hammer_str_to_tid(ncp->nc_name + i + 2);
d113fda1 632 flags |= HAMMER_INODE_RO;
7f7c1f84
MD
633 break;
634 }
635 }
636 nlen = i;
66325755 637
d113fda1
MD
638 /*
639 * If there is no path component the time extension is relative to
640 * dip.
641 */
642 if (nlen == 0) {
36f82b23 643 ip = hammer_get_inode(&trans, &dip->cache[1], dip->obj_id,
61aeeb33 644 asof, flags, &error);
d113fda1 645 if (error == 0) {
e8599db1 646 error = hammer_get_vnode(ip, &vp);
d113fda1
MD
647 hammer_rel_inode(ip, 0);
648 } else {
649 vp = NULL;
650 }
651 if (error == 0) {
652 vn_unlock(vp);
653 cache_setvp(ap->a_nch, vp);
654 vrele(vp);
655 }
36f82b23 656 goto done;
d113fda1
MD
657 }
658
8cd0a023
MD
659 /*
660 * Calculate the namekey and setup the key range for the scan. This
661 * works kinda like a chained hash table where the lower 32 bits
662 * of the namekey synthesize the chain.
663 *
664 * The key range is inclusive of both key_beg and key_end.
665 */
7f7c1f84 666 namekey = hammer_directory_namekey(ncp->nc_name, nlen);
66325755 667
4e17f465 668 error = hammer_init_cursor(&trans, &cursor, &dip->cache[0], dip);
8cd0a023
MD
669 cursor.key_beg.obj_id = dip->obj_id;
670 cursor.key_beg.key = namekey;
d5530d22 671 cursor.key_beg.create_tid = 0;
8cd0a023
MD
672 cursor.key_beg.delete_tid = 0;
673 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
674 cursor.key_beg.obj_type = 0;
66325755 675
8cd0a023
MD
676 cursor.key_end = cursor.key_beg;
677 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
678 cursor.asof = asof;
679 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
66325755
MD
680
681 /*
8cd0a023 682 * Scan all matching records (the chain), locate the one matching
a89aec1b 683 * the requested path component.
8cd0a023
MD
684 *
685 * The hammer_ip_*() functions merge in-memory records with on-disk
686 * records for the purposes of the search.
66325755 687 */
6a37e7e4
MD
688 obj_id = 0;
689
4e17f465 690 if (error == 0) {
4e17f465
MD
691 error = hammer_ip_first(&cursor);
692 while (error == 0) {
693 error = hammer_ip_resolve_data(&cursor);
694 if (error)
695 break;
11ad5ade
MD
696 if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF &&
697 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
698 obj_id = cursor.data->entry.obj_id;
4e17f465
MD
699 break;
700 }
701 error = hammer_ip_next(&cursor);
66325755
MD
702 }
703 }
6a37e7e4 704 hammer_done_cursor(&cursor);
66325755 705 if (error == 0) {
36f82b23 706 ip = hammer_get_inode(&trans, &dip->cache[1],
6a37e7e4 707 obj_id, asof, flags, &error);
7f7c1f84 708 if (error == 0) {
e8599db1 709 error = hammer_get_vnode(ip, &vp);
7f7c1f84
MD
710 hammer_rel_inode(ip, 0);
711 } else {
712 vp = NULL;
713 }
66325755
MD
714 if (error == 0) {
715 vn_unlock(vp);
716 cache_setvp(ap->a_nch, vp);
717 vrele(vp);
718 }
719 } else if (error == ENOENT) {
720 cache_setvp(ap->a_nch, NULL);
721 }
36f82b23 722done:
b84de5af 723 hammer_done_transaction(&trans);
66325755 724 return (error);
427e5fc6
MD
725}
726
66325755
MD
727/*
728 * hammer_vop_nlookupdotdot { dvp, vpp, cred }
729 *
730 * Locate the parent directory of a directory vnode.
731 *
732 * dvp is referenced but not locked. *vpp must be returned referenced and
733 * locked. A parent_obj_id of 0 does not necessarily indicate that we are
734 * at the root, instead it could indicate that the directory we were in was
735 * removed.
42c7d26b
MD
736 *
737 * NOTE: as-of sequences are not linked into the directory structure. If
738 * we are at the root with a different asof then the mount point, reload
739 * the same directory with the mount point's asof. I'm not sure what this
740 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not
741 * get confused, but it hasn't been tested.
66325755 742 */
427e5fc6
MD
743static
744int
66325755 745hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
427e5fc6 746{
36f82b23 747 struct hammer_transaction trans;
66325755 748 struct hammer_inode *dip;
d113fda1 749 struct hammer_inode *ip;
42c7d26b
MD
750 int64_t parent_obj_id;
751 hammer_tid_t asof;
d113fda1 752 int error;
66325755
MD
753
754 dip = VTOI(ap->a_dvp);
42c7d26b
MD
755 asof = dip->obj_asof;
756 parent_obj_id = dip->ino_data.parent_obj_id;
757
758 if (parent_obj_id == 0) {
759 if (dip->obj_id == HAMMER_OBJID_ROOT &&
760 asof != dip->hmp->asof) {
761 parent_obj_id = dip->obj_id;
762 asof = dip->hmp->asof;
763 *ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK);
764 ksnprintf(*ap->a_fakename, 19, "0x%016llx",
765 dip->obj_asof);
766 } else {
767 *ap->a_vpp = NULL;
768 return ENOENT;
769 }
66325755 770 }
d113fda1 771
36f82b23
MD
772 hammer_simple_transaction(&trans, dip->hmp);
773
774 ip = hammer_get_inode(&trans, &dip->cache[1], parent_obj_id,
42c7d26b 775 asof, dip->flags, &error);
36f82b23 776 if (ip) {
e8599db1 777 error = hammer_get_vnode(ip, ap->a_vpp);
36f82b23
MD
778 hammer_rel_inode(ip, 0);
779 } else {
d113fda1 780 *ap->a_vpp = NULL;
d113fda1 781 }
b84de5af 782 hammer_done_transaction(&trans);
d113fda1 783 return (error);
427e5fc6
MD
784}
785
66325755
MD
786/*
787 * hammer_vop_nlink { nch, dvp, vp, cred }
788 */
427e5fc6
MD
789static
790int
66325755 791hammer_vop_nlink(struct vop_nlink_args *ap)
427e5fc6 792{
66325755
MD
793 struct hammer_transaction trans;
794 struct hammer_inode *dip;
795 struct hammer_inode *ip;
796 struct nchandle *nch;
797 int error;
798
799 nch = ap->a_nch;
800 dip = VTOI(ap->a_dvp);
801 ip = VTOI(ap->a_vp);
802
d113fda1
MD
803 if (dip->flags & HAMMER_INODE_RO)
804 return (EROFS);
805 if (ip->flags & HAMMER_INODE_RO)
806 return (EROFS);
807
66325755
MD
808 /*
809 * Create a transaction to cover the operations we perform.
810 */
8cd0a023 811 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
812
813 /*
814 * Add the filesystem object to the directory. Note that neither
815 * dip nor ip are referenced or locked, but their vnodes are
816 * referenced. This function will bump the inode's link count.
817 */
a89aec1b 818 error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip);
66325755
MD
819
820 /*
821 * Finish up.
822 */
b84de5af 823 if (error == 0) {
6b4f890b
MD
824 cache_setunresolved(nch);
825 cache_setvp(nch, ap->a_vp);
66325755 826 }
b84de5af 827 hammer_done_transaction(&trans);
66325755 828 return (error);
427e5fc6
MD
829}
830
66325755
MD
831/*
832 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
833 *
834 * The operating system has already ensured that the directory entry
835 * does not exist and done all appropriate namespace locking.
836 */
427e5fc6
MD
837static
838int
66325755 839hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
427e5fc6 840{
66325755
MD
841 struct hammer_transaction trans;
842 struct hammer_inode *dip;
843 struct hammer_inode *nip;
844 struct nchandle *nch;
845 int error;
846
847 nch = ap->a_nch;
848 dip = VTOI(ap->a_dvp);
849
d113fda1
MD
850 if (dip->flags & HAMMER_INODE_RO)
851 return (EROFS);
852
66325755
MD
853 /*
854 * Create a transaction to cover the operations we perform.
855 */
8cd0a023 856 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
857
858 /*
859 * Create a new filesystem object of the requested type. The
8cd0a023 860 * returned inode will be referenced but not locked.
66325755 861 */
8cd0a023 862 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 863 if (error) {
77062c8a 864 hkprintf("hammer_mkdir error %d\n", error);
b84de5af 865 hammer_done_transaction(&trans);
66325755
MD
866 *ap->a_vpp = NULL;
867 return (error);
868 }
66325755
MD
869 /*
870 * Add the new filesystem object to the directory. This will also
871 * bump the inode's link count.
872 */
a89aec1b 873 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
0b075555 874 if (error)
77062c8a 875 hkprintf("hammer_mkdir (add) error %d\n", error);
66325755
MD
876
877 /*
878 * Finish up.
879 */
880 if (error) {
a89aec1b 881 hammer_rel_inode(nip, 0);
66325755
MD
882 *ap->a_vpp = NULL;
883 } else {
e8599db1 884 error = hammer_get_vnode(nip, ap->a_vpp);
a89aec1b
MD
885 hammer_rel_inode(nip, 0);
886 if (error == 0) {
887 cache_setunresolved(ap->a_nch);
888 cache_setvp(ap->a_nch, *ap->a_vpp);
889 }
66325755 890 }
b84de5af 891 hammer_done_transaction(&trans);
66325755 892 return (error);
427e5fc6
MD
893}
894
66325755
MD
895/*
896 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
897 *
898 * The operating system has already ensured that the directory entry
899 * does not exist and done all appropriate namespace locking.
900 */
427e5fc6
MD
901static
902int
66325755 903hammer_vop_nmknod(struct vop_nmknod_args *ap)
427e5fc6 904{
66325755
MD
905 struct hammer_transaction trans;
906 struct hammer_inode *dip;
907 struct hammer_inode *nip;
908 struct nchandle *nch;
909 int error;
910
911 nch = ap->a_nch;
912 dip = VTOI(ap->a_dvp);
913
d113fda1
MD
914 if (dip->flags & HAMMER_INODE_RO)
915 return (EROFS);
916
66325755
MD
917 /*
918 * Create a transaction to cover the operations we perform.
919 */
8cd0a023 920 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
921
922 /*
923 * Create a new filesystem object of the requested type. The
8cd0a023 924 * returned inode will be referenced but not locked.
66325755 925 */
8cd0a023 926 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 927 if (error) {
b84de5af 928 hammer_done_transaction(&trans);
66325755
MD
929 *ap->a_vpp = NULL;
930 return (error);
931 }
66325755
MD
932
933 /*
934 * Add the new filesystem object to the directory. This will also
935 * bump the inode's link count.
936 */
a89aec1b 937 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
66325755
MD
938
939 /*
940 * Finish up.
941 */
942 if (error) {
a89aec1b 943 hammer_rel_inode(nip, 0);
66325755
MD
944 *ap->a_vpp = NULL;
945 } else {
e8599db1 946 error = hammer_get_vnode(nip, ap->a_vpp);
a89aec1b
MD
947 hammer_rel_inode(nip, 0);
948 if (error == 0) {
949 cache_setunresolved(ap->a_nch);
950 cache_setvp(ap->a_nch, *ap->a_vpp);
951 }
66325755 952 }
b84de5af 953 hammer_done_transaction(&trans);
66325755 954 return (error);
427e5fc6
MD
955}
956
66325755
MD
957/*
958 * hammer_vop_open { vp, mode, cred, fp }
959 */
427e5fc6
MD
960static
961int
66325755 962hammer_vop_open(struct vop_open_args *ap)
427e5fc6 963{
d113fda1
MD
964 if ((ap->a_mode & FWRITE) && (VTOI(ap->a_vp)->flags & HAMMER_INODE_RO))
965 return (EROFS);
966
a89aec1b 967 return(vop_stdopen(ap));
427e5fc6
MD
968}
969
66325755
MD
970/*
971 * hammer_vop_pathconf { vp, name, retval }
972 */
427e5fc6
MD
973static
974int
66325755 975hammer_vop_pathconf(struct vop_pathconf_args *ap)
427e5fc6
MD
976{
977 return EOPNOTSUPP;
978}
979
66325755
MD
980/*
981 * hammer_vop_print { vp }
982 */
427e5fc6
MD
983static
984int
66325755 985hammer_vop_print(struct vop_print_args *ap)
427e5fc6
MD
986{
987 return EOPNOTSUPP;
988}
989
66325755 990/*
6b4f890b 991 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
66325755 992 */
427e5fc6
MD
993static
994int
66325755 995hammer_vop_readdir(struct vop_readdir_args *ap)
427e5fc6 996{
36f82b23 997 struct hammer_transaction trans;
6b4f890b
MD
998 struct hammer_cursor cursor;
999 struct hammer_inode *ip;
1000 struct uio *uio;
6b4f890b
MD
1001 hammer_base_elm_t base;
1002 int error;
1003 int cookie_index;
1004 int ncookies;
1005 off_t *cookies;
1006 off_t saveoff;
1007 int r;
1008
1009 ip = VTOI(ap->a_vp);
1010 uio = ap->a_uio;
b3deaf57
MD
1011 saveoff = uio->uio_offset;
1012
1013 if (ap->a_ncookies) {
1014 ncookies = uio->uio_resid / 16 + 1;
1015 if (ncookies > 1024)
1016 ncookies = 1024;
1017 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
1018 cookie_index = 0;
1019 } else {
1020 ncookies = -1;
1021 cookies = NULL;
1022 cookie_index = 0;
1023 }
1024
36f82b23
MD
1025 hammer_simple_transaction(&trans, ip->hmp);
1026
b3deaf57
MD
1027 /*
1028 * Handle artificial entries
1029 */
1030 error = 0;
1031 if (saveoff == 0) {
1032 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, ".");
1033 if (r)
1034 goto done;
1035 if (cookies)
1036 cookies[cookie_index] = saveoff;
1037 ++saveoff;
1038 ++cookie_index;
1039 if (cookie_index == ncookies)
1040 goto done;
1041 }
1042 if (saveoff == 1) {
1043 if (ip->ino_data.parent_obj_id) {
1044 r = vop_write_dirent(&error, uio,
1045 ip->ino_data.parent_obj_id,
1046 DT_DIR, 2, "..");
1047 } else {
1048 r = vop_write_dirent(&error, uio,
1049 ip->obj_id, DT_DIR, 2, "..");
1050 }
1051 if (r)
1052 goto done;
1053 if (cookies)
1054 cookies[cookie_index] = saveoff;
1055 ++saveoff;
1056 ++cookie_index;
1057 if (cookie_index == ncookies)
1058 goto done;
1059 }
6b4f890b
MD
1060
1061 /*
1062 * Key range (begin and end inclusive) to scan. Directory keys
1063 * directly translate to a 64 bit 'seek' position.
1064 */
4e17f465 1065 hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
6b4f890b 1066 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1067 cursor.key_beg.create_tid = 0;
6b4f890b
MD
1068 cursor.key_beg.delete_tid = 0;
1069 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1070 cursor.key_beg.obj_type = 0;
b3deaf57 1071 cursor.key_beg.key = saveoff;
6b4f890b
MD
1072
1073 cursor.key_end = cursor.key_beg;
1074 cursor.key_end.key = HAMMER_MAX_KEY;
d5530d22
MD
1075 cursor.asof = ip->obj_asof;
1076 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
6b4f890b 1077
4e17f465 1078 error = hammer_ip_first(&cursor);
6b4f890b
MD
1079
1080 while (error == 0) {
11ad5ade 1081 error = hammer_ip_resolve_data(&cursor);
6b4f890b
MD
1082 if (error)
1083 break;
11ad5ade 1084 base = &cursor.leaf->base;
6b4f890b 1085 saveoff = base->key;
11ad5ade 1086 KKASSERT(cursor.leaf->data_len > HAMMER_ENTRY_NAME_OFF);
6b4f890b 1087
7a04d74f
MD
1088 if (base->obj_id != ip->obj_id)
1089 panic("readdir: bad record at %p", cursor.node);
1090
6b4f890b 1091 r = vop_write_dirent(
11ad5ade
MD
1092 &error, uio, cursor.data->entry.obj_id,
1093 hammer_get_dtype(cursor.leaf->base.obj_type),
1094 cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF ,
1095 (void *)cursor.data->entry.name);
6b4f890b
MD
1096 if (r)
1097 break;
1098 ++saveoff;
1099 if (cookies)
1100 cookies[cookie_index] = base->key;
1101 ++cookie_index;
1102 if (cookie_index == ncookies)
1103 break;
1104 error = hammer_ip_next(&cursor);
1105 }
1106 hammer_done_cursor(&cursor);
1107
b3deaf57 1108done:
b84de5af 1109 hammer_done_transaction(&trans);
36f82b23 1110
6b4f890b
MD
1111 if (ap->a_eofflag)
1112 *ap->a_eofflag = (error == ENOENT);
6b4f890b
MD
1113 uio->uio_offset = saveoff;
1114 if (error && cookie_index == 0) {
b3deaf57
MD
1115 if (error == ENOENT)
1116 error = 0;
6b4f890b
MD
1117 if (cookies) {
1118 kfree(cookies, M_TEMP);
1119 *ap->a_ncookies = 0;
1120 *ap->a_cookies = NULL;
1121 }
1122 } else {
7a04d74f
MD
1123 if (error == ENOENT)
1124 error = 0;
6b4f890b
MD
1125 if (cookies) {
1126 *ap->a_ncookies = cookie_index;
1127 *ap->a_cookies = cookies;
1128 }
1129 }
1130 return(error);
427e5fc6
MD
1131}
1132
66325755
MD
1133/*
1134 * hammer_vop_readlink { vp, uio, cred }
1135 */
427e5fc6
MD
1136static
1137int
66325755 1138hammer_vop_readlink(struct vop_readlink_args *ap)
427e5fc6 1139{
36f82b23 1140 struct hammer_transaction trans;
7a04d74f
MD
1141 struct hammer_cursor cursor;
1142 struct hammer_inode *ip;
1143 int error;
1144
1145 ip = VTOI(ap->a_vp);
36f82b23
MD
1146
1147 hammer_simple_transaction(&trans, ip->hmp);
1148
4e17f465 1149 hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
7a04d74f
MD
1150
1151 /*
1152 * Key range (begin and end inclusive) to scan. Directory keys
1153 * directly translate to a 64 bit 'seek' position.
1154 */
1155 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1156 cursor.key_beg.create_tid = 0;
7a04d74f
MD
1157 cursor.key_beg.delete_tid = 0;
1158 cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX;
1159 cursor.key_beg.obj_type = 0;
1160 cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK;
d5530d22
MD
1161 cursor.asof = ip->obj_asof;
1162 cursor.flags |= HAMMER_CURSOR_ASOF;
7a04d74f 1163
45a014dc 1164 error = hammer_ip_lookup(&cursor);
7a04d74f
MD
1165 if (error == 0) {
1166 error = hammer_ip_resolve_data(&cursor);
1167 if (error == 0) {
11ad5ade
MD
1168 KKASSERT(cursor.leaf->data_len >=
1169 HAMMER_SYMLINK_NAME_OFF);
1170 error = uiomove(cursor.data->symlink.name,
1171 cursor.leaf->data_len -
1172 HAMMER_SYMLINK_NAME_OFF,
7a04d74f
MD
1173 ap->a_uio);
1174 }
1175 }
1176 hammer_done_cursor(&cursor);
b84de5af 1177 hammer_done_transaction(&trans);
7a04d74f 1178 return(error);
427e5fc6
MD
1179}
1180
66325755
MD
1181/*
1182 * hammer_vop_nremove { nch, dvp, cred }
1183 */
427e5fc6
MD
1184static
1185int
66325755 1186hammer_vop_nremove(struct vop_nremove_args *ap)
427e5fc6 1187{
b84de5af
MD
1188 struct hammer_transaction trans;
1189 int error;
1190
1191 hammer_start_transaction(&trans, VTOI(ap->a_dvp)->hmp);
1192 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
1193 hammer_done_transaction(&trans);
1194
1195 return (error);
427e5fc6
MD
1196}
1197
66325755
MD
1198/*
1199 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1200 */
427e5fc6
MD
1201static
1202int
66325755 1203hammer_vop_nrename(struct vop_nrename_args *ap)
427e5fc6 1204{
8cd0a023
MD
1205 struct hammer_transaction trans;
1206 struct namecache *fncp;
1207 struct namecache *tncp;
1208 struct hammer_inode *fdip;
1209 struct hammer_inode *tdip;
1210 struct hammer_inode *ip;
1211 struct hammer_cursor cursor;
8cd0a023 1212 int64_t namekey;
11ad5ade 1213 int nlen, error;
8cd0a023
MD
1214
1215 fdip = VTOI(ap->a_fdvp);
1216 tdip = VTOI(ap->a_tdvp);
1217 fncp = ap->a_fnch->ncp;
1218 tncp = ap->a_tnch->ncp;
b3deaf57
MD
1219 ip = VTOI(fncp->nc_vp);
1220 KKASSERT(ip != NULL);
d113fda1
MD
1221
1222 if (fdip->flags & HAMMER_INODE_RO)
1223 return (EROFS);
1224 if (tdip->flags & HAMMER_INODE_RO)
1225 return (EROFS);
1226 if (ip->flags & HAMMER_INODE_RO)
1227 return (EROFS);
1228
8cd0a023
MD
1229 hammer_start_transaction(&trans, fdip->hmp);
1230
1231 /*
b3deaf57
MD
1232 * Remove tncp from the target directory and then link ip as
1233 * tncp. XXX pass trans to dounlink
42c7d26b
MD
1234 *
1235 * Force the inode sync-time to match the transaction so it is
1236 * in-sync with the creation of the target directory entry.
8cd0a023 1237 */
b84de5af 1238 error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, ap->a_cred, 0);
42c7d26b 1239 if (error == 0 || error == ENOENT) {
b3deaf57 1240 error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
42c7d26b
MD
1241 if (error == 0) {
1242 ip->ino_data.parent_obj_id = tdip->obj_id;
b84de5af 1243 hammer_modify_inode(&trans, ip, HAMMER_INODE_DDIRTY);
42c7d26b
MD
1244 }
1245 }
b3deaf57
MD
1246 if (error)
1247 goto failed; /* XXX */
8cd0a023
MD
1248
1249 /*
1250 * Locate the record in the originating directory and remove it.
1251 *
1252 * Calculate the namekey and setup the key range for the scan. This
1253 * works kinda like a chained hash table where the lower 32 bits
1254 * of the namekey synthesize the chain.
1255 *
1256 * The key range is inclusive of both key_beg and key_end.
1257 */
1258 namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
6a37e7e4 1259retry:
4e17f465 1260 hammer_init_cursor(&trans, &cursor, &fdip->cache[0], fdip);
8cd0a023
MD
1261 cursor.key_beg.obj_id = fdip->obj_id;
1262 cursor.key_beg.key = namekey;
d5530d22 1263 cursor.key_beg.create_tid = 0;
8cd0a023
MD
1264 cursor.key_beg.delete_tid = 0;
1265 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1266 cursor.key_beg.obj_type = 0;
1267
1268 cursor.key_end = cursor.key_beg;
1269 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
1270 cursor.asof = fdip->obj_asof;
1271 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
8cd0a023
MD
1272
1273 /*
1274 * Scan all matching records (the chain), locate the one matching
a89aec1b 1275 * the requested path component.
8cd0a023
MD
1276 *
1277 * The hammer_ip_*() functions merge in-memory records with on-disk
1278 * records for the purposes of the search.
1279 */
4e17f465 1280 error = hammer_ip_first(&cursor);
a89aec1b 1281 while (error == 0) {
8cd0a023
MD
1282 if (hammer_ip_resolve_data(&cursor) != 0)
1283 break;
11ad5ade
MD
1284 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
1285 KKASSERT(nlen > 0);
1286 if (fncp->nc_nlen == nlen &&
1287 bcmp(fncp->nc_name, cursor.data->entry.name, nlen) == 0) {
8cd0a023
MD
1288 break;
1289 }
a89aec1b 1290 error = hammer_ip_next(&cursor);
8cd0a023 1291 }
8cd0a023
MD
1292
1293 /*
1294 * If all is ok we have to get the inode so we can adjust nlinks.
6a37e7e4
MD
1295 *
1296 * WARNING: hammer_ip_del_directory() may have to terminate the
1297 * cursor to avoid a recursion. It's ok to call hammer_done_cursor()
1298 * twice.
8cd0a023 1299 */
9944ae54 1300 if (error == 0)
6a37e7e4 1301 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
b84de5af
MD
1302
1303 /*
1304 * XXX A deadlock here will break rename's atomicy for the purposes
1305 * of crash recovery.
1306 */
1307 if (error == EDEADLK) {
b84de5af 1308 hammer_done_cursor(&cursor);
b84de5af
MD
1309 goto retry;
1310 }
1311
1312 /*
1313 * Cleanup and tell the kernel that the rename succeeded.
1314 */
c0ade690 1315 hammer_done_cursor(&cursor);
6a37e7e4
MD
1316 if (error == 0)
1317 cache_rename(ap->a_fnch, ap->a_tnch);
b84de5af 1318
b3deaf57 1319failed:
b84de5af 1320 hammer_done_transaction(&trans);
8cd0a023 1321 return (error);
427e5fc6
MD
1322}
1323
66325755
MD
1324/*
1325 * hammer_vop_nrmdir { nch, dvp, cred }
1326 */
427e5fc6
MD
1327static
1328int
66325755 1329hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
427e5fc6 1330{
b84de5af
MD
1331 struct hammer_transaction trans;
1332 int error;
1333
1334 hammer_start_transaction(&trans, VTOI(ap->a_dvp)->hmp);
1335 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
1336 hammer_done_transaction(&trans);
1337
1338 return (error);
427e5fc6
MD
1339}
1340
66325755
MD
1341/*
1342 * hammer_vop_setattr { vp, vap, cred }
1343 */
427e5fc6
MD
1344static
1345int
66325755 1346hammer_vop_setattr(struct vop_setattr_args *ap)
427e5fc6 1347{
8cd0a023
MD
1348 struct hammer_transaction trans;
1349 struct vattr *vap;
1350 struct hammer_inode *ip;
1351 int modflags;
1352 int error;
d5ef456e 1353 int truncating;
b84de5af 1354 off_t aligned_size;
8cd0a023
MD
1355 u_int32_t flags;
1356 uuid_t uuid;
1357
1358 vap = ap->a_vap;
1359 ip = ap->a_vp->v_data;
1360 modflags = 0;
1361
1362 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1363 return(EROFS);
d113fda1
MD
1364 if (ip->flags & HAMMER_INODE_RO)
1365 return (EROFS);
8cd0a023
MD
1366
1367 hammer_start_transaction(&trans, ip->hmp);
1368 error = 0;
1369
1370 if (vap->va_flags != VNOVAL) {
1371 flags = ip->ino_data.uflags;
1372 error = vop_helper_setattr_flags(&flags, vap->va_flags,
1373 hammer_to_unix_xid(&ip->ino_data.uid),
1374 ap->a_cred);
1375 if (error == 0) {
1376 if (ip->ino_data.uflags != flags) {
1377 ip->ino_data.uflags = flags;
1378 modflags |= HAMMER_INODE_DDIRTY;
1379 }
1380 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1381 error = 0;
1382 goto done;
1383 }
1384 }
1385 goto done;
1386 }
1387 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1388 error = EPERM;
1389 goto done;
1390 }
1391 if (vap->va_uid != (uid_t)VNOVAL) {
1392 hammer_guid_to_uuid(&uuid, vap->va_uid);
6b4f890b 1393 if (bcmp(&uuid, &ip->ino_data.uid, sizeof(uuid)) != 0) {
8cd0a023
MD
1394 ip->ino_data.uid = uuid;
1395 modflags |= HAMMER_INODE_DDIRTY;
1396 }
1397 }
1398 if (vap->va_gid != (uid_t)VNOVAL) {
6b4f890b
MD
1399 hammer_guid_to_uuid(&uuid, vap->va_gid);
1400 if (bcmp(&uuid, &ip->ino_data.gid, sizeof(uuid)) != 0) {
8cd0a023
MD
1401 ip->ino_data.gid = uuid;
1402 modflags |= HAMMER_INODE_DDIRTY;
1403 }
1404 }
11ad5ade 1405 while (vap->va_size != VNOVAL && ip->ino_data.size != vap->va_size) {
8cd0a023
MD
1406 switch(ap->a_vp->v_type) {
1407 case VREG:
11ad5ade 1408 if (vap->va_size == ip->ino_data.size)
d5ef456e 1409 break;
b84de5af
MD
1410 /*
1411 * XXX break atomicy, we can deadlock the backend
1412 * if we do not release the lock. Probably not a
1413 * big deal here.
1414 */
11ad5ade 1415 if (vap->va_size < ip->ino_data.size) {
c0ade690
MD
1416 vtruncbuf(ap->a_vp, vap->va_size,
1417 HAMMER_BUFSIZE);
d5ef456e
MD
1418 truncating = 1;
1419 } else {
c0ade690 1420 vnode_pager_setsize(ap->a_vp, vap->va_size);
d5ef456e 1421 truncating = 0;
c0ade690 1422 }
11ad5ade
MD
1423 ip->ino_data.size = vap->va_size;
1424 modflags |= HAMMER_INODE_DDIRTY;
76376933 1425 aligned_size = (vap->va_size + HAMMER_BUFMASK) &
b84de5af 1426 ~HAMMER_BUFMASK64;
d5ef456e 1427
b84de5af
MD
1428 /*
1429 * on-media truncation is cached in the inode until
1430 * the inode is synchronized.
1431 */
d5ef456e 1432 if (truncating) {
b84de5af
MD
1433 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
1434 ip->flags |= HAMMER_INODE_TRUNCATED;
1435 ip->trunc_off = vap->va_size;
1436 } else if (ip->trunc_off > vap->va_size) {
1437 ip->trunc_off = vap->va_size;
1438 }
d5ef456e 1439 }
b84de5af 1440
d5ef456e
MD
1441 /*
1442 * If truncating we have to clean out a portion of
b84de5af
MD
1443 * the last block on-disk. We do this in the
1444 * front-end buffer cache.
d5ef456e 1445 */
b84de5af 1446 if (truncating && vap->va_size < aligned_size) {
d5ef456e
MD
1447 struct buf *bp;
1448 int offset;
1449
1450 offset = vap->va_size & HAMMER_BUFMASK;
1451 error = bread(ap->a_vp,
1452 aligned_size - HAMMER_BUFSIZE,
1453 HAMMER_BUFSIZE, &bp);
1454 if (error == 0) {
1455 bzero(bp->b_data + offset,
1456 HAMMER_BUFSIZE - offset);
1457 bdwrite(bp);
1458 } else {
1459 brelse(bp);
1460 }
1461 }
76376933 1462 break;
8cd0a023 1463 case VDATABASE:
b84de5af
MD
1464 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
1465 ip->flags |= HAMMER_INODE_TRUNCATED;
1466 ip->trunc_off = vap->va_size;
1467 } else if (ip->trunc_off > vap->va_size) {
1468 ip->trunc_off = vap->va_size;
1469 }
11ad5ade
MD
1470 ip->ino_data.size = vap->va_size;
1471 modflags |= HAMMER_INODE_DDIRTY;
8cd0a023
MD
1472 break;
1473 default:
1474 error = EINVAL;
1475 goto done;
1476 }
d26d0ae9 1477 break;
8cd0a023
MD
1478 }
1479 if (vap->va_atime.tv_sec != VNOVAL) {
11ad5ade 1480 ip->ino_leaf.atime =
8cd0a023
MD
1481 hammer_timespec_to_transid(&vap->va_atime);
1482 modflags |= HAMMER_INODE_ITIMES;
1483 }
1484 if (vap->va_mtime.tv_sec != VNOVAL) {
11ad5ade 1485 ip->ino_data.mtime =
8cd0a023
MD
1486 hammer_timespec_to_transid(&vap->va_mtime);
1487 modflags |= HAMMER_INODE_ITIMES;
98f7132d 1488 modflags |= HAMMER_INODE_DDIRTY; /* XXX mtime */
8cd0a023
MD
1489 }
1490 if (vap->va_mode != (mode_t)VNOVAL) {
1491 if (ip->ino_data.mode != vap->va_mode) {
1492 ip->ino_data.mode = vap->va_mode;
1493 modflags |= HAMMER_INODE_DDIRTY;
1494 }
1495 }
1496done:
b84de5af 1497 if (error == 0)
c0ade690 1498 hammer_modify_inode(&trans, ip, modflags);
b84de5af 1499 hammer_done_transaction(&trans);
8cd0a023 1500 return (error);
427e5fc6
MD
1501}
1502
66325755
MD
1503/*
1504 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1505 */
427e5fc6
MD
1506static
1507int
66325755 1508hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
427e5fc6 1509{
7a04d74f
MD
1510 struct hammer_transaction trans;
1511 struct hammer_inode *dip;
1512 struct hammer_inode *nip;
1513 struct nchandle *nch;
1514 hammer_record_t record;
1515 int error;
1516 int bytes;
1517
1518 ap->a_vap->va_type = VLNK;
1519
1520 nch = ap->a_nch;
1521 dip = VTOI(ap->a_dvp);
1522
d113fda1
MD
1523 if (dip->flags & HAMMER_INODE_RO)
1524 return (EROFS);
1525
7a04d74f
MD
1526 /*
1527 * Create a transaction to cover the operations we perform.
1528 */
1529 hammer_start_transaction(&trans, dip->hmp);
1530
1531 /*
1532 * Create a new filesystem object of the requested type. The
1533 * returned inode will be referenced but not locked.
1534 */
1535
1536 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
1537 if (error) {
b84de5af 1538 hammer_done_transaction(&trans);
7a04d74f
MD
1539 *ap->a_vpp = NULL;
1540 return (error);
1541 }
1542
7a04d74f
MD
1543 /*
1544 * Add a record representing the symlink. symlink stores the link
1545 * as pure data, not a string, and is no \0 terminated.
1546 */
1547 if (error == 0) {
7a04d74f 1548 bytes = strlen(ap->a_target);
11ad5ade
MD
1549 record = hammer_alloc_mem_record(nip, bytes);
1550 record->type = HAMMER_MEM_RECORD_GENERAL;
7a04d74f 1551
11ad5ade
MD
1552 record->leaf.base.key = HAMMER_FIXKEY_SYMLINK;
1553 record->leaf.base.rec_type = HAMMER_RECTYPE_FIX;
1554 record->leaf.data_len = bytes;
1555 KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0);
1556 bcopy(ap->a_target, record->data->symlink.name, bytes);
7a04d74f 1557 error = hammer_ip_add_record(&trans, record);
42c7d26b
MD
1558
1559 /*
1560 * Set the file size to the length of the link.
1561 */
1562 if (error == 0) {
11ad5ade
MD
1563 nip->ino_data.size = bytes;
1564 hammer_modify_inode(&trans, nip, HAMMER_INODE_DDIRTY);
42c7d26b 1565 }
7a04d74f 1566 }
1f07f686
MD
1567 if (error == 0)
1568 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
7a04d74f
MD
1569
1570 /*
1571 * Finish up.
1572 */
1573 if (error) {
1574 hammer_rel_inode(nip, 0);
7a04d74f
MD
1575 *ap->a_vpp = NULL;
1576 } else {
e8599db1 1577 error = hammer_get_vnode(nip, ap->a_vpp);
7a04d74f
MD
1578 hammer_rel_inode(nip, 0);
1579 if (error == 0) {
1580 cache_setunresolved(ap->a_nch);
1581 cache_setvp(ap->a_nch, *ap->a_vpp);
1582 }
1583 }
b84de5af 1584 hammer_done_transaction(&trans);
7a04d74f 1585 return (error);
427e5fc6
MD
1586}
1587
66325755
MD
1588/*
1589 * hammer_vop_nwhiteout { nch, dvp, cred, flags }
1590 */
427e5fc6
MD
1591static
1592int
66325755 1593hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
427e5fc6 1594{
b84de5af
MD
1595 struct hammer_transaction trans;
1596 int error;
1597
1598 hammer_start_transaction(&trans, VTOI(ap->a_dvp)->hmp);
1599 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp,
1600 ap->a_cred, ap->a_flags);
1601 hammer_done_transaction(&trans);
1602
1603 return (error);
427e5fc6
MD
1604}
1605
7dc57964
MD
1606/*
1607 * hammer_vop_ioctl { vp, command, data, fflag, cred }
1608 */
1609static
1610int
1611hammer_vop_ioctl(struct vop_ioctl_args *ap)
1612{
1613 struct hammer_inode *ip = ap->a_vp->v_data;
1614
1615 return(hammer_ioctl(ip, ap->a_command, ap->a_data,
1616 ap->a_fflag, ap->a_cred));
1617}
1618
513ca7d7
MD
1619static
1620int
1621hammer_vop_mountctl(struct vop_mountctl_args *ap)
1622{
1623 struct mount *mp;
1624 int error;
1625
1626 mp = ap->a_head.a_ops->head.vv_mount;
1627
1628 switch(ap->a_op) {
1629 case MOUNTCTL_SET_EXPORT:
1630 if (ap->a_ctllen != sizeof(struct export_args))
1631 error = EINVAL;
1632 error = hammer_vfs_export(mp, ap->a_op,
1633 (const struct export_args *)ap->a_ctl);
1634 break;
1635 default:
1636 error = journal_mountctl(ap);
1637 break;
1638 }
1639 return(error);
1640}
1641
66325755
MD
1642/*
1643 * hammer_vop_strategy { vp, bio }
8cd0a023
MD
1644 *
1645 * Strategy call, used for regular file read & write only. Note that the
1646 * bp may represent a cluster.
1647 *
1648 * To simplify operation and allow better optimizations in the future,
1649 * this code does not make any assumptions with regards to buffer alignment
1650 * or size.
66325755 1651 */
427e5fc6
MD
1652static
1653int
66325755 1654hammer_vop_strategy(struct vop_strategy_args *ap)
427e5fc6 1655{
8cd0a023
MD
1656 struct buf *bp;
1657 int error;
1658
1659 bp = ap->a_bio->bio_buf;
1660
1661 switch(bp->b_cmd) {
1662 case BUF_CMD_READ:
1663 error = hammer_vop_strategy_read(ap);
1664 break;
1665 case BUF_CMD_WRITE:
1666 error = hammer_vop_strategy_write(ap);
1667 break;
1668 default:
059819e3
MD
1669 bp->b_error = error = EINVAL;
1670 bp->b_flags |= B_ERROR;
1671 biodone(ap->a_bio);
8cd0a023
MD
1672 break;
1673 }
8cd0a023 1674 return (error);
427e5fc6
MD
1675}
1676
8cd0a023
MD
1677/*
1678 * Read from a regular file. Iterate the related records and fill in the
1679 * BIO/BUF. Gaps are zero-filled.
1680 *
1681 * The support code in hammer_object.c should be used to deal with mixed
1682 * in-memory and on-disk records.
1683 *
1684 * XXX atime update
1685 */
1686static
1687int
1688hammer_vop_strategy_read(struct vop_strategy_args *ap)
1689{
36f82b23
MD
1690 struct hammer_transaction trans;
1691 struct hammer_inode *ip;
8cd0a023 1692 struct hammer_cursor cursor;
8cd0a023
MD
1693 hammer_base_elm_t base;
1694 struct bio *bio;
1695 struct buf *bp;
1696 int64_t rec_offset;
a89aec1b 1697 int64_t ran_end;
195c19a1 1698 int64_t tmp64;
8cd0a023
MD
1699 int error;
1700 int boff;
1701 int roff;
1702 int n;
1703
1704 bio = ap->a_bio;
1705 bp = bio->bio_buf;
36f82b23 1706 ip = ap->a_vp->v_data;
8cd0a023 1707
36f82b23 1708 hammer_simple_transaction(&trans, ip->hmp);
4e17f465 1709 hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
8cd0a023
MD
1710
1711 /*
1712 * Key range (begin and end inclusive) to scan. Note that the key's
c0ade690
MD
1713 * stored in the actual records represent BASE+LEN, not BASE. The
1714 * first record containing bio_offset will have a key > bio_offset.
8cd0a023
MD
1715 */
1716 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1717 cursor.key_beg.create_tid = 0;
8cd0a023 1718 cursor.key_beg.delete_tid = 0;
8cd0a023 1719 cursor.key_beg.obj_type = 0;
c0ade690 1720 cursor.key_beg.key = bio->bio_offset + 1;
d5530d22 1721 cursor.asof = ip->obj_asof;
47197d71 1722 cursor.flags |= HAMMER_CURSOR_ASOF | HAMMER_CURSOR_DATAEXTOK;
8cd0a023
MD
1723
1724 cursor.key_end = cursor.key_beg;
11ad5ade 1725 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE);
b84de5af 1726#if 0
11ad5ade 1727 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) {
a89aec1b
MD
1728 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
1729 cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
1730 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
b84de5af
MD
1731 } else
1732#endif
1733 {
c0ade690 1734 ran_end = bio->bio_offset + bp->b_bufsize;
a89aec1b
MD
1735 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
1736 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
195c19a1
MD
1737 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */
1738 if (tmp64 < ran_end)
a89aec1b
MD
1739 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1740 else
7f7c1f84 1741 cursor.key_end.key = ran_end + MAXPHYS + 1;
a89aec1b 1742 }
d26d0ae9 1743 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
8cd0a023 1744
4e17f465 1745 error = hammer_ip_first(&cursor);
8cd0a023
MD
1746 boff = 0;
1747
a89aec1b
MD
1748 while (error == 0) {
1749 error = hammer_ip_resolve_data(&cursor);
1750 if (error)
66325755 1751 break;
11ad5ade 1752 base = &cursor.leaf->base;
8cd0a023 1753
11ad5ade 1754 rec_offset = base->key - cursor.leaf->data_len;
8cd0a023 1755
66325755 1756 /*
a89aec1b 1757 * Calculate the gap, if any, and zero-fill it.
66325755 1758 */
8cd0a023
MD
1759 n = (int)(rec_offset - (bio->bio_offset + boff));
1760 if (n > 0) {
a89aec1b
MD
1761 if (n > bp->b_bufsize - boff)
1762 n = bp->b_bufsize - boff;
8cd0a023
MD
1763 bzero((char *)bp->b_data + boff, n);
1764 boff += n;
1765 n = 0;
66325755 1766 }
8cd0a023
MD
1767
1768 /*
1769 * Calculate the data offset in the record and the number
1770 * of bytes we can copy.
a89aec1b
MD
1771 *
1772 * Note there is a degenerate case here where boff may
1773 * already be at bp->b_bufsize.
8cd0a023
MD
1774 */
1775 roff = -n;
b84de5af 1776 rec_offset += roff;
11ad5ade 1777 n = cursor.leaf->data_len - roff;
8cd0a023
MD
1778 KKASSERT(n > 0);
1779 if (n > bp->b_bufsize - boff)
1780 n = bp->b_bufsize - boff;
059819e3 1781
b84de5af
MD
1782 /*
1783 * If we cached a truncation point on our front-end the
1784 * on-disk version may still have physical records beyond
1785 * that point. Truncate visibility.
1786 */
1787 if (ip->trunc_off <= rec_offset)
1788 n = 0;
1789 else if (ip->trunc_off < rec_offset + n)
1790 n = (int)(ip->trunc_off - rec_offset);
1791
1792 /*
1793 * Copy
1794 */
1795 if (n) {
1796 bcopy((char *)cursor.data + roff,
1797 (char *)bp->b_data + boff, n);
1798 boff += n;
1799 }
8cd0a023 1800 if (boff == bp->b_bufsize)
66325755 1801 break;
a89aec1b 1802 error = hammer_ip_next(&cursor);
66325755 1803 }
8cd0a023 1804 hammer_done_cursor(&cursor);
b84de5af 1805 hammer_done_transaction(&trans);
66325755
MD
1806
1807 /*
8cd0a023 1808 * There may have been a gap after the last record
66325755 1809 */
8cd0a023
MD
1810 if (error == ENOENT)
1811 error = 0;
1812 if (error == 0 && boff != bp->b_bufsize) {
7f7c1f84 1813 KKASSERT(boff < bp->b_bufsize);
8cd0a023
MD
1814 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
1815 /* boff = bp->b_bufsize; */
1816 }
1817 bp->b_resid = 0;
059819e3
MD
1818 bp->b_error = error;
1819 if (error)
1820 bp->b_flags |= B_ERROR;
1821 biodone(ap->a_bio);
8cd0a023
MD
1822 return(error);
1823}
1824
1825/*
059819e3
MD
1826 * Write to a regular file. Because this is a strategy call the OS is
1827 * trying to actually sync data to the media. HAMMER can only flush
1828 * the entire inode (so the TID remains properly synchronized).
8cd0a023 1829 *
059819e3
MD
1830 * Basically all we do here is place the bio on the inode's flush queue
1831 * and activate the flusher.
8cd0a023
MD
1832 */
1833static
1834int
1835hammer_vop_strategy_write(struct vop_strategy_args *ap)
1836{
8cd0a023
MD
1837 hammer_inode_t ip;
1838 struct bio *bio;
1839 struct buf *bp;
8cd0a023
MD
1840
1841 bio = ap->a_bio;
1842 bp = bio->bio_buf;
1843 ip = ap->a_vp->v_data;
d113fda1 1844
059819e3
MD
1845 if (ip->flags & HAMMER_INODE_RO) {
1846 bp->b_error = EROFS;
1847 bp->b_flags |= B_ERROR;
1848 biodone(ap->a_bio);
1849 return(EROFS);
1850 }
b84de5af
MD
1851
1852 /*
1853 * If the inode is being flushed we cannot re-queue buffers
1854 * it may have already flushed, or it could result in duplicate
1855 * records in the database.
1856 */
059819e3 1857 BUF_KERNPROC(bp);
1f07f686 1858 if (ip->flags & HAMMER_INODE_WRITE_ALT)
b84de5af
MD
1859 TAILQ_INSERT_TAIL(&ip->bio_alt_list, bio, bio_act);
1860 else
1861 TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act);
1f07f686
MD
1862 ++hammer_bio_count;
1863 hammer_modify_inode(NULL, ip, HAMMER_INODE_BUFS);
4e17f465
MD
1864
1865 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1866#if 0
1867 /*
1868 * XXX
1869 *
1870 * If the write was not part of an integrated flush operation then
1871 * signal a flush.
1872 */
1873 if (ip->flush_state != HAMMER_FST_FLUSH ||
1874 (ip->flags & HAMMER_INODE_WRITE_ALT)) {
1875 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1876 }
1877#endif
059819e3
MD
1878 return(0);
1879}
1880
1881/*
b84de5af 1882 * Backend code which actually performs the write to the media. This
059819e3
MD
1883 * routine is typically called from the flusher. The bio will be disposed
1884 * of (biodone'd) by this routine.
1885 *
1886 * Iterate the related records and mark for deletion. If existing edge
1887 * records (left and right side) overlap our write they have to be marked
1888 * deleted and new records created, usually referencing a portion of the
1889 * original data. Then add a record to represent the buffer.
1890 */
1891int
4e17f465 1892hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip, struct bio *bio)
059819e3
MD
1893{
1894 struct buf *bp = bio->bio_buf;
1895 int error;
8cd0a023 1896
b84de5af
MD
1897 KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
1898
869e8f55
MD
1899 /*
1900 * If the inode is going or gone, just throw away any frontend
1901 * buffers.
1902 */
1903 if (ip->flags & HAMMER_INODE_DELETED) {
1904 bp->b_resid = 0;
1905 biodone(bio);
77062c8a 1906 --hammer_bio_count;
ee3fed53 1907 return(0);
869e8f55
MD
1908 }
1909
8cd0a023
MD
1910 /*
1911 * Delete any records overlapping our range. This function will
d26d0ae9 1912 * (eventually) properly truncate partial overlaps.
8cd0a023 1913 */
11ad5ade 1914 if (ip->sync_ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) {
4e17f465 1915 error = hammer_ip_delete_range(cursor, ip, bio->bio_offset,
47197d71 1916 bio->bio_offset);
a89aec1b 1917 } else {
4e17f465 1918 error = hammer_ip_delete_range(cursor, ip, bio->bio_offset,
d26d0ae9 1919 bio->bio_offset +
47197d71 1920 bp->b_bufsize - 1);
a89aec1b 1921 }
8cd0a023
MD
1922
1923 /*
e38e0b15
MD
1924 * Add a single record to cover the write. We can write a record
1925 * with only the actual file data - for example, a small 200 byte
1926 * file does not have to write out a 16K record.
1927 *
1928 * While the data size does not have to be aligned, we still do it
1929 * to reduce fragmentation in a future allocation model.
8cd0a023
MD
1930 */
1931 if (error == 0) {
e38e0b15
MD
1932 int limit_size;
1933
11ad5ade 1934 if (ip->sync_ino_data.size - bio->bio_offset >
b84de5af
MD
1935 bp->b_bufsize) {
1936 limit_size = bp->b_bufsize;
e38e0b15 1937 } else {
11ad5ade 1938 limit_size = (int)(ip->sync_ino_data.size -
e38e0b15
MD
1939 bio->bio_offset);
1940 KKASSERT(limit_size >= 0);
1941 limit_size = (limit_size + 63) & ~63;
1942 }
4e17f465
MD
1943 if (limit_size) {
1944 error = hammer_ip_sync_data(cursor, ip, bio->bio_offset,
1945 bp->b_data, limit_size);
1946 }
66325755 1947 }
a5fddc16
MD
1948 if (error)
1949 Debugger("hammer_dowrite: error");
66325755 1950
8cd0a023 1951 if (error) {
8cd0a023 1952 bp->b_resid = bp->b_bufsize;
059819e3
MD
1953 bp->b_error = error;
1954 bp->b_flags |= B_ERROR;
8cd0a023 1955 } else {
8cd0a023
MD
1956 bp->b_resid = 0;
1957 }
059819e3 1958 biodone(bio);
1f07f686 1959 --hammer_bio_count;
8cd0a023
MD
1960 return(error);
1961}
1962
1963/*
1964 * dounlink - disconnect a directory entry
1965 *
1966 * XXX whiteout support not really in yet
1967 */
1968static int
b84de5af
MD
1969hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
1970 struct vnode *dvp, struct ucred *cred, int flags)
8cd0a023 1971{
8cd0a023
MD
1972 struct namecache *ncp;
1973 hammer_inode_t dip;
1974 hammer_inode_t ip;
8cd0a023 1975 struct hammer_cursor cursor;
8cd0a023 1976 int64_t namekey;
11ad5ade 1977 int nlen, error;
8cd0a023
MD
1978
1979 /*
1980 * Calculate the namekey and setup the key range for the scan. This
1981 * works kinda like a chained hash table where the lower 32 bits
1982 * of the namekey synthesize the chain.
1983 *
1984 * The key range is inclusive of both key_beg and key_end.
1985 */
1986 dip = VTOI(dvp);
1987 ncp = nch->ncp;
d113fda1
MD
1988
1989 if (dip->flags & HAMMER_INODE_RO)
1990 return (EROFS);
1991
6a37e7e4
MD
1992 namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
1993retry:
4e17f465 1994 hammer_init_cursor(trans, &cursor, &dip->cache[0], dip);
8cd0a023
MD
1995 cursor.key_beg.obj_id = dip->obj_id;
1996 cursor.key_beg.key = namekey;
d5530d22 1997 cursor.key_beg.create_tid = 0;
8cd0a023
MD
1998 cursor.key_beg.delete_tid = 0;
1999 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
2000 cursor.key_beg.obj_type = 0;
2001
2002 cursor.key_end = cursor.key_beg;
2003 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
2004 cursor.asof = dip->obj_asof;
2005 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
8cd0a023 2006
8cd0a023
MD
2007 /*
2008 * Scan all matching records (the chain), locate the one matching
2009 * the requested path component. info->last_error contains the
2010 * error code on search termination and could be 0, ENOENT, or
2011 * something else.
2012 *
2013 * The hammer_ip_*() functions merge in-memory records with on-disk
2014 * records for the purposes of the search.
2015 */
4e17f465
MD
2016 error = hammer_ip_first(&cursor);
2017
a89aec1b
MD
2018 while (error == 0) {
2019 error = hammer_ip_resolve_data(&cursor);
2020 if (error)
66325755 2021 break;
11ad5ade
MD
2022 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
2023 KKASSERT(nlen > 0);
2024 if (ncp->nc_nlen == nlen &&
2025 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
66325755
MD
2026 break;
2027 }
a89aec1b 2028 error = hammer_ip_next(&cursor);
66325755 2029 }
8cd0a023
MD
2030
2031 /*
2032 * If all is ok we have to get the inode so we can adjust nlinks.
b3deaf57
MD
2033 *
2034 * If the target is a directory, it must be empty.
8cd0a023 2035 */
66325755 2036 if (error == 0) {
b84de5af 2037 ip = hammer_get_inode(trans, &dip->cache[1],
11ad5ade 2038 cursor.data->entry.obj_id,
d113fda1 2039 dip->hmp->asof, 0, &error);
46fe7ae1 2040 if (error == ENOENT) {
11ad5ade 2041 kprintf("obj_id %016llx\n", cursor.data->entry.obj_id);
10a5d1ba 2042 Debugger("ENOENT unlinking object that should exist");
46fe7ae1 2043 }
1f07f686
MD
2044
2045 /*
2046 * If we are trying to remove a directory the directory must
2047 * be empty.
2048 *
2049 * WARNING: hammer_ip_check_directory_empty() may have to
2050 * terminate the cursor to avoid a deadlock. It is ok to
2051 * call hammer_done_cursor() twice.
2052 */
11ad5ade 2053 if (error == 0 && ip->ino_data.obj_type ==
b3deaf57 2054 HAMMER_OBJTYPE_DIRECTORY) {
98f7132d 2055 error = hammer_ip_check_directory_empty(trans, ip);
b3deaf57 2056 }
1f07f686 2057
6a37e7e4 2058 /*
1f07f686
MD
2059 * Delete the directory entry.
2060 *
6a37e7e4 2061 * WARNING: hammer_ip_del_directory() may have to terminate
1f07f686 2062 * the cursor to avoid a deadlock. It is ok to call
6a37e7e4
MD
2063 * hammer_done_cursor() twice.
2064 */
b84de5af 2065 if (error == 0) {
b84de5af
MD
2066 error = hammer_ip_del_directory(trans, &cursor,
2067 dip, ip);
b84de5af 2068 }
8cd0a023
MD
2069 if (error == 0) {
2070 cache_setunresolved(nch);
2071 cache_setvp(nch, NULL);
2072 /* XXX locking */
2073 if (ip->vp)
2074 cache_inval_vp(ip->vp, CINV_DESTROY);
2075 }
a89aec1b 2076 hammer_rel_inode(ip, 0);
66325755 2077 }
6a37e7e4
MD
2078 hammer_done_cursor(&cursor);
2079 if (error == EDEADLK)
2080 goto retry;
9c448776 2081
66325755 2082 return (error);
66325755
MD
2083}
2084
7a04d74f
MD
2085/************************************************************************
2086 * FIFO AND SPECFS OPS *
2087 ************************************************************************
2088 *
2089 */
2090
2091static int
2092hammer_vop_fifoclose (struct vop_close_args *ap)
2093{
2094 /* XXX update itimes */
2095 return (VOCALL(&fifo_vnode_vops, &ap->a_head));
2096}
2097
2098static int
2099hammer_vop_fiforead (struct vop_read_args *ap)
2100{
2101 int error;
2102
2103 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2104 /* XXX update access time */
2105 return (error);
2106}
2107
2108static int
2109hammer_vop_fifowrite (struct vop_write_args *ap)
2110{
2111 int error;
2112
2113 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2114 /* XXX update access time */
2115 return (error);
2116}
2117
2118static int
2119hammer_vop_specclose (struct vop_close_args *ap)
2120{
2121 /* XXX update itimes */
2122 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2123}
2124
2125static int
2126hammer_vop_specread (struct vop_read_args *ap)
2127{
2128 /* XXX update access time */
2129 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2130}
2131
2132static int
2133hammer_vop_specwrite (struct vop_write_args *ap)
2134{
2135 /* XXX update last change time */
2136 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2137}
2138