Use ASSERT_IFAC_VALID whenever possible
[dragonfly.git] / sys / vfs / hammer / hammer_vnops.c
CommitLineData
427e5fc6 1/*
b84de5af 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
427e5fc6
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
e63644f0 34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.58 2008/06/02 20:19:03 dillon Exp $
427e5fc6
MD
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
41#include <sys/namecache.h>
42#include <sys/vnode.h>
43#include <sys/lockf.h>
44#include <sys/event.h>
45#include <sys/stat.h>
b3deaf57 46#include <sys/dirent.h>
c0ade690 47#include <vm/vm_extern.h>
7a04d74f 48#include <vfs/fifofs/fifo.h>
427e5fc6
MD
49#include "hammer.h"
50
51/*
52 * USERFS VNOPS
53 */
54/*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
66325755
MD
55static int hammer_vop_fsync(struct vop_fsync_args *);
56static int hammer_vop_read(struct vop_read_args *);
57static int hammer_vop_write(struct vop_write_args *);
58static int hammer_vop_access(struct vop_access_args *);
59static int hammer_vop_advlock(struct vop_advlock_args *);
60static int hammer_vop_close(struct vop_close_args *);
61static int hammer_vop_ncreate(struct vop_ncreate_args *);
62static int hammer_vop_getattr(struct vop_getattr_args *);
63static int hammer_vop_nresolve(struct vop_nresolve_args *);
64static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
65static int hammer_vop_nlink(struct vop_nlink_args *);
66static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
67static int hammer_vop_nmknod(struct vop_nmknod_args *);
68static int hammer_vop_open(struct vop_open_args *);
69static int hammer_vop_pathconf(struct vop_pathconf_args *);
70static int hammer_vop_print(struct vop_print_args *);
71static int hammer_vop_readdir(struct vop_readdir_args *);
72static int hammer_vop_readlink(struct vop_readlink_args *);
73static int hammer_vop_nremove(struct vop_nremove_args *);
74static int hammer_vop_nrename(struct vop_nrename_args *);
75static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
76static int hammer_vop_setattr(struct vop_setattr_args *);
77static int hammer_vop_strategy(struct vop_strategy_args *);
78static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
79static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
7dc57964 80static int hammer_vop_ioctl(struct vop_ioctl_args *);
513ca7d7 81static int hammer_vop_mountctl(struct vop_mountctl_args *);
427e5fc6 82
7a04d74f
MD
83static int hammer_vop_fifoclose (struct vop_close_args *);
84static int hammer_vop_fiforead (struct vop_read_args *);
85static int hammer_vop_fifowrite (struct vop_write_args *);
86
87static int hammer_vop_specclose (struct vop_close_args *);
88static int hammer_vop_specread (struct vop_read_args *);
89static int hammer_vop_specwrite (struct vop_write_args *);
90
427e5fc6
MD
91struct vop_ops hammer_vnode_vops = {
92 .vop_default = vop_defaultop,
93 .vop_fsync = hammer_vop_fsync,
c0ade690
MD
94 .vop_getpages = vop_stdgetpages,
95 .vop_putpages = vop_stdputpages,
427e5fc6
MD
96 .vop_read = hammer_vop_read,
97 .vop_write = hammer_vop_write,
98 .vop_access = hammer_vop_access,
99 .vop_advlock = hammer_vop_advlock,
100 .vop_close = hammer_vop_close,
101 .vop_ncreate = hammer_vop_ncreate,
102 .vop_getattr = hammer_vop_getattr,
103 .vop_inactive = hammer_vop_inactive,
104 .vop_reclaim = hammer_vop_reclaim,
105 .vop_nresolve = hammer_vop_nresolve,
106 .vop_nlookupdotdot = hammer_vop_nlookupdotdot,
107 .vop_nlink = hammer_vop_nlink,
108 .vop_nmkdir = hammer_vop_nmkdir,
109 .vop_nmknod = hammer_vop_nmknod,
110 .vop_open = hammer_vop_open,
111 .vop_pathconf = hammer_vop_pathconf,
112 .vop_print = hammer_vop_print,
113 .vop_readdir = hammer_vop_readdir,
114 .vop_readlink = hammer_vop_readlink,
115 .vop_nremove = hammer_vop_nremove,
116 .vop_nrename = hammer_vop_nrename,
117 .vop_nrmdir = hammer_vop_nrmdir,
118 .vop_setattr = hammer_vop_setattr,
119 .vop_strategy = hammer_vop_strategy,
120 .vop_nsymlink = hammer_vop_nsymlink,
7dc57964 121 .vop_nwhiteout = hammer_vop_nwhiteout,
513ca7d7
MD
122 .vop_ioctl = hammer_vop_ioctl,
123 .vop_mountctl = hammer_vop_mountctl
427e5fc6
MD
124};
125
7a04d74f
MD
126struct vop_ops hammer_spec_vops = {
127 .vop_default = spec_vnoperate,
128 .vop_fsync = hammer_vop_fsync,
129 .vop_read = hammer_vop_specread,
130 .vop_write = hammer_vop_specwrite,
131 .vop_access = hammer_vop_access,
132 .vop_close = hammer_vop_specclose,
133 .vop_getattr = hammer_vop_getattr,
134 .vop_inactive = hammer_vop_inactive,
135 .vop_reclaim = hammer_vop_reclaim,
136 .vop_setattr = hammer_vop_setattr
137};
138
139struct vop_ops hammer_fifo_vops = {
140 .vop_default = fifo_vnoperate,
141 .vop_fsync = hammer_vop_fsync,
142 .vop_read = hammer_vop_fiforead,
143 .vop_write = hammer_vop_fifowrite,
144 .vop_access = hammer_vop_access,
145 .vop_close = hammer_vop_fifoclose,
146 .vop_getattr = hammer_vop_getattr,
147 .vop_inactive = hammer_vop_inactive,
148 .vop_reclaim = hammer_vop_reclaim,
149 .vop_setattr = hammer_vop_setattr
150};
151
b84de5af
MD
152static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
153 struct vnode *dvp, struct ucred *cred, int flags);
8cd0a023
MD
154static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
155static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
e63644f0 156static void hammer_cleanup_write_io(hammer_inode_t ip);
8cd0a023 157
427e5fc6
MD
158#if 0
159static
160int
161hammer_vop_vnoperate(struct vop_generic_args *)
162{
163 return (VOCALL(&hammer_vnode_vops, ap));
164}
165#endif
166
66325755
MD
167/*
168 * hammer_vop_fsync { vp, waitfor }
169 */
427e5fc6
MD
170static
171int
66325755 172hammer_vop_fsync(struct vop_fsync_args *ap)
427e5fc6 173{
b84de5af 174 hammer_inode_t ip = VTOI(ap->a_vp);
c0ade690 175
f90dde4c 176 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
e8599db1 177 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
b84de5af
MD
178 if (ap->a_waitfor == MNT_WAIT)
179 hammer_wait_inode(ip);
059819e3 180 return (ip->error);
427e5fc6
MD
181}
182
66325755
MD
183/*
184 * hammer_vop_read { vp, uio, ioflag, cred }
185 */
427e5fc6
MD
186static
187int
66325755 188hammer_vop_read(struct vop_read_args *ap)
427e5fc6 189{
66325755 190 struct hammer_transaction trans;
c0ade690 191 hammer_inode_t ip;
66325755
MD
192 off_t offset;
193 struct buf *bp;
194 struct uio *uio;
195 int error;
196 int n;
8cd0a023 197 int seqcount;
66325755
MD
198
199 if (ap->a_vp->v_type != VREG)
200 return (EINVAL);
201 ip = VTOI(ap->a_vp);
202 error = 0;
8cd0a023 203 seqcount = ap->a_ioflag >> 16;
66325755 204
8cd0a023 205 hammer_start_transaction(&trans, ip->hmp);
66325755
MD
206
207 /*
208 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
209 */
210 uio = ap->a_uio;
11ad5ade 211 while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_data.size) {
66325755 212 offset = uio->uio_offset & HAMMER_BUFMASK;
c0ade690 213#if 0
11ad5ade 214 error = cluster_read(ap->a_vp, ip->ino_data.size,
8cd0a023
MD
215 uio->uio_offset - offset, HAMMER_BUFSIZE,
216 MAXBSIZE, seqcount, &bp);
c0ade690
MD
217#endif
218 error = bread(ap->a_vp, uio->uio_offset - offset,
219 HAMMER_BUFSIZE, &bp);
66325755
MD
220 if (error) {
221 brelse(bp);
222 break;
223 }
c0ade690 224 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
66325755
MD
225 n = HAMMER_BUFSIZE - offset;
226 if (n > uio->uio_resid)
227 n = uio->uio_resid;
11ad5ade
MD
228 if (n > ip->ino_data.size - uio->uio_offset)
229 n = (int)(ip->ino_data.size - uio->uio_offset);
66325755
MD
230 error = uiomove((char *)bp->b_data + offset, n, uio);
231 if (error) {
8cd0a023 232 bqrelse(bp);
66325755
MD
233 break;
234 }
66325755
MD
235 bqrelse(bp);
236 }
b84de5af
MD
237 if ((ip->flags & HAMMER_INODE_RO) == 0 &&
238 (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
11ad5ade 239 ip->ino_leaf.atime = trans.time;
b84de5af
MD
240 hammer_modify_inode(&trans, ip, HAMMER_INODE_ITIMES);
241 }
242 hammer_done_transaction(&trans);
66325755 243 return (error);
427e5fc6
MD
244}
245
66325755
MD
246/*
247 * hammer_vop_write { vp, uio, ioflag, cred }
248 */
427e5fc6
MD
249static
250int
66325755 251hammer_vop_write(struct vop_write_args *ap)
427e5fc6 252{
66325755
MD
253 struct hammer_transaction trans;
254 struct hammer_inode *ip;
255 struct uio *uio;
256 off_t offset;
257 struct buf *bp;
258 int error;
259 int n;
c0ade690 260 int flags;
059819e3 261 int count;
66325755
MD
262
263 if (ap->a_vp->v_type != VREG)
264 return (EINVAL);
265 ip = VTOI(ap->a_vp);
266 error = 0;
267
d113fda1
MD
268 if (ip->flags & HAMMER_INODE_RO)
269 return (EROFS);
270
66325755
MD
271 /*
272 * Create a transaction to cover the operations we perform.
273 */
8cd0a023 274 hammer_start_transaction(&trans, ip->hmp);
66325755
MD
275 uio = ap->a_uio;
276
277 /*
278 * Check append mode
279 */
280 if (ap->a_ioflag & IO_APPEND)
11ad5ade 281 uio->uio_offset = ip->ino_data.size;
66325755
MD
282
283 /*
284 * Check for illegal write offsets. Valid range is 0...2^63-1
285 */
9c448776 286 if (uio->uio_offset < 0 || uio->uio_offset + uio->uio_resid <= 0) {
b84de5af 287 hammer_done_transaction(&trans);
66325755 288 return (EFBIG);
9c448776 289 }
66325755
MD
290
291 /*
292 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
293 */
059819e3 294 count = 0;
66325755 295 while (uio->uio_resid > 0) {
d5ef456e
MD
296 int fixsize = 0;
297
e63644f0
MD
298 if ((error = hammer_checkspace(trans.hmp)) != 0)
299 break;
300
059819e3
MD
301 /*
302 * Do not allow huge writes to deadlock the buffer cache
303 */
304 if ((++count & 15) == 0) {
305 vn_unlock(ap->a_vp);
306 if ((ap->a_ioflag & IO_NOBWILL) == 0)
307 bwillwrite();
308 vn_lock(ap->a_vp, LK_EXCLUSIVE|LK_RETRY);
309 }
310
66325755 311 offset = uio->uio_offset & HAMMER_BUFMASK;
d5ef456e
MD
312 n = HAMMER_BUFSIZE - offset;
313 if (n > uio->uio_resid)
314 n = uio->uio_resid;
11ad5ade 315 if (uio->uio_offset + n > ip->ino_data.size) {
d5ef456e
MD
316 vnode_pager_setsize(ap->a_vp, uio->uio_offset + n);
317 fixsize = 1;
318 }
319
c0ade690
MD
320 if (uio->uio_segflg == UIO_NOCOPY) {
321 /*
322 * Issuing a write with the same data backing the
323 * buffer. Instantiate the buffer to collect the
324 * backing vm pages, then read-in any missing bits.
325 *
326 * This case is used by vop_stdputpages().
327 */
d5ef456e
MD
328 bp = getblk(ap->a_vp, uio->uio_offset - offset,
329 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
c0ade690
MD
330 if ((bp->b_flags & B_CACHE) == 0) {
331 bqrelse(bp);
332 error = bread(ap->a_vp,
333 uio->uio_offset - offset,
334 HAMMER_BUFSIZE, &bp);
c0ade690
MD
335 }
336 } else if (offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
337 /*
a5fddc16
MD
338 * Even though we are entirely overwriting the buffer
339 * we may still have to zero it out to avoid a
340 * mmap/write visibility issue.
c0ade690 341 */
d5ef456e
MD
342 bp = getblk(ap->a_vp, uio->uio_offset - offset,
343 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
a5fddc16
MD
344 if ((bp->b_flags & B_CACHE) == 0)
345 vfs_bio_clrbuf(bp);
11ad5ade 346 } else if (uio->uio_offset - offset >= ip->ino_data.size) {
c0ade690 347 /*
a5fddc16
MD
348 * If the base offset of the buffer is beyond the
349 * file EOF, we don't have to issue a read.
c0ade690 350 */
d5ef456e
MD
351 bp = getblk(ap->a_vp, uio->uio_offset - offset,
352 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
66325755
MD
353 vfs_bio_clrbuf(bp);
354 } else {
c0ade690
MD
355 /*
356 * Partial overwrite, read in any missing bits then
357 * replace the portion being written.
358 */
66325755
MD
359 error = bread(ap->a_vp, uio->uio_offset - offset,
360 HAMMER_BUFSIZE, &bp);
d5ef456e
MD
361 if (error == 0)
362 bheavy(bp);
66325755 363 }
d5ef456e
MD
364 if (error == 0)
365 error = uiomove((char *)bp->b_data + offset, n, uio);
366
367 /*
368 * If we screwed up we have to undo any VM size changes we
369 * made.
370 */
66325755
MD
371 if (error) {
372 brelse(bp);
d5ef456e 373 if (fixsize) {
11ad5ade 374 vtruncbuf(ap->a_vp, ip->ino_data.size,
d5ef456e
MD
375 HAMMER_BUFSIZE);
376 }
66325755
MD
377 break;
378 }
c0ade690 379 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
11ad5ade
MD
380 if (ip->ino_data.size < uio->uio_offset) {
381 ip->ino_data.size = uio->uio_offset;
382 flags = HAMMER_INODE_DDIRTY;
383 vnode_pager_setsize(ap->a_vp, ip->ino_data.size);
c0ade690 384 } else {
d113fda1 385 flags = 0;
66325755 386 }
11ad5ade 387 ip->ino_data.mtime = trans.time;
f3b0f382 388 flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS;
11ad5ade 389 flags |= HAMMER_INODE_DDIRTY; /* XXX mtime */
c0ade690 390 hammer_modify_inode(&trans, ip, flags);
32c90105 391
e63644f0
MD
392 if ((bp->b_flags & B_DIRTY) == 0) {
393 ++ip->rsv_databufs;
394 ++ip->hmp->rsv_databufs;
395 }
396
66325755
MD
397 if (ap->a_ioflag & IO_SYNC) {
398 bwrite(bp);
399 } else if (ap->a_ioflag & IO_DIRECT) {
66325755 400 bawrite(bp);
059819e3
MD
401#if 0
402 } else if ((ap->a_ioflag >> 16) == IO_SEQMAX &&
34d829f7 403 (uio->uio_offset & HAMMER_BUFMASK) == 0) {
059819e3
MD
404 /*
405 * XXX HAMMER can only fsync the whole inode,
406 * doing it on every buffer would be a bad idea.
407 */
34d829f7
MD
408 /*
409 * If seqcount indicates sequential operation and
410 * we just finished filling a buffer, push it out
411 * now to prevent the buffer cache from becoming
412 * too full, which would trigger non-optimal
413 * flushes.
414 */
059819e3
MD
415 bdwrite(bp);
416#endif
66325755 417 } else {
66325755
MD
418 bdwrite(bp);
419 }
420 }
b84de5af 421 hammer_done_transaction(&trans);
66325755 422 return (error);
427e5fc6
MD
423}
424
66325755
MD
425/*
426 * hammer_vop_access { vp, mode, cred }
427 */
427e5fc6
MD
428static
429int
66325755 430hammer_vop_access(struct vop_access_args *ap)
427e5fc6 431{
66325755
MD
432 struct hammer_inode *ip = VTOI(ap->a_vp);
433 uid_t uid;
434 gid_t gid;
435 int error;
436
437 uid = hammer_to_unix_xid(&ip->ino_data.uid);
438 gid = hammer_to_unix_xid(&ip->ino_data.gid);
439
440 error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
441 ip->ino_data.uflags);
442 return (error);
427e5fc6
MD
443}
444
66325755
MD
445/*
446 * hammer_vop_advlock { vp, id, op, fl, flags }
447 */
427e5fc6
MD
448static
449int
66325755 450hammer_vop_advlock(struct vop_advlock_args *ap)
427e5fc6 451{
66325755
MD
452 struct hammer_inode *ip = VTOI(ap->a_vp);
453
11ad5ade 454 return (lf_advlock(ap, &ip->advlock, ip->ino_data.size));
427e5fc6
MD
455}
456
66325755
MD
457/*
458 * hammer_vop_close { vp, fflag }
459 */
427e5fc6
MD
460static
461int
66325755 462hammer_vop_close(struct vop_close_args *ap)
427e5fc6 463{
a89aec1b 464 return (vop_stdclose(ap));
427e5fc6
MD
465}
466
66325755
MD
467/*
468 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
469 *
470 * The operating system has already ensured that the directory entry
471 * does not exist and done all appropriate namespace locking.
472 */
427e5fc6
MD
473static
474int
66325755 475hammer_vop_ncreate(struct vop_ncreate_args *ap)
427e5fc6 476{
66325755
MD
477 struct hammer_transaction trans;
478 struct hammer_inode *dip;
479 struct hammer_inode *nip;
480 struct nchandle *nch;
481 int error;
482
483 nch = ap->a_nch;
484 dip = VTOI(ap->a_dvp);
485
d113fda1
MD
486 if (dip->flags & HAMMER_INODE_RO)
487 return (EROFS);
e63644f0
MD
488 if ((error = hammer_checkspace(dip->hmp)) != 0)
489 return (error);
d113fda1 490
66325755
MD
491 /*
492 * Create a transaction to cover the operations we perform.
493 */
8cd0a023 494 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
495
496 /*
497 * Create a new filesystem object of the requested type. The
b84de5af
MD
498 * returned inode will be referenced and shared-locked to prevent
499 * it from being moved to the flusher.
66325755 500 */
8cd0a023
MD
501
502 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 503 if (error) {
77062c8a 504 hkprintf("hammer_create_inode error %d\n", error);
b84de5af 505 hammer_done_transaction(&trans);
66325755
MD
506 *ap->a_vpp = NULL;
507 return (error);
508 }
66325755
MD
509
510 /*
511 * Add the new filesystem object to the directory. This will also
512 * bump the inode's link count.
513 */
a89aec1b 514 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
0b075555 515 if (error)
77062c8a 516 hkprintf("hammer_ip_add_directory error %d\n", error);
66325755
MD
517
518 /*
519 * Finish up.
520 */
521 if (error) {
a89aec1b 522 hammer_rel_inode(nip, 0);
b84de5af 523 hammer_done_transaction(&trans);
66325755
MD
524 *ap->a_vpp = NULL;
525 } else {
e8599db1 526 error = hammer_get_vnode(nip, ap->a_vpp);
b84de5af 527 hammer_done_transaction(&trans);
a89aec1b
MD
528 hammer_rel_inode(nip, 0);
529 if (error == 0) {
530 cache_setunresolved(ap->a_nch);
531 cache_setvp(ap->a_nch, *ap->a_vpp);
532 }
66325755
MD
533 }
534 return (error);
427e5fc6
MD
535}
536
66325755
MD
537/*
538 * hammer_vop_getattr { vp, vap }
98f7132d
MD
539 *
540 * Retrieve an inode's attribute information. When accessing inodes
541 * historically we fake the atime field to ensure consistent results.
542 * The atime field is stored in the B-Tree element and allowed to be
543 * updated without cycling the element.
66325755 544 */
427e5fc6
MD
545static
546int
66325755 547hammer_vop_getattr(struct vop_getattr_args *ap)
427e5fc6 548{
66325755
MD
549 struct hammer_inode *ip = VTOI(ap->a_vp);
550 struct vattr *vap = ap->a_vap;
551
552#if 0
553 if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) &&
554 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 &&
7f7c1f84 555 ip->obj_asof == XXX
66325755
MD
556 ) {
557 /* LAZYMOD XXX */
558 }
559 hammer_itimes(ap->a_vp);
560#endif
561
562 vap->va_fsid = ip->hmp->fsid_udev;
11ad5ade 563 vap->va_fileid = ip->ino_leaf.base.obj_id;
66325755 564 vap->va_mode = ip->ino_data.mode;
11ad5ade 565 vap->va_nlink = ip->ino_data.nlinks;
66325755
MD
566 vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
567 vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
568 vap->va_rmajor = 0;
569 vap->va_rminor = 0;
11ad5ade 570 vap->va_size = ip->ino_data.size;
98f7132d
MD
571 if (ip->flags & HAMMER_INODE_RO)
572 hammer_to_timespec(ip->ino_data.mtime, &vap->va_atime);
573 else
574 hammer_to_timespec(ip->ino_leaf.atime, &vap->va_atime);
11ad5ade 575 hammer_to_timespec(ip->ino_data.mtime, &vap->va_mtime);
66325755
MD
576 hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
577 vap->va_flags = ip->ino_data.uflags;
578 vap->va_gen = 1; /* hammer inums are unique for all time */
bf686dbe 579 vap->va_blocksize = HAMMER_BUFSIZE;
11ad5ade
MD
580 vap->va_bytes = (ip->ino_data.size + 63) & ~63;
581 vap->va_type = hammer_get_vnode_type(ip->ino_data.obj_type);
66325755
MD
582 vap->va_filerev = 0; /* XXX */
583 /* mtime uniquely identifies any adjustments made to the file */
11ad5ade 584 vap->va_fsmid = ip->ino_data.mtime;
66325755
MD
585 vap->va_uid_uuid = ip->ino_data.uid;
586 vap->va_gid_uuid = ip->ino_data.gid;
587 vap->va_fsid_uuid = ip->hmp->fsid;
588 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
589 VA_FSID_UUID_VALID;
7a04d74f 590
11ad5ade 591 switch (ip->ino_data.obj_type) {
7a04d74f
MD
592 case HAMMER_OBJTYPE_CDEV:
593 case HAMMER_OBJTYPE_BDEV:
594 vap->va_rmajor = ip->ino_data.rmajor;
595 vap->va_rminor = ip->ino_data.rminor;
596 break;
597 default:
598 break;
599 }
600
66325755 601 return(0);
427e5fc6
MD
602}
603
66325755
MD
604/*
605 * hammer_vop_nresolve { nch, dvp, cred }
606 *
607 * Locate the requested directory entry.
608 */
427e5fc6
MD
609static
610int
66325755 611hammer_vop_nresolve(struct vop_nresolve_args *ap)
427e5fc6 612{
36f82b23 613 struct hammer_transaction trans;
66325755 614 struct namecache *ncp;
7f7c1f84
MD
615 hammer_inode_t dip;
616 hammer_inode_t ip;
617 hammer_tid_t asof;
8cd0a023 618 struct hammer_cursor cursor;
66325755
MD
619 struct vnode *vp;
620 int64_t namekey;
621 int error;
7f7c1f84
MD
622 int i;
623 int nlen;
d113fda1 624 int flags;
6a37e7e4 625 u_int64_t obj_id;
7f7c1f84
MD
626
627 /*
628 * Misc initialization, plus handle as-of name extensions. Look for
629 * the '@@' extension. Note that as-of files and directories cannot
630 * be modified.
7f7c1f84
MD
631 */
632 dip = VTOI(ap->a_dvp);
633 ncp = ap->a_nch->ncp;
634 asof = dip->obj_asof;
635 nlen = ncp->nc_nlen;
d113fda1 636 flags = dip->flags;
7f7c1f84 637
36f82b23
MD
638 hammer_simple_transaction(&trans, dip->hmp);
639
7f7c1f84
MD
640 for (i = 0; i < nlen; ++i) {
641 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
d113fda1 642 asof = hammer_str_to_tid(ncp->nc_name + i + 2);
d113fda1 643 flags |= HAMMER_INODE_RO;
7f7c1f84
MD
644 break;
645 }
646 }
647 nlen = i;
66325755 648
d113fda1
MD
649 /*
650 * If there is no path component the time extension is relative to
651 * dip.
652 */
653 if (nlen == 0) {
36f82b23 654 ip = hammer_get_inode(&trans, &dip->cache[1], dip->obj_id,
61aeeb33 655 asof, flags, &error);
d113fda1 656 if (error == 0) {
e8599db1 657 error = hammer_get_vnode(ip, &vp);
d113fda1
MD
658 hammer_rel_inode(ip, 0);
659 } else {
660 vp = NULL;
661 }
662 if (error == 0) {
663 vn_unlock(vp);
664 cache_setvp(ap->a_nch, vp);
665 vrele(vp);
666 }
36f82b23 667 goto done;
d113fda1
MD
668 }
669
8cd0a023
MD
670 /*
671 * Calculate the namekey and setup the key range for the scan. This
672 * works kinda like a chained hash table where the lower 32 bits
673 * of the namekey synthesize the chain.
674 *
675 * The key range is inclusive of both key_beg and key_end.
676 */
7f7c1f84 677 namekey = hammer_directory_namekey(ncp->nc_name, nlen);
66325755 678
4e17f465 679 error = hammer_init_cursor(&trans, &cursor, &dip->cache[0], dip);
2f85fa4d 680 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023
MD
681 cursor.key_beg.obj_id = dip->obj_id;
682 cursor.key_beg.key = namekey;
d5530d22 683 cursor.key_beg.create_tid = 0;
8cd0a023
MD
684 cursor.key_beg.delete_tid = 0;
685 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
686 cursor.key_beg.obj_type = 0;
66325755 687
8cd0a023
MD
688 cursor.key_end = cursor.key_beg;
689 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
690 cursor.asof = asof;
691 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
66325755
MD
692
693 /*
8cd0a023 694 * Scan all matching records (the chain), locate the one matching
a89aec1b 695 * the requested path component.
8cd0a023
MD
696 *
697 * The hammer_ip_*() functions merge in-memory records with on-disk
698 * records for the purposes of the search.
66325755 699 */
6a37e7e4
MD
700 obj_id = 0;
701
4e17f465 702 if (error == 0) {
4e17f465
MD
703 error = hammer_ip_first(&cursor);
704 while (error == 0) {
705 error = hammer_ip_resolve_data(&cursor);
706 if (error)
707 break;
11ad5ade
MD
708 if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF &&
709 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
710 obj_id = cursor.data->entry.obj_id;
4e17f465
MD
711 break;
712 }
713 error = hammer_ip_next(&cursor);
66325755
MD
714 }
715 }
6a37e7e4 716 hammer_done_cursor(&cursor);
66325755 717 if (error == 0) {
36f82b23 718 ip = hammer_get_inode(&trans, &dip->cache[1],
6a37e7e4 719 obj_id, asof, flags, &error);
7f7c1f84 720 if (error == 0) {
e8599db1 721 error = hammer_get_vnode(ip, &vp);
7f7c1f84
MD
722 hammer_rel_inode(ip, 0);
723 } else {
724 vp = NULL;
725 }
66325755
MD
726 if (error == 0) {
727 vn_unlock(vp);
728 cache_setvp(ap->a_nch, vp);
729 vrele(vp);
730 }
731 } else if (error == ENOENT) {
732 cache_setvp(ap->a_nch, NULL);
733 }
36f82b23 734done:
b84de5af 735 hammer_done_transaction(&trans);
66325755 736 return (error);
427e5fc6
MD
737}
738
66325755
MD
739/*
740 * hammer_vop_nlookupdotdot { dvp, vpp, cred }
741 *
742 * Locate the parent directory of a directory vnode.
743 *
744 * dvp is referenced but not locked. *vpp must be returned referenced and
745 * locked. A parent_obj_id of 0 does not necessarily indicate that we are
746 * at the root, instead it could indicate that the directory we were in was
747 * removed.
42c7d26b
MD
748 *
749 * NOTE: as-of sequences are not linked into the directory structure. If
750 * we are at the root with a different asof then the mount point, reload
751 * the same directory with the mount point's asof. I'm not sure what this
752 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not
753 * get confused, but it hasn't been tested.
66325755 754 */
427e5fc6
MD
755static
756int
66325755 757hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
427e5fc6 758{
36f82b23 759 struct hammer_transaction trans;
66325755 760 struct hammer_inode *dip;
d113fda1 761 struct hammer_inode *ip;
42c7d26b
MD
762 int64_t parent_obj_id;
763 hammer_tid_t asof;
d113fda1 764 int error;
66325755
MD
765
766 dip = VTOI(ap->a_dvp);
42c7d26b
MD
767 asof = dip->obj_asof;
768 parent_obj_id = dip->ino_data.parent_obj_id;
769
770 if (parent_obj_id == 0) {
771 if (dip->obj_id == HAMMER_OBJID_ROOT &&
772 asof != dip->hmp->asof) {
773 parent_obj_id = dip->obj_id;
774 asof = dip->hmp->asof;
775 *ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK);
776 ksnprintf(*ap->a_fakename, 19, "0x%016llx",
777 dip->obj_asof);
778 } else {
779 *ap->a_vpp = NULL;
780 return ENOENT;
781 }
66325755 782 }
d113fda1 783
36f82b23
MD
784 hammer_simple_transaction(&trans, dip->hmp);
785
786 ip = hammer_get_inode(&trans, &dip->cache[1], parent_obj_id,
42c7d26b 787 asof, dip->flags, &error);
36f82b23 788 if (ip) {
e8599db1 789 error = hammer_get_vnode(ip, ap->a_vpp);
36f82b23
MD
790 hammer_rel_inode(ip, 0);
791 } else {
d113fda1 792 *ap->a_vpp = NULL;
d113fda1 793 }
b84de5af 794 hammer_done_transaction(&trans);
d113fda1 795 return (error);
427e5fc6
MD
796}
797
66325755
MD
798/*
799 * hammer_vop_nlink { nch, dvp, vp, cred }
800 */
427e5fc6
MD
801static
802int
66325755 803hammer_vop_nlink(struct vop_nlink_args *ap)
427e5fc6 804{
66325755
MD
805 struct hammer_transaction trans;
806 struct hammer_inode *dip;
807 struct hammer_inode *ip;
808 struct nchandle *nch;
809 int error;
810
811 nch = ap->a_nch;
812 dip = VTOI(ap->a_dvp);
813 ip = VTOI(ap->a_vp);
814
d113fda1
MD
815 if (dip->flags & HAMMER_INODE_RO)
816 return (EROFS);
817 if (ip->flags & HAMMER_INODE_RO)
818 return (EROFS);
e63644f0
MD
819 if ((error = hammer_checkspace(dip->hmp)) != 0)
820 return (error);
d113fda1 821
66325755
MD
822 /*
823 * Create a transaction to cover the operations we perform.
824 */
8cd0a023 825 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
826
827 /*
828 * Add the filesystem object to the directory. Note that neither
829 * dip nor ip are referenced or locked, but their vnodes are
830 * referenced. This function will bump the inode's link count.
831 */
a89aec1b 832 error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip);
66325755
MD
833
834 /*
835 * Finish up.
836 */
b84de5af 837 if (error == 0) {
6b4f890b
MD
838 cache_setunresolved(nch);
839 cache_setvp(nch, ap->a_vp);
66325755 840 }
b84de5af 841 hammer_done_transaction(&trans);
66325755 842 return (error);
427e5fc6
MD
843}
844
66325755
MD
845/*
846 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
847 *
848 * The operating system has already ensured that the directory entry
849 * does not exist and done all appropriate namespace locking.
850 */
427e5fc6
MD
851static
852int
66325755 853hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
427e5fc6 854{
66325755
MD
855 struct hammer_transaction trans;
856 struct hammer_inode *dip;
857 struct hammer_inode *nip;
858 struct nchandle *nch;
859 int error;
860
861 nch = ap->a_nch;
862 dip = VTOI(ap->a_dvp);
863
d113fda1
MD
864 if (dip->flags & HAMMER_INODE_RO)
865 return (EROFS);
e63644f0
MD
866 if ((error = hammer_checkspace(dip->hmp)) != 0)
867 return (error);
d113fda1 868
66325755
MD
869 /*
870 * Create a transaction to cover the operations we perform.
871 */
8cd0a023 872 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
873
874 /*
875 * Create a new filesystem object of the requested type. The
8cd0a023 876 * returned inode will be referenced but not locked.
66325755 877 */
8cd0a023 878 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 879 if (error) {
77062c8a 880 hkprintf("hammer_mkdir error %d\n", error);
b84de5af 881 hammer_done_transaction(&trans);
66325755
MD
882 *ap->a_vpp = NULL;
883 return (error);
884 }
66325755
MD
885 /*
886 * Add the new filesystem object to the directory. This will also
887 * bump the inode's link count.
888 */
a89aec1b 889 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
0b075555 890 if (error)
77062c8a 891 hkprintf("hammer_mkdir (add) error %d\n", error);
66325755
MD
892
893 /*
894 * Finish up.
895 */
896 if (error) {
a89aec1b 897 hammer_rel_inode(nip, 0);
66325755
MD
898 *ap->a_vpp = NULL;
899 } else {
e8599db1 900 error = hammer_get_vnode(nip, ap->a_vpp);
a89aec1b
MD
901 hammer_rel_inode(nip, 0);
902 if (error == 0) {
903 cache_setunresolved(ap->a_nch);
904 cache_setvp(ap->a_nch, *ap->a_vpp);
905 }
66325755 906 }
b84de5af 907 hammer_done_transaction(&trans);
66325755 908 return (error);
427e5fc6
MD
909}
910
66325755
MD
911/*
912 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
913 *
914 * The operating system has already ensured that the directory entry
915 * does not exist and done all appropriate namespace locking.
916 */
427e5fc6
MD
917static
918int
66325755 919hammer_vop_nmknod(struct vop_nmknod_args *ap)
427e5fc6 920{
66325755
MD
921 struct hammer_transaction trans;
922 struct hammer_inode *dip;
923 struct hammer_inode *nip;
924 struct nchandle *nch;
925 int error;
926
927 nch = ap->a_nch;
928 dip = VTOI(ap->a_dvp);
929
d113fda1
MD
930 if (dip->flags & HAMMER_INODE_RO)
931 return (EROFS);
e63644f0
MD
932 if ((error = hammer_checkspace(dip->hmp)) != 0)
933 return (error);
d113fda1 934
66325755
MD
935 /*
936 * Create a transaction to cover the operations we perform.
937 */
8cd0a023 938 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
939
940 /*
941 * Create a new filesystem object of the requested type. The
8cd0a023 942 * returned inode will be referenced but not locked.
66325755 943 */
8cd0a023 944 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 945 if (error) {
b84de5af 946 hammer_done_transaction(&trans);
66325755
MD
947 *ap->a_vpp = NULL;
948 return (error);
949 }
66325755
MD
950
951 /*
952 * Add the new filesystem object to the directory. This will also
953 * bump the inode's link count.
954 */
a89aec1b 955 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
66325755
MD
956
957 /*
958 * Finish up.
959 */
960 if (error) {
a89aec1b 961 hammer_rel_inode(nip, 0);
66325755
MD
962 *ap->a_vpp = NULL;
963 } else {
e8599db1 964 error = hammer_get_vnode(nip, ap->a_vpp);
a89aec1b
MD
965 hammer_rel_inode(nip, 0);
966 if (error == 0) {
967 cache_setunresolved(ap->a_nch);
968 cache_setvp(ap->a_nch, *ap->a_vpp);
969 }
66325755 970 }
b84de5af 971 hammer_done_transaction(&trans);
66325755 972 return (error);
427e5fc6
MD
973}
974
66325755
MD
975/*
976 * hammer_vop_open { vp, mode, cred, fp }
977 */
427e5fc6
MD
978static
979int
66325755 980hammer_vop_open(struct vop_open_args *ap)
427e5fc6 981{
d113fda1
MD
982 if ((ap->a_mode & FWRITE) && (VTOI(ap->a_vp)->flags & HAMMER_INODE_RO))
983 return (EROFS);
984
a89aec1b 985 return(vop_stdopen(ap));
427e5fc6
MD
986}
987
66325755
MD
988/*
989 * hammer_vop_pathconf { vp, name, retval }
990 */
427e5fc6
MD
991static
992int
66325755 993hammer_vop_pathconf(struct vop_pathconf_args *ap)
427e5fc6
MD
994{
995 return EOPNOTSUPP;
996}
997
66325755
MD
998/*
999 * hammer_vop_print { vp }
1000 */
427e5fc6
MD
1001static
1002int
66325755 1003hammer_vop_print(struct vop_print_args *ap)
427e5fc6
MD
1004{
1005 return EOPNOTSUPP;
1006}
1007
66325755 1008/*
6b4f890b 1009 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
66325755 1010 */
427e5fc6
MD
1011static
1012int
66325755 1013hammer_vop_readdir(struct vop_readdir_args *ap)
427e5fc6 1014{
36f82b23 1015 struct hammer_transaction trans;
6b4f890b
MD
1016 struct hammer_cursor cursor;
1017 struct hammer_inode *ip;
1018 struct uio *uio;
6b4f890b
MD
1019 hammer_base_elm_t base;
1020 int error;
1021 int cookie_index;
1022 int ncookies;
1023 off_t *cookies;
1024 off_t saveoff;
1025 int r;
1026
1027 ip = VTOI(ap->a_vp);
1028 uio = ap->a_uio;
b3deaf57
MD
1029 saveoff = uio->uio_offset;
1030
1031 if (ap->a_ncookies) {
1032 ncookies = uio->uio_resid / 16 + 1;
1033 if (ncookies > 1024)
1034 ncookies = 1024;
1035 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
1036 cookie_index = 0;
1037 } else {
1038 ncookies = -1;
1039 cookies = NULL;
1040 cookie_index = 0;
1041 }
1042
36f82b23
MD
1043 hammer_simple_transaction(&trans, ip->hmp);
1044
b3deaf57
MD
1045 /*
1046 * Handle artificial entries
1047 */
1048 error = 0;
1049 if (saveoff == 0) {
1050 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, ".");
1051 if (r)
1052 goto done;
1053 if (cookies)
1054 cookies[cookie_index] = saveoff;
1055 ++saveoff;
1056 ++cookie_index;
1057 if (cookie_index == ncookies)
1058 goto done;
1059 }
1060 if (saveoff == 1) {
1061 if (ip->ino_data.parent_obj_id) {
1062 r = vop_write_dirent(&error, uio,
1063 ip->ino_data.parent_obj_id,
1064 DT_DIR, 2, "..");
1065 } else {
1066 r = vop_write_dirent(&error, uio,
1067 ip->obj_id, DT_DIR, 2, "..");
1068 }
1069 if (r)
1070 goto done;
1071 if (cookies)
1072 cookies[cookie_index] = saveoff;
1073 ++saveoff;
1074 ++cookie_index;
1075 if (cookie_index == ncookies)
1076 goto done;
1077 }
6b4f890b
MD
1078
1079 /*
1080 * Key range (begin and end inclusive) to scan. Directory keys
1081 * directly translate to a 64 bit 'seek' position.
1082 */
4e17f465 1083 hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
2f85fa4d 1084 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
6b4f890b 1085 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1086 cursor.key_beg.create_tid = 0;
6b4f890b
MD
1087 cursor.key_beg.delete_tid = 0;
1088 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1089 cursor.key_beg.obj_type = 0;
b3deaf57 1090 cursor.key_beg.key = saveoff;
6b4f890b
MD
1091
1092 cursor.key_end = cursor.key_beg;
1093 cursor.key_end.key = HAMMER_MAX_KEY;
d5530d22
MD
1094 cursor.asof = ip->obj_asof;
1095 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
6b4f890b 1096
4e17f465 1097 error = hammer_ip_first(&cursor);
6b4f890b
MD
1098
1099 while (error == 0) {
11ad5ade 1100 error = hammer_ip_resolve_data(&cursor);
6b4f890b
MD
1101 if (error)
1102 break;
11ad5ade 1103 base = &cursor.leaf->base;
6b4f890b 1104 saveoff = base->key;
11ad5ade 1105 KKASSERT(cursor.leaf->data_len > HAMMER_ENTRY_NAME_OFF);
6b4f890b 1106
7a04d74f
MD
1107 if (base->obj_id != ip->obj_id)
1108 panic("readdir: bad record at %p", cursor.node);
1109
6b4f890b 1110 r = vop_write_dirent(
11ad5ade
MD
1111 &error, uio, cursor.data->entry.obj_id,
1112 hammer_get_dtype(cursor.leaf->base.obj_type),
1113 cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF ,
1114 (void *)cursor.data->entry.name);
6b4f890b
MD
1115 if (r)
1116 break;
1117 ++saveoff;
1118 if (cookies)
1119 cookies[cookie_index] = base->key;
1120 ++cookie_index;
1121 if (cookie_index == ncookies)
1122 break;
1123 error = hammer_ip_next(&cursor);
1124 }
1125 hammer_done_cursor(&cursor);
1126
b3deaf57 1127done:
b84de5af 1128 hammer_done_transaction(&trans);
36f82b23 1129
6b4f890b
MD
1130 if (ap->a_eofflag)
1131 *ap->a_eofflag = (error == ENOENT);
6b4f890b
MD
1132 uio->uio_offset = saveoff;
1133 if (error && cookie_index == 0) {
b3deaf57
MD
1134 if (error == ENOENT)
1135 error = 0;
6b4f890b
MD
1136 if (cookies) {
1137 kfree(cookies, M_TEMP);
1138 *ap->a_ncookies = 0;
1139 *ap->a_cookies = NULL;
1140 }
1141 } else {
7a04d74f
MD
1142 if (error == ENOENT)
1143 error = 0;
6b4f890b
MD
1144 if (cookies) {
1145 *ap->a_ncookies = cookie_index;
1146 *ap->a_cookies = cookies;
1147 }
1148 }
1149 return(error);
427e5fc6
MD
1150}
1151
66325755
MD
1152/*
1153 * hammer_vop_readlink { vp, uio, cred }
1154 */
427e5fc6
MD
1155static
1156int
66325755 1157hammer_vop_readlink(struct vop_readlink_args *ap)
427e5fc6 1158{
36f82b23 1159 struct hammer_transaction trans;
7a04d74f
MD
1160 struct hammer_cursor cursor;
1161 struct hammer_inode *ip;
1162 int error;
1163
1164 ip = VTOI(ap->a_vp);
36f82b23 1165
2f85fa4d
MD
1166 /*
1167 * Shortcut if the symlink data was stuffed into ino_data.
1168 */
1169 if (ip->ino_data.size <= HAMMER_INODE_BASESYMLEN) {
1170 error = uiomove(ip->ino_data.ext.symlink,
1171 ip->ino_data.size, ap->a_uio);
1172 return(error);
1173 }
36f82b23 1174
2f85fa4d
MD
1175 /*
1176 * Long version
1177 */
1178 hammer_simple_transaction(&trans, ip->hmp);
4e17f465 1179 hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
7a04d74f
MD
1180
1181 /*
1182 * Key range (begin and end inclusive) to scan. Directory keys
1183 * directly translate to a 64 bit 'seek' position.
1184 */
2f85fa4d 1185 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC; /* XXX */
7a04d74f 1186 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1187 cursor.key_beg.create_tid = 0;
7a04d74f
MD
1188 cursor.key_beg.delete_tid = 0;
1189 cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX;
1190 cursor.key_beg.obj_type = 0;
1191 cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK;
d5530d22
MD
1192 cursor.asof = ip->obj_asof;
1193 cursor.flags |= HAMMER_CURSOR_ASOF;
7a04d74f 1194
45a014dc 1195 error = hammer_ip_lookup(&cursor);
7a04d74f
MD
1196 if (error == 0) {
1197 error = hammer_ip_resolve_data(&cursor);
1198 if (error == 0) {
11ad5ade
MD
1199 KKASSERT(cursor.leaf->data_len >=
1200 HAMMER_SYMLINK_NAME_OFF);
1201 error = uiomove(cursor.data->symlink.name,
1202 cursor.leaf->data_len -
1203 HAMMER_SYMLINK_NAME_OFF,
7a04d74f
MD
1204 ap->a_uio);
1205 }
1206 }
1207 hammer_done_cursor(&cursor);
b84de5af 1208 hammer_done_transaction(&trans);
7a04d74f 1209 return(error);
427e5fc6
MD
1210}
1211
66325755
MD
1212/*
1213 * hammer_vop_nremove { nch, dvp, cred }
1214 */
427e5fc6
MD
1215static
1216int
66325755 1217hammer_vop_nremove(struct vop_nremove_args *ap)
427e5fc6 1218{
b84de5af 1219 struct hammer_transaction trans;
e63644f0 1220 struct hammer_inode *dip;
b84de5af
MD
1221 int error;
1222
e63644f0
MD
1223 dip = VTOI(ap->a_dvp);
1224
1225 if (hammer_nohistory(dip) == 0 &&
1226 (error = hammer_checkspace(dip->hmp)) != 0) {
1227 return (error);
1228 }
1229
1230 hammer_start_transaction(&trans, dip->hmp);
b84de5af
MD
1231 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
1232 hammer_done_transaction(&trans);
1233
1234 return (error);
427e5fc6
MD
1235}
1236
66325755
MD
1237/*
1238 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1239 */
427e5fc6
MD
1240static
1241int
66325755 1242hammer_vop_nrename(struct vop_nrename_args *ap)
427e5fc6 1243{
8cd0a023
MD
1244 struct hammer_transaction trans;
1245 struct namecache *fncp;
1246 struct namecache *tncp;
1247 struct hammer_inode *fdip;
1248 struct hammer_inode *tdip;
1249 struct hammer_inode *ip;
1250 struct hammer_cursor cursor;
8cd0a023 1251 int64_t namekey;
11ad5ade 1252 int nlen, error;
8cd0a023
MD
1253
1254 fdip = VTOI(ap->a_fdvp);
1255 tdip = VTOI(ap->a_tdvp);
1256 fncp = ap->a_fnch->ncp;
1257 tncp = ap->a_tnch->ncp;
b3deaf57
MD
1258 ip = VTOI(fncp->nc_vp);
1259 KKASSERT(ip != NULL);
d113fda1
MD
1260
1261 if (fdip->flags & HAMMER_INODE_RO)
1262 return (EROFS);
1263 if (tdip->flags & HAMMER_INODE_RO)
1264 return (EROFS);
1265 if (ip->flags & HAMMER_INODE_RO)
1266 return (EROFS);
e63644f0
MD
1267 if ((error = hammer_checkspace(fdip->hmp)) != 0)
1268 return (error);
d113fda1 1269
8cd0a023
MD
1270 hammer_start_transaction(&trans, fdip->hmp);
1271
1272 /*
b3deaf57
MD
1273 * Remove tncp from the target directory and then link ip as
1274 * tncp. XXX pass trans to dounlink
42c7d26b
MD
1275 *
1276 * Force the inode sync-time to match the transaction so it is
1277 * in-sync with the creation of the target directory entry.
8cd0a023 1278 */
b84de5af 1279 error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, ap->a_cred, 0);
42c7d26b 1280 if (error == 0 || error == ENOENT) {
b3deaf57 1281 error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
42c7d26b
MD
1282 if (error == 0) {
1283 ip->ino_data.parent_obj_id = tdip->obj_id;
b84de5af 1284 hammer_modify_inode(&trans, ip, HAMMER_INODE_DDIRTY);
42c7d26b
MD
1285 }
1286 }
b3deaf57
MD
1287 if (error)
1288 goto failed; /* XXX */
8cd0a023
MD
1289
1290 /*
1291 * Locate the record in the originating directory and remove it.
1292 *
1293 * Calculate the namekey and setup the key range for the scan. This
1294 * works kinda like a chained hash table where the lower 32 bits
1295 * of the namekey synthesize the chain.
1296 *
1297 * The key range is inclusive of both key_beg and key_end.
1298 */
1299 namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
6a37e7e4 1300retry:
4e17f465 1301 hammer_init_cursor(&trans, &cursor, &fdip->cache[0], fdip);
2f85fa4d 1302 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023
MD
1303 cursor.key_beg.obj_id = fdip->obj_id;
1304 cursor.key_beg.key = namekey;
d5530d22 1305 cursor.key_beg.create_tid = 0;
8cd0a023
MD
1306 cursor.key_beg.delete_tid = 0;
1307 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1308 cursor.key_beg.obj_type = 0;
1309
1310 cursor.key_end = cursor.key_beg;
1311 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
1312 cursor.asof = fdip->obj_asof;
1313 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
8cd0a023
MD
1314
1315 /*
1316 * Scan all matching records (the chain), locate the one matching
a89aec1b 1317 * the requested path component.
8cd0a023
MD
1318 *
1319 * The hammer_ip_*() functions merge in-memory records with on-disk
1320 * records for the purposes of the search.
1321 */
4e17f465 1322 error = hammer_ip_first(&cursor);
a89aec1b 1323 while (error == 0) {
8cd0a023
MD
1324 if (hammer_ip_resolve_data(&cursor) != 0)
1325 break;
11ad5ade
MD
1326 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
1327 KKASSERT(nlen > 0);
1328 if (fncp->nc_nlen == nlen &&
1329 bcmp(fncp->nc_name, cursor.data->entry.name, nlen) == 0) {
8cd0a023
MD
1330 break;
1331 }
a89aec1b 1332 error = hammer_ip_next(&cursor);
8cd0a023 1333 }
8cd0a023
MD
1334
1335 /*
1336 * If all is ok we have to get the inode so we can adjust nlinks.
6a37e7e4
MD
1337 *
1338 * WARNING: hammer_ip_del_directory() may have to terminate the
1339 * cursor to avoid a recursion. It's ok to call hammer_done_cursor()
1340 * twice.
8cd0a023 1341 */
9944ae54 1342 if (error == 0)
6a37e7e4 1343 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
b84de5af
MD
1344
1345 /*
1346 * XXX A deadlock here will break rename's atomicy for the purposes
1347 * of crash recovery.
1348 */
1349 if (error == EDEADLK) {
b84de5af 1350 hammer_done_cursor(&cursor);
b84de5af
MD
1351 goto retry;
1352 }
1353
1354 /*
1355 * Cleanup and tell the kernel that the rename succeeded.
1356 */
c0ade690 1357 hammer_done_cursor(&cursor);
6a37e7e4
MD
1358 if (error == 0)
1359 cache_rename(ap->a_fnch, ap->a_tnch);
b84de5af 1360
b3deaf57 1361failed:
b84de5af 1362 hammer_done_transaction(&trans);
8cd0a023 1363 return (error);
427e5fc6
MD
1364}
1365
66325755
MD
1366/*
1367 * hammer_vop_nrmdir { nch, dvp, cred }
1368 */
427e5fc6
MD
1369static
1370int
66325755 1371hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
427e5fc6 1372{
b84de5af 1373 struct hammer_transaction trans;
e63644f0 1374 struct hammer_inode *dip;
b84de5af
MD
1375 int error;
1376
e63644f0
MD
1377 dip = VTOI(ap->a_dvp);
1378
1379 if (hammer_nohistory(dip) == 0 &&
1380 (error = hammer_checkspace(dip->hmp)) != 0) {
1381 return (error);
1382 }
1383
1384 hammer_start_transaction(&trans, dip->hmp);
b84de5af
MD
1385 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
1386 hammer_done_transaction(&trans);
1387
1388 return (error);
427e5fc6
MD
1389}
1390
66325755
MD
1391/*
1392 * hammer_vop_setattr { vp, vap, cred }
1393 */
427e5fc6
MD
1394static
1395int
66325755 1396hammer_vop_setattr(struct vop_setattr_args *ap)
427e5fc6 1397{
8cd0a023
MD
1398 struct hammer_transaction trans;
1399 struct vattr *vap;
1400 struct hammer_inode *ip;
1401 int modflags;
1402 int error;
d5ef456e 1403 int truncating;
b84de5af 1404 off_t aligned_size;
8cd0a023 1405 u_int32_t flags;
8cd0a023
MD
1406
1407 vap = ap->a_vap;
1408 ip = ap->a_vp->v_data;
1409 modflags = 0;
1410
1411 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1412 return(EROFS);
d113fda1
MD
1413 if (ip->flags & HAMMER_INODE_RO)
1414 return (EROFS);
e63644f0
MD
1415 if (hammer_nohistory(ip) == 0 &&
1416 (error = hammer_checkspace(ip->hmp)) != 0) {
1417 return (error);
1418 }
8cd0a023
MD
1419
1420 hammer_start_transaction(&trans, ip->hmp);
1421 error = 0;
1422
1423 if (vap->va_flags != VNOVAL) {
1424 flags = ip->ino_data.uflags;
1425 error = vop_helper_setattr_flags(&flags, vap->va_flags,
1426 hammer_to_unix_xid(&ip->ino_data.uid),
1427 ap->a_cred);
1428 if (error == 0) {
1429 if (ip->ino_data.uflags != flags) {
1430 ip->ino_data.uflags = flags;
1431 modflags |= HAMMER_INODE_DDIRTY;
1432 }
1433 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1434 error = 0;
1435 goto done;
1436 }
1437 }
1438 goto done;
1439 }
1440 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1441 error = EPERM;
1442 goto done;
1443 }
7538695e
MD
1444 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
1445 mode_t cur_mode = ip->ino_data.mode;
1446 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid);
1447 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid);
1448 uuid_t uuid_uid;
1449 uuid_t uuid_gid;
1450
1451 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
1452 ap->a_cred,
1453 &cur_uid, &cur_gid, &cur_mode);
1454 if (error == 0) {
1455 hammer_guid_to_uuid(&uuid_uid, cur_uid);
1456 hammer_guid_to_uuid(&uuid_gid, cur_gid);
1457 if (bcmp(&uuid_uid, &ip->ino_data.uid,
1458 sizeof(uuid_uid)) ||
1459 bcmp(&uuid_gid, &ip->ino_data.gid,
1460 sizeof(uuid_gid)) ||
1461 ip->ino_data.mode != cur_mode
1462 ) {
1463 ip->ino_data.uid = uuid_uid;
1464 ip->ino_data.gid = uuid_gid;
1465 ip->ino_data.mode = cur_mode;
1466 }
8cd0a023
MD
1467 modflags |= HAMMER_INODE_DDIRTY;
1468 }
1469 }
11ad5ade 1470 while (vap->va_size != VNOVAL && ip->ino_data.size != vap->va_size) {
8cd0a023
MD
1471 switch(ap->a_vp->v_type) {
1472 case VREG:
11ad5ade 1473 if (vap->va_size == ip->ino_data.size)
d5ef456e 1474 break;
b84de5af
MD
1475 /*
1476 * XXX break atomicy, we can deadlock the backend
1477 * if we do not release the lock. Probably not a
1478 * big deal here.
1479 */
11ad5ade 1480 if (vap->va_size < ip->ino_data.size) {
c0ade690
MD
1481 vtruncbuf(ap->a_vp, vap->va_size,
1482 HAMMER_BUFSIZE);
d5ef456e
MD
1483 truncating = 1;
1484 } else {
c0ade690 1485 vnode_pager_setsize(ap->a_vp, vap->va_size);
d5ef456e 1486 truncating = 0;
c0ade690 1487 }
11ad5ade
MD
1488 ip->ino_data.size = vap->va_size;
1489 modflags |= HAMMER_INODE_DDIRTY;
76376933 1490 aligned_size = (vap->va_size + HAMMER_BUFMASK) &
b84de5af 1491 ~HAMMER_BUFMASK64;
d5ef456e 1492
b84de5af
MD
1493 /*
1494 * on-media truncation is cached in the inode until
1495 * the inode is synchronized.
1496 */
d5ef456e 1497 if (truncating) {
b84de5af
MD
1498 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
1499 ip->flags |= HAMMER_INODE_TRUNCATED;
1500 ip->trunc_off = vap->va_size;
1501 } else if (ip->trunc_off > vap->va_size) {
1502 ip->trunc_off = vap->va_size;
1503 }
d5ef456e 1504 }
b84de5af 1505
d5ef456e
MD
1506 /*
1507 * If truncating we have to clean out a portion of
b84de5af
MD
1508 * the last block on-disk. We do this in the
1509 * front-end buffer cache.
d5ef456e 1510 */
b84de5af 1511 if (truncating && vap->va_size < aligned_size) {
d5ef456e
MD
1512 struct buf *bp;
1513 int offset;
1514
1515 offset = vap->va_size & HAMMER_BUFMASK;
1516 error = bread(ap->a_vp,
1517 aligned_size - HAMMER_BUFSIZE,
1518 HAMMER_BUFSIZE, &bp);
1519 if (error == 0) {
1520 bzero(bp->b_data + offset,
1521 HAMMER_BUFSIZE - offset);
1522 bdwrite(bp);
1523 } else {
1524 brelse(bp);
1525 }
1526 }
76376933 1527 break;
8cd0a023 1528 case VDATABASE:
b84de5af
MD
1529 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
1530 ip->flags |= HAMMER_INODE_TRUNCATED;
1531 ip->trunc_off = vap->va_size;
1532 } else if (ip->trunc_off > vap->va_size) {
1533 ip->trunc_off = vap->va_size;
1534 }
11ad5ade
MD
1535 ip->ino_data.size = vap->va_size;
1536 modflags |= HAMMER_INODE_DDIRTY;
8cd0a023
MD
1537 break;
1538 default:
1539 error = EINVAL;
1540 goto done;
1541 }
d26d0ae9 1542 break;
8cd0a023
MD
1543 }
1544 if (vap->va_atime.tv_sec != VNOVAL) {
11ad5ade 1545 ip->ino_leaf.atime =
8cd0a023
MD
1546 hammer_timespec_to_transid(&vap->va_atime);
1547 modflags |= HAMMER_INODE_ITIMES;
1548 }
1549 if (vap->va_mtime.tv_sec != VNOVAL) {
11ad5ade 1550 ip->ino_data.mtime =
8cd0a023
MD
1551 hammer_timespec_to_transid(&vap->va_mtime);
1552 modflags |= HAMMER_INODE_ITIMES;
98f7132d 1553 modflags |= HAMMER_INODE_DDIRTY; /* XXX mtime */
8cd0a023
MD
1554 }
1555 if (vap->va_mode != (mode_t)VNOVAL) {
7538695e
MD
1556 mode_t cur_mode = ip->ino_data.mode;
1557 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid);
1558 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid);
1559
1560 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
1561 cur_uid, cur_gid, &cur_mode);
1562 if (error == 0 && ip->ino_data.mode != cur_mode) {
1563 ip->ino_data.mode = cur_mode;
8cd0a023
MD
1564 modflags |= HAMMER_INODE_DDIRTY;
1565 }
1566 }
1567done:
b84de5af 1568 if (error == 0)
c0ade690 1569 hammer_modify_inode(&trans, ip, modflags);
b84de5af 1570 hammer_done_transaction(&trans);
8cd0a023 1571 return (error);
427e5fc6
MD
1572}
1573
66325755
MD
1574/*
1575 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1576 */
427e5fc6
MD
1577static
1578int
66325755 1579hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
427e5fc6 1580{
7a04d74f
MD
1581 struct hammer_transaction trans;
1582 struct hammer_inode *dip;
1583 struct hammer_inode *nip;
1584 struct nchandle *nch;
1585 hammer_record_t record;
1586 int error;
1587 int bytes;
1588
1589 ap->a_vap->va_type = VLNK;
1590
1591 nch = ap->a_nch;
1592 dip = VTOI(ap->a_dvp);
1593
d113fda1
MD
1594 if (dip->flags & HAMMER_INODE_RO)
1595 return (EROFS);
e63644f0
MD
1596 if ((error = hammer_checkspace(dip->hmp)) != 0)
1597 return (error);
d113fda1 1598
7a04d74f
MD
1599 /*
1600 * Create a transaction to cover the operations we perform.
1601 */
1602 hammer_start_transaction(&trans, dip->hmp);
1603
1604 /*
1605 * Create a new filesystem object of the requested type. The
1606 * returned inode will be referenced but not locked.
1607 */
1608
1609 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
1610 if (error) {
b84de5af 1611 hammer_done_transaction(&trans);
7a04d74f
MD
1612 *ap->a_vpp = NULL;
1613 return (error);
1614 }
1615
7a04d74f
MD
1616 /*
1617 * Add a record representing the symlink. symlink stores the link
1618 * as pure data, not a string, and is no \0 terminated.
1619 */
1620 if (error == 0) {
7a04d74f
MD
1621 bytes = strlen(ap->a_target);
1622
2f85fa4d
MD
1623 if (bytes <= HAMMER_INODE_BASESYMLEN) {
1624 bcopy(ap->a_target, nip->ino_data.ext.symlink, bytes);
1625 } else {
1626 record = hammer_alloc_mem_record(nip, bytes);
1627 record->type = HAMMER_MEM_RECORD_GENERAL;
1628
1629 record->leaf.base.localization = HAMMER_LOCALIZE_MISC;
1630 record->leaf.base.key = HAMMER_FIXKEY_SYMLINK;
1631 record->leaf.base.rec_type = HAMMER_RECTYPE_FIX;
1632 record->leaf.data_len = bytes;
1633 KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0);
1634 bcopy(ap->a_target, record->data->symlink.name, bytes);
1635 error = hammer_ip_add_record(&trans, record);
1636 }
42c7d26b
MD
1637
1638 /*
1639 * Set the file size to the length of the link.
1640 */
1641 if (error == 0) {
11ad5ade
MD
1642 nip->ino_data.size = bytes;
1643 hammer_modify_inode(&trans, nip, HAMMER_INODE_DDIRTY);
42c7d26b 1644 }
7a04d74f 1645 }
1f07f686
MD
1646 if (error == 0)
1647 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
7a04d74f
MD
1648
1649 /*
1650 * Finish up.
1651 */
1652 if (error) {
1653 hammer_rel_inode(nip, 0);
7a04d74f
MD
1654 *ap->a_vpp = NULL;
1655 } else {
e8599db1 1656 error = hammer_get_vnode(nip, ap->a_vpp);
7a04d74f
MD
1657 hammer_rel_inode(nip, 0);
1658 if (error == 0) {
1659 cache_setunresolved(ap->a_nch);
1660 cache_setvp(ap->a_nch, *ap->a_vpp);
1661 }
1662 }
b84de5af 1663 hammer_done_transaction(&trans);
7a04d74f 1664 return (error);
427e5fc6
MD
1665}
1666
66325755
MD
1667/*
1668 * hammer_vop_nwhiteout { nch, dvp, cred, flags }
1669 */
427e5fc6
MD
1670static
1671int
66325755 1672hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
427e5fc6 1673{
b84de5af 1674 struct hammer_transaction trans;
e63644f0 1675 struct hammer_inode *dip;
b84de5af
MD
1676 int error;
1677
e63644f0
MD
1678 dip = VTOI(ap->a_dvp);
1679
1680 if (hammer_nohistory(dip) == 0 &&
1681 (error = hammer_checkspace(dip->hmp)) != 0) {
1682 return (error);
1683 }
1684
1685 hammer_start_transaction(&trans, dip->hmp);
b84de5af
MD
1686 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp,
1687 ap->a_cred, ap->a_flags);
1688 hammer_done_transaction(&trans);
1689
1690 return (error);
427e5fc6
MD
1691}
1692
7dc57964
MD
1693/*
1694 * hammer_vop_ioctl { vp, command, data, fflag, cred }
1695 */
1696static
1697int
1698hammer_vop_ioctl(struct vop_ioctl_args *ap)
1699{
1700 struct hammer_inode *ip = ap->a_vp->v_data;
1701
1702 return(hammer_ioctl(ip, ap->a_command, ap->a_data,
1703 ap->a_fflag, ap->a_cred));
1704}
1705
513ca7d7
MD
1706static
1707int
1708hammer_vop_mountctl(struct vop_mountctl_args *ap)
1709{
1710 struct mount *mp;
1711 int error;
1712
1713 mp = ap->a_head.a_ops->head.vv_mount;
1714
1715 switch(ap->a_op) {
1716 case MOUNTCTL_SET_EXPORT:
1717 if (ap->a_ctllen != sizeof(struct export_args))
1718 error = EINVAL;
1719 error = hammer_vfs_export(mp, ap->a_op,
1720 (const struct export_args *)ap->a_ctl);
1721 break;
1722 default:
1723 error = journal_mountctl(ap);
1724 break;
1725 }
1726 return(error);
1727}
1728
66325755
MD
1729/*
1730 * hammer_vop_strategy { vp, bio }
8cd0a023
MD
1731 *
1732 * Strategy call, used for regular file read & write only. Note that the
1733 * bp may represent a cluster.
1734 *
1735 * To simplify operation and allow better optimizations in the future,
1736 * this code does not make any assumptions with regards to buffer alignment
1737 * or size.
66325755 1738 */
427e5fc6
MD
1739static
1740int
66325755 1741hammer_vop_strategy(struct vop_strategy_args *ap)
427e5fc6 1742{
8cd0a023
MD
1743 struct buf *bp;
1744 int error;
1745
1746 bp = ap->a_bio->bio_buf;
1747
1748 switch(bp->b_cmd) {
1749 case BUF_CMD_READ:
1750 error = hammer_vop_strategy_read(ap);
1751 break;
1752 case BUF_CMD_WRITE:
1753 error = hammer_vop_strategy_write(ap);
1754 break;
1755 default:
059819e3
MD
1756 bp->b_error = error = EINVAL;
1757 bp->b_flags |= B_ERROR;
1758 biodone(ap->a_bio);
8cd0a023
MD
1759 break;
1760 }
8cd0a023 1761 return (error);
427e5fc6
MD
1762}
1763
8cd0a023
MD
1764/*
1765 * Read from a regular file. Iterate the related records and fill in the
1766 * BIO/BUF. Gaps are zero-filled.
1767 *
1768 * The support code in hammer_object.c should be used to deal with mixed
1769 * in-memory and on-disk records.
1770 *
1771 * XXX atime update
1772 */
1773static
1774int
1775hammer_vop_strategy_read(struct vop_strategy_args *ap)
1776{
36f82b23
MD
1777 struct hammer_transaction trans;
1778 struct hammer_inode *ip;
8cd0a023 1779 struct hammer_cursor cursor;
8cd0a023
MD
1780 hammer_base_elm_t base;
1781 struct bio *bio;
1782 struct buf *bp;
1783 int64_t rec_offset;
a89aec1b 1784 int64_t ran_end;
195c19a1 1785 int64_t tmp64;
8cd0a023
MD
1786 int error;
1787 int boff;
1788 int roff;
1789 int n;
1790
1791 bio = ap->a_bio;
1792 bp = bio->bio_buf;
36f82b23 1793 ip = ap->a_vp->v_data;
8cd0a023 1794
36f82b23 1795 hammer_simple_transaction(&trans, ip->hmp);
4e17f465 1796 hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
8cd0a023
MD
1797
1798 /*
1799 * Key range (begin and end inclusive) to scan. Note that the key's
c0ade690
MD
1800 * stored in the actual records represent BASE+LEN, not BASE. The
1801 * first record containing bio_offset will have a key > bio_offset.
8cd0a023 1802 */
2f85fa4d 1803 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023 1804 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1805 cursor.key_beg.create_tid = 0;
8cd0a023 1806 cursor.key_beg.delete_tid = 0;
8cd0a023 1807 cursor.key_beg.obj_type = 0;
c0ade690 1808 cursor.key_beg.key = bio->bio_offset + 1;
d5530d22 1809 cursor.asof = ip->obj_asof;
47197d71 1810 cursor.flags |= HAMMER_CURSOR_ASOF | HAMMER_CURSOR_DATAEXTOK;
8cd0a023
MD
1811
1812 cursor.key_end = cursor.key_beg;
11ad5ade 1813 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE);
b84de5af 1814#if 0
11ad5ade 1815 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) {
a89aec1b
MD
1816 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
1817 cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
1818 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
b84de5af
MD
1819 } else
1820#endif
1821 {
c0ade690 1822 ran_end = bio->bio_offset + bp->b_bufsize;
a89aec1b
MD
1823 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
1824 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
195c19a1
MD
1825 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */
1826 if (tmp64 < ran_end)
a89aec1b
MD
1827 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1828 else
7f7c1f84 1829 cursor.key_end.key = ran_end + MAXPHYS + 1;
a89aec1b 1830 }
d26d0ae9 1831 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
8cd0a023 1832
4e17f465 1833 error = hammer_ip_first(&cursor);
8cd0a023
MD
1834 boff = 0;
1835
a89aec1b
MD
1836 while (error == 0) {
1837 error = hammer_ip_resolve_data(&cursor);
1838 if (error)
66325755 1839 break;
11ad5ade 1840 base = &cursor.leaf->base;
8cd0a023 1841
11ad5ade 1842 rec_offset = base->key - cursor.leaf->data_len;
8cd0a023 1843
66325755 1844 /*
a89aec1b 1845 * Calculate the gap, if any, and zero-fill it.
1fef775e
MD
1846 *
1847 * n is the offset of the start of the record verses our
1848 * current seek offset in the bio.
66325755 1849 */
8cd0a023
MD
1850 n = (int)(rec_offset - (bio->bio_offset + boff));
1851 if (n > 0) {
a89aec1b
MD
1852 if (n > bp->b_bufsize - boff)
1853 n = bp->b_bufsize - boff;
8cd0a023
MD
1854 bzero((char *)bp->b_data + boff, n);
1855 boff += n;
1856 n = 0;
66325755 1857 }
8cd0a023
MD
1858
1859 /*
1860 * Calculate the data offset in the record and the number
1861 * of bytes we can copy.
a89aec1b 1862 *
1fef775e
MD
1863 * There are two degenerate cases. First, boff may already
1864 * be at bp->b_bufsize. Secondly, the data offset within
1865 * the record may exceed the record's size.
8cd0a023
MD
1866 */
1867 roff = -n;
b84de5af 1868 rec_offset += roff;
11ad5ade 1869 n = cursor.leaf->data_len - roff;
1fef775e
MD
1870 if (n <= 0) {
1871 kprintf("strategy_read: bad n=%d roff=%d\n", n, roff);
1872 n = 0;
1873 } else if (n > bp->b_bufsize - boff) {
8cd0a023 1874 n = bp->b_bufsize - boff;
1fef775e 1875 }
059819e3 1876
b84de5af
MD
1877 /*
1878 * If we cached a truncation point on our front-end the
1879 * on-disk version may still have physical records beyond
1880 * that point. Truncate visibility.
1881 */
1882 if (ip->trunc_off <= rec_offset)
1883 n = 0;
1884 else if (ip->trunc_off < rec_offset + n)
1885 n = (int)(ip->trunc_off - rec_offset);
1886
1887 /*
1888 * Copy
1889 */
1890 if (n) {
1891 bcopy((char *)cursor.data + roff,
1892 (char *)bp->b_data + boff, n);
1893 boff += n;
1894 }
8cd0a023 1895 if (boff == bp->b_bufsize)
66325755 1896 break;
a89aec1b 1897 error = hammer_ip_next(&cursor);
66325755 1898 }
8cd0a023 1899 hammer_done_cursor(&cursor);
b84de5af 1900 hammer_done_transaction(&trans);
66325755
MD
1901
1902 /*
8cd0a023 1903 * There may have been a gap after the last record
66325755 1904 */
8cd0a023
MD
1905 if (error == ENOENT)
1906 error = 0;
1907 if (error == 0 && boff != bp->b_bufsize) {
7f7c1f84 1908 KKASSERT(boff < bp->b_bufsize);
8cd0a023
MD
1909 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
1910 /* boff = bp->b_bufsize; */
1911 }
1912 bp->b_resid = 0;
059819e3
MD
1913 bp->b_error = error;
1914 if (error)
1915 bp->b_flags |= B_ERROR;
1916 biodone(ap->a_bio);
8cd0a023
MD
1917 return(error);
1918}
1919
1920/*
059819e3
MD
1921 * Write to a regular file. Because this is a strategy call the OS is
1922 * trying to actually sync data to the media. HAMMER can only flush
1923 * the entire inode (so the TID remains properly synchronized).
8cd0a023 1924 *
059819e3
MD
1925 * Basically all we do here is place the bio on the inode's flush queue
1926 * and activate the flusher.
8cd0a023
MD
1927 */
1928static
1929int
1930hammer_vop_strategy_write(struct vop_strategy_args *ap)
1931{
8cd0a023
MD
1932 hammer_inode_t ip;
1933 struct bio *bio;
1934 struct buf *bp;
8cd0a023
MD
1935
1936 bio = ap->a_bio;
1937 bp = bio->bio_buf;
1938 ip = ap->a_vp->v_data;
d113fda1 1939
059819e3
MD
1940 if (ip->flags & HAMMER_INODE_RO) {
1941 bp->b_error = EROFS;
1942 bp->b_flags |= B_ERROR;
1943 biodone(ap->a_bio);
e63644f0 1944 hammer_cleanup_write_io(ip);
059819e3
MD
1945 return(EROFS);
1946 }
b84de5af 1947
29ce0677
MD
1948 /*
1949 * Interlock with inode destruction (no in-kernel or directory
1950 * topology visibility). If we queue new IO while trying to
1951 * destroy the inode we can deadlock the vtrunc call in
1952 * hammer_inode_unloadable_check().
1953 */
1954 if (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) {
1955 bp->b_resid = 0;
1956 biodone(ap->a_bio);
e63644f0 1957 hammer_cleanup_write_io(ip);
29ce0677
MD
1958 return(0);
1959 }
1960
b84de5af
MD
1961 /*
1962 * If the inode is being flushed we cannot re-queue buffers
1963 * it may have already flushed, or it could result in duplicate
1964 * records in the database.
1965 */
059819e3 1966 BUF_KERNPROC(bp);
1f07f686 1967 if (ip->flags & HAMMER_INODE_WRITE_ALT)
b84de5af
MD
1968 TAILQ_INSERT_TAIL(&ip->bio_alt_list, bio, bio_act);
1969 else
1970 TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act);
1f07f686
MD
1971 ++hammer_bio_count;
1972 hammer_modify_inode(NULL, ip, HAMMER_INODE_BUFS);
4e17f465
MD
1973
1974 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1975#if 0
1976 /*
1977 * XXX
1978 *
1979 * If the write was not part of an integrated flush operation then
1980 * signal a flush.
1981 */
1982 if (ip->flush_state != HAMMER_FST_FLUSH ||
1983 (ip->flags & HAMMER_INODE_WRITE_ALT)) {
1984 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1985 }
1986#endif
059819e3
MD
1987 return(0);
1988}
1989
1990/*
b84de5af 1991 * Backend code which actually performs the write to the media. This
059819e3
MD
1992 * routine is typically called from the flusher. The bio will be disposed
1993 * of (biodone'd) by this routine.
1994 *
1995 * Iterate the related records and mark for deletion. If existing edge
1996 * records (left and right side) overlap our write they have to be marked
1997 * deleted and new records created, usually referencing a portion of the
1998 * original data. Then add a record to represent the buffer.
1999 */
2000int
4e17f465 2001hammer_dowrite(hammer_cursor_t cursor, hammer_inode_t ip, struct bio *bio)
059819e3
MD
2002{
2003 struct buf *bp = bio->bio_buf;
2004 int error;
8cd0a023 2005
b84de5af
MD
2006 KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
2007
869e8f55
MD
2008 /*
2009 * If the inode is going or gone, just throw away any frontend
2010 * buffers.
2011 */
2012 if (ip->flags & HAMMER_INODE_DELETED) {
2013 bp->b_resid = 0;
2014 biodone(bio);
77062c8a 2015 --hammer_bio_count;
e63644f0 2016 hammer_cleanup_write_io(ip);
ee3fed53 2017 return(0);
869e8f55
MD
2018 }
2019
8cd0a023
MD
2020 /*
2021 * Delete any records overlapping our range. This function will
d26d0ae9 2022 * (eventually) properly truncate partial overlaps.
8cd0a023 2023 */
11ad5ade 2024 if (ip->sync_ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) {
4e17f465 2025 error = hammer_ip_delete_range(cursor, ip, bio->bio_offset,
47197d71 2026 bio->bio_offset);
a89aec1b 2027 } else {
4e17f465 2028 error = hammer_ip_delete_range(cursor, ip, bio->bio_offset,
d26d0ae9 2029 bio->bio_offset +
47197d71 2030 bp->b_bufsize - 1);
a89aec1b 2031 }
8cd0a023
MD
2032
2033 /*
e38e0b15
MD
2034 * Add a single record to cover the write. We can write a record
2035 * with only the actual file data - for example, a small 200 byte
2036 * file does not have to write out a 16K record.
2037 *
2038 * While the data size does not have to be aligned, we still do it
2039 * to reduce fragmentation in a future allocation model.
8cd0a023
MD
2040 */
2041 if (error == 0) {
e38e0b15
MD
2042 int limit_size;
2043
11ad5ade 2044 if (ip->sync_ino_data.size - bio->bio_offset >
b84de5af
MD
2045 bp->b_bufsize) {
2046 limit_size = bp->b_bufsize;
e38e0b15 2047 } else {
11ad5ade 2048 limit_size = (int)(ip->sync_ino_data.size -
e38e0b15
MD
2049 bio->bio_offset);
2050 KKASSERT(limit_size >= 0);
2051 limit_size = (limit_size + 63) & ~63;
2052 }
4e17f465
MD
2053 if (limit_size) {
2054 error = hammer_ip_sync_data(cursor, ip, bio->bio_offset,
2055 bp->b_data, limit_size);
2056 }
66325755 2057 }
a5fddc16
MD
2058 if (error)
2059 Debugger("hammer_dowrite: error");
66325755 2060
8cd0a023 2061 if (error) {
8cd0a023 2062 bp->b_resid = bp->b_bufsize;
059819e3
MD
2063 bp->b_error = error;
2064 bp->b_flags |= B_ERROR;
8cd0a023 2065 } else {
8cd0a023
MD
2066 bp->b_resid = 0;
2067 }
059819e3 2068 biodone(bio);
1f07f686 2069 --hammer_bio_count;
e63644f0 2070 hammer_cleanup_write_io(ip);
8cd0a023
MD
2071 return(error);
2072}
2073
e63644f0
MD
2074static void
2075hammer_cleanup_write_io(hammer_inode_t ip)
2076{
2077 if (ip->rsv_databufs) {
2078 --ip->rsv_databufs;
2079 --ip->hmp->rsv_databufs;
2080 }
2081}
2082
8cd0a023
MD
2083/*
2084 * dounlink - disconnect a directory entry
2085 *
2086 * XXX whiteout support not really in yet
2087 */
2088static int
b84de5af
MD
2089hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
2090 struct vnode *dvp, struct ucred *cred, int flags)
8cd0a023 2091{
8cd0a023
MD
2092 struct namecache *ncp;
2093 hammer_inode_t dip;
2094 hammer_inode_t ip;
8cd0a023 2095 struct hammer_cursor cursor;
8cd0a023 2096 int64_t namekey;
11ad5ade 2097 int nlen, error;
8cd0a023
MD
2098
2099 /*
2100 * Calculate the namekey and setup the key range for the scan. This
2101 * works kinda like a chained hash table where the lower 32 bits
2102 * of the namekey synthesize the chain.
2103 *
2104 * The key range is inclusive of both key_beg and key_end.
2105 */
2106 dip = VTOI(dvp);
2107 ncp = nch->ncp;
d113fda1
MD
2108
2109 if (dip->flags & HAMMER_INODE_RO)
2110 return (EROFS);
2111
6a37e7e4
MD
2112 namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
2113retry:
4e17f465 2114 hammer_init_cursor(trans, &cursor, &dip->cache[0], dip);
2f85fa4d 2115 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023
MD
2116 cursor.key_beg.obj_id = dip->obj_id;
2117 cursor.key_beg.key = namekey;
d5530d22 2118 cursor.key_beg.create_tid = 0;
8cd0a023
MD
2119 cursor.key_beg.delete_tid = 0;
2120 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
2121 cursor.key_beg.obj_type = 0;
2122
2123 cursor.key_end = cursor.key_beg;
2124 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
2125 cursor.asof = dip->obj_asof;
2126 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
8cd0a023 2127
8cd0a023
MD
2128 /*
2129 * Scan all matching records (the chain), locate the one matching
2130 * the requested path component. info->last_error contains the
2131 * error code on search termination and could be 0, ENOENT, or
2132 * something else.
2133 *
2134 * The hammer_ip_*() functions merge in-memory records with on-disk
2135 * records for the purposes of the search.
2136 */
4e17f465
MD
2137 error = hammer_ip_first(&cursor);
2138
a89aec1b
MD
2139 while (error == 0) {
2140 error = hammer_ip_resolve_data(&cursor);
2141 if (error)
66325755 2142 break;
11ad5ade
MD
2143 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
2144 KKASSERT(nlen > 0);
2145 if (ncp->nc_nlen == nlen &&
2146 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
66325755
MD
2147 break;
2148 }
a89aec1b 2149 error = hammer_ip_next(&cursor);
66325755 2150 }
8cd0a023
MD
2151
2152 /*
2153 * If all is ok we have to get the inode so we can adjust nlinks.
b3deaf57
MD
2154 *
2155 * If the target is a directory, it must be empty.
8cd0a023 2156 */
66325755 2157 if (error == 0) {
b84de5af 2158 ip = hammer_get_inode(trans, &dip->cache[1],
11ad5ade 2159 cursor.data->entry.obj_id,
d113fda1 2160 dip->hmp->asof, 0, &error);
46fe7ae1 2161 if (error == ENOENT) {
11ad5ade 2162 kprintf("obj_id %016llx\n", cursor.data->entry.obj_id);
10a5d1ba 2163 Debugger("ENOENT unlinking object that should exist");
46fe7ae1 2164 }
1f07f686
MD
2165
2166 /*
2167 * If we are trying to remove a directory the directory must
2168 * be empty.
2169 *
2170 * WARNING: hammer_ip_check_directory_empty() may have to
2171 * terminate the cursor to avoid a deadlock. It is ok to
2172 * call hammer_done_cursor() twice.
2173 */
11ad5ade 2174 if (error == 0 && ip->ino_data.obj_type ==
b3deaf57 2175 HAMMER_OBJTYPE_DIRECTORY) {
98f7132d 2176 error = hammer_ip_check_directory_empty(trans, ip);
b3deaf57 2177 }
1f07f686 2178
6a37e7e4 2179 /*
1f07f686
MD
2180 * Delete the directory entry.
2181 *
6a37e7e4 2182 * WARNING: hammer_ip_del_directory() may have to terminate
1f07f686 2183 * the cursor to avoid a deadlock. It is ok to call
6a37e7e4
MD
2184 * hammer_done_cursor() twice.
2185 */
b84de5af 2186 if (error == 0) {
b84de5af
MD
2187 error = hammer_ip_del_directory(trans, &cursor,
2188 dip, ip);
b84de5af 2189 }
8cd0a023
MD
2190 if (error == 0) {
2191 cache_setunresolved(nch);
2192 cache_setvp(nch, NULL);
2193 /* XXX locking */
2194 if (ip->vp)
2195 cache_inval_vp(ip->vp, CINV_DESTROY);
2196 }
a89aec1b 2197 hammer_rel_inode(ip, 0);
66325755 2198 }
6a37e7e4
MD
2199 hammer_done_cursor(&cursor);
2200 if (error == EDEADLK)
2201 goto retry;
9c448776 2202
66325755 2203 return (error);
66325755
MD
2204}
2205
7a04d74f
MD
2206/************************************************************************
2207 * FIFO AND SPECFS OPS *
2208 ************************************************************************
2209 *
2210 */
2211
2212static int
2213hammer_vop_fifoclose (struct vop_close_args *ap)
2214{
2215 /* XXX update itimes */
2216 return (VOCALL(&fifo_vnode_vops, &ap->a_head));
2217}
2218
2219static int
2220hammer_vop_fiforead (struct vop_read_args *ap)
2221{
2222 int error;
2223
2224 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2225 /* XXX update access time */
2226 return (error);
2227}
2228
2229static int
2230hammer_vop_fifowrite (struct vop_write_args *ap)
2231{
2232 int error;
2233
2234 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2235 /* XXX update access time */
2236 return (error);
2237}
2238
2239static int
2240hammer_vop_specclose (struct vop_close_args *ap)
2241{
2242 /* XXX update itimes */
2243 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2244}
2245
2246static int
2247hammer_vop_specread (struct vop_read_args *ap)
2248{
2249 /* XXX update access time */
2250 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2251}
2252
2253static int
2254hammer_vop_specwrite (struct vop_write_args *ap)
2255{
2256 /* XXX update last change time */
2257 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2258}
2259