Prototype declarations don't have an initializer.
[dragonfly.git] / sys / vfs / hammer / hammer_vnops.c
CommitLineData
427e5fc6 1/*
b84de5af 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
427e5fc6
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
bcac4bbb 34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.72 2008/06/18 01:13:30 dillon Exp $
427e5fc6
MD
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
41#include <sys/namecache.h>
42#include <sys/vnode.h>
43#include <sys/lockf.h>
44#include <sys/event.h>
45#include <sys/stat.h>
b3deaf57 46#include <sys/dirent.h>
c0ade690 47#include <vm/vm_extern.h>
7a04d74f 48#include <vfs/fifofs/fifo.h>
427e5fc6
MD
49#include "hammer.h"
50
51/*
52 * USERFS VNOPS
53 */
54/*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
66325755
MD
55static int hammer_vop_fsync(struct vop_fsync_args *);
56static int hammer_vop_read(struct vop_read_args *);
57static int hammer_vop_write(struct vop_write_args *);
58static int hammer_vop_access(struct vop_access_args *);
59static int hammer_vop_advlock(struct vop_advlock_args *);
60static int hammer_vop_close(struct vop_close_args *);
61static int hammer_vop_ncreate(struct vop_ncreate_args *);
62static int hammer_vop_getattr(struct vop_getattr_args *);
63static int hammer_vop_nresolve(struct vop_nresolve_args *);
64static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
65static int hammer_vop_nlink(struct vop_nlink_args *);
66static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
67static int hammer_vop_nmknod(struct vop_nmknod_args *);
68static int hammer_vop_open(struct vop_open_args *);
69static int hammer_vop_pathconf(struct vop_pathconf_args *);
70static int hammer_vop_print(struct vop_print_args *);
71static int hammer_vop_readdir(struct vop_readdir_args *);
72static int hammer_vop_readlink(struct vop_readlink_args *);
73static int hammer_vop_nremove(struct vop_nremove_args *);
74static int hammer_vop_nrename(struct vop_nrename_args *);
75static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
76static int hammer_vop_setattr(struct vop_setattr_args *);
77static int hammer_vop_strategy(struct vop_strategy_args *);
a99b9ea2 78static int hammer_vop_bmap(struct vop_bmap_args *ap);
66325755
MD
79static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
80static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
7dc57964 81static int hammer_vop_ioctl(struct vop_ioctl_args *);
513ca7d7 82static int hammer_vop_mountctl(struct vop_mountctl_args *);
427e5fc6 83
7a04d74f
MD
84static int hammer_vop_fifoclose (struct vop_close_args *);
85static int hammer_vop_fiforead (struct vop_read_args *);
86static int hammer_vop_fifowrite (struct vop_write_args *);
87
88static int hammer_vop_specclose (struct vop_close_args *);
89static int hammer_vop_specread (struct vop_read_args *);
90static int hammer_vop_specwrite (struct vop_write_args *);
91
427e5fc6
MD
92struct vop_ops hammer_vnode_vops = {
93 .vop_default = vop_defaultop,
94 .vop_fsync = hammer_vop_fsync,
c0ade690
MD
95 .vop_getpages = vop_stdgetpages,
96 .vop_putpages = vop_stdputpages,
427e5fc6
MD
97 .vop_read = hammer_vop_read,
98 .vop_write = hammer_vop_write,
99 .vop_access = hammer_vop_access,
100 .vop_advlock = hammer_vop_advlock,
101 .vop_close = hammer_vop_close,
102 .vop_ncreate = hammer_vop_ncreate,
103 .vop_getattr = hammer_vop_getattr,
104 .vop_inactive = hammer_vop_inactive,
105 .vop_reclaim = hammer_vop_reclaim,
106 .vop_nresolve = hammer_vop_nresolve,
107 .vop_nlookupdotdot = hammer_vop_nlookupdotdot,
108 .vop_nlink = hammer_vop_nlink,
109 .vop_nmkdir = hammer_vop_nmkdir,
110 .vop_nmknod = hammer_vop_nmknod,
111 .vop_open = hammer_vop_open,
112 .vop_pathconf = hammer_vop_pathconf,
113 .vop_print = hammer_vop_print,
114 .vop_readdir = hammer_vop_readdir,
115 .vop_readlink = hammer_vop_readlink,
116 .vop_nremove = hammer_vop_nremove,
117 .vop_nrename = hammer_vop_nrename,
118 .vop_nrmdir = hammer_vop_nrmdir,
119 .vop_setattr = hammer_vop_setattr,
a99b9ea2 120 .vop_bmap = hammer_vop_bmap,
427e5fc6
MD
121 .vop_strategy = hammer_vop_strategy,
122 .vop_nsymlink = hammer_vop_nsymlink,
7dc57964 123 .vop_nwhiteout = hammer_vop_nwhiteout,
513ca7d7
MD
124 .vop_ioctl = hammer_vop_ioctl,
125 .vop_mountctl = hammer_vop_mountctl
427e5fc6
MD
126};
127
7a04d74f
MD
128struct vop_ops hammer_spec_vops = {
129 .vop_default = spec_vnoperate,
130 .vop_fsync = hammer_vop_fsync,
131 .vop_read = hammer_vop_specread,
132 .vop_write = hammer_vop_specwrite,
133 .vop_access = hammer_vop_access,
134 .vop_close = hammer_vop_specclose,
135 .vop_getattr = hammer_vop_getattr,
136 .vop_inactive = hammer_vop_inactive,
137 .vop_reclaim = hammer_vop_reclaim,
138 .vop_setattr = hammer_vop_setattr
139};
140
141struct vop_ops hammer_fifo_vops = {
142 .vop_default = fifo_vnoperate,
143 .vop_fsync = hammer_vop_fsync,
144 .vop_read = hammer_vop_fiforead,
145 .vop_write = hammer_vop_fifowrite,
146 .vop_access = hammer_vop_access,
147 .vop_close = hammer_vop_fifoclose,
148 .vop_getattr = hammer_vop_getattr,
149 .vop_inactive = hammer_vop_inactive,
150 .vop_reclaim = hammer_vop_reclaim,
151 .vop_setattr = hammer_vop_setattr
152};
153
0832c9bb
MD
154#ifdef DEBUG_TRUNCATE
155struct hammer_inode *HammerTruncIp;
156#endif
157
b84de5af
MD
158static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
159 struct vnode *dvp, struct ucred *cred, int flags);
8cd0a023
MD
160static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
161static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
0832c9bb
MD
162static void hammer_cleanup_write_io(hammer_inode_t ip);
163static void hammer_update_rsv_databufs(hammer_inode_t ip);
8cd0a023 164
427e5fc6
MD
165#if 0
166static
167int
168hammer_vop_vnoperate(struct vop_generic_args *)
169{
170 return (VOCALL(&hammer_vnode_vops, ap));
171}
172#endif
173
66325755
MD
174/*
175 * hammer_vop_fsync { vp, waitfor }
176 */
427e5fc6
MD
177static
178int
66325755 179hammer_vop_fsync(struct vop_fsync_args *ap)
427e5fc6 180{
b84de5af 181 hammer_inode_t ip = VTOI(ap->a_vp);
c0ade690 182
e8599db1 183 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
af209b0f 184 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
b84de5af
MD
185 if (ap->a_waitfor == MNT_WAIT)
186 hammer_wait_inode(ip);
059819e3 187 return (ip->error);
427e5fc6
MD
188}
189
66325755
MD
190/*
191 * hammer_vop_read { vp, uio, ioflag, cred }
192 */
427e5fc6
MD
193static
194int
66325755 195hammer_vop_read(struct vop_read_args *ap)
427e5fc6 196{
66325755 197 struct hammer_transaction trans;
c0ade690 198 hammer_inode_t ip;
66325755
MD
199 off_t offset;
200 struct buf *bp;
201 struct uio *uio;
202 int error;
203 int n;
8cd0a023 204 int seqcount;
66325755
MD
205
206 if (ap->a_vp->v_type != VREG)
207 return (EINVAL);
208 ip = VTOI(ap->a_vp);
209 error = 0;
8cd0a023 210 seqcount = ap->a_ioflag >> 16;
66325755 211
8cd0a023 212 hammer_start_transaction(&trans, ip->hmp);
66325755
MD
213
214 /*
215 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
216 */
217 uio = ap->a_uio;
11ad5ade 218 while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_data.size) {
66325755 219 offset = uio->uio_offset & HAMMER_BUFMASK;
a99b9ea2
MD
220 if (hammer_debug_cluster_enable) {
221 error = cluster_read(ap->a_vp, ip->ino_data.size,
222 uio->uio_offset - offset,
223 HAMMER_BUFSIZE,
224 MAXBSIZE, seqcount, &bp);
225 } else {
226 error = bread(ap->a_vp, uio->uio_offset - offset,
227 HAMMER_BUFSIZE, &bp);
228 }
66325755
MD
229 if (error) {
230 brelse(bp);
231 break;
232 }
7bc5b8c2 233
c0ade690 234 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
66325755
MD
235 n = HAMMER_BUFSIZE - offset;
236 if (n > uio->uio_resid)
237 n = uio->uio_resid;
11ad5ade
MD
238 if (n > ip->ino_data.size - uio->uio_offset)
239 n = (int)(ip->ino_data.size - uio->uio_offset);
66325755 240 error = uiomove((char *)bp->b_data + offset, n, uio);
7bc5b8c2
MD
241
242 /* data has a lower priority then meta-data */
243 bp->b_flags |= B_AGE;
66325755 244 bqrelse(bp);
af209b0f
MD
245 if (error)
246 break;
66325755 247 }
b84de5af
MD
248 if ((ip->flags & HAMMER_INODE_RO) == 0 &&
249 (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
bcac4bbb 250 ip->ino_data.atime = trans.time;
47637bff 251 hammer_modify_inode(ip, HAMMER_INODE_ITIMES);
b84de5af
MD
252 }
253 hammer_done_transaction(&trans);
66325755 254 return (error);
427e5fc6
MD
255}
256
66325755
MD
257/*
258 * hammer_vop_write { vp, uio, ioflag, cred }
259 */
427e5fc6
MD
260static
261int
66325755 262hammer_vop_write(struct vop_write_args *ap)
427e5fc6 263{
66325755
MD
264 struct hammer_transaction trans;
265 struct hammer_inode *ip;
266 struct uio *uio;
47637bff
MD
267 int rel_offset;
268 off_t base_offset;
66325755
MD
269 struct buf *bp;
270 int error;
271 int n;
c0ade690 272 int flags;
059819e3 273 int count;
cb51be26 274 int seqcount;
66325755
MD
275
276 if (ap->a_vp->v_type != VREG)
277 return (EINVAL);
278 ip = VTOI(ap->a_vp);
279 error = 0;
cb51be26 280 seqcount = ap->a_ioflag >> 16;
66325755 281
d113fda1
MD
282 if (ip->flags & HAMMER_INODE_RO)
283 return (EROFS);
284
66325755
MD
285 /*
286 * Create a transaction to cover the operations we perform.
287 */
8cd0a023 288 hammer_start_transaction(&trans, ip->hmp);
66325755
MD
289 uio = ap->a_uio;
290
291 /*
292 * Check append mode
293 */
294 if (ap->a_ioflag & IO_APPEND)
11ad5ade 295 uio->uio_offset = ip->ino_data.size;
66325755
MD
296
297 /*
af209b0f
MD
298 * Check for illegal write offsets. Valid range is 0...2^63-1.
299 *
300 * NOTE: the base_off assignment is required to work around what
301 * I consider to be a GCC-4 optimization bug.
66325755 302 */
af209b0f
MD
303 if (uio->uio_offset < 0) {
304 hammer_done_transaction(&trans);
305 return (EFBIG);
306 }
307 base_offset = uio->uio_offset + uio->uio_resid; /* work around gcc-4 */
308 if (uio->uio_resid > 0 && base_offset <= 0) {
b84de5af 309 hammer_done_transaction(&trans);
66325755 310 return (EFBIG);
9c448776 311 }
66325755
MD
312
313 /*
314 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
315 */
059819e3 316 count = 0;
66325755 317 while (uio->uio_resid > 0) {
d5ef456e
MD
318 int fixsize = 0;
319
e63644f0
MD
320 if ((error = hammer_checkspace(trans.hmp)) != 0)
321 break;
322
059819e3 323 /*
47637bff
MD
324 * Do not allow HAMMER to blow out the buffer cache.
325 *
326 * Do not allow HAMMER to blow out system memory by
327 * accumulating too many records. Records are decoupled
328 * from the buffer cache.
329 *
330 * Always check at the beginning so separate writes are
331 * not able to bypass this code.
0832c9bb
MD
332 *
333 * WARNING: Cannot unlock vp when doing a NOCOPY write as
334 * part of a putpages operation. Doing so could cause us
335 * to deadlock against the VM system when we try to re-lock.
059819e3 336 */
47637bff 337 if ((count++ & 15) == 0) {
0832c9bb
MD
338 if (uio->uio_segflg != UIO_NOCOPY) {
339 vn_unlock(ap->a_vp);
340 if ((ap->a_ioflag & IO_NOBWILL) == 0)
341 bwillwrite();
342 }
a99b9ea2
MD
343 if (ip->rsv_recs > hammer_limit_irecs)
344 hammer_wait_inode_recs(ip);
0832c9bb
MD
345 if (uio->uio_segflg != UIO_NOCOPY)
346 vn_lock(ap->a_vp, LK_EXCLUSIVE|LK_RETRY);
059819e3
MD
347 }
348
47637bff
MD
349 rel_offset = (int)(uio->uio_offset & HAMMER_BUFMASK);
350 base_offset = uio->uio_offset & ~HAMMER_BUFMASK64;
351 n = HAMMER_BUFSIZE - rel_offset;
d5ef456e
MD
352 if (n > uio->uio_resid)
353 n = uio->uio_resid;
11ad5ade 354 if (uio->uio_offset + n > ip->ino_data.size) {
d5ef456e
MD
355 vnode_pager_setsize(ap->a_vp, uio->uio_offset + n);
356 fixsize = 1;
357 }
358
c0ade690
MD
359 if (uio->uio_segflg == UIO_NOCOPY) {
360 /*
361 * Issuing a write with the same data backing the
362 * buffer. Instantiate the buffer to collect the
363 * backing vm pages, then read-in any missing bits.
364 *
365 * This case is used by vop_stdputpages().
366 */
47637bff 367 bp = getblk(ap->a_vp, base_offset,
d5ef456e 368 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
c0ade690
MD
369 if ((bp->b_flags & B_CACHE) == 0) {
370 bqrelse(bp);
47637bff 371 error = bread(ap->a_vp, base_offset,
c0ade690 372 HAMMER_BUFSIZE, &bp);
c0ade690 373 }
47637bff 374 } else if (rel_offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
c0ade690 375 /*
a5fddc16
MD
376 * Even though we are entirely overwriting the buffer
377 * we may still have to zero it out to avoid a
378 * mmap/write visibility issue.
c0ade690 379 */
47637bff 380 bp = getblk(ap->a_vp, base_offset,
d5ef456e 381 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
a5fddc16
MD
382 if ((bp->b_flags & B_CACHE) == 0)
383 vfs_bio_clrbuf(bp);
47637bff 384 } else if (base_offset >= ip->ino_data.size) {
c0ade690 385 /*
a5fddc16
MD
386 * If the base offset of the buffer is beyond the
387 * file EOF, we don't have to issue a read.
c0ade690 388 */
47637bff 389 bp = getblk(ap->a_vp, base_offset,
d5ef456e 390 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
66325755
MD
391 vfs_bio_clrbuf(bp);
392 } else {
c0ade690
MD
393 /*
394 * Partial overwrite, read in any missing bits then
395 * replace the portion being written.
396 */
47637bff 397 error = bread(ap->a_vp, base_offset,
66325755 398 HAMMER_BUFSIZE, &bp);
d5ef456e
MD
399 if (error == 0)
400 bheavy(bp);
66325755 401 }
47637bff
MD
402 if (error == 0) {
403 error = uiomove((char *)bp->b_data + rel_offset,
404 n, uio);
405 }
d5ef456e
MD
406
407 /*
408 * If we screwed up we have to undo any VM size changes we
409 * made.
410 */
66325755
MD
411 if (error) {
412 brelse(bp);
d5ef456e 413 if (fixsize) {
11ad5ade 414 vtruncbuf(ap->a_vp, ip->ino_data.size,
d5ef456e
MD
415 HAMMER_BUFSIZE);
416 }
66325755
MD
417 break;
418 }
c0ade690 419 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
11ad5ade
MD
420 if (ip->ino_data.size < uio->uio_offset) {
421 ip->ino_data.size = uio->uio_offset;
422 flags = HAMMER_INODE_DDIRTY;
423 vnode_pager_setsize(ap->a_vp, ip->ino_data.size);
c0ade690 424 } else {
d113fda1 425 flags = 0;
66325755 426 }
11ad5ade 427 ip->ino_data.mtime = trans.time;
f3b0f382 428 flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS;
11ad5ade 429 flags |= HAMMER_INODE_DDIRTY; /* XXX mtime */
47637bff 430 hammer_modify_inode(ip, flags);
32c90105 431
0832c9bb
MD
432 /*
433 * Try to keep track of cached dirty data.
434 */
e63644f0
MD
435 if ((bp->b_flags & B_DIRTY) == 0) {
436 ++ip->rsv_databufs;
437 ++ip->hmp->rsv_databufs;
438 }
439
47637bff
MD
440 /*
441 * Final buffer disposition.
cb51be26
MD
442 *
443 * If write_mode is non-zero we call bawrite()
444 * unconditionally. Otherwise we only use bawrite()
445 * if the writes are clearly sequential.
47637bff 446 */
cb51be26 447 bp->b_flags |= B_AGE;
66325755
MD
448 if (ap->a_ioflag & IO_SYNC) {
449 bwrite(bp);
450 } else if (ap->a_ioflag & IO_DIRECT) {
66325755 451 bawrite(bp);
cb51be26
MD
452 } else if (hammer_write_mode &&
453 (uio->uio_offset & HAMMER_BUFMASK) == 0) {
47637bff 454#if 1
cb51be26
MD
455 bp->b_flags |= B_CLUSTEROK;
456 cluster_write(bp, ip->ino_data.size, seqcount);
457#else
cb51be26 458 bawrite(bp);
bcac4bbb 459#endif
059819e3 460 } else if ((ap->a_ioflag >> 16) == IO_SEQMAX &&
34d829f7
MD
461 (uio->uio_offset & HAMMER_BUFMASK) == 0) {
462 /*
463 * If seqcount indicates sequential operation and
464 * we just finished filling a buffer, push it out
465 * now to prevent the buffer cache from becoming
466 * too full, which would trigger non-optimal
467 * flushes.
468 */
47637bff 469 bawrite(bp);
66325755 470 } else {
66325755
MD
471 bdwrite(bp);
472 }
473 }
b84de5af 474 hammer_done_transaction(&trans);
66325755 475 return (error);
427e5fc6
MD
476}
477
66325755
MD
478/*
479 * hammer_vop_access { vp, mode, cred }
480 */
427e5fc6
MD
481static
482int
66325755 483hammer_vop_access(struct vop_access_args *ap)
427e5fc6 484{
66325755
MD
485 struct hammer_inode *ip = VTOI(ap->a_vp);
486 uid_t uid;
487 gid_t gid;
488 int error;
489
490 uid = hammer_to_unix_xid(&ip->ino_data.uid);
491 gid = hammer_to_unix_xid(&ip->ino_data.gid);
492
493 error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
494 ip->ino_data.uflags);
495 return (error);
427e5fc6
MD
496}
497
66325755
MD
498/*
499 * hammer_vop_advlock { vp, id, op, fl, flags }
500 */
427e5fc6
MD
501static
502int
66325755 503hammer_vop_advlock(struct vop_advlock_args *ap)
427e5fc6 504{
66325755
MD
505 struct hammer_inode *ip = VTOI(ap->a_vp);
506
11ad5ade 507 return (lf_advlock(ap, &ip->advlock, ip->ino_data.size));
427e5fc6
MD
508}
509
66325755
MD
510/*
511 * hammer_vop_close { vp, fflag }
512 */
427e5fc6
MD
513static
514int
66325755 515hammer_vop_close(struct vop_close_args *ap)
427e5fc6 516{
a89aec1b 517 return (vop_stdclose(ap));
427e5fc6
MD
518}
519
66325755
MD
520/*
521 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
522 *
523 * The operating system has already ensured that the directory entry
524 * does not exist and done all appropriate namespace locking.
525 */
427e5fc6
MD
526static
527int
66325755 528hammer_vop_ncreate(struct vop_ncreate_args *ap)
427e5fc6 529{
66325755
MD
530 struct hammer_transaction trans;
531 struct hammer_inode *dip;
532 struct hammer_inode *nip;
533 struct nchandle *nch;
534 int error;
535
536 nch = ap->a_nch;
537 dip = VTOI(ap->a_dvp);
538
d113fda1
MD
539 if (dip->flags & HAMMER_INODE_RO)
540 return (EROFS);
e63644f0
MD
541 if ((error = hammer_checkspace(dip->hmp)) != 0)
542 return (error);
d113fda1 543
66325755
MD
544 /*
545 * Create a transaction to cover the operations we perform.
546 */
8cd0a023 547 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
548
549 /*
550 * Create a new filesystem object of the requested type. The
b84de5af
MD
551 * returned inode will be referenced and shared-locked to prevent
552 * it from being moved to the flusher.
66325755 553 */
8cd0a023
MD
554
555 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 556 if (error) {
77062c8a 557 hkprintf("hammer_create_inode error %d\n", error);
b84de5af 558 hammer_done_transaction(&trans);
66325755
MD
559 *ap->a_vpp = NULL;
560 return (error);
561 }
66325755
MD
562
563 /*
564 * Add the new filesystem object to the directory. This will also
565 * bump the inode's link count.
566 */
a89aec1b 567 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
0b075555 568 if (error)
77062c8a 569 hkprintf("hammer_ip_add_directory error %d\n", error);
66325755
MD
570
571 /*
572 * Finish up.
573 */
574 if (error) {
a89aec1b 575 hammer_rel_inode(nip, 0);
b84de5af 576 hammer_done_transaction(&trans);
66325755
MD
577 *ap->a_vpp = NULL;
578 } else {
e8599db1 579 error = hammer_get_vnode(nip, ap->a_vpp);
b84de5af 580 hammer_done_transaction(&trans);
a89aec1b
MD
581 hammer_rel_inode(nip, 0);
582 if (error == 0) {
583 cache_setunresolved(ap->a_nch);
584 cache_setvp(ap->a_nch, *ap->a_vpp);
585 }
66325755
MD
586 }
587 return (error);
427e5fc6
MD
588}
589
66325755
MD
590/*
591 * hammer_vop_getattr { vp, vap }
98f7132d
MD
592 *
593 * Retrieve an inode's attribute information. When accessing inodes
594 * historically we fake the atime field to ensure consistent results.
595 * The atime field is stored in the B-Tree element and allowed to be
596 * updated without cycling the element.
66325755 597 */
427e5fc6
MD
598static
599int
66325755 600hammer_vop_getattr(struct vop_getattr_args *ap)
427e5fc6 601{
66325755
MD
602 struct hammer_inode *ip = VTOI(ap->a_vp);
603 struct vattr *vap = ap->a_vap;
604
605#if 0
606 if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) &&
607 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 &&
7f7c1f84 608 ip->obj_asof == XXX
66325755
MD
609 ) {
610 /* LAZYMOD XXX */
611 }
612 hammer_itimes(ap->a_vp);
613#endif
614
615 vap->va_fsid = ip->hmp->fsid_udev;
11ad5ade 616 vap->va_fileid = ip->ino_leaf.base.obj_id;
66325755 617 vap->va_mode = ip->ino_data.mode;
11ad5ade 618 vap->va_nlink = ip->ino_data.nlinks;
66325755
MD
619 vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
620 vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
621 vap->va_rmajor = 0;
622 vap->va_rminor = 0;
11ad5ade 623 vap->va_size = ip->ino_data.size;
bcac4bbb
MD
624
625 /*
626 * We must provide a consistent atime and mtime for snapshots
627 * so people can do a 'tar cf - ... | md5' on them and get
628 * consistent results.
629 */
630 if (ip->flags & HAMMER_INODE_RO) {
631 hammer_to_timespec(ip->ino_data.ctime, &vap->va_atime);
632 hammer_to_timespec(ip->ino_data.ctime, &vap->va_mtime);
633 } else {
634 hammer_to_timespec(ip->ino_data.atime, &vap->va_atime);
635 hammer_to_timespec(ip->ino_data.mtime, &vap->va_mtime);
636 }
66325755
MD
637 hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
638 vap->va_flags = ip->ino_data.uflags;
639 vap->va_gen = 1; /* hammer inums are unique for all time */
bf686dbe 640 vap->va_blocksize = HAMMER_BUFSIZE;
11ad5ade
MD
641 vap->va_bytes = (ip->ino_data.size + 63) & ~63;
642 vap->va_type = hammer_get_vnode_type(ip->ino_data.obj_type);
66325755
MD
643 vap->va_filerev = 0; /* XXX */
644 /* mtime uniquely identifies any adjustments made to the file */
11ad5ade 645 vap->va_fsmid = ip->ino_data.mtime;
66325755
MD
646 vap->va_uid_uuid = ip->ino_data.uid;
647 vap->va_gid_uuid = ip->ino_data.gid;
648 vap->va_fsid_uuid = ip->hmp->fsid;
649 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
650 VA_FSID_UUID_VALID;
7a04d74f 651
11ad5ade 652 switch (ip->ino_data.obj_type) {
7a04d74f
MD
653 case HAMMER_OBJTYPE_CDEV:
654 case HAMMER_OBJTYPE_BDEV:
655 vap->va_rmajor = ip->ino_data.rmajor;
656 vap->va_rminor = ip->ino_data.rminor;
657 break;
658 default:
659 break;
660 }
661
66325755 662 return(0);
427e5fc6
MD
663}
664
66325755
MD
665/*
666 * hammer_vop_nresolve { nch, dvp, cred }
667 *
668 * Locate the requested directory entry.
669 */
427e5fc6
MD
670static
671int
66325755 672hammer_vop_nresolve(struct vop_nresolve_args *ap)
427e5fc6 673{
36f82b23 674 struct hammer_transaction trans;
66325755 675 struct namecache *ncp;
7f7c1f84
MD
676 hammer_inode_t dip;
677 hammer_inode_t ip;
678 hammer_tid_t asof;
8cd0a023 679 struct hammer_cursor cursor;
66325755
MD
680 struct vnode *vp;
681 int64_t namekey;
682 int error;
7f7c1f84
MD
683 int i;
684 int nlen;
d113fda1 685 int flags;
6a37e7e4 686 u_int64_t obj_id;
7f7c1f84
MD
687
688 /*
689 * Misc initialization, plus handle as-of name extensions. Look for
690 * the '@@' extension. Note that as-of files and directories cannot
691 * be modified.
7f7c1f84
MD
692 */
693 dip = VTOI(ap->a_dvp);
694 ncp = ap->a_nch->ncp;
695 asof = dip->obj_asof;
696 nlen = ncp->nc_nlen;
d113fda1 697 flags = dip->flags;
7f7c1f84 698
36f82b23
MD
699 hammer_simple_transaction(&trans, dip->hmp);
700
7f7c1f84
MD
701 for (i = 0; i < nlen; ++i) {
702 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
d113fda1 703 asof = hammer_str_to_tid(ncp->nc_name + i + 2);
d113fda1 704 flags |= HAMMER_INODE_RO;
7f7c1f84
MD
705 break;
706 }
707 }
708 nlen = i;
66325755 709
d113fda1
MD
710 /*
711 * If there is no path component the time extension is relative to
712 * dip.
713 */
714 if (nlen == 0) {
bcac4bbb 715 ip = hammer_get_inode(&trans, dip, dip->obj_id,
61aeeb33 716 asof, flags, &error);
d113fda1 717 if (error == 0) {
e8599db1 718 error = hammer_get_vnode(ip, &vp);
d113fda1
MD
719 hammer_rel_inode(ip, 0);
720 } else {
721 vp = NULL;
722 }
723 if (error == 0) {
724 vn_unlock(vp);
725 cache_setvp(ap->a_nch, vp);
726 vrele(vp);
727 }
36f82b23 728 goto done;
d113fda1
MD
729 }
730
8cd0a023
MD
731 /*
732 * Calculate the namekey and setup the key range for the scan. This
733 * works kinda like a chained hash table where the lower 32 bits
734 * of the namekey synthesize the chain.
735 *
736 * The key range is inclusive of both key_beg and key_end.
737 */
7f7c1f84 738 namekey = hammer_directory_namekey(ncp->nc_name, nlen);
66325755 739
bcac4bbb 740 error = hammer_init_cursor(&trans, &cursor, &dip->cache[1], dip);
2f85fa4d 741 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023
MD
742 cursor.key_beg.obj_id = dip->obj_id;
743 cursor.key_beg.key = namekey;
d5530d22 744 cursor.key_beg.create_tid = 0;
8cd0a023
MD
745 cursor.key_beg.delete_tid = 0;
746 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
747 cursor.key_beg.obj_type = 0;
66325755 748
8cd0a023
MD
749 cursor.key_end = cursor.key_beg;
750 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
751 cursor.asof = asof;
752 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
66325755
MD
753
754 /*
8cd0a023 755 * Scan all matching records (the chain), locate the one matching
a89aec1b 756 * the requested path component.
8cd0a023
MD
757 *
758 * The hammer_ip_*() functions merge in-memory records with on-disk
759 * records for the purposes of the search.
66325755 760 */
6a37e7e4
MD
761 obj_id = 0;
762
4e17f465 763 if (error == 0) {
4e17f465
MD
764 error = hammer_ip_first(&cursor);
765 while (error == 0) {
766 error = hammer_ip_resolve_data(&cursor);
767 if (error)
768 break;
11ad5ade
MD
769 if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF &&
770 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
771 obj_id = cursor.data->entry.obj_id;
4e17f465
MD
772 break;
773 }
774 error = hammer_ip_next(&cursor);
66325755
MD
775 }
776 }
6a37e7e4 777 hammer_done_cursor(&cursor);
66325755 778 if (error == 0) {
bcac4bbb
MD
779 ip = hammer_get_inode(&trans, dip, obj_id,
780 asof, flags, &error);
7f7c1f84 781 if (error == 0) {
e8599db1 782 error = hammer_get_vnode(ip, &vp);
7f7c1f84
MD
783 hammer_rel_inode(ip, 0);
784 } else {
785 vp = NULL;
786 }
66325755
MD
787 if (error == 0) {
788 vn_unlock(vp);
789 cache_setvp(ap->a_nch, vp);
790 vrele(vp);
791 }
792 } else if (error == ENOENT) {
793 cache_setvp(ap->a_nch, NULL);
794 }
36f82b23 795done:
b84de5af 796 hammer_done_transaction(&trans);
66325755 797 return (error);
427e5fc6
MD
798}
799
66325755
MD
800/*
801 * hammer_vop_nlookupdotdot { dvp, vpp, cred }
802 *
803 * Locate the parent directory of a directory vnode.
804 *
805 * dvp is referenced but not locked. *vpp must be returned referenced and
806 * locked. A parent_obj_id of 0 does not necessarily indicate that we are
807 * at the root, instead it could indicate that the directory we were in was
808 * removed.
42c7d26b
MD
809 *
810 * NOTE: as-of sequences are not linked into the directory structure. If
811 * we are at the root with a different asof then the mount point, reload
812 * the same directory with the mount point's asof. I'm not sure what this
813 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not
814 * get confused, but it hasn't been tested.
66325755 815 */
427e5fc6
MD
816static
817int
66325755 818hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
427e5fc6 819{
36f82b23 820 struct hammer_transaction trans;
66325755 821 struct hammer_inode *dip;
d113fda1 822 struct hammer_inode *ip;
42c7d26b
MD
823 int64_t parent_obj_id;
824 hammer_tid_t asof;
d113fda1 825 int error;
66325755
MD
826
827 dip = VTOI(ap->a_dvp);
42c7d26b
MD
828 asof = dip->obj_asof;
829 parent_obj_id = dip->ino_data.parent_obj_id;
830
831 if (parent_obj_id == 0) {
832 if (dip->obj_id == HAMMER_OBJID_ROOT &&
833 asof != dip->hmp->asof) {
834 parent_obj_id = dip->obj_id;
835 asof = dip->hmp->asof;
836 *ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK);
837 ksnprintf(*ap->a_fakename, 19, "0x%016llx",
838 dip->obj_asof);
839 } else {
840 *ap->a_vpp = NULL;
841 return ENOENT;
842 }
66325755 843 }
d113fda1 844
36f82b23
MD
845 hammer_simple_transaction(&trans, dip->hmp);
846
bcac4bbb 847 ip = hammer_get_inode(&trans, dip, parent_obj_id,
42c7d26b 848 asof, dip->flags, &error);
36f82b23 849 if (ip) {
e8599db1 850 error = hammer_get_vnode(ip, ap->a_vpp);
36f82b23
MD
851 hammer_rel_inode(ip, 0);
852 } else {
d113fda1 853 *ap->a_vpp = NULL;
d113fda1 854 }
b84de5af 855 hammer_done_transaction(&trans);
d113fda1 856 return (error);
427e5fc6
MD
857}
858
66325755
MD
859/*
860 * hammer_vop_nlink { nch, dvp, vp, cred }
861 */
427e5fc6
MD
862static
863int
66325755 864hammer_vop_nlink(struct vop_nlink_args *ap)
427e5fc6 865{
66325755
MD
866 struct hammer_transaction trans;
867 struct hammer_inode *dip;
868 struct hammer_inode *ip;
869 struct nchandle *nch;
870 int error;
871
872 nch = ap->a_nch;
873 dip = VTOI(ap->a_dvp);
874 ip = VTOI(ap->a_vp);
875
d113fda1
MD
876 if (dip->flags & HAMMER_INODE_RO)
877 return (EROFS);
878 if (ip->flags & HAMMER_INODE_RO)
879 return (EROFS);
e63644f0
MD
880 if ((error = hammer_checkspace(dip->hmp)) != 0)
881 return (error);
d113fda1 882
66325755
MD
883 /*
884 * Create a transaction to cover the operations we perform.
885 */
8cd0a023 886 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
887
888 /*
889 * Add the filesystem object to the directory. Note that neither
890 * dip nor ip are referenced or locked, but their vnodes are
891 * referenced. This function will bump the inode's link count.
892 */
a89aec1b 893 error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip);
66325755
MD
894
895 /*
896 * Finish up.
897 */
b84de5af 898 if (error == 0) {
6b4f890b
MD
899 cache_setunresolved(nch);
900 cache_setvp(nch, ap->a_vp);
66325755 901 }
b84de5af 902 hammer_done_transaction(&trans);
66325755 903 return (error);
427e5fc6
MD
904}
905
66325755
MD
906/*
907 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
908 *
909 * The operating system has already ensured that the directory entry
910 * does not exist and done all appropriate namespace locking.
911 */
427e5fc6
MD
912static
913int
66325755 914hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
427e5fc6 915{
66325755
MD
916 struct hammer_transaction trans;
917 struct hammer_inode *dip;
918 struct hammer_inode *nip;
919 struct nchandle *nch;
920 int error;
921
922 nch = ap->a_nch;
923 dip = VTOI(ap->a_dvp);
924
d113fda1
MD
925 if (dip->flags & HAMMER_INODE_RO)
926 return (EROFS);
e63644f0
MD
927 if ((error = hammer_checkspace(dip->hmp)) != 0)
928 return (error);
d113fda1 929
66325755
MD
930 /*
931 * Create a transaction to cover the operations we perform.
932 */
8cd0a023 933 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
934
935 /*
936 * Create a new filesystem object of the requested type. The
8cd0a023 937 * returned inode will be referenced but not locked.
66325755 938 */
8cd0a023 939 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 940 if (error) {
77062c8a 941 hkprintf("hammer_mkdir error %d\n", error);
b84de5af 942 hammer_done_transaction(&trans);
66325755
MD
943 *ap->a_vpp = NULL;
944 return (error);
945 }
66325755
MD
946 /*
947 * Add the new filesystem object to the directory. This will also
948 * bump the inode's link count.
949 */
a89aec1b 950 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
0b075555 951 if (error)
77062c8a 952 hkprintf("hammer_mkdir (add) error %d\n", error);
66325755
MD
953
954 /*
955 * Finish up.
956 */
957 if (error) {
a89aec1b 958 hammer_rel_inode(nip, 0);
66325755
MD
959 *ap->a_vpp = NULL;
960 } else {
e8599db1 961 error = hammer_get_vnode(nip, ap->a_vpp);
a89aec1b
MD
962 hammer_rel_inode(nip, 0);
963 if (error == 0) {
964 cache_setunresolved(ap->a_nch);
965 cache_setvp(ap->a_nch, *ap->a_vpp);
966 }
66325755 967 }
b84de5af 968 hammer_done_transaction(&trans);
66325755 969 return (error);
427e5fc6
MD
970}
971
66325755
MD
972/*
973 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
974 *
975 * The operating system has already ensured that the directory entry
976 * does not exist and done all appropriate namespace locking.
977 */
427e5fc6
MD
978static
979int
66325755 980hammer_vop_nmknod(struct vop_nmknod_args *ap)
427e5fc6 981{
66325755
MD
982 struct hammer_transaction trans;
983 struct hammer_inode *dip;
984 struct hammer_inode *nip;
985 struct nchandle *nch;
986 int error;
987
988 nch = ap->a_nch;
989 dip = VTOI(ap->a_dvp);
990
d113fda1
MD
991 if (dip->flags & HAMMER_INODE_RO)
992 return (EROFS);
e63644f0
MD
993 if ((error = hammer_checkspace(dip->hmp)) != 0)
994 return (error);
d113fda1 995
66325755
MD
996 /*
997 * Create a transaction to cover the operations we perform.
998 */
8cd0a023 999 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
1000
1001 /*
1002 * Create a new filesystem object of the requested type. The
8cd0a023 1003 * returned inode will be referenced but not locked.
66325755 1004 */
8cd0a023 1005 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 1006 if (error) {
b84de5af 1007 hammer_done_transaction(&trans);
66325755
MD
1008 *ap->a_vpp = NULL;
1009 return (error);
1010 }
66325755
MD
1011
1012 /*
1013 * Add the new filesystem object to the directory. This will also
1014 * bump the inode's link count.
1015 */
a89aec1b 1016 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
66325755
MD
1017
1018 /*
1019 * Finish up.
1020 */
1021 if (error) {
a89aec1b 1022 hammer_rel_inode(nip, 0);
66325755
MD
1023 *ap->a_vpp = NULL;
1024 } else {
e8599db1 1025 error = hammer_get_vnode(nip, ap->a_vpp);
a89aec1b
MD
1026 hammer_rel_inode(nip, 0);
1027 if (error == 0) {
1028 cache_setunresolved(ap->a_nch);
1029 cache_setvp(ap->a_nch, *ap->a_vpp);
1030 }
66325755 1031 }
b84de5af 1032 hammer_done_transaction(&trans);
66325755 1033 return (error);
427e5fc6
MD
1034}
1035
66325755
MD
1036/*
1037 * hammer_vop_open { vp, mode, cred, fp }
1038 */
427e5fc6
MD
1039static
1040int
66325755 1041hammer_vop_open(struct vop_open_args *ap)
427e5fc6 1042{
9f5097dc
MD
1043 hammer_inode_t ip;
1044
1045 ip = VTOI(ap->a_vp);
1046
1047 if ((ap->a_mode & FWRITE) && (ip->flags & HAMMER_INODE_RO))
d113fda1 1048 return (EROFS);
a89aec1b 1049 return(vop_stdopen(ap));
427e5fc6
MD
1050}
1051
66325755
MD
1052/*
1053 * hammer_vop_pathconf { vp, name, retval }
1054 */
427e5fc6
MD
1055static
1056int
66325755 1057hammer_vop_pathconf(struct vop_pathconf_args *ap)
427e5fc6
MD
1058{
1059 return EOPNOTSUPP;
1060}
1061
66325755
MD
1062/*
1063 * hammer_vop_print { vp }
1064 */
427e5fc6
MD
1065static
1066int
66325755 1067hammer_vop_print(struct vop_print_args *ap)
427e5fc6
MD
1068{
1069 return EOPNOTSUPP;
1070}
1071
66325755 1072/*
6b4f890b 1073 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
66325755 1074 */
427e5fc6
MD
1075static
1076int
66325755 1077hammer_vop_readdir(struct vop_readdir_args *ap)
427e5fc6 1078{
36f82b23 1079 struct hammer_transaction trans;
6b4f890b
MD
1080 struct hammer_cursor cursor;
1081 struct hammer_inode *ip;
1082 struct uio *uio;
6b4f890b
MD
1083 hammer_base_elm_t base;
1084 int error;
1085 int cookie_index;
1086 int ncookies;
1087 off_t *cookies;
1088 off_t saveoff;
1089 int r;
1090
1091 ip = VTOI(ap->a_vp);
1092 uio = ap->a_uio;
b3deaf57
MD
1093 saveoff = uio->uio_offset;
1094
1095 if (ap->a_ncookies) {
1096 ncookies = uio->uio_resid / 16 + 1;
1097 if (ncookies > 1024)
1098 ncookies = 1024;
1099 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
1100 cookie_index = 0;
1101 } else {
1102 ncookies = -1;
1103 cookies = NULL;
1104 cookie_index = 0;
1105 }
1106
36f82b23
MD
1107 hammer_simple_transaction(&trans, ip->hmp);
1108
b3deaf57
MD
1109 /*
1110 * Handle artificial entries
1111 */
1112 error = 0;
1113 if (saveoff == 0) {
1114 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, ".");
1115 if (r)
1116 goto done;
1117 if (cookies)
1118 cookies[cookie_index] = saveoff;
1119 ++saveoff;
1120 ++cookie_index;
1121 if (cookie_index == ncookies)
1122 goto done;
1123 }
1124 if (saveoff == 1) {
1125 if (ip->ino_data.parent_obj_id) {
1126 r = vop_write_dirent(&error, uio,
1127 ip->ino_data.parent_obj_id,
1128 DT_DIR, 2, "..");
1129 } else {
1130 r = vop_write_dirent(&error, uio,
1131 ip->obj_id, DT_DIR, 2, "..");
1132 }
1133 if (r)
1134 goto done;
1135 if (cookies)
1136 cookies[cookie_index] = saveoff;
1137 ++saveoff;
1138 ++cookie_index;
1139 if (cookie_index == ncookies)
1140 goto done;
1141 }
6b4f890b
MD
1142
1143 /*
1144 * Key range (begin and end inclusive) to scan. Directory keys
1145 * directly translate to a 64 bit 'seek' position.
1146 */
bcac4bbb 1147 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
2f85fa4d 1148 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
6b4f890b 1149 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1150 cursor.key_beg.create_tid = 0;
6b4f890b
MD
1151 cursor.key_beg.delete_tid = 0;
1152 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1153 cursor.key_beg.obj_type = 0;
b3deaf57 1154 cursor.key_beg.key = saveoff;
6b4f890b
MD
1155
1156 cursor.key_end = cursor.key_beg;
1157 cursor.key_end.key = HAMMER_MAX_KEY;
d5530d22
MD
1158 cursor.asof = ip->obj_asof;
1159 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
6b4f890b 1160
4e17f465 1161 error = hammer_ip_first(&cursor);
6b4f890b
MD
1162
1163 while (error == 0) {
11ad5ade 1164 error = hammer_ip_resolve_data(&cursor);
6b4f890b
MD
1165 if (error)
1166 break;
11ad5ade 1167 base = &cursor.leaf->base;
6b4f890b 1168 saveoff = base->key;
11ad5ade 1169 KKASSERT(cursor.leaf->data_len > HAMMER_ENTRY_NAME_OFF);
6b4f890b 1170
7a04d74f
MD
1171 if (base->obj_id != ip->obj_id)
1172 panic("readdir: bad record at %p", cursor.node);
1173
6b4f890b 1174 r = vop_write_dirent(
11ad5ade
MD
1175 &error, uio, cursor.data->entry.obj_id,
1176 hammer_get_dtype(cursor.leaf->base.obj_type),
1177 cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF ,
1178 (void *)cursor.data->entry.name);
6b4f890b
MD
1179 if (r)
1180 break;
1181 ++saveoff;
1182 if (cookies)
1183 cookies[cookie_index] = base->key;
1184 ++cookie_index;
1185 if (cookie_index == ncookies)
1186 break;
1187 error = hammer_ip_next(&cursor);
1188 }
1189 hammer_done_cursor(&cursor);
1190
b3deaf57 1191done:
b84de5af 1192 hammer_done_transaction(&trans);
36f82b23 1193
6b4f890b
MD
1194 if (ap->a_eofflag)
1195 *ap->a_eofflag = (error == ENOENT);
6b4f890b
MD
1196 uio->uio_offset = saveoff;
1197 if (error && cookie_index == 0) {
b3deaf57
MD
1198 if (error == ENOENT)
1199 error = 0;
6b4f890b
MD
1200 if (cookies) {
1201 kfree(cookies, M_TEMP);
1202 *ap->a_ncookies = 0;
1203 *ap->a_cookies = NULL;
1204 }
1205 } else {
7a04d74f
MD
1206 if (error == ENOENT)
1207 error = 0;
6b4f890b
MD
1208 if (cookies) {
1209 *ap->a_ncookies = cookie_index;
1210 *ap->a_cookies = cookies;
1211 }
1212 }
1213 return(error);
427e5fc6
MD
1214}
1215
66325755
MD
1216/*
1217 * hammer_vop_readlink { vp, uio, cred }
1218 */
427e5fc6
MD
1219static
1220int
66325755 1221hammer_vop_readlink(struct vop_readlink_args *ap)
427e5fc6 1222{
36f82b23 1223 struct hammer_transaction trans;
7a04d74f
MD
1224 struct hammer_cursor cursor;
1225 struct hammer_inode *ip;
1226 int error;
1227
1228 ip = VTOI(ap->a_vp);
36f82b23 1229
2f85fa4d
MD
1230 /*
1231 * Shortcut if the symlink data was stuffed into ino_data.
1232 */
1233 if (ip->ino_data.size <= HAMMER_INODE_BASESYMLEN) {
1234 error = uiomove(ip->ino_data.ext.symlink,
1235 ip->ino_data.size, ap->a_uio);
1236 return(error);
1237 }
36f82b23 1238
2f85fa4d
MD
1239 /*
1240 * Long version
1241 */
1242 hammer_simple_transaction(&trans, ip->hmp);
bcac4bbb 1243 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
7a04d74f
MD
1244
1245 /*
1246 * Key range (begin and end inclusive) to scan. Directory keys
1247 * directly translate to a 64 bit 'seek' position.
1248 */
2f85fa4d 1249 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC; /* XXX */
7a04d74f 1250 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1251 cursor.key_beg.create_tid = 0;
7a04d74f
MD
1252 cursor.key_beg.delete_tid = 0;
1253 cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX;
1254 cursor.key_beg.obj_type = 0;
1255 cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK;
d5530d22
MD
1256 cursor.asof = ip->obj_asof;
1257 cursor.flags |= HAMMER_CURSOR_ASOF;
7a04d74f 1258
45a014dc 1259 error = hammer_ip_lookup(&cursor);
7a04d74f
MD
1260 if (error == 0) {
1261 error = hammer_ip_resolve_data(&cursor);
1262 if (error == 0) {
11ad5ade
MD
1263 KKASSERT(cursor.leaf->data_len >=
1264 HAMMER_SYMLINK_NAME_OFF);
1265 error = uiomove(cursor.data->symlink.name,
1266 cursor.leaf->data_len -
1267 HAMMER_SYMLINK_NAME_OFF,
7a04d74f
MD
1268 ap->a_uio);
1269 }
1270 }
1271 hammer_done_cursor(&cursor);
b84de5af 1272 hammer_done_transaction(&trans);
7a04d74f 1273 return(error);
427e5fc6
MD
1274}
1275
66325755
MD
1276/*
1277 * hammer_vop_nremove { nch, dvp, cred }
1278 */
427e5fc6
MD
1279static
1280int
66325755 1281hammer_vop_nremove(struct vop_nremove_args *ap)
427e5fc6 1282{
b84de5af 1283 struct hammer_transaction trans;
e63644f0 1284 struct hammer_inode *dip;
b84de5af
MD
1285 int error;
1286
e63644f0
MD
1287 dip = VTOI(ap->a_dvp);
1288
1289 if (hammer_nohistory(dip) == 0 &&
1290 (error = hammer_checkspace(dip->hmp)) != 0) {
1291 return (error);
1292 }
1293
1294 hammer_start_transaction(&trans, dip->hmp);
b84de5af
MD
1295 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
1296 hammer_done_transaction(&trans);
1297
1298 return (error);
427e5fc6
MD
1299}
1300
66325755
MD
1301/*
1302 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1303 */
427e5fc6
MD
1304static
1305int
66325755 1306hammer_vop_nrename(struct vop_nrename_args *ap)
427e5fc6 1307{
8cd0a023
MD
1308 struct hammer_transaction trans;
1309 struct namecache *fncp;
1310 struct namecache *tncp;
1311 struct hammer_inode *fdip;
1312 struct hammer_inode *tdip;
1313 struct hammer_inode *ip;
1314 struct hammer_cursor cursor;
8cd0a023 1315 int64_t namekey;
11ad5ade 1316 int nlen, error;
8cd0a023
MD
1317
1318 fdip = VTOI(ap->a_fdvp);
1319 tdip = VTOI(ap->a_tdvp);
1320 fncp = ap->a_fnch->ncp;
1321 tncp = ap->a_tnch->ncp;
b3deaf57
MD
1322 ip = VTOI(fncp->nc_vp);
1323 KKASSERT(ip != NULL);
d113fda1
MD
1324
1325 if (fdip->flags & HAMMER_INODE_RO)
1326 return (EROFS);
1327 if (tdip->flags & HAMMER_INODE_RO)
1328 return (EROFS);
1329 if (ip->flags & HAMMER_INODE_RO)
1330 return (EROFS);
e63644f0
MD
1331 if ((error = hammer_checkspace(fdip->hmp)) != 0)
1332 return (error);
d113fda1 1333
8cd0a023
MD
1334 hammer_start_transaction(&trans, fdip->hmp);
1335
1336 /*
b3deaf57
MD
1337 * Remove tncp from the target directory and then link ip as
1338 * tncp. XXX pass trans to dounlink
42c7d26b
MD
1339 *
1340 * Force the inode sync-time to match the transaction so it is
1341 * in-sync with the creation of the target directory entry.
8cd0a023 1342 */
b84de5af 1343 error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, ap->a_cred, 0);
42c7d26b 1344 if (error == 0 || error == ENOENT) {
b3deaf57 1345 error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
42c7d26b
MD
1346 if (error == 0) {
1347 ip->ino_data.parent_obj_id = tdip->obj_id;
47637bff 1348 hammer_modify_inode(ip, HAMMER_INODE_DDIRTY);
42c7d26b
MD
1349 }
1350 }
b3deaf57
MD
1351 if (error)
1352 goto failed; /* XXX */
8cd0a023
MD
1353
1354 /*
1355 * Locate the record in the originating directory and remove it.
1356 *
1357 * Calculate the namekey and setup the key range for the scan. This
1358 * works kinda like a chained hash table where the lower 32 bits
1359 * of the namekey synthesize the chain.
1360 *
1361 * The key range is inclusive of both key_beg and key_end.
1362 */
1363 namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
6a37e7e4 1364retry:
bcac4bbb 1365 hammer_init_cursor(&trans, &cursor, &fdip->cache[1], fdip);
2f85fa4d 1366 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023
MD
1367 cursor.key_beg.obj_id = fdip->obj_id;
1368 cursor.key_beg.key = namekey;
d5530d22 1369 cursor.key_beg.create_tid = 0;
8cd0a023
MD
1370 cursor.key_beg.delete_tid = 0;
1371 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1372 cursor.key_beg.obj_type = 0;
1373
1374 cursor.key_end = cursor.key_beg;
1375 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
1376 cursor.asof = fdip->obj_asof;
1377 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
8cd0a023
MD
1378
1379 /*
1380 * Scan all matching records (the chain), locate the one matching
a89aec1b 1381 * the requested path component.
8cd0a023
MD
1382 *
1383 * The hammer_ip_*() functions merge in-memory records with on-disk
1384 * records for the purposes of the search.
1385 */
4e17f465 1386 error = hammer_ip_first(&cursor);
a89aec1b 1387 while (error == 0) {
8cd0a023
MD
1388 if (hammer_ip_resolve_data(&cursor) != 0)
1389 break;
11ad5ade
MD
1390 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
1391 KKASSERT(nlen > 0);
1392 if (fncp->nc_nlen == nlen &&
1393 bcmp(fncp->nc_name, cursor.data->entry.name, nlen) == 0) {
8cd0a023
MD
1394 break;
1395 }
a89aec1b 1396 error = hammer_ip_next(&cursor);
8cd0a023 1397 }
8cd0a023
MD
1398
1399 /*
1400 * If all is ok we have to get the inode so we can adjust nlinks.
6a37e7e4
MD
1401 *
1402 * WARNING: hammer_ip_del_directory() may have to terminate the
1403 * cursor to avoid a recursion. It's ok to call hammer_done_cursor()
1404 * twice.
8cd0a023 1405 */
9944ae54 1406 if (error == 0)
6a37e7e4 1407 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
b84de5af
MD
1408
1409 /*
1410 * XXX A deadlock here will break rename's atomicy for the purposes
1411 * of crash recovery.
1412 */
1413 if (error == EDEADLK) {
b84de5af 1414 hammer_done_cursor(&cursor);
b84de5af
MD
1415 goto retry;
1416 }
1417
1418 /*
1419 * Cleanup and tell the kernel that the rename succeeded.
1420 */
c0ade690 1421 hammer_done_cursor(&cursor);
6a37e7e4
MD
1422 if (error == 0)
1423 cache_rename(ap->a_fnch, ap->a_tnch);
b84de5af 1424
b3deaf57 1425failed:
b84de5af 1426 hammer_done_transaction(&trans);
8cd0a023 1427 return (error);
427e5fc6
MD
1428}
1429
66325755
MD
1430/*
1431 * hammer_vop_nrmdir { nch, dvp, cred }
1432 */
427e5fc6
MD
1433static
1434int
66325755 1435hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
427e5fc6 1436{
b84de5af 1437 struct hammer_transaction trans;
e63644f0 1438 struct hammer_inode *dip;
b84de5af
MD
1439 int error;
1440
e63644f0
MD
1441 dip = VTOI(ap->a_dvp);
1442
1443 if (hammer_nohistory(dip) == 0 &&
1444 (error = hammer_checkspace(dip->hmp)) != 0) {
1445 return (error);
1446 }
1447
1448 hammer_start_transaction(&trans, dip->hmp);
b84de5af
MD
1449 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
1450 hammer_done_transaction(&trans);
1451
1452 return (error);
427e5fc6
MD
1453}
1454
66325755
MD
1455/*
1456 * hammer_vop_setattr { vp, vap, cred }
1457 */
427e5fc6
MD
1458static
1459int
66325755 1460hammer_vop_setattr(struct vop_setattr_args *ap)
427e5fc6 1461{
8cd0a023
MD
1462 struct hammer_transaction trans;
1463 struct vattr *vap;
1464 struct hammer_inode *ip;
1465 int modflags;
1466 int error;
d5ef456e 1467 int truncating;
b84de5af 1468 off_t aligned_size;
8cd0a023 1469 u_int32_t flags;
8cd0a023
MD
1470
1471 vap = ap->a_vap;
1472 ip = ap->a_vp->v_data;
1473 modflags = 0;
1474
1475 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1476 return(EROFS);
d113fda1
MD
1477 if (ip->flags & HAMMER_INODE_RO)
1478 return (EROFS);
e63644f0
MD
1479 if (hammer_nohistory(ip) == 0 &&
1480 (error = hammer_checkspace(ip->hmp)) != 0) {
1481 return (error);
1482 }
8cd0a023
MD
1483
1484 hammer_start_transaction(&trans, ip->hmp);
1485 error = 0;
1486
1487 if (vap->va_flags != VNOVAL) {
1488 flags = ip->ino_data.uflags;
1489 error = vop_helper_setattr_flags(&flags, vap->va_flags,
1490 hammer_to_unix_xid(&ip->ino_data.uid),
1491 ap->a_cred);
1492 if (error == 0) {
1493 if (ip->ino_data.uflags != flags) {
1494 ip->ino_data.uflags = flags;
1495 modflags |= HAMMER_INODE_DDIRTY;
1496 }
1497 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1498 error = 0;
1499 goto done;
1500 }
1501 }
1502 goto done;
1503 }
1504 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1505 error = EPERM;
1506 goto done;
1507 }
7538695e
MD
1508 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
1509 mode_t cur_mode = ip->ino_data.mode;
1510 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid);
1511 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid);
1512 uuid_t uuid_uid;
1513 uuid_t uuid_gid;
1514
1515 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
1516 ap->a_cred,
1517 &cur_uid, &cur_gid, &cur_mode);
1518 if (error == 0) {
1519 hammer_guid_to_uuid(&uuid_uid, cur_uid);
1520 hammer_guid_to_uuid(&uuid_gid, cur_gid);
1521 if (bcmp(&uuid_uid, &ip->ino_data.uid,
1522 sizeof(uuid_uid)) ||
1523 bcmp(&uuid_gid, &ip->ino_data.gid,
1524 sizeof(uuid_gid)) ||
1525 ip->ino_data.mode != cur_mode
1526 ) {
1527 ip->ino_data.uid = uuid_uid;
1528 ip->ino_data.gid = uuid_gid;
1529 ip->ino_data.mode = cur_mode;
1530 }
8cd0a023
MD
1531 modflags |= HAMMER_INODE_DDIRTY;
1532 }
1533 }
11ad5ade 1534 while (vap->va_size != VNOVAL && ip->ino_data.size != vap->va_size) {
8cd0a023
MD
1535 switch(ap->a_vp->v_type) {
1536 case VREG:
11ad5ade 1537 if (vap->va_size == ip->ino_data.size)
d5ef456e 1538 break;
b84de5af
MD
1539 /*
1540 * XXX break atomicy, we can deadlock the backend
1541 * if we do not release the lock. Probably not a
1542 * big deal here.
1543 */
11ad5ade 1544 if (vap->va_size < ip->ino_data.size) {
c0ade690
MD
1545 vtruncbuf(ap->a_vp, vap->va_size,
1546 HAMMER_BUFSIZE);
d5ef456e
MD
1547 truncating = 1;
1548 } else {
c0ade690 1549 vnode_pager_setsize(ap->a_vp, vap->va_size);
d5ef456e 1550 truncating = 0;
c0ade690 1551 }
11ad5ade
MD
1552 ip->ino_data.size = vap->va_size;
1553 modflags |= HAMMER_INODE_DDIRTY;
76376933 1554 aligned_size = (vap->va_size + HAMMER_BUFMASK) &
b84de5af 1555 ~HAMMER_BUFMASK64;
d5ef456e 1556
b84de5af
MD
1557 /*
1558 * on-media truncation is cached in the inode until
1559 * the inode is synchronized.
1560 */
d5ef456e 1561 if (truncating) {
47637bff 1562 hammer_ip_frontend_trunc(ip, vap->va_size);
0832c9bb
MD
1563 hammer_update_rsv_databufs(ip);
1564#ifdef DEBUG_TRUNCATE
1565 if (HammerTruncIp == NULL)
1566 HammerTruncIp = ip;
1567#endif
b84de5af
MD
1568 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
1569 ip->flags |= HAMMER_INODE_TRUNCATED;
1570 ip->trunc_off = vap->va_size;
0832c9bb
MD
1571#ifdef DEBUG_TRUNCATE
1572 if (ip == HammerTruncIp)
1573 kprintf("truncate1 %016llx\n", ip->trunc_off);
1574#endif
b84de5af
MD
1575 } else if (ip->trunc_off > vap->va_size) {
1576 ip->trunc_off = vap->va_size;
0832c9bb
MD
1577#ifdef DEBUG_TRUNCATE
1578 if (ip == HammerTruncIp)
1579 kprintf("truncate2 %016llx\n", ip->trunc_off);
1580#endif
1581 } else {
1582#ifdef DEBUG_TRUNCATE
1583 if (ip == HammerTruncIp)
1584 kprintf("truncate3 %016llx (ignored)\n", vap->va_size);
1585#endif
b84de5af 1586 }
d5ef456e 1587 }
b84de5af 1588
d5ef456e
MD
1589 /*
1590 * If truncating we have to clean out a portion of
b84de5af
MD
1591 * the last block on-disk. We do this in the
1592 * front-end buffer cache.
d5ef456e 1593 */
b84de5af 1594 if (truncating && vap->va_size < aligned_size) {
d5ef456e
MD
1595 struct buf *bp;
1596 int offset;
1597
47637bff
MD
1598 aligned_size -= HAMMER_BUFSIZE;
1599
d5ef456e 1600 offset = vap->va_size & HAMMER_BUFMASK;
47637bff 1601 error = bread(ap->a_vp, aligned_size,
d5ef456e 1602 HAMMER_BUFSIZE, &bp);
47637bff 1603 hammer_ip_frontend_trunc(ip, aligned_size);
d5ef456e
MD
1604 if (error == 0) {
1605 bzero(bp->b_data + offset,
1606 HAMMER_BUFSIZE - offset);
1607 bdwrite(bp);
1608 } else {
47637bff 1609 kprintf("ERROR %d\n", error);
d5ef456e
MD
1610 brelse(bp);
1611 }
1612 }
76376933 1613 break;
8cd0a023 1614 case VDATABASE:
b84de5af
MD
1615 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
1616 ip->flags |= HAMMER_INODE_TRUNCATED;
1617 ip->trunc_off = vap->va_size;
1618 } else if (ip->trunc_off > vap->va_size) {
1619 ip->trunc_off = vap->va_size;
1620 }
47637bff 1621 hammer_ip_frontend_trunc(ip, vap->va_size);
11ad5ade
MD
1622 ip->ino_data.size = vap->va_size;
1623 modflags |= HAMMER_INODE_DDIRTY;
8cd0a023
MD
1624 break;
1625 default:
1626 error = EINVAL;
1627 goto done;
1628 }
d26d0ae9 1629 break;
8cd0a023
MD
1630 }
1631 if (vap->va_atime.tv_sec != VNOVAL) {
bcac4bbb 1632 ip->ino_data.atime =
8cd0a023
MD
1633 hammer_timespec_to_transid(&vap->va_atime);
1634 modflags |= HAMMER_INODE_ITIMES;
1635 }
1636 if (vap->va_mtime.tv_sec != VNOVAL) {
11ad5ade 1637 ip->ino_data.mtime =
8cd0a023
MD
1638 hammer_timespec_to_transid(&vap->va_mtime);
1639 modflags |= HAMMER_INODE_ITIMES;
98f7132d 1640 modflags |= HAMMER_INODE_DDIRTY; /* XXX mtime */
8cd0a023
MD
1641 }
1642 if (vap->va_mode != (mode_t)VNOVAL) {
7538695e
MD
1643 mode_t cur_mode = ip->ino_data.mode;
1644 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid);
1645 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid);
1646
1647 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
1648 cur_uid, cur_gid, &cur_mode);
1649 if (error == 0 && ip->ino_data.mode != cur_mode) {
1650 ip->ino_data.mode = cur_mode;
8cd0a023
MD
1651 modflags |= HAMMER_INODE_DDIRTY;
1652 }
1653 }
1654done:
b84de5af 1655 if (error == 0)
47637bff 1656 hammer_modify_inode(ip, modflags);
b84de5af 1657 hammer_done_transaction(&trans);
8cd0a023 1658 return (error);
427e5fc6
MD
1659}
1660
66325755
MD
1661/*
1662 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1663 */
427e5fc6
MD
1664static
1665int
66325755 1666hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
427e5fc6 1667{
7a04d74f
MD
1668 struct hammer_transaction trans;
1669 struct hammer_inode *dip;
1670 struct hammer_inode *nip;
1671 struct nchandle *nch;
1672 hammer_record_t record;
1673 int error;
1674 int bytes;
1675
1676 ap->a_vap->va_type = VLNK;
1677
1678 nch = ap->a_nch;
1679 dip = VTOI(ap->a_dvp);
1680
d113fda1
MD
1681 if (dip->flags & HAMMER_INODE_RO)
1682 return (EROFS);
e63644f0
MD
1683 if ((error = hammer_checkspace(dip->hmp)) != 0)
1684 return (error);
d113fda1 1685
7a04d74f
MD
1686 /*
1687 * Create a transaction to cover the operations we perform.
1688 */
1689 hammer_start_transaction(&trans, dip->hmp);
1690
1691 /*
1692 * Create a new filesystem object of the requested type. The
1693 * returned inode will be referenced but not locked.
1694 */
1695
1696 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
1697 if (error) {
b84de5af 1698 hammer_done_transaction(&trans);
7a04d74f
MD
1699 *ap->a_vpp = NULL;
1700 return (error);
1701 }
1702
7a04d74f
MD
1703 /*
1704 * Add a record representing the symlink. symlink stores the link
1705 * as pure data, not a string, and is no \0 terminated.
1706 */
1707 if (error == 0) {
7a04d74f
MD
1708 bytes = strlen(ap->a_target);
1709
2f85fa4d
MD
1710 if (bytes <= HAMMER_INODE_BASESYMLEN) {
1711 bcopy(ap->a_target, nip->ino_data.ext.symlink, bytes);
1712 } else {
1713 record = hammer_alloc_mem_record(nip, bytes);
1714 record->type = HAMMER_MEM_RECORD_GENERAL;
1715
1716 record->leaf.base.localization = HAMMER_LOCALIZE_MISC;
1717 record->leaf.base.key = HAMMER_FIXKEY_SYMLINK;
1718 record->leaf.base.rec_type = HAMMER_RECTYPE_FIX;
1719 record->leaf.data_len = bytes;
1720 KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0);
1721 bcopy(ap->a_target, record->data->symlink.name, bytes);
1722 error = hammer_ip_add_record(&trans, record);
1723 }
42c7d26b
MD
1724
1725 /*
1726 * Set the file size to the length of the link.
1727 */
1728 if (error == 0) {
11ad5ade 1729 nip->ino_data.size = bytes;
47637bff 1730 hammer_modify_inode(nip, HAMMER_INODE_DDIRTY);
42c7d26b 1731 }
7a04d74f 1732 }
1f07f686
MD
1733 if (error == 0)
1734 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
7a04d74f
MD
1735
1736 /*
1737 * Finish up.
1738 */
1739 if (error) {
1740 hammer_rel_inode(nip, 0);
7a04d74f
MD
1741 *ap->a_vpp = NULL;
1742 } else {
e8599db1 1743 error = hammer_get_vnode(nip, ap->a_vpp);
7a04d74f
MD
1744 hammer_rel_inode(nip, 0);
1745 if (error == 0) {
1746 cache_setunresolved(ap->a_nch);
1747 cache_setvp(ap->a_nch, *ap->a_vpp);
1748 }
1749 }
b84de5af 1750 hammer_done_transaction(&trans);
7a04d74f 1751 return (error);
427e5fc6
MD
1752}
1753
66325755
MD
1754/*
1755 * hammer_vop_nwhiteout { nch, dvp, cred, flags }
1756 */
427e5fc6
MD
1757static
1758int
66325755 1759hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
427e5fc6 1760{
b84de5af 1761 struct hammer_transaction trans;
e63644f0 1762 struct hammer_inode *dip;
b84de5af
MD
1763 int error;
1764
e63644f0
MD
1765 dip = VTOI(ap->a_dvp);
1766
1767 if (hammer_nohistory(dip) == 0 &&
1768 (error = hammer_checkspace(dip->hmp)) != 0) {
1769 return (error);
1770 }
1771
1772 hammer_start_transaction(&trans, dip->hmp);
b84de5af
MD
1773 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp,
1774 ap->a_cred, ap->a_flags);
1775 hammer_done_transaction(&trans);
1776
1777 return (error);
427e5fc6
MD
1778}
1779
7dc57964
MD
1780/*
1781 * hammer_vop_ioctl { vp, command, data, fflag, cred }
1782 */
1783static
1784int
1785hammer_vop_ioctl(struct vop_ioctl_args *ap)
1786{
1787 struct hammer_inode *ip = ap->a_vp->v_data;
1788
1789 return(hammer_ioctl(ip, ap->a_command, ap->a_data,
1790 ap->a_fflag, ap->a_cred));
1791}
1792
513ca7d7
MD
1793static
1794int
1795hammer_vop_mountctl(struct vop_mountctl_args *ap)
1796{
1797 struct mount *mp;
1798 int error;
1799
1800 mp = ap->a_head.a_ops->head.vv_mount;
1801
1802 switch(ap->a_op) {
1803 case MOUNTCTL_SET_EXPORT:
1804 if (ap->a_ctllen != sizeof(struct export_args))
1805 error = EINVAL;
1806 error = hammer_vfs_export(mp, ap->a_op,
1807 (const struct export_args *)ap->a_ctl);
1808 break;
1809 default:
1810 error = journal_mountctl(ap);
1811 break;
1812 }
1813 return(error);
1814}
1815
66325755
MD
1816/*
1817 * hammer_vop_strategy { vp, bio }
8cd0a023
MD
1818 *
1819 * Strategy call, used for regular file read & write only. Note that the
1820 * bp may represent a cluster.
1821 *
1822 * To simplify operation and allow better optimizations in the future,
1823 * this code does not make any assumptions with regards to buffer alignment
1824 * or size.
66325755 1825 */
427e5fc6
MD
1826static
1827int
66325755 1828hammer_vop_strategy(struct vop_strategy_args *ap)
427e5fc6 1829{
8cd0a023
MD
1830 struct buf *bp;
1831 int error;
1832
1833 bp = ap->a_bio->bio_buf;
1834
1835 switch(bp->b_cmd) {
1836 case BUF_CMD_READ:
1837 error = hammer_vop_strategy_read(ap);
1838 break;
1839 case BUF_CMD_WRITE:
1840 error = hammer_vop_strategy_write(ap);
1841 break;
1842 default:
059819e3
MD
1843 bp->b_error = error = EINVAL;
1844 bp->b_flags |= B_ERROR;
1845 biodone(ap->a_bio);
8cd0a023
MD
1846 break;
1847 }
8cd0a023 1848 return (error);
427e5fc6
MD
1849}
1850
8cd0a023
MD
1851/*
1852 * Read from a regular file. Iterate the related records and fill in the
1853 * BIO/BUF. Gaps are zero-filled.
1854 *
1855 * The support code in hammer_object.c should be used to deal with mixed
1856 * in-memory and on-disk records.
1857 *
1858 * XXX atime update
1859 */
1860static
1861int
1862hammer_vop_strategy_read(struct vop_strategy_args *ap)
1863{
36f82b23
MD
1864 struct hammer_transaction trans;
1865 struct hammer_inode *ip;
8cd0a023 1866 struct hammer_cursor cursor;
8cd0a023
MD
1867 hammer_base_elm_t base;
1868 struct bio *bio;
a99b9ea2 1869 struct bio *nbio;
8cd0a023
MD
1870 struct buf *bp;
1871 int64_t rec_offset;
a89aec1b 1872 int64_t ran_end;
195c19a1 1873 int64_t tmp64;
8cd0a023
MD
1874 int error;
1875 int boff;
1876 int roff;
1877 int n;
1878
1879 bio = ap->a_bio;
1880 bp = bio->bio_buf;
36f82b23 1881 ip = ap->a_vp->v_data;
8cd0a023 1882
a99b9ea2
MD
1883 /*
1884 * The zone-2 disk offset may have been set by the cluster code via
1885 * a BMAP operation. Take care not to confuse it with the bio_offset
1886 * set by hammer_io_direct_write(), which is a device-relative offset.
1887 *
1888 * Checking the high bits should suffice.
1889 */
1890 nbio = push_bio(bio);
6aeaa7bd
MD
1891 if ((nbio->bio_offset & HAMMER_OFF_ZONE_MASK) ==
1892 HAMMER_ZONE_RAW_BUFFER) {
a99b9ea2
MD
1893 error = hammer_io_direct_read(ip->hmp, nbio->bio_offset, bio);
1894 return (error);
1895 }
1896
1897 /*
1898 * Hard way
1899 */
36f82b23 1900 hammer_simple_transaction(&trans, ip->hmp);
47637bff 1901 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
8cd0a023
MD
1902
1903 /*
1904 * Key range (begin and end inclusive) to scan. Note that the key's
c0ade690
MD
1905 * stored in the actual records represent BASE+LEN, not BASE. The
1906 * first record containing bio_offset will have a key > bio_offset.
8cd0a023 1907 */
2f85fa4d 1908 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023 1909 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1910 cursor.key_beg.create_tid = 0;
8cd0a023 1911 cursor.key_beg.delete_tid = 0;
8cd0a023 1912 cursor.key_beg.obj_type = 0;
c0ade690 1913 cursor.key_beg.key = bio->bio_offset + 1;
d5530d22 1914 cursor.asof = ip->obj_asof;
bf3b416b 1915 cursor.flags |= HAMMER_CURSOR_ASOF;
8cd0a023
MD
1916
1917 cursor.key_end = cursor.key_beg;
11ad5ade 1918 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE);
b84de5af 1919#if 0
11ad5ade 1920 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) {
a89aec1b
MD
1921 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
1922 cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
1923 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
b84de5af
MD
1924 } else
1925#endif
1926 {
c0ade690 1927 ran_end = bio->bio_offset + bp->b_bufsize;
a89aec1b
MD
1928 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
1929 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
195c19a1
MD
1930 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */
1931 if (tmp64 < ran_end)
a89aec1b
MD
1932 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1933 else
7f7c1f84 1934 cursor.key_end.key = ran_end + MAXPHYS + 1;
a89aec1b 1935 }
d26d0ae9 1936 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
8cd0a023 1937
4e17f465 1938 error = hammer_ip_first(&cursor);
8cd0a023
MD
1939 boff = 0;
1940
a89aec1b 1941 while (error == 0) {
47637bff
MD
1942 /*
1943 * Get the base file offset of the record. The key for
1944 * data records is (base + bytes) rather then (base).
1945 */
11ad5ade 1946 base = &cursor.leaf->base;
11ad5ade 1947 rec_offset = base->key - cursor.leaf->data_len;
8cd0a023 1948
66325755 1949 /*
a89aec1b 1950 * Calculate the gap, if any, and zero-fill it.
1fef775e
MD
1951 *
1952 * n is the offset of the start of the record verses our
1953 * current seek offset in the bio.
66325755 1954 */
8cd0a023
MD
1955 n = (int)(rec_offset - (bio->bio_offset + boff));
1956 if (n > 0) {
a89aec1b
MD
1957 if (n > bp->b_bufsize - boff)
1958 n = bp->b_bufsize - boff;
8cd0a023
MD
1959 bzero((char *)bp->b_data + boff, n);
1960 boff += n;
1961 n = 0;
66325755 1962 }
8cd0a023
MD
1963
1964 /*
1965 * Calculate the data offset in the record and the number
1966 * of bytes we can copy.
a89aec1b 1967 *
1fef775e
MD
1968 * There are two degenerate cases. First, boff may already
1969 * be at bp->b_bufsize. Secondly, the data offset within
1970 * the record may exceed the record's size.
8cd0a023
MD
1971 */
1972 roff = -n;
b84de5af 1973 rec_offset += roff;
11ad5ade 1974 n = cursor.leaf->data_len - roff;
1fef775e
MD
1975 if (n <= 0) {
1976 kprintf("strategy_read: bad n=%d roff=%d\n", n, roff);
1977 n = 0;
1978 } else if (n > bp->b_bufsize - boff) {
8cd0a023 1979 n = bp->b_bufsize - boff;
1fef775e 1980 }
059819e3 1981
b84de5af 1982 /*
47637bff
MD
1983 * Deal with cached truncations. This cool bit of code
1984 * allows truncate()/ftruncate() to avoid having to sync
1985 * the file.
1986 *
1987 * If the frontend is truncated then all backend records are
1988 * subject to the frontend's truncation.
1989 *
1990 * If the backend is truncated then backend records on-disk
1991 * (but not in-memory) are subject to the backend's
1992 * truncation. In-memory records owned by the backend
1993 * represent data written after the truncation point on the
1994 * backend and must not be truncated.
1995 *
1996 * Truncate operations deal with frontend buffer cache
1997 * buffers and frontend-owned in-memory records synchronously.
b84de5af 1998 */
47637bff
MD
1999 if (ip->flags & HAMMER_INODE_TRUNCATED) {
2000 if (hammer_cursor_ondisk(&cursor) ||
2001 cursor.iprec->flush_state == HAMMER_FST_FLUSH) {
2002 if (ip->trunc_off <= rec_offset)
2003 n = 0;
2004 else if (ip->trunc_off < rec_offset + n)
2005 n = (int)(ip->trunc_off - rec_offset);
2006 }
2007 }
2008 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
2009 if (hammer_cursor_ondisk(&cursor)) {
2010 if (ip->sync_trunc_off <= rec_offset)
2011 n = 0;
2012 else if (ip->sync_trunc_off < rec_offset + n)
2013 n = (int)(ip->sync_trunc_off - rec_offset);
2014 }
2015 }
b84de5af
MD
2016
2017 /*
47637bff
MD
2018 * Try to issue a direct read into our bio if possible,
2019 * otherwise resolve the element data into a hammer_buffer
2020 * and copy.
b84de5af 2021 */
6aeaa7bd 2022 if (n && boff == 0 &&
a99b9ea2
MD
2023 ((cursor.leaf->data_offset + roff) & HAMMER_BUFMASK) == 0) {
2024 error = hammer_io_direct_read(
2025 trans.hmp,
2026 cursor.leaf->data_offset + roff,
2027 bio);
47637bff
MD
2028 goto done;
2029 } else if (n) {
2030 error = hammer_ip_resolve_data(&cursor);
2031 if (error == 0) {
2032 bcopy((char *)cursor.data + roff,
2033 (char *)bp->b_data + boff, n);
2034 }
b84de5af 2035 }
47637bff
MD
2036 if (error)
2037 break;
2038
2039 /*
2040 * Iterate until we have filled the request.
2041 */
2042 boff += n;
8cd0a023 2043 if (boff == bp->b_bufsize)
66325755 2044 break;
a89aec1b 2045 error = hammer_ip_next(&cursor);
66325755
MD
2046 }
2047
2048 /*
8cd0a023 2049 * There may have been a gap after the last record
66325755 2050 */
8cd0a023
MD
2051 if (error == ENOENT)
2052 error = 0;
2053 if (error == 0 && boff != bp->b_bufsize) {
7f7c1f84 2054 KKASSERT(boff < bp->b_bufsize);
8cd0a023
MD
2055 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
2056 /* boff = bp->b_bufsize; */
2057 }
2058 bp->b_resid = 0;
059819e3
MD
2059 bp->b_error = error;
2060 if (error)
2061 bp->b_flags |= B_ERROR;
2062 biodone(ap->a_bio);
47637bff
MD
2063
2064done:
2065 if (cursor.node)
bcac4bbb 2066 hammer_cache_node(&ip->cache[1], cursor.node);
47637bff
MD
2067 hammer_done_cursor(&cursor);
2068 hammer_done_transaction(&trans);
8cd0a023
MD
2069 return(error);
2070}
2071
a99b9ea2
MD
2072/*
2073 * BMAP operation - used to support cluster_read() only.
2074 *
2075 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
2076 *
2077 * This routine may return EOPNOTSUPP if the opration is not supported for
2078 * the specified offset. The contents of the pointer arguments do not
2079 * need to be initialized in that case.
2080 *
2081 * If a disk address is available and properly aligned return 0 with
2082 * *doffsetp set to the zone-2 address, and *runp / *runb set appropriately
2083 * to the run-length relative to that offset. Callers may assume that
2084 * *doffsetp is valid if 0 is returned, even if *runp is not sufficiently
2085 * large, so return EOPNOTSUPP if it is not sufficiently large.
2086 */
2087static
2088int
2089hammer_vop_bmap(struct vop_bmap_args *ap)
2090{
2091 struct hammer_transaction trans;
2092 struct hammer_inode *ip;
2093 struct hammer_cursor cursor;
2094 hammer_base_elm_t base;
2095 int64_t rec_offset;
2096 int64_t ran_end;
2097 int64_t tmp64;
2098 int64_t base_offset;
2099 int64_t base_disk_offset;
2100 int64_t last_offset;
2101 hammer_off_t last_disk_offset;
2102 hammer_off_t disk_offset;
2103 int rec_len;
2104 int error;
2105
2106 ip = ap->a_vp->v_data;
2107
2108 /*
2109 * We can only BMAP regular files. We can't BMAP database files,
2110 * directories, etc.
2111 */
2112 if (ip->ino_data.obj_type != HAMMER_OBJTYPE_REGFILE)
2113 return(EOPNOTSUPP);
2114
2115 /*
2116 * bmap is typically called with runp/runb both NULL when used
2117 * for writing. We do not support BMAP for writing atm.
2118 */
2119 if (ap->a_runp == NULL && ap->a_runb == NULL)
2120 return(EOPNOTSUPP);
2121
2122 /*
2123 * Scan the B-Tree to acquire blockmap addresses, then translate
2124 * to raw addresses.
2125 */
2126 hammer_simple_transaction(&trans, ip->hmp);
cb51be26
MD
2127#if 0
2128 kprintf("bmap_beg %016llx ip->cache %p\n", ap->a_loffset, ip->cache[1]);
2129#endif
a99b9ea2
MD
2130 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
2131
2132 /*
2133 * Key range (begin and end inclusive) to scan. Note that the key's
2134 * stored in the actual records represent BASE+LEN, not BASE. The
2135 * first record containing bio_offset will have a key > bio_offset.
2136 */
2137 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
2138 cursor.key_beg.obj_id = ip->obj_id;
2139 cursor.key_beg.create_tid = 0;
2140 cursor.key_beg.delete_tid = 0;
2141 cursor.key_beg.obj_type = 0;
2142 if (ap->a_runb)
2143 cursor.key_beg.key = ap->a_loffset - MAXPHYS + 1;
2144 else
2145 cursor.key_beg.key = ap->a_loffset + 1;
2146 if (cursor.key_beg.key < 0)
2147 cursor.key_beg.key = 0;
2148 cursor.asof = ip->obj_asof;
bf3b416b 2149 cursor.flags |= HAMMER_CURSOR_ASOF;
a99b9ea2
MD
2150
2151 cursor.key_end = cursor.key_beg;
2152 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE);
2153
2154 ran_end = ap->a_loffset + MAXPHYS;
2155 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
2156 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
2157 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */
2158 if (tmp64 < ran_end)
2159 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
2160 else
2161 cursor.key_end.key = ran_end + MAXPHYS + 1;
2162
2163 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
2164
2165 error = hammer_ip_first(&cursor);
2166 base_offset = last_offset = 0;
2167 base_disk_offset = last_disk_offset = 0;
2168
2169 while (error == 0) {
2170 /*
2171 * Get the base file offset of the record. The key for
2172 * data records is (base + bytes) rather then (base).
2173 */
2174 base = &cursor.leaf->base;
2175 rec_offset = base->key - cursor.leaf->data_len;
2176 rec_len = cursor.leaf->data_len;
2177
2178 /*
2179 * Incorporate any cached truncation
2180 */
2181 if (ip->flags & HAMMER_INODE_TRUNCATED) {
2182 if (hammer_cursor_ondisk(&cursor) ||
2183 cursor.iprec->flush_state == HAMMER_FST_FLUSH) {
2184 if (ip->trunc_off <= rec_offset)
2185 rec_len = 0;
2186 else if (ip->trunc_off < rec_offset + rec_len)
2187 rec_len = (int)(ip->trunc_off - rec_offset);
2188 }
2189 }
2190 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
2191 if (hammer_cursor_ondisk(&cursor)) {
2192 if (ip->sync_trunc_off <= rec_offset)
2193 rec_len = 0;
2194 else if (ip->sync_trunc_off < rec_offset + rec_len)
2195 rec_len = (int)(ip->sync_trunc_off - rec_offset);
2196 }
2197 }
2198
2199 /*
2200 * Accumulate information. If we have hit a discontiguous
2201 * block reset base_offset unless we are already beyond the
2202 * requested offset. If we are, that's it, we stop.
2203 */
2204 disk_offset = hammer_blockmap_lookup(trans.hmp,
2205 cursor.leaf->data_offset,
2206 &error);
2207 if (error)
2208 break;
2209 if (rec_offset != last_offset ||
2210 disk_offset != last_disk_offset) {
2211 if (rec_offset > ap->a_loffset)
2212 break;
2213 base_offset = rec_offset;
2214 base_disk_offset = disk_offset;
2215 }
2216 last_offset = rec_offset + rec_len;
2217 last_disk_offset = disk_offset + rec_len;
2218
2219 error = hammer_ip_next(&cursor);
2220 }
2221
2222#if 0
2223 kprintf("BMAP %016llx: %016llx - %016llx\n",
2224 ap->a_loffset, base_offset, last_offset);
2225 kprintf("BMAP %16s: %016llx - %016llx\n",
2226 "", base_disk_offset, last_disk_offset);
2227#endif
2228
cb51be26 2229 if (cursor.node) {
bcac4bbb 2230 hammer_cache_node(&ip->cache[1], cursor.node);
cb51be26
MD
2231#if 0
2232 kprintf("bmap_end2 %016llx ip->cache %p\n", ap->a_loffset, ip->cache[1]);
2233#endif
2234 }
a99b9ea2
MD
2235 hammer_done_cursor(&cursor);
2236 hammer_done_transaction(&trans);
2237
2238 if (base_offset == 0 || base_offset > ap->a_loffset ||
2239 last_offset < ap->a_loffset) {
2240 error = EOPNOTSUPP;
2241 } else {
2242 disk_offset = base_disk_offset + (ap->a_loffset - base_offset);
2243
2244 /*
2245 * If doffsetp is not aligned or the forward run size does
2246 * not cover a whole buffer, disallow the direct I/O.
2247 */
2248 if ((disk_offset & HAMMER_BUFMASK) ||
2249 (last_offset - ap->a_loffset) < HAMMER_BUFSIZE) {
2250 error = EOPNOTSUPP;
2251 } else {
2252 *ap->a_doffsetp = disk_offset;
2253 if (ap->a_runb)
2254 *ap->a_runb = ap->a_loffset - base_offset;
2255 if (ap->a_runp)
2256 *ap->a_runp = last_offset - ap->a_loffset;
2257 error = 0;
2258 }
2259 }
2260 return(error);
2261}
2262
8cd0a023 2263/*
059819e3 2264 * Write to a regular file. Because this is a strategy call the OS is
bcac4bbb 2265 * trying to actually get data onto the media.
8cd0a023
MD
2266 */
2267static
2268int
2269hammer_vop_strategy_write(struct vop_strategy_args *ap)
2270{
47637bff 2271 hammer_record_t record;
af209b0f 2272 hammer_mount_t hmp;
8cd0a023
MD
2273 hammer_inode_t ip;
2274 struct bio *bio;
2275 struct buf *bp;
0832c9bb
MD
2276 int bytes;
2277 int error;
8cd0a023
MD
2278
2279 bio = ap->a_bio;
2280 bp = bio->bio_buf;
2281 ip = ap->a_vp->v_data;
af209b0f 2282 hmp = ip->hmp;
d113fda1 2283
059819e3
MD
2284 if (ip->flags & HAMMER_INODE_RO) {
2285 bp->b_error = EROFS;
2286 bp->b_flags |= B_ERROR;
2287 biodone(ap->a_bio);
e63644f0 2288 hammer_cleanup_write_io(ip);
059819e3
MD
2289 return(EROFS);
2290 }
b84de5af 2291
29ce0677
MD
2292 /*
2293 * Interlock with inode destruction (no in-kernel or directory
2294 * topology visibility). If we queue new IO while trying to
2295 * destroy the inode we can deadlock the vtrunc call in
2296 * hammer_inode_unloadable_check().
2297 */
2298 if (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) {
2299 bp->b_resid = 0;
2300 biodone(ap->a_bio);
e63644f0 2301 hammer_cleanup_write_io(ip);
29ce0677
MD
2302 return(0);
2303 }
2304
b84de5af 2305 /*
a99b9ea2
MD
2306 * Reserve space and issue a direct-write from the front-end.
2307 * NOTE: The direct_io code will hammer_bread/bcopy smaller
2308 * allocations.
47637bff 2309 *
a99b9ea2
MD
2310 * An in-memory record will be installed to reference the storage
2311 * until the flusher can get to it.
47637bff
MD
2312 *
2313 * Since we own the high level bio the front-end will not try to
0832c9bb 2314 * do a direct-read until the write completes.
a99b9ea2
MD
2315 *
2316 * NOTE: The only time we do not reserve a full-sized buffers
2317 * worth of data is if the file is small. We do not try to
2318 * allocate a fragment (from the small-data zone) at the end of
2319 * an otherwise large file as this can lead to wildly separated
2320 * data.
47637bff 2321 */
0832c9bb
MD
2322 KKASSERT((bio->bio_offset & HAMMER_BUFMASK) == 0);
2323 KKASSERT(bio->bio_offset < ip->ino_data.size);
a99b9ea2
MD
2324 if (bio->bio_offset || ip->ino_data.size > HAMMER_BUFSIZE / 2)
2325 bytes = (bp->b_bufsize + HAMMER_BUFMASK) & ~HAMMER_BUFMASK;
b84de5af 2326 else
a99b9ea2 2327 bytes = ((int)ip->ino_data.size + 15) & ~15;
0832c9bb
MD
2328
2329 record = hammer_ip_add_bulk(ip, bio->bio_offset, bp->b_data,
2330 bytes, &error);
2331 if (record) {
af209b0f 2332 hammer_io_direct_write(hmp, &record->leaf, bio);
0832c9bb 2333 hammer_rel_mem_record(record);
af209b0f
MD
2334 if (hmp->rsv_recs > hammer_limit_recs &&
2335 ip->rsv_recs > hammer_limit_irecs / 10) {
0832c9bb 2336 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
a99b9ea2 2337 } else if (ip->rsv_recs > hammer_limit_irecs / 2) {
af209b0f
MD
2338 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
2339 }
0832c9bb 2340 } else {
a99b9ea2 2341 bp->b_bio2.bio_offset = NOOFFSET;
0832c9bb
MD
2342 bp->b_error = error;
2343 bp->b_flags |= B_ERROR;
2344 biodone(ap->a_bio);
2345 }
2346 hammer_cleanup_write_io(ip);
2347 return(error);
059819e3
MD
2348}
2349
2350/*
47637bff
MD
2351 * Clean-up after disposing of a dirty frontend buffer's data.
2352 * This is somewhat heuristical so try to be robust.
059819e3 2353 */
0832c9bb 2354static void
e63644f0
MD
2355hammer_cleanup_write_io(hammer_inode_t ip)
2356{
2357 if (ip->rsv_databufs) {
2358 --ip->rsv_databufs;
2359 --ip->hmp->rsv_databufs;
2360 }
2361}
2362
0832c9bb
MD
2363/*
2364 * We can lose track of dirty buffer cache buffers if we truncate, this
2365 * routine will resynchronize the count.
2366 */
2367static
2368void
2369hammer_update_rsv_databufs(hammer_inode_t ip)
2370{
2371 struct buf *bp;
2372 int delta;
2373 int n;
2374
2375 if (ip->vp) {
2376 n = 0;
2377 RB_FOREACH(bp, buf_rb_tree, &ip->vp->v_rbdirty_tree) {
2378 ++n;
2379 }
2380 } else {
2381 n = 0;
2382 }
2383 delta = n - ip->rsv_databufs;
2384 ip->rsv_databufs += delta;
2385 ip->hmp->rsv_databufs += delta;
2386}
2387
8cd0a023
MD
2388/*
2389 * dounlink - disconnect a directory entry
2390 *
2391 * XXX whiteout support not really in yet
2392 */
2393static int
b84de5af
MD
2394hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
2395 struct vnode *dvp, struct ucred *cred, int flags)
8cd0a023 2396{
8cd0a023
MD
2397 struct namecache *ncp;
2398 hammer_inode_t dip;
2399 hammer_inode_t ip;
8cd0a023 2400 struct hammer_cursor cursor;
8cd0a023 2401 int64_t namekey;
11ad5ade 2402 int nlen, error;
8cd0a023
MD
2403
2404 /*
2405 * Calculate the namekey and setup the key range for the scan. This
2406 * works kinda like a chained hash table where the lower 32 bits
2407 * of the namekey synthesize the chain.
2408 *
2409 * The key range is inclusive of both key_beg and key_end.
2410 */
2411 dip = VTOI(dvp);
2412 ncp = nch->ncp;
d113fda1
MD
2413
2414 if (dip->flags & HAMMER_INODE_RO)
2415 return (EROFS);
2416
6a37e7e4
MD
2417 namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
2418retry:
bcac4bbb 2419 hammer_init_cursor(trans, &cursor, &dip->cache[1], dip);
2f85fa4d 2420 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023
MD
2421 cursor.key_beg.obj_id = dip->obj_id;
2422 cursor.key_beg.key = namekey;
d5530d22 2423 cursor.key_beg.create_tid = 0;
8cd0a023
MD
2424 cursor.key_beg.delete_tid = 0;
2425 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
2426 cursor.key_beg.obj_type = 0;
2427
2428 cursor.key_end = cursor.key_beg;
2429 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
2430 cursor.asof = dip->obj_asof;
2431 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
8cd0a023 2432
8cd0a023
MD
2433 /*
2434 * Scan all matching records (the chain), locate the one matching
2435 * the requested path component. info->last_error contains the
2436 * error code on search termination and could be 0, ENOENT, or
2437 * something else.
2438 *
2439 * The hammer_ip_*() functions merge in-memory records with on-disk
2440 * records for the purposes of the search.
2441 */
4e17f465
MD
2442 error = hammer_ip_first(&cursor);
2443
a89aec1b
MD
2444 while (error == 0) {
2445 error = hammer_ip_resolve_data(&cursor);
2446 if (error)
66325755 2447 break;
11ad5ade
MD
2448 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
2449 KKASSERT(nlen > 0);
2450 if (ncp->nc_nlen == nlen &&
2451 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
66325755
MD
2452 break;
2453 }
a89aec1b 2454 error = hammer_ip_next(&cursor);
66325755 2455 }
8cd0a023
MD
2456
2457 /*
2458 * If all is ok we have to get the inode so we can adjust nlinks.
269c5eab
MD
2459 * To avoid a deadlock with the flusher we must release the inode
2460 * lock on the directory when acquiring the inode for the entry.
b3deaf57
MD
2461 *
2462 * If the target is a directory, it must be empty.
8cd0a023 2463 */
66325755 2464 if (error == 0) {
269c5eab 2465 hammer_unlock(&cursor.ip->lock);
bcac4bbb 2466 ip = hammer_get_inode(trans, dip, cursor.data->entry.obj_id,
d113fda1 2467 dip->hmp->asof, 0, &error);
269c5eab 2468 hammer_lock_sh(&cursor.ip->lock);
46fe7ae1 2469 if (error == ENOENT) {
11ad5ade 2470 kprintf("obj_id %016llx\n", cursor.data->entry.obj_id);
10a5d1ba 2471 Debugger("ENOENT unlinking object that should exist");
46fe7ae1 2472 }
1f07f686
MD
2473
2474 /*
2475 * If we are trying to remove a directory the directory must
2476 * be empty.
2477 *
2478 * WARNING: hammer_ip_check_directory_empty() may have to
2479 * terminate the cursor to avoid a deadlock. It is ok to
2480 * call hammer_done_cursor() twice.
2481 */
11ad5ade 2482 if (error == 0 && ip->ino_data.obj_type ==
b3deaf57 2483 HAMMER_OBJTYPE_DIRECTORY) {
98f7132d 2484 error = hammer_ip_check_directory_empty(trans, ip);
b3deaf57 2485 }
1f07f686 2486
6a37e7e4 2487 /*
1f07f686
MD
2488 * Delete the directory entry.
2489 *
6a37e7e4 2490 * WARNING: hammer_ip_del_directory() may have to terminate
1f07f686 2491 * the cursor to avoid a deadlock. It is ok to call
6a37e7e4
MD
2492 * hammer_done_cursor() twice.
2493 */
b84de5af 2494 if (error == 0) {
b84de5af
MD
2495 error = hammer_ip_del_directory(trans, &cursor,
2496 dip, ip);
b84de5af 2497 }
269c5eab 2498 hammer_done_cursor(&cursor);
8cd0a023
MD
2499 if (error == 0) {
2500 cache_setunresolved(nch);
2501 cache_setvp(nch, NULL);
2502 /* XXX locking */
2503 if (ip->vp)
2504 cache_inval_vp(ip->vp, CINV_DESTROY);
2505 }
af209b0f
MD
2506 if (ip)
2507 hammer_rel_inode(ip, 0);
269c5eab
MD
2508 } else {
2509 hammer_done_cursor(&cursor);
66325755 2510 }
6a37e7e4
MD
2511 if (error == EDEADLK)
2512 goto retry;
9c448776 2513
66325755 2514 return (error);
66325755
MD
2515}
2516
7a04d74f
MD
2517/************************************************************************
2518 * FIFO AND SPECFS OPS *
2519 ************************************************************************
2520 *
2521 */
2522
2523static int
2524hammer_vop_fifoclose (struct vop_close_args *ap)
2525{
2526 /* XXX update itimes */
2527 return (VOCALL(&fifo_vnode_vops, &ap->a_head));
2528}
2529
2530static int
2531hammer_vop_fiforead (struct vop_read_args *ap)
2532{
2533 int error;
2534
2535 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2536 /* XXX update access time */
2537 return (error);
2538}
2539
2540static int
2541hammer_vop_fifowrite (struct vop_write_args *ap)
2542{
2543 int error;
2544
2545 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2546 /* XXX update access time */
2547 return (error);
2548}
2549
2550static int
2551hammer_vop_specclose (struct vop_close_args *ap)
2552{
2553 /* XXX update itimes */
2554 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2555}
2556
2557static int
2558hammer_vop_specread (struct vop_read_args *ap)
2559{
2560 /* XXX update access time */
2561 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2562}
2563
2564static int
2565hammer_vop_specwrite (struct vop_write_args *ap)
2566{
2567 /* XXX update last change time */
2568 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2569}
2570