do early copyin / delayed copyout for socket options
[dragonfly.git] / sys / vfs / hammer / hammer_vnops.c
CommitLineData
427e5fc6 1/*
b84de5af 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
427e5fc6
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
cb51be26 34 * $DragonFly: src/sys/vfs/hammer/hammer_vnops.c,v 1.71 2008/06/17 04:02:38 dillon Exp $
427e5fc6
MD
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
41#include <sys/namecache.h>
42#include <sys/vnode.h>
43#include <sys/lockf.h>
44#include <sys/event.h>
45#include <sys/stat.h>
b3deaf57 46#include <sys/dirent.h>
c0ade690 47#include <vm/vm_extern.h>
7a04d74f 48#include <vfs/fifofs/fifo.h>
427e5fc6
MD
49#include "hammer.h"
50
51/*
52 * USERFS VNOPS
53 */
54/*static int hammer_vop_vnoperate(struct vop_generic_args *);*/
66325755
MD
55static int hammer_vop_fsync(struct vop_fsync_args *);
56static int hammer_vop_read(struct vop_read_args *);
57static int hammer_vop_write(struct vop_write_args *);
58static int hammer_vop_access(struct vop_access_args *);
59static int hammer_vop_advlock(struct vop_advlock_args *);
60static int hammer_vop_close(struct vop_close_args *);
61static int hammer_vop_ncreate(struct vop_ncreate_args *);
62static int hammer_vop_getattr(struct vop_getattr_args *);
63static int hammer_vop_nresolve(struct vop_nresolve_args *);
64static int hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *);
65static int hammer_vop_nlink(struct vop_nlink_args *);
66static int hammer_vop_nmkdir(struct vop_nmkdir_args *);
67static int hammer_vop_nmknod(struct vop_nmknod_args *);
68static int hammer_vop_open(struct vop_open_args *);
69static int hammer_vop_pathconf(struct vop_pathconf_args *);
70static int hammer_vop_print(struct vop_print_args *);
71static int hammer_vop_readdir(struct vop_readdir_args *);
72static int hammer_vop_readlink(struct vop_readlink_args *);
73static int hammer_vop_nremove(struct vop_nremove_args *);
74static int hammer_vop_nrename(struct vop_nrename_args *);
75static int hammer_vop_nrmdir(struct vop_nrmdir_args *);
76static int hammer_vop_setattr(struct vop_setattr_args *);
77static int hammer_vop_strategy(struct vop_strategy_args *);
a99b9ea2 78static int hammer_vop_bmap(struct vop_bmap_args *ap);
66325755
MD
79static int hammer_vop_nsymlink(struct vop_nsymlink_args *);
80static int hammer_vop_nwhiteout(struct vop_nwhiteout_args *);
7dc57964 81static int hammer_vop_ioctl(struct vop_ioctl_args *);
513ca7d7 82static int hammer_vop_mountctl(struct vop_mountctl_args *);
427e5fc6 83
7a04d74f
MD
84static int hammer_vop_fifoclose (struct vop_close_args *);
85static int hammer_vop_fiforead (struct vop_read_args *);
86static int hammer_vop_fifowrite (struct vop_write_args *);
87
88static int hammer_vop_specclose (struct vop_close_args *);
89static int hammer_vop_specread (struct vop_read_args *);
90static int hammer_vop_specwrite (struct vop_write_args *);
91
427e5fc6
MD
92struct vop_ops hammer_vnode_vops = {
93 .vop_default = vop_defaultop,
94 .vop_fsync = hammer_vop_fsync,
c0ade690
MD
95 .vop_getpages = vop_stdgetpages,
96 .vop_putpages = vop_stdputpages,
427e5fc6
MD
97 .vop_read = hammer_vop_read,
98 .vop_write = hammer_vop_write,
99 .vop_access = hammer_vop_access,
100 .vop_advlock = hammer_vop_advlock,
101 .vop_close = hammer_vop_close,
102 .vop_ncreate = hammer_vop_ncreate,
103 .vop_getattr = hammer_vop_getattr,
104 .vop_inactive = hammer_vop_inactive,
105 .vop_reclaim = hammer_vop_reclaim,
106 .vop_nresolve = hammer_vop_nresolve,
107 .vop_nlookupdotdot = hammer_vop_nlookupdotdot,
108 .vop_nlink = hammer_vop_nlink,
109 .vop_nmkdir = hammer_vop_nmkdir,
110 .vop_nmknod = hammer_vop_nmknod,
111 .vop_open = hammer_vop_open,
112 .vop_pathconf = hammer_vop_pathconf,
113 .vop_print = hammer_vop_print,
114 .vop_readdir = hammer_vop_readdir,
115 .vop_readlink = hammer_vop_readlink,
116 .vop_nremove = hammer_vop_nremove,
117 .vop_nrename = hammer_vop_nrename,
118 .vop_nrmdir = hammer_vop_nrmdir,
119 .vop_setattr = hammer_vop_setattr,
a99b9ea2 120 .vop_bmap = hammer_vop_bmap,
427e5fc6
MD
121 .vop_strategy = hammer_vop_strategy,
122 .vop_nsymlink = hammer_vop_nsymlink,
7dc57964 123 .vop_nwhiteout = hammer_vop_nwhiteout,
513ca7d7
MD
124 .vop_ioctl = hammer_vop_ioctl,
125 .vop_mountctl = hammer_vop_mountctl
427e5fc6
MD
126};
127
7a04d74f
MD
128struct vop_ops hammer_spec_vops = {
129 .vop_default = spec_vnoperate,
130 .vop_fsync = hammer_vop_fsync,
131 .vop_read = hammer_vop_specread,
132 .vop_write = hammer_vop_specwrite,
133 .vop_access = hammer_vop_access,
134 .vop_close = hammer_vop_specclose,
135 .vop_getattr = hammer_vop_getattr,
136 .vop_inactive = hammer_vop_inactive,
137 .vop_reclaim = hammer_vop_reclaim,
138 .vop_setattr = hammer_vop_setattr
139};
140
141struct vop_ops hammer_fifo_vops = {
142 .vop_default = fifo_vnoperate,
143 .vop_fsync = hammer_vop_fsync,
144 .vop_read = hammer_vop_fiforead,
145 .vop_write = hammer_vop_fifowrite,
146 .vop_access = hammer_vop_access,
147 .vop_close = hammer_vop_fifoclose,
148 .vop_getattr = hammer_vop_getattr,
149 .vop_inactive = hammer_vop_inactive,
150 .vop_reclaim = hammer_vop_reclaim,
151 .vop_setattr = hammer_vop_setattr
152};
153
0832c9bb
MD
154#ifdef DEBUG_TRUNCATE
155struct hammer_inode *HammerTruncIp;
156#endif
157
b84de5af
MD
158static int hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
159 struct vnode *dvp, struct ucred *cred, int flags);
8cd0a023
MD
160static int hammer_vop_strategy_read(struct vop_strategy_args *ap);
161static int hammer_vop_strategy_write(struct vop_strategy_args *ap);
0832c9bb
MD
162static void hammer_cleanup_write_io(hammer_inode_t ip);
163static void hammer_update_rsv_databufs(hammer_inode_t ip);
8cd0a023 164
427e5fc6
MD
165#if 0
166static
167int
168hammer_vop_vnoperate(struct vop_generic_args *)
169{
170 return (VOCALL(&hammer_vnode_vops, ap));
171}
172#endif
173
66325755
MD
174/*
175 * hammer_vop_fsync { vp, waitfor }
176 */
427e5fc6
MD
177static
178int
66325755 179hammer_vop_fsync(struct vop_fsync_args *ap)
427e5fc6 180{
b84de5af 181 hammer_inode_t ip = VTOI(ap->a_vp);
c0ade690 182
e8599db1 183 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
af209b0f 184 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
b84de5af
MD
185 if (ap->a_waitfor == MNT_WAIT)
186 hammer_wait_inode(ip);
059819e3 187 return (ip->error);
427e5fc6
MD
188}
189
66325755
MD
190/*
191 * hammer_vop_read { vp, uio, ioflag, cred }
192 */
427e5fc6
MD
193static
194int
66325755 195hammer_vop_read(struct vop_read_args *ap)
427e5fc6 196{
66325755 197 struct hammer_transaction trans;
c0ade690 198 hammer_inode_t ip;
66325755
MD
199 off_t offset;
200 struct buf *bp;
201 struct uio *uio;
202 int error;
203 int n;
8cd0a023 204 int seqcount;
66325755
MD
205
206 if (ap->a_vp->v_type != VREG)
207 return (EINVAL);
208 ip = VTOI(ap->a_vp);
209 error = 0;
8cd0a023 210 seqcount = ap->a_ioflag >> 16;
66325755 211
8cd0a023 212 hammer_start_transaction(&trans, ip->hmp);
66325755
MD
213
214 /*
215 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
216 */
217 uio = ap->a_uio;
11ad5ade 218 while (uio->uio_resid > 0 && uio->uio_offset < ip->ino_data.size) {
66325755 219 offset = uio->uio_offset & HAMMER_BUFMASK;
a99b9ea2
MD
220 if (hammer_debug_cluster_enable) {
221 error = cluster_read(ap->a_vp, ip->ino_data.size,
222 uio->uio_offset - offset,
223 HAMMER_BUFSIZE,
224 MAXBSIZE, seqcount, &bp);
225 } else {
226 error = bread(ap->a_vp, uio->uio_offset - offset,
227 HAMMER_BUFSIZE, &bp);
228 }
66325755
MD
229 if (error) {
230 brelse(bp);
231 break;
232 }
7bc5b8c2 233
c0ade690 234 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
66325755
MD
235 n = HAMMER_BUFSIZE - offset;
236 if (n > uio->uio_resid)
237 n = uio->uio_resid;
11ad5ade
MD
238 if (n > ip->ino_data.size - uio->uio_offset)
239 n = (int)(ip->ino_data.size - uio->uio_offset);
66325755 240 error = uiomove((char *)bp->b_data + offset, n, uio);
7bc5b8c2
MD
241
242 /* data has a lower priority then meta-data */
243 bp->b_flags |= B_AGE;
66325755 244 bqrelse(bp);
af209b0f
MD
245 if (error)
246 break;
66325755 247 }
b84de5af
MD
248 if ((ip->flags & HAMMER_INODE_RO) == 0 &&
249 (ip->hmp->mp->mnt_flag & MNT_NOATIME) == 0) {
11ad5ade 250 ip->ino_leaf.atime = trans.time;
47637bff 251 hammer_modify_inode(ip, HAMMER_INODE_ITIMES);
b84de5af
MD
252 }
253 hammer_done_transaction(&trans);
66325755 254 return (error);
427e5fc6
MD
255}
256
66325755
MD
257/*
258 * hammer_vop_write { vp, uio, ioflag, cred }
259 */
427e5fc6
MD
260static
261int
66325755 262hammer_vop_write(struct vop_write_args *ap)
427e5fc6 263{
66325755
MD
264 struct hammer_transaction trans;
265 struct hammer_inode *ip;
266 struct uio *uio;
47637bff
MD
267 int rel_offset;
268 off_t base_offset;
66325755
MD
269 struct buf *bp;
270 int error;
271 int n;
c0ade690 272 int flags;
059819e3 273 int count;
cb51be26 274 int seqcount;
66325755
MD
275
276 if (ap->a_vp->v_type != VREG)
277 return (EINVAL);
278 ip = VTOI(ap->a_vp);
279 error = 0;
cb51be26 280 seqcount = ap->a_ioflag >> 16;
66325755 281
d113fda1
MD
282 if (ip->flags & HAMMER_INODE_RO)
283 return (EROFS);
284
66325755
MD
285 /*
286 * Create a transaction to cover the operations we perform.
287 */
8cd0a023 288 hammer_start_transaction(&trans, ip->hmp);
66325755
MD
289 uio = ap->a_uio;
290
291 /*
292 * Check append mode
293 */
294 if (ap->a_ioflag & IO_APPEND)
11ad5ade 295 uio->uio_offset = ip->ino_data.size;
66325755
MD
296
297 /*
af209b0f
MD
298 * Check for illegal write offsets. Valid range is 0...2^63-1.
299 *
300 * NOTE: the base_off assignment is required to work around what
301 * I consider to be a GCC-4 optimization bug.
66325755 302 */
af209b0f
MD
303 if (uio->uio_offset < 0) {
304 hammer_done_transaction(&trans);
305 return (EFBIG);
306 }
307 base_offset = uio->uio_offset + uio->uio_resid; /* work around gcc-4 */
308 if (uio->uio_resid > 0 && base_offset <= 0) {
b84de5af 309 hammer_done_transaction(&trans);
66325755 310 return (EFBIG);
9c448776 311 }
66325755
MD
312
313 /*
314 * Access the data in HAMMER_BUFSIZE blocks via the buffer cache.
315 */
059819e3 316 count = 0;
66325755 317 while (uio->uio_resid > 0) {
d5ef456e
MD
318 int fixsize = 0;
319
e63644f0
MD
320 if ((error = hammer_checkspace(trans.hmp)) != 0)
321 break;
322
059819e3 323 /*
47637bff
MD
324 * Do not allow HAMMER to blow out the buffer cache.
325 *
326 * Do not allow HAMMER to blow out system memory by
327 * accumulating too many records. Records are decoupled
328 * from the buffer cache.
329 *
330 * Always check at the beginning so separate writes are
331 * not able to bypass this code.
0832c9bb
MD
332 *
333 * WARNING: Cannot unlock vp when doing a NOCOPY write as
334 * part of a putpages operation. Doing so could cause us
335 * to deadlock against the VM system when we try to re-lock.
059819e3 336 */
47637bff 337 if ((count++ & 15) == 0) {
0832c9bb
MD
338 if (uio->uio_segflg != UIO_NOCOPY) {
339 vn_unlock(ap->a_vp);
340 if ((ap->a_ioflag & IO_NOBWILL) == 0)
341 bwillwrite();
342 }
a99b9ea2
MD
343 if (ip->rsv_recs > hammer_limit_irecs)
344 hammer_wait_inode_recs(ip);
0832c9bb
MD
345 if (uio->uio_segflg != UIO_NOCOPY)
346 vn_lock(ap->a_vp, LK_EXCLUSIVE|LK_RETRY);
059819e3
MD
347 }
348
47637bff
MD
349 rel_offset = (int)(uio->uio_offset & HAMMER_BUFMASK);
350 base_offset = uio->uio_offset & ~HAMMER_BUFMASK64;
351 n = HAMMER_BUFSIZE - rel_offset;
d5ef456e
MD
352 if (n > uio->uio_resid)
353 n = uio->uio_resid;
11ad5ade 354 if (uio->uio_offset + n > ip->ino_data.size) {
d5ef456e
MD
355 vnode_pager_setsize(ap->a_vp, uio->uio_offset + n);
356 fixsize = 1;
357 }
358
c0ade690
MD
359 if (uio->uio_segflg == UIO_NOCOPY) {
360 /*
361 * Issuing a write with the same data backing the
362 * buffer. Instantiate the buffer to collect the
363 * backing vm pages, then read-in any missing bits.
364 *
365 * This case is used by vop_stdputpages().
366 */
47637bff 367 bp = getblk(ap->a_vp, base_offset,
d5ef456e 368 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
c0ade690
MD
369 if ((bp->b_flags & B_CACHE) == 0) {
370 bqrelse(bp);
47637bff 371 error = bread(ap->a_vp, base_offset,
c0ade690 372 HAMMER_BUFSIZE, &bp);
c0ade690 373 }
47637bff 374 } else if (rel_offset == 0 && uio->uio_resid >= HAMMER_BUFSIZE) {
c0ade690 375 /*
a5fddc16
MD
376 * Even though we are entirely overwriting the buffer
377 * we may still have to zero it out to avoid a
378 * mmap/write visibility issue.
c0ade690 379 */
47637bff 380 bp = getblk(ap->a_vp, base_offset,
d5ef456e 381 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
a5fddc16
MD
382 if ((bp->b_flags & B_CACHE) == 0)
383 vfs_bio_clrbuf(bp);
47637bff 384 } else if (base_offset >= ip->ino_data.size) {
c0ade690 385 /*
a5fddc16
MD
386 * If the base offset of the buffer is beyond the
387 * file EOF, we don't have to issue a read.
c0ade690 388 */
47637bff 389 bp = getblk(ap->a_vp, base_offset,
d5ef456e 390 HAMMER_BUFSIZE, GETBLK_BHEAVY, 0);
66325755
MD
391 vfs_bio_clrbuf(bp);
392 } else {
c0ade690
MD
393 /*
394 * Partial overwrite, read in any missing bits then
395 * replace the portion being written.
396 */
47637bff 397 error = bread(ap->a_vp, base_offset,
66325755 398 HAMMER_BUFSIZE, &bp);
d5ef456e
MD
399 if (error == 0)
400 bheavy(bp);
66325755 401 }
47637bff
MD
402 if (error == 0) {
403 error = uiomove((char *)bp->b_data + rel_offset,
404 n, uio);
405 }
d5ef456e
MD
406
407 /*
408 * If we screwed up we have to undo any VM size changes we
409 * made.
410 */
66325755
MD
411 if (error) {
412 brelse(bp);
d5ef456e 413 if (fixsize) {
11ad5ade 414 vtruncbuf(ap->a_vp, ip->ino_data.size,
d5ef456e
MD
415 HAMMER_BUFSIZE);
416 }
66325755
MD
417 break;
418 }
c0ade690 419 /* bp->b_flags |= B_CLUSTEROK; temporarily disabled */
11ad5ade
MD
420 if (ip->ino_data.size < uio->uio_offset) {
421 ip->ino_data.size = uio->uio_offset;
422 flags = HAMMER_INODE_DDIRTY;
423 vnode_pager_setsize(ap->a_vp, ip->ino_data.size);
c0ade690 424 } else {
d113fda1 425 flags = 0;
66325755 426 }
11ad5ade 427 ip->ino_data.mtime = trans.time;
f3b0f382 428 flags |= HAMMER_INODE_ITIMES | HAMMER_INODE_BUFS;
11ad5ade 429 flags |= HAMMER_INODE_DDIRTY; /* XXX mtime */
47637bff 430 hammer_modify_inode(ip, flags);
32c90105 431
0832c9bb
MD
432 /*
433 * Try to keep track of cached dirty data.
434 */
e63644f0
MD
435 if ((bp->b_flags & B_DIRTY) == 0) {
436 ++ip->rsv_databufs;
437 ++ip->hmp->rsv_databufs;
438 }
439
47637bff
MD
440 /*
441 * Final buffer disposition.
cb51be26
MD
442 *
443 * If write_mode is non-zero we call bawrite()
444 * unconditionally. Otherwise we only use bawrite()
445 * if the writes are clearly sequential.
47637bff 446 */
cb51be26 447 bp->b_flags |= B_AGE;
66325755
MD
448 if (ap->a_ioflag & IO_SYNC) {
449 bwrite(bp);
450 } else if (ap->a_ioflag & IO_DIRECT) {
66325755 451 bawrite(bp);
cb51be26
MD
452 } else if (hammer_write_mode &&
453 (uio->uio_offset & HAMMER_BUFMASK) == 0) {
47637bff 454#if 1
cb51be26
MD
455 /* strategy write cannot handled clustered writes */
456 bp->b_flags |= B_CLUSTEROK;
457 cluster_write(bp, ip->ino_data.size, seqcount);
458#else
459#endif
460 bawrite(bp);
059819e3 461 } else if ((ap->a_ioflag >> 16) == IO_SEQMAX &&
34d829f7
MD
462 (uio->uio_offset & HAMMER_BUFMASK) == 0) {
463 /*
464 * If seqcount indicates sequential operation and
465 * we just finished filling a buffer, push it out
466 * now to prevent the buffer cache from becoming
467 * too full, which would trigger non-optimal
468 * flushes.
469 */
47637bff 470 bawrite(bp);
66325755 471 } else {
66325755
MD
472 bdwrite(bp);
473 }
474 }
b84de5af 475 hammer_done_transaction(&trans);
66325755 476 return (error);
427e5fc6
MD
477}
478
66325755
MD
479/*
480 * hammer_vop_access { vp, mode, cred }
481 */
427e5fc6
MD
482static
483int
66325755 484hammer_vop_access(struct vop_access_args *ap)
427e5fc6 485{
66325755
MD
486 struct hammer_inode *ip = VTOI(ap->a_vp);
487 uid_t uid;
488 gid_t gid;
489 int error;
490
491 uid = hammer_to_unix_xid(&ip->ino_data.uid);
492 gid = hammer_to_unix_xid(&ip->ino_data.gid);
493
494 error = vop_helper_access(ap, uid, gid, ip->ino_data.mode,
495 ip->ino_data.uflags);
496 return (error);
427e5fc6
MD
497}
498
66325755
MD
499/*
500 * hammer_vop_advlock { vp, id, op, fl, flags }
501 */
427e5fc6
MD
502static
503int
66325755 504hammer_vop_advlock(struct vop_advlock_args *ap)
427e5fc6 505{
66325755
MD
506 struct hammer_inode *ip = VTOI(ap->a_vp);
507
11ad5ade 508 return (lf_advlock(ap, &ip->advlock, ip->ino_data.size));
427e5fc6
MD
509}
510
66325755
MD
511/*
512 * hammer_vop_close { vp, fflag }
513 */
427e5fc6
MD
514static
515int
66325755 516hammer_vop_close(struct vop_close_args *ap)
427e5fc6 517{
a89aec1b 518 return (vop_stdclose(ap));
427e5fc6
MD
519}
520
66325755
MD
521/*
522 * hammer_vop_ncreate { nch, dvp, vpp, cred, vap }
523 *
524 * The operating system has already ensured that the directory entry
525 * does not exist and done all appropriate namespace locking.
526 */
427e5fc6
MD
527static
528int
66325755 529hammer_vop_ncreate(struct vop_ncreate_args *ap)
427e5fc6 530{
66325755
MD
531 struct hammer_transaction trans;
532 struct hammer_inode *dip;
533 struct hammer_inode *nip;
534 struct nchandle *nch;
535 int error;
536
537 nch = ap->a_nch;
538 dip = VTOI(ap->a_dvp);
539
d113fda1
MD
540 if (dip->flags & HAMMER_INODE_RO)
541 return (EROFS);
e63644f0
MD
542 if ((error = hammer_checkspace(dip->hmp)) != 0)
543 return (error);
d113fda1 544
66325755
MD
545 /*
546 * Create a transaction to cover the operations we perform.
547 */
8cd0a023 548 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
549
550 /*
551 * Create a new filesystem object of the requested type. The
b84de5af
MD
552 * returned inode will be referenced and shared-locked to prevent
553 * it from being moved to the flusher.
66325755 554 */
8cd0a023
MD
555
556 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 557 if (error) {
77062c8a 558 hkprintf("hammer_create_inode error %d\n", error);
b84de5af 559 hammer_done_transaction(&trans);
66325755
MD
560 *ap->a_vpp = NULL;
561 return (error);
562 }
66325755
MD
563
564 /*
565 * Add the new filesystem object to the directory. This will also
566 * bump the inode's link count.
567 */
a89aec1b 568 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
0b075555 569 if (error)
77062c8a 570 hkprintf("hammer_ip_add_directory error %d\n", error);
66325755
MD
571
572 /*
573 * Finish up.
574 */
575 if (error) {
a89aec1b 576 hammer_rel_inode(nip, 0);
b84de5af 577 hammer_done_transaction(&trans);
66325755
MD
578 *ap->a_vpp = NULL;
579 } else {
e8599db1 580 error = hammer_get_vnode(nip, ap->a_vpp);
b84de5af 581 hammer_done_transaction(&trans);
a89aec1b
MD
582 hammer_rel_inode(nip, 0);
583 if (error == 0) {
584 cache_setunresolved(ap->a_nch);
585 cache_setvp(ap->a_nch, *ap->a_vpp);
586 }
66325755
MD
587 }
588 return (error);
427e5fc6
MD
589}
590
66325755
MD
591/*
592 * hammer_vop_getattr { vp, vap }
98f7132d
MD
593 *
594 * Retrieve an inode's attribute information. When accessing inodes
595 * historically we fake the atime field to ensure consistent results.
596 * The atime field is stored in the B-Tree element and allowed to be
597 * updated without cycling the element.
66325755 598 */
427e5fc6
MD
599static
600int
66325755 601hammer_vop_getattr(struct vop_getattr_args *ap)
427e5fc6 602{
66325755
MD
603 struct hammer_inode *ip = VTOI(ap->a_vp);
604 struct vattr *vap = ap->a_vap;
605
606#if 0
607 if (cache_check_fsmid_vp(ap->a_vp, &ip->fsmid) &&
608 (vp->v_mount->mnt_flag & MNT_RDONLY) == 0 &&
7f7c1f84 609 ip->obj_asof == XXX
66325755
MD
610 ) {
611 /* LAZYMOD XXX */
612 }
613 hammer_itimes(ap->a_vp);
614#endif
615
616 vap->va_fsid = ip->hmp->fsid_udev;
11ad5ade 617 vap->va_fileid = ip->ino_leaf.base.obj_id;
66325755 618 vap->va_mode = ip->ino_data.mode;
11ad5ade 619 vap->va_nlink = ip->ino_data.nlinks;
66325755
MD
620 vap->va_uid = hammer_to_unix_xid(&ip->ino_data.uid);
621 vap->va_gid = hammer_to_unix_xid(&ip->ino_data.gid);
622 vap->va_rmajor = 0;
623 vap->va_rminor = 0;
11ad5ade 624 vap->va_size = ip->ino_data.size;
98f7132d
MD
625 if (ip->flags & HAMMER_INODE_RO)
626 hammer_to_timespec(ip->ino_data.mtime, &vap->va_atime);
627 else
628 hammer_to_timespec(ip->ino_leaf.atime, &vap->va_atime);
11ad5ade 629 hammer_to_timespec(ip->ino_data.mtime, &vap->va_mtime);
66325755
MD
630 hammer_to_timespec(ip->ino_data.ctime, &vap->va_ctime);
631 vap->va_flags = ip->ino_data.uflags;
632 vap->va_gen = 1; /* hammer inums are unique for all time */
bf686dbe 633 vap->va_blocksize = HAMMER_BUFSIZE;
11ad5ade
MD
634 vap->va_bytes = (ip->ino_data.size + 63) & ~63;
635 vap->va_type = hammer_get_vnode_type(ip->ino_data.obj_type);
66325755
MD
636 vap->va_filerev = 0; /* XXX */
637 /* mtime uniquely identifies any adjustments made to the file */
11ad5ade 638 vap->va_fsmid = ip->ino_data.mtime;
66325755
MD
639 vap->va_uid_uuid = ip->ino_data.uid;
640 vap->va_gid_uuid = ip->ino_data.gid;
641 vap->va_fsid_uuid = ip->hmp->fsid;
642 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
643 VA_FSID_UUID_VALID;
7a04d74f 644
11ad5ade 645 switch (ip->ino_data.obj_type) {
7a04d74f
MD
646 case HAMMER_OBJTYPE_CDEV:
647 case HAMMER_OBJTYPE_BDEV:
648 vap->va_rmajor = ip->ino_data.rmajor;
649 vap->va_rminor = ip->ino_data.rminor;
650 break;
651 default:
652 break;
653 }
654
66325755 655 return(0);
427e5fc6
MD
656}
657
66325755
MD
658/*
659 * hammer_vop_nresolve { nch, dvp, cred }
660 *
661 * Locate the requested directory entry.
662 */
427e5fc6
MD
663static
664int
66325755 665hammer_vop_nresolve(struct vop_nresolve_args *ap)
427e5fc6 666{
36f82b23 667 struct hammer_transaction trans;
66325755 668 struct namecache *ncp;
7f7c1f84
MD
669 hammer_inode_t dip;
670 hammer_inode_t ip;
671 hammer_tid_t asof;
8cd0a023 672 struct hammer_cursor cursor;
66325755
MD
673 struct vnode *vp;
674 int64_t namekey;
675 int error;
7f7c1f84
MD
676 int i;
677 int nlen;
d113fda1 678 int flags;
6a37e7e4 679 u_int64_t obj_id;
7f7c1f84
MD
680
681 /*
682 * Misc initialization, plus handle as-of name extensions. Look for
683 * the '@@' extension. Note that as-of files and directories cannot
684 * be modified.
7f7c1f84
MD
685 */
686 dip = VTOI(ap->a_dvp);
687 ncp = ap->a_nch->ncp;
688 asof = dip->obj_asof;
689 nlen = ncp->nc_nlen;
d113fda1 690 flags = dip->flags;
7f7c1f84 691
36f82b23
MD
692 hammer_simple_transaction(&trans, dip->hmp);
693
7f7c1f84
MD
694 for (i = 0; i < nlen; ++i) {
695 if (ncp->nc_name[i] == '@' && ncp->nc_name[i+1] == '@') {
d113fda1 696 asof = hammer_str_to_tid(ncp->nc_name + i + 2);
d113fda1 697 flags |= HAMMER_INODE_RO;
7f7c1f84
MD
698 break;
699 }
700 }
701 nlen = i;
66325755 702
d113fda1
MD
703 /*
704 * If there is no path component the time extension is relative to
705 * dip.
706 */
707 if (nlen == 0) {
36f82b23 708 ip = hammer_get_inode(&trans, &dip->cache[1], dip->obj_id,
61aeeb33 709 asof, flags, &error);
d113fda1 710 if (error == 0) {
e8599db1 711 error = hammer_get_vnode(ip, &vp);
d113fda1
MD
712 hammer_rel_inode(ip, 0);
713 } else {
714 vp = NULL;
715 }
716 if (error == 0) {
717 vn_unlock(vp);
718 cache_setvp(ap->a_nch, vp);
719 vrele(vp);
720 }
36f82b23 721 goto done;
d113fda1
MD
722 }
723
8cd0a023
MD
724 /*
725 * Calculate the namekey and setup the key range for the scan. This
726 * works kinda like a chained hash table where the lower 32 bits
727 * of the namekey synthesize the chain.
728 *
729 * The key range is inclusive of both key_beg and key_end.
730 */
7f7c1f84 731 namekey = hammer_directory_namekey(ncp->nc_name, nlen);
66325755 732
4e17f465 733 error = hammer_init_cursor(&trans, &cursor, &dip->cache[0], dip);
2f85fa4d 734 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023
MD
735 cursor.key_beg.obj_id = dip->obj_id;
736 cursor.key_beg.key = namekey;
d5530d22 737 cursor.key_beg.create_tid = 0;
8cd0a023
MD
738 cursor.key_beg.delete_tid = 0;
739 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
740 cursor.key_beg.obj_type = 0;
66325755 741
8cd0a023
MD
742 cursor.key_end = cursor.key_beg;
743 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
744 cursor.asof = asof;
745 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
66325755
MD
746
747 /*
8cd0a023 748 * Scan all matching records (the chain), locate the one matching
a89aec1b 749 * the requested path component.
8cd0a023
MD
750 *
751 * The hammer_ip_*() functions merge in-memory records with on-disk
752 * records for the purposes of the search.
66325755 753 */
6a37e7e4
MD
754 obj_id = 0;
755
4e17f465 756 if (error == 0) {
4e17f465
MD
757 error = hammer_ip_first(&cursor);
758 while (error == 0) {
759 error = hammer_ip_resolve_data(&cursor);
760 if (error)
761 break;
11ad5ade
MD
762 if (nlen == cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF &&
763 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
764 obj_id = cursor.data->entry.obj_id;
4e17f465
MD
765 break;
766 }
767 error = hammer_ip_next(&cursor);
66325755
MD
768 }
769 }
6a37e7e4 770 hammer_done_cursor(&cursor);
66325755 771 if (error == 0) {
36f82b23 772 ip = hammer_get_inode(&trans, &dip->cache[1],
6a37e7e4 773 obj_id, asof, flags, &error);
7f7c1f84 774 if (error == 0) {
e8599db1 775 error = hammer_get_vnode(ip, &vp);
7f7c1f84
MD
776 hammer_rel_inode(ip, 0);
777 } else {
778 vp = NULL;
779 }
66325755
MD
780 if (error == 0) {
781 vn_unlock(vp);
782 cache_setvp(ap->a_nch, vp);
783 vrele(vp);
784 }
785 } else if (error == ENOENT) {
786 cache_setvp(ap->a_nch, NULL);
787 }
36f82b23 788done:
b84de5af 789 hammer_done_transaction(&trans);
66325755 790 return (error);
427e5fc6
MD
791}
792
66325755
MD
793/*
794 * hammer_vop_nlookupdotdot { dvp, vpp, cred }
795 *
796 * Locate the parent directory of a directory vnode.
797 *
798 * dvp is referenced but not locked. *vpp must be returned referenced and
799 * locked. A parent_obj_id of 0 does not necessarily indicate that we are
800 * at the root, instead it could indicate that the directory we were in was
801 * removed.
42c7d26b
MD
802 *
803 * NOTE: as-of sequences are not linked into the directory structure. If
804 * we are at the root with a different asof then the mount point, reload
805 * the same directory with the mount point's asof. I'm not sure what this
806 * will do to NFS. We encode ASOF stamps in NFS file handles so it might not
807 * get confused, but it hasn't been tested.
66325755 808 */
427e5fc6
MD
809static
810int
66325755 811hammer_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
427e5fc6 812{
36f82b23 813 struct hammer_transaction trans;
66325755 814 struct hammer_inode *dip;
d113fda1 815 struct hammer_inode *ip;
42c7d26b
MD
816 int64_t parent_obj_id;
817 hammer_tid_t asof;
d113fda1 818 int error;
66325755
MD
819
820 dip = VTOI(ap->a_dvp);
42c7d26b
MD
821 asof = dip->obj_asof;
822 parent_obj_id = dip->ino_data.parent_obj_id;
823
824 if (parent_obj_id == 0) {
825 if (dip->obj_id == HAMMER_OBJID_ROOT &&
826 asof != dip->hmp->asof) {
827 parent_obj_id = dip->obj_id;
828 asof = dip->hmp->asof;
829 *ap->a_fakename = kmalloc(19, M_TEMP, M_WAITOK);
830 ksnprintf(*ap->a_fakename, 19, "0x%016llx",
831 dip->obj_asof);
832 } else {
833 *ap->a_vpp = NULL;
834 return ENOENT;
835 }
66325755 836 }
d113fda1 837
36f82b23
MD
838 hammer_simple_transaction(&trans, dip->hmp);
839
840 ip = hammer_get_inode(&trans, &dip->cache[1], parent_obj_id,
42c7d26b 841 asof, dip->flags, &error);
36f82b23 842 if (ip) {
e8599db1 843 error = hammer_get_vnode(ip, ap->a_vpp);
36f82b23
MD
844 hammer_rel_inode(ip, 0);
845 } else {
d113fda1 846 *ap->a_vpp = NULL;
d113fda1 847 }
b84de5af 848 hammer_done_transaction(&trans);
d113fda1 849 return (error);
427e5fc6
MD
850}
851
66325755
MD
852/*
853 * hammer_vop_nlink { nch, dvp, vp, cred }
854 */
427e5fc6
MD
855static
856int
66325755 857hammer_vop_nlink(struct vop_nlink_args *ap)
427e5fc6 858{
66325755
MD
859 struct hammer_transaction trans;
860 struct hammer_inode *dip;
861 struct hammer_inode *ip;
862 struct nchandle *nch;
863 int error;
864
865 nch = ap->a_nch;
866 dip = VTOI(ap->a_dvp);
867 ip = VTOI(ap->a_vp);
868
d113fda1
MD
869 if (dip->flags & HAMMER_INODE_RO)
870 return (EROFS);
871 if (ip->flags & HAMMER_INODE_RO)
872 return (EROFS);
e63644f0
MD
873 if ((error = hammer_checkspace(dip->hmp)) != 0)
874 return (error);
d113fda1 875
66325755
MD
876 /*
877 * Create a transaction to cover the operations we perform.
878 */
8cd0a023 879 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
880
881 /*
882 * Add the filesystem object to the directory. Note that neither
883 * dip nor ip are referenced or locked, but their vnodes are
884 * referenced. This function will bump the inode's link count.
885 */
a89aec1b 886 error = hammer_ip_add_directory(&trans, dip, nch->ncp, ip);
66325755
MD
887
888 /*
889 * Finish up.
890 */
b84de5af 891 if (error == 0) {
6b4f890b
MD
892 cache_setunresolved(nch);
893 cache_setvp(nch, ap->a_vp);
66325755 894 }
b84de5af 895 hammer_done_transaction(&trans);
66325755 896 return (error);
427e5fc6
MD
897}
898
66325755
MD
899/*
900 * hammer_vop_nmkdir { nch, dvp, vpp, cred, vap }
901 *
902 * The operating system has already ensured that the directory entry
903 * does not exist and done all appropriate namespace locking.
904 */
427e5fc6
MD
905static
906int
66325755 907hammer_vop_nmkdir(struct vop_nmkdir_args *ap)
427e5fc6 908{
66325755
MD
909 struct hammer_transaction trans;
910 struct hammer_inode *dip;
911 struct hammer_inode *nip;
912 struct nchandle *nch;
913 int error;
914
915 nch = ap->a_nch;
916 dip = VTOI(ap->a_dvp);
917
d113fda1
MD
918 if (dip->flags & HAMMER_INODE_RO)
919 return (EROFS);
e63644f0
MD
920 if ((error = hammer_checkspace(dip->hmp)) != 0)
921 return (error);
d113fda1 922
66325755
MD
923 /*
924 * Create a transaction to cover the operations we perform.
925 */
8cd0a023 926 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
927
928 /*
929 * Create a new filesystem object of the requested type. The
8cd0a023 930 * returned inode will be referenced but not locked.
66325755 931 */
8cd0a023 932 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 933 if (error) {
77062c8a 934 hkprintf("hammer_mkdir error %d\n", error);
b84de5af 935 hammer_done_transaction(&trans);
66325755
MD
936 *ap->a_vpp = NULL;
937 return (error);
938 }
66325755
MD
939 /*
940 * Add the new filesystem object to the directory. This will also
941 * bump the inode's link count.
942 */
a89aec1b 943 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
0b075555 944 if (error)
77062c8a 945 hkprintf("hammer_mkdir (add) error %d\n", error);
66325755
MD
946
947 /*
948 * Finish up.
949 */
950 if (error) {
a89aec1b 951 hammer_rel_inode(nip, 0);
66325755
MD
952 *ap->a_vpp = NULL;
953 } else {
e8599db1 954 error = hammer_get_vnode(nip, ap->a_vpp);
a89aec1b
MD
955 hammer_rel_inode(nip, 0);
956 if (error == 0) {
957 cache_setunresolved(ap->a_nch);
958 cache_setvp(ap->a_nch, *ap->a_vpp);
959 }
66325755 960 }
b84de5af 961 hammer_done_transaction(&trans);
66325755 962 return (error);
427e5fc6
MD
963}
964
66325755
MD
965/*
966 * hammer_vop_nmknod { nch, dvp, vpp, cred, vap }
967 *
968 * The operating system has already ensured that the directory entry
969 * does not exist and done all appropriate namespace locking.
970 */
427e5fc6
MD
971static
972int
66325755 973hammer_vop_nmknod(struct vop_nmknod_args *ap)
427e5fc6 974{
66325755
MD
975 struct hammer_transaction trans;
976 struct hammer_inode *dip;
977 struct hammer_inode *nip;
978 struct nchandle *nch;
979 int error;
980
981 nch = ap->a_nch;
982 dip = VTOI(ap->a_dvp);
983
d113fda1
MD
984 if (dip->flags & HAMMER_INODE_RO)
985 return (EROFS);
e63644f0
MD
986 if ((error = hammer_checkspace(dip->hmp)) != 0)
987 return (error);
d113fda1 988
66325755
MD
989 /*
990 * Create a transaction to cover the operations we perform.
991 */
8cd0a023 992 hammer_start_transaction(&trans, dip->hmp);
66325755
MD
993
994 /*
995 * Create a new filesystem object of the requested type. The
8cd0a023 996 * returned inode will be referenced but not locked.
66325755 997 */
8cd0a023 998 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
66325755 999 if (error) {
b84de5af 1000 hammer_done_transaction(&trans);
66325755
MD
1001 *ap->a_vpp = NULL;
1002 return (error);
1003 }
66325755
MD
1004
1005 /*
1006 * Add the new filesystem object to the directory. This will also
1007 * bump the inode's link count.
1008 */
a89aec1b 1009 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
66325755
MD
1010
1011 /*
1012 * Finish up.
1013 */
1014 if (error) {
a89aec1b 1015 hammer_rel_inode(nip, 0);
66325755
MD
1016 *ap->a_vpp = NULL;
1017 } else {
e8599db1 1018 error = hammer_get_vnode(nip, ap->a_vpp);
a89aec1b
MD
1019 hammer_rel_inode(nip, 0);
1020 if (error == 0) {
1021 cache_setunresolved(ap->a_nch);
1022 cache_setvp(ap->a_nch, *ap->a_vpp);
1023 }
66325755 1024 }
b84de5af 1025 hammer_done_transaction(&trans);
66325755 1026 return (error);
427e5fc6
MD
1027}
1028
66325755
MD
1029/*
1030 * hammer_vop_open { vp, mode, cred, fp }
1031 */
427e5fc6
MD
1032static
1033int
66325755 1034hammer_vop_open(struct vop_open_args *ap)
427e5fc6 1035{
9f5097dc
MD
1036 hammer_inode_t ip;
1037
1038 ip = VTOI(ap->a_vp);
1039
1040 if ((ap->a_mode & FWRITE) && (ip->flags & HAMMER_INODE_RO))
d113fda1 1041 return (EROFS);
a89aec1b 1042 return(vop_stdopen(ap));
427e5fc6
MD
1043}
1044
66325755
MD
1045/*
1046 * hammer_vop_pathconf { vp, name, retval }
1047 */
427e5fc6
MD
1048static
1049int
66325755 1050hammer_vop_pathconf(struct vop_pathconf_args *ap)
427e5fc6
MD
1051{
1052 return EOPNOTSUPP;
1053}
1054
66325755
MD
1055/*
1056 * hammer_vop_print { vp }
1057 */
427e5fc6
MD
1058static
1059int
66325755 1060hammer_vop_print(struct vop_print_args *ap)
427e5fc6
MD
1061{
1062 return EOPNOTSUPP;
1063}
1064
66325755 1065/*
6b4f890b 1066 * hammer_vop_readdir { vp, uio, cred, *eofflag, *ncookies, off_t **cookies }
66325755 1067 */
427e5fc6
MD
1068static
1069int
66325755 1070hammer_vop_readdir(struct vop_readdir_args *ap)
427e5fc6 1071{
36f82b23 1072 struct hammer_transaction trans;
6b4f890b
MD
1073 struct hammer_cursor cursor;
1074 struct hammer_inode *ip;
1075 struct uio *uio;
6b4f890b
MD
1076 hammer_base_elm_t base;
1077 int error;
1078 int cookie_index;
1079 int ncookies;
1080 off_t *cookies;
1081 off_t saveoff;
1082 int r;
1083
1084 ip = VTOI(ap->a_vp);
1085 uio = ap->a_uio;
b3deaf57
MD
1086 saveoff = uio->uio_offset;
1087
1088 if (ap->a_ncookies) {
1089 ncookies = uio->uio_resid / 16 + 1;
1090 if (ncookies > 1024)
1091 ncookies = 1024;
1092 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
1093 cookie_index = 0;
1094 } else {
1095 ncookies = -1;
1096 cookies = NULL;
1097 cookie_index = 0;
1098 }
1099
36f82b23
MD
1100 hammer_simple_transaction(&trans, ip->hmp);
1101
b3deaf57
MD
1102 /*
1103 * Handle artificial entries
1104 */
1105 error = 0;
1106 if (saveoff == 0) {
1107 r = vop_write_dirent(&error, uio, ip->obj_id, DT_DIR, 1, ".");
1108 if (r)
1109 goto done;
1110 if (cookies)
1111 cookies[cookie_index] = saveoff;
1112 ++saveoff;
1113 ++cookie_index;
1114 if (cookie_index == ncookies)
1115 goto done;
1116 }
1117 if (saveoff == 1) {
1118 if (ip->ino_data.parent_obj_id) {
1119 r = vop_write_dirent(&error, uio,
1120 ip->ino_data.parent_obj_id,
1121 DT_DIR, 2, "..");
1122 } else {
1123 r = vop_write_dirent(&error, uio,
1124 ip->obj_id, DT_DIR, 2, "..");
1125 }
1126 if (r)
1127 goto done;
1128 if (cookies)
1129 cookies[cookie_index] = saveoff;
1130 ++saveoff;
1131 ++cookie_index;
1132 if (cookie_index == ncookies)
1133 goto done;
1134 }
6b4f890b
MD
1135
1136 /*
1137 * Key range (begin and end inclusive) to scan. Directory keys
1138 * directly translate to a 64 bit 'seek' position.
1139 */
4e17f465 1140 hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
2f85fa4d 1141 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
6b4f890b 1142 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1143 cursor.key_beg.create_tid = 0;
6b4f890b
MD
1144 cursor.key_beg.delete_tid = 0;
1145 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1146 cursor.key_beg.obj_type = 0;
b3deaf57 1147 cursor.key_beg.key = saveoff;
6b4f890b
MD
1148
1149 cursor.key_end = cursor.key_beg;
1150 cursor.key_end.key = HAMMER_MAX_KEY;
d5530d22
MD
1151 cursor.asof = ip->obj_asof;
1152 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
6b4f890b 1153
4e17f465 1154 error = hammer_ip_first(&cursor);
6b4f890b
MD
1155
1156 while (error == 0) {
11ad5ade 1157 error = hammer_ip_resolve_data(&cursor);
6b4f890b
MD
1158 if (error)
1159 break;
11ad5ade 1160 base = &cursor.leaf->base;
6b4f890b 1161 saveoff = base->key;
11ad5ade 1162 KKASSERT(cursor.leaf->data_len > HAMMER_ENTRY_NAME_OFF);
6b4f890b 1163
7a04d74f
MD
1164 if (base->obj_id != ip->obj_id)
1165 panic("readdir: bad record at %p", cursor.node);
1166
6b4f890b 1167 r = vop_write_dirent(
11ad5ade
MD
1168 &error, uio, cursor.data->entry.obj_id,
1169 hammer_get_dtype(cursor.leaf->base.obj_type),
1170 cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF ,
1171 (void *)cursor.data->entry.name);
6b4f890b
MD
1172 if (r)
1173 break;
1174 ++saveoff;
1175 if (cookies)
1176 cookies[cookie_index] = base->key;
1177 ++cookie_index;
1178 if (cookie_index == ncookies)
1179 break;
1180 error = hammer_ip_next(&cursor);
1181 }
1182 hammer_done_cursor(&cursor);
1183
b3deaf57 1184done:
b84de5af 1185 hammer_done_transaction(&trans);
36f82b23 1186
6b4f890b
MD
1187 if (ap->a_eofflag)
1188 *ap->a_eofflag = (error == ENOENT);
6b4f890b
MD
1189 uio->uio_offset = saveoff;
1190 if (error && cookie_index == 0) {
b3deaf57
MD
1191 if (error == ENOENT)
1192 error = 0;
6b4f890b
MD
1193 if (cookies) {
1194 kfree(cookies, M_TEMP);
1195 *ap->a_ncookies = 0;
1196 *ap->a_cookies = NULL;
1197 }
1198 } else {
7a04d74f
MD
1199 if (error == ENOENT)
1200 error = 0;
6b4f890b
MD
1201 if (cookies) {
1202 *ap->a_ncookies = cookie_index;
1203 *ap->a_cookies = cookies;
1204 }
1205 }
1206 return(error);
427e5fc6
MD
1207}
1208
66325755
MD
1209/*
1210 * hammer_vop_readlink { vp, uio, cred }
1211 */
427e5fc6
MD
1212static
1213int
66325755 1214hammer_vop_readlink(struct vop_readlink_args *ap)
427e5fc6 1215{
36f82b23 1216 struct hammer_transaction trans;
7a04d74f
MD
1217 struct hammer_cursor cursor;
1218 struct hammer_inode *ip;
1219 int error;
1220
1221 ip = VTOI(ap->a_vp);
36f82b23 1222
2f85fa4d
MD
1223 /*
1224 * Shortcut if the symlink data was stuffed into ino_data.
1225 */
1226 if (ip->ino_data.size <= HAMMER_INODE_BASESYMLEN) {
1227 error = uiomove(ip->ino_data.ext.symlink,
1228 ip->ino_data.size, ap->a_uio);
1229 return(error);
1230 }
36f82b23 1231
2f85fa4d
MD
1232 /*
1233 * Long version
1234 */
1235 hammer_simple_transaction(&trans, ip->hmp);
4e17f465 1236 hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
7a04d74f
MD
1237
1238 /*
1239 * Key range (begin and end inclusive) to scan. Directory keys
1240 * directly translate to a 64 bit 'seek' position.
1241 */
2f85fa4d 1242 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC; /* XXX */
7a04d74f 1243 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1244 cursor.key_beg.create_tid = 0;
7a04d74f
MD
1245 cursor.key_beg.delete_tid = 0;
1246 cursor.key_beg.rec_type = HAMMER_RECTYPE_FIX;
1247 cursor.key_beg.obj_type = 0;
1248 cursor.key_beg.key = HAMMER_FIXKEY_SYMLINK;
d5530d22
MD
1249 cursor.asof = ip->obj_asof;
1250 cursor.flags |= HAMMER_CURSOR_ASOF;
7a04d74f 1251
45a014dc 1252 error = hammer_ip_lookup(&cursor);
7a04d74f
MD
1253 if (error == 0) {
1254 error = hammer_ip_resolve_data(&cursor);
1255 if (error == 0) {
11ad5ade
MD
1256 KKASSERT(cursor.leaf->data_len >=
1257 HAMMER_SYMLINK_NAME_OFF);
1258 error = uiomove(cursor.data->symlink.name,
1259 cursor.leaf->data_len -
1260 HAMMER_SYMLINK_NAME_OFF,
7a04d74f
MD
1261 ap->a_uio);
1262 }
1263 }
1264 hammer_done_cursor(&cursor);
b84de5af 1265 hammer_done_transaction(&trans);
7a04d74f 1266 return(error);
427e5fc6
MD
1267}
1268
66325755
MD
1269/*
1270 * hammer_vop_nremove { nch, dvp, cred }
1271 */
427e5fc6
MD
1272static
1273int
66325755 1274hammer_vop_nremove(struct vop_nremove_args *ap)
427e5fc6 1275{
b84de5af 1276 struct hammer_transaction trans;
e63644f0 1277 struct hammer_inode *dip;
b84de5af
MD
1278 int error;
1279
e63644f0
MD
1280 dip = VTOI(ap->a_dvp);
1281
1282 if (hammer_nohistory(dip) == 0 &&
1283 (error = hammer_checkspace(dip->hmp)) != 0) {
1284 return (error);
1285 }
1286
1287 hammer_start_transaction(&trans, dip->hmp);
b84de5af
MD
1288 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
1289 hammer_done_transaction(&trans);
1290
1291 return (error);
427e5fc6
MD
1292}
1293
66325755
MD
1294/*
1295 * hammer_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1296 */
427e5fc6
MD
1297static
1298int
66325755 1299hammer_vop_nrename(struct vop_nrename_args *ap)
427e5fc6 1300{
8cd0a023
MD
1301 struct hammer_transaction trans;
1302 struct namecache *fncp;
1303 struct namecache *tncp;
1304 struct hammer_inode *fdip;
1305 struct hammer_inode *tdip;
1306 struct hammer_inode *ip;
1307 struct hammer_cursor cursor;
8cd0a023 1308 int64_t namekey;
11ad5ade 1309 int nlen, error;
8cd0a023
MD
1310
1311 fdip = VTOI(ap->a_fdvp);
1312 tdip = VTOI(ap->a_tdvp);
1313 fncp = ap->a_fnch->ncp;
1314 tncp = ap->a_tnch->ncp;
b3deaf57
MD
1315 ip = VTOI(fncp->nc_vp);
1316 KKASSERT(ip != NULL);
d113fda1
MD
1317
1318 if (fdip->flags & HAMMER_INODE_RO)
1319 return (EROFS);
1320 if (tdip->flags & HAMMER_INODE_RO)
1321 return (EROFS);
1322 if (ip->flags & HAMMER_INODE_RO)
1323 return (EROFS);
e63644f0
MD
1324 if ((error = hammer_checkspace(fdip->hmp)) != 0)
1325 return (error);
d113fda1 1326
8cd0a023
MD
1327 hammer_start_transaction(&trans, fdip->hmp);
1328
1329 /*
b3deaf57
MD
1330 * Remove tncp from the target directory and then link ip as
1331 * tncp. XXX pass trans to dounlink
42c7d26b
MD
1332 *
1333 * Force the inode sync-time to match the transaction so it is
1334 * in-sync with the creation of the target directory entry.
8cd0a023 1335 */
b84de5af 1336 error = hammer_dounlink(&trans, ap->a_tnch, ap->a_tdvp, ap->a_cred, 0);
42c7d26b 1337 if (error == 0 || error == ENOENT) {
b3deaf57 1338 error = hammer_ip_add_directory(&trans, tdip, tncp, ip);
42c7d26b
MD
1339 if (error == 0) {
1340 ip->ino_data.parent_obj_id = tdip->obj_id;
47637bff 1341 hammer_modify_inode(ip, HAMMER_INODE_DDIRTY);
42c7d26b
MD
1342 }
1343 }
b3deaf57
MD
1344 if (error)
1345 goto failed; /* XXX */
8cd0a023
MD
1346
1347 /*
1348 * Locate the record in the originating directory and remove it.
1349 *
1350 * Calculate the namekey and setup the key range for the scan. This
1351 * works kinda like a chained hash table where the lower 32 bits
1352 * of the namekey synthesize the chain.
1353 *
1354 * The key range is inclusive of both key_beg and key_end.
1355 */
1356 namekey = hammer_directory_namekey(fncp->nc_name, fncp->nc_nlen);
6a37e7e4 1357retry:
4e17f465 1358 hammer_init_cursor(&trans, &cursor, &fdip->cache[0], fdip);
2f85fa4d 1359 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023
MD
1360 cursor.key_beg.obj_id = fdip->obj_id;
1361 cursor.key_beg.key = namekey;
d5530d22 1362 cursor.key_beg.create_tid = 0;
8cd0a023
MD
1363 cursor.key_beg.delete_tid = 0;
1364 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
1365 cursor.key_beg.obj_type = 0;
1366
1367 cursor.key_end = cursor.key_beg;
1368 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
1369 cursor.asof = fdip->obj_asof;
1370 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
8cd0a023
MD
1371
1372 /*
1373 * Scan all matching records (the chain), locate the one matching
a89aec1b 1374 * the requested path component.
8cd0a023
MD
1375 *
1376 * The hammer_ip_*() functions merge in-memory records with on-disk
1377 * records for the purposes of the search.
1378 */
4e17f465 1379 error = hammer_ip_first(&cursor);
a89aec1b 1380 while (error == 0) {
8cd0a023
MD
1381 if (hammer_ip_resolve_data(&cursor) != 0)
1382 break;
11ad5ade
MD
1383 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
1384 KKASSERT(nlen > 0);
1385 if (fncp->nc_nlen == nlen &&
1386 bcmp(fncp->nc_name, cursor.data->entry.name, nlen) == 0) {
8cd0a023
MD
1387 break;
1388 }
a89aec1b 1389 error = hammer_ip_next(&cursor);
8cd0a023 1390 }
8cd0a023
MD
1391
1392 /*
1393 * If all is ok we have to get the inode so we can adjust nlinks.
6a37e7e4
MD
1394 *
1395 * WARNING: hammer_ip_del_directory() may have to terminate the
1396 * cursor to avoid a recursion. It's ok to call hammer_done_cursor()
1397 * twice.
8cd0a023 1398 */
9944ae54 1399 if (error == 0)
6a37e7e4 1400 error = hammer_ip_del_directory(&trans, &cursor, fdip, ip);
b84de5af
MD
1401
1402 /*
1403 * XXX A deadlock here will break rename's atomicy for the purposes
1404 * of crash recovery.
1405 */
1406 if (error == EDEADLK) {
b84de5af 1407 hammer_done_cursor(&cursor);
b84de5af
MD
1408 goto retry;
1409 }
1410
1411 /*
1412 * Cleanup and tell the kernel that the rename succeeded.
1413 */
c0ade690 1414 hammer_done_cursor(&cursor);
6a37e7e4
MD
1415 if (error == 0)
1416 cache_rename(ap->a_fnch, ap->a_tnch);
b84de5af 1417
b3deaf57 1418failed:
b84de5af 1419 hammer_done_transaction(&trans);
8cd0a023 1420 return (error);
427e5fc6
MD
1421}
1422
66325755
MD
1423/*
1424 * hammer_vop_nrmdir { nch, dvp, cred }
1425 */
427e5fc6
MD
1426static
1427int
66325755 1428hammer_vop_nrmdir(struct vop_nrmdir_args *ap)
427e5fc6 1429{
b84de5af 1430 struct hammer_transaction trans;
e63644f0 1431 struct hammer_inode *dip;
b84de5af
MD
1432 int error;
1433
e63644f0
MD
1434 dip = VTOI(ap->a_dvp);
1435
1436 if (hammer_nohistory(dip) == 0 &&
1437 (error = hammer_checkspace(dip->hmp)) != 0) {
1438 return (error);
1439 }
1440
1441 hammer_start_transaction(&trans, dip->hmp);
b84de5af
MD
1442 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp, ap->a_cred, 0);
1443 hammer_done_transaction(&trans);
1444
1445 return (error);
427e5fc6
MD
1446}
1447
66325755
MD
1448/*
1449 * hammer_vop_setattr { vp, vap, cred }
1450 */
427e5fc6
MD
1451static
1452int
66325755 1453hammer_vop_setattr(struct vop_setattr_args *ap)
427e5fc6 1454{
8cd0a023
MD
1455 struct hammer_transaction trans;
1456 struct vattr *vap;
1457 struct hammer_inode *ip;
1458 int modflags;
1459 int error;
d5ef456e 1460 int truncating;
b84de5af 1461 off_t aligned_size;
8cd0a023 1462 u_int32_t flags;
8cd0a023
MD
1463
1464 vap = ap->a_vap;
1465 ip = ap->a_vp->v_data;
1466 modflags = 0;
1467
1468 if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)
1469 return(EROFS);
d113fda1
MD
1470 if (ip->flags & HAMMER_INODE_RO)
1471 return (EROFS);
e63644f0
MD
1472 if (hammer_nohistory(ip) == 0 &&
1473 (error = hammer_checkspace(ip->hmp)) != 0) {
1474 return (error);
1475 }
8cd0a023
MD
1476
1477 hammer_start_transaction(&trans, ip->hmp);
1478 error = 0;
1479
1480 if (vap->va_flags != VNOVAL) {
1481 flags = ip->ino_data.uflags;
1482 error = vop_helper_setattr_flags(&flags, vap->va_flags,
1483 hammer_to_unix_xid(&ip->ino_data.uid),
1484 ap->a_cred);
1485 if (error == 0) {
1486 if (ip->ino_data.uflags != flags) {
1487 ip->ino_data.uflags = flags;
1488 modflags |= HAMMER_INODE_DDIRTY;
1489 }
1490 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1491 error = 0;
1492 goto done;
1493 }
1494 }
1495 goto done;
1496 }
1497 if (ip->ino_data.uflags & (IMMUTABLE | APPEND)) {
1498 error = EPERM;
1499 goto done;
1500 }
7538695e
MD
1501 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
1502 mode_t cur_mode = ip->ino_data.mode;
1503 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid);
1504 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid);
1505 uuid_t uuid_uid;
1506 uuid_t uuid_gid;
1507
1508 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
1509 ap->a_cred,
1510 &cur_uid, &cur_gid, &cur_mode);
1511 if (error == 0) {
1512 hammer_guid_to_uuid(&uuid_uid, cur_uid);
1513 hammer_guid_to_uuid(&uuid_gid, cur_gid);
1514 if (bcmp(&uuid_uid, &ip->ino_data.uid,
1515 sizeof(uuid_uid)) ||
1516 bcmp(&uuid_gid, &ip->ino_data.gid,
1517 sizeof(uuid_gid)) ||
1518 ip->ino_data.mode != cur_mode
1519 ) {
1520 ip->ino_data.uid = uuid_uid;
1521 ip->ino_data.gid = uuid_gid;
1522 ip->ino_data.mode = cur_mode;
1523 }
8cd0a023
MD
1524 modflags |= HAMMER_INODE_DDIRTY;
1525 }
1526 }
11ad5ade 1527 while (vap->va_size != VNOVAL && ip->ino_data.size != vap->va_size) {
8cd0a023
MD
1528 switch(ap->a_vp->v_type) {
1529 case VREG:
11ad5ade 1530 if (vap->va_size == ip->ino_data.size)
d5ef456e 1531 break;
b84de5af
MD
1532 /*
1533 * XXX break atomicy, we can deadlock the backend
1534 * if we do not release the lock. Probably not a
1535 * big deal here.
1536 */
11ad5ade 1537 if (vap->va_size < ip->ino_data.size) {
c0ade690
MD
1538 vtruncbuf(ap->a_vp, vap->va_size,
1539 HAMMER_BUFSIZE);
d5ef456e
MD
1540 truncating = 1;
1541 } else {
c0ade690 1542 vnode_pager_setsize(ap->a_vp, vap->va_size);
d5ef456e 1543 truncating = 0;
c0ade690 1544 }
11ad5ade
MD
1545 ip->ino_data.size = vap->va_size;
1546 modflags |= HAMMER_INODE_DDIRTY;
76376933 1547 aligned_size = (vap->va_size + HAMMER_BUFMASK) &
b84de5af 1548 ~HAMMER_BUFMASK64;
d5ef456e 1549
b84de5af
MD
1550 /*
1551 * on-media truncation is cached in the inode until
1552 * the inode is synchronized.
1553 */
d5ef456e 1554 if (truncating) {
47637bff 1555 hammer_ip_frontend_trunc(ip, vap->va_size);
0832c9bb
MD
1556 hammer_update_rsv_databufs(ip);
1557#ifdef DEBUG_TRUNCATE
1558 if (HammerTruncIp == NULL)
1559 HammerTruncIp = ip;
1560#endif
b84de5af
MD
1561 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
1562 ip->flags |= HAMMER_INODE_TRUNCATED;
1563 ip->trunc_off = vap->va_size;
0832c9bb
MD
1564#ifdef DEBUG_TRUNCATE
1565 if (ip == HammerTruncIp)
1566 kprintf("truncate1 %016llx\n", ip->trunc_off);
1567#endif
b84de5af
MD
1568 } else if (ip->trunc_off > vap->va_size) {
1569 ip->trunc_off = vap->va_size;
0832c9bb
MD
1570#ifdef DEBUG_TRUNCATE
1571 if (ip == HammerTruncIp)
1572 kprintf("truncate2 %016llx\n", ip->trunc_off);
1573#endif
1574 } else {
1575#ifdef DEBUG_TRUNCATE
1576 if (ip == HammerTruncIp)
1577 kprintf("truncate3 %016llx (ignored)\n", vap->va_size);
1578#endif
b84de5af 1579 }
d5ef456e 1580 }
b84de5af 1581
d5ef456e
MD
1582 /*
1583 * If truncating we have to clean out a portion of
b84de5af
MD
1584 * the last block on-disk. We do this in the
1585 * front-end buffer cache.
d5ef456e 1586 */
b84de5af 1587 if (truncating && vap->va_size < aligned_size) {
d5ef456e
MD
1588 struct buf *bp;
1589 int offset;
1590
47637bff
MD
1591 aligned_size -= HAMMER_BUFSIZE;
1592
d5ef456e 1593 offset = vap->va_size & HAMMER_BUFMASK;
47637bff 1594 error = bread(ap->a_vp, aligned_size,
d5ef456e 1595 HAMMER_BUFSIZE, &bp);
47637bff 1596 hammer_ip_frontend_trunc(ip, aligned_size);
d5ef456e
MD
1597 if (error == 0) {
1598 bzero(bp->b_data + offset,
1599 HAMMER_BUFSIZE - offset);
1600 bdwrite(bp);
1601 } else {
47637bff 1602 kprintf("ERROR %d\n", error);
d5ef456e
MD
1603 brelse(bp);
1604 }
1605 }
76376933 1606 break;
8cd0a023 1607 case VDATABASE:
b84de5af
MD
1608 if ((ip->flags & HAMMER_INODE_TRUNCATED) == 0) {
1609 ip->flags |= HAMMER_INODE_TRUNCATED;
1610 ip->trunc_off = vap->va_size;
1611 } else if (ip->trunc_off > vap->va_size) {
1612 ip->trunc_off = vap->va_size;
1613 }
47637bff 1614 hammer_ip_frontend_trunc(ip, vap->va_size);
11ad5ade
MD
1615 ip->ino_data.size = vap->va_size;
1616 modflags |= HAMMER_INODE_DDIRTY;
8cd0a023
MD
1617 break;
1618 default:
1619 error = EINVAL;
1620 goto done;
1621 }
d26d0ae9 1622 break;
8cd0a023
MD
1623 }
1624 if (vap->va_atime.tv_sec != VNOVAL) {
11ad5ade 1625 ip->ino_leaf.atime =
8cd0a023
MD
1626 hammer_timespec_to_transid(&vap->va_atime);
1627 modflags |= HAMMER_INODE_ITIMES;
1628 }
1629 if (vap->va_mtime.tv_sec != VNOVAL) {
11ad5ade 1630 ip->ino_data.mtime =
8cd0a023
MD
1631 hammer_timespec_to_transid(&vap->va_mtime);
1632 modflags |= HAMMER_INODE_ITIMES;
98f7132d 1633 modflags |= HAMMER_INODE_DDIRTY; /* XXX mtime */
8cd0a023
MD
1634 }
1635 if (vap->va_mode != (mode_t)VNOVAL) {
7538695e
MD
1636 mode_t cur_mode = ip->ino_data.mode;
1637 uid_t cur_uid = hammer_to_unix_xid(&ip->ino_data.uid);
1638 gid_t cur_gid = hammer_to_unix_xid(&ip->ino_data.gid);
1639
1640 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
1641 cur_uid, cur_gid, &cur_mode);
1642 if (error == 0 && ip->ino_data.mode != cur_mode) {
1643 ip->ino_data.mode = cur_mode;
8cd0a023
MD
1644 modflags |= HAMMER_INODE_DDIRTY;
1645 }
1646 }
1647done:
b84de5af 1648 if (error == 0)
47637bff 1649 hammer_modify_inode(ip, modflags);
b84de5af 1650 hammer_done_transaction(&trans);
8cd0a023 1651 return (error);
427e5fc6
MD
1652}
1653
66325755
MD
1654/*
1655 * hammer_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1656 */
427e5fc6
MD
1657static
1658int
66325755 1659hammer_vop_nsymlink(struct vop_nsymlink_args *ap)
427e5fc6 1660{
7a04d74f
MD
1661 struct hammer_transaction trans;
1662 struct hammer_inode *dip;
1663 struct hammer_inode *nip;
1664 struct nchandle *nch;
1665 hammer_record_t record;
1666 int error;
1667 int bytes;
1668
1669 ap->a_vap->va_type = VLNK;
1670
1671 nch = ap->a_nch;
1672 dip = VTOI(ap->a_dvp);
1673
d113fda1
MD
1674 if (dip->flags & HAMMER_INODE_RO)
1675 return (EROFS);
e63644f0
MD
1676 if ((error = hammer_checkspace(dip->hmp)) != 0)
1677 return (error);
d113fda1 1678
7a04d74f
MD
1679 /*
1680 * Create a transaction to cover the operations we perform.
1681 */
1682 hammer_start_transaction(&trans, dip->hmp);
1683
1684 /*
1685 * Create a new filesystem object of the requested type. The
1686 * returned inode will be referenced but not locked.
1687 */
1688
1689 error = hammer_create_inode(&trans, ap->a_vap, ap->a_cred, dip, &nip);
1690 if (error) {
b84de5af 1691 hammer_done_transaction(&trans);
7a04d74f
MD
1692 *ap->a_vpp = NULL;
1693 return (error);
1694 }
1695
7a04d74f
MD
1696 /*
1697 * Add a record representing the symlink. symlink stores the link
1698 * as pure data, not a string, and is no \0 terminated.
1699 */
1700 if (error == 0) {
7a04d74f
MD
1701 bytes = strlen(ap->a_target);
1702
2f85fa4d
MD
1703 if (bytes <= HAMMER_INODE_BASESYMLEN) {
1704 bcopy(ap->a_target, nip->ino_data.ext.symlink, bytes);
1705 } else {
1706 record = hammer_alloc_mem_record(nip, bytes);
1707 record->type = HAMMER_MEM_RECORD_GENERAL;
1708
1709 record->leaf.base.localization = HAMMER_LOCALIZE_MISC;
1710 record->leaf.base.key = HAMMER_FIXKEY_SYMLINK;
1711 record->leaf.base.rec_type = HAMMER_RECTYPE_FIX;
1712 record->leaf.data_len = bytes;
1713 KKASSERT(HAMMER_SYMLINK_NAME_OFF == 0);
1714 bcopy(ap->a_target, record->data->symlink.name, bytes);
1715 error = hammer_ip_add_record(&trans, record);
1716 }
42c7d26b
MD
1717
1718 /*
1719 * Set the file size to the length of the link.
1720 */
1721 if (error == 0) {
11ad5ade 1722 nip->ino_data.size = bytes;
47637bff 1723 hammer_modify_inode(nip, HAMMER_INODE_DDIRTY);
42c7d26b 1724 }
7a04d74f 1725 }
1f07f686
MD
1726 if (error == 0)
1727 error = hammer_ip_add_directory(&trans, dip, nch->ncp, nip);
7a04d74f
MD
1728
1729 /*
1730 * Finish up.
1731 */
1732 if (error) {
1733 hammer_rel_inode(nip, 0);
7a04d74f
MD
1734 *ap->a_vpp = NULL;
1735 } else {
e8599db1 1736 error = hammer_get_vnode(nip, ap->a_vpp);
7a04d74f
MD
1737 hammer_rel_inode(nip, 0);
1738 if (error == 0) {
1739 cache_setunresolved(ap->a_nch);
1740 cache_setvp(ap->a_nch, *ap->a_vpp);
1741 }
1742 }
b84de5af 1743 hammer_done_transaction(&trans);
7a04d74f 1744 return (error);
427e5fc6
MD
1745}
1746
66325755
MD
1747/*
1748 * hammer_vop_nwhiteout { nch, dvp, cred, flags }
1749 */
427e5fc6
MD
1750static
1751int
66325755 1752hammer_vop_nwhiteout(struct vop_nwhiteout_args *ap)
427e5fc6 1753{
b84de5af 1754 struct hammer_transaction trans;
e63644f0 1755 struct hammer_inode *dip;
b84de5af
MD
1756 int error;
1757
e63644f0
MD
1758 dip = VTOI(ap->a_dvp);
1759
1760 if (hammer_nohistory(dip) == 0 &&
1761 (error = hammer_checkspace(dip->hmp)) != 0) {
1762 return (error);
1763 }
1764
1765 hammer_start_transaction(&trans, dip->hmp);
b84de5af
MD
1766 error = hammer_dounlink(&trans, ap->a_nch, ap->a_dvp,
1767 ap->a_cred, ap->a_flags);
1768 hammer_done_transaction(&trans);
1769
1770 return (error);
427e5fc6
MD
1771}
1772
7dc57964
MD
1773/*
1774 * hammer_vop_ioctl { vp, command, data, fflag, cred }
1775 */
1776static
1777int
1778hammer_vop_ioctl(struct vop_ioctl_args *ap)
1779{
1780 struct hammer_inode *ip = ap->a_vp->v_data;
1781
1782 return(hammer_ioctl(ip, ap->a_command, ap->a_data,
1783 ap->a_fflag, ap->a_cred));
1784}
1785
513ca7d7
MD
1786static
1787int
1788hammer_vop_mountctl(struct vop_mountctl_args *ap)
1789{
1790 struct mount *mp;
1791 int error;
1792
1793 mp = ap->a_head.a_ops->head.vv_mount;
1794
1795 switch(ap->a_op) {
1796 case MOUNTCTL_SET_EXPORT:
1797 if (ap->a_ctllen != sizeof(struct export_args))
1798 error = EINVAL;
1799 error = hammer_vfs_export(mp, ap->a_op,
1800 (const struct export_args *)ap->a_ctl);
1801 break;
1802 default:
1803 error = journal_mountctl(ap);
1804 break;
1805 }
1806 return(error);
1807}
1808
66325755
MD
1809/*
1810 * hammer_vop_strategy { vp, bio }
8cd0a023
MD
1811 *
1812 * Strategy call, used for regular file read & write only. Note that the
1813 * bp may represent a cluster.
1814 *
1815 * To simplify operation and allow better optimizations in the future,
1816 * this code does not make any assumptions with regards to buffer alignment
1817 * or size.
66325755 1818 */
427e5fc6
MD
1819static
1820int
66325755 1821hammer_vop_strategy(struct vop_strategy_args *ap)
427e5fc6 1822{
8cd0a023
MD
1823 struct buf *bp;
1824 int error;
1825
1826 bp = ap->a_bio->bio_buf;
1827
1828 switch(bp->b_cmd) {
1829 case BUF_CMD_READ:
1830 error = hammer_vop_strategy_read(ap);
1831 break;
1832 case BUF_CMD_WRITE:
1833 error = hammer_vop_strategy_write(ap);
1834 break;
1835 default:
059819e3
MD
1836 bp->b_error = error = EINVAL;
1837 bp->b_flags |= B_ERROR;
1838 biodone(ap->a_bio);
8cd0a023
MD
1839 break;
1840 }
8cd0a023 1841 return (error);
427e5fc6
MD
1842}
1843
8cd0a023
MD
1844/*
1845 * Read from a regular file. Iterate the related records and fill in the
1846 * BIO/BUF. Gaps are zero-filled.
1847 *
1848 * The support code in hammer_object.c should be used to deal with mixed
1849 * in-memory and on-disk records.
1850 *
1851 * XXX atime update
1852 */
1853static
1854int
1855hammer_vop_strategy_read(struct vop_strategy_args *ap)
1856{
36f82b23
MD
1857 struct hammer_transaction trans;
1858 struct hammer_inode *ip;
8cd0a023 1859 struct hammer_cursor cursor;
8cd0a023
MD
1860 hammer_base_elm_t base;
1861 struct bio *bio;
a99b9ea2 1862 struct bio *nbio;
8cd0a023
MD
1863 struct buf *bp;
1864 int64_t rec_offset;
a89aec1b 1865 int64_t ran_end;
195c19a1 1866 int64_t tmp64;
8cd0a023
MD
1867 int error;
1868 int boff;
1869 int roff;
1870 int n;
1871
1872 bio = ap->a_bio;
1873 bp = bio->bio_buf;
36f82b23 1874 ip = ap->a_vp->v_data;
8cd0a023 1875
a99b9ea2
MD
1876 /*
1877 * The zone-2 disk offset may have been set by the cluster code via
1878 * a BMAP operation. Take care not to confuse it with the bio_offset
1879 * set by hammer_io_direct_write(), which is a device-relative offset.
1880 *
1881 * Checking the high bits should suffice.
1882 */
1883 nbio = push_bio(bio);
6aeaa7bd
MD
1884 if ((nbio->bio_offset & HAMMER_OFF_ZONE_MASK) ==
1885 HAMMER_ZONE_RAW_BUFFER) {
a99b9ea2
MD
1886 error = hammer_io_direct_read(ip->hmp, nbio->bio_offset, bio);
1887 return (error);
1888 }
1889
1890 /*
1891 * Hard way
1892 */
36f82b23 1893 hammer_simple_transaction(&trans, ip->hmp);
47637bff 1894 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
8cd0a023
MD
1895
1896 /*
1897 * Key range (begin and end inclusive) to scan. Note that the key's
c0ade690
MD
1898 * stored in the actual records represent BASE+LEN, not BASE. The
1899 * first record containing bio_offset will have a key > bio_offset.
8cd0a023 1900 */
2f85fa4d 1901 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023 1902 cursor.key_beg.obj_id = ip->obj_id;
d5530d22 1903 cursor.key_beg.create_tid = 0;
8cd0a023 1904 cursor.key_beg.delete_tid = 0;
8cd0a023 1905 cursor.key_beg.obj_type = 0;
c0ade690 1906 cursor.key_beg.key = bio->bio_offset + 1;
d5530d22 1907 cursor.asof = ip->obj_asof;
bf3b416b 1908 cursor.flags |= HAMMER_CURSOR_ASOF;
8cd0a023
MD
1909
1910 cursor.key_end = cursor.key_beg;
11ad5ade 1911 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE);
b84de5af 1912#if 0
11ad5ade 1913 if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DBFILE) {
a89aec1b
MD
1914 cursor.key_beg.rec_type = HAMMER_RECTYPE_DB;
1915 cursor.key_end.rec_type = HAMMER_RECTYPE_DB;
1916 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
b84de5af
MD
1917 } else
1918#endif
1919 {
c0ade690 1920 ran_end = bio->bio_offset + bp->b_bufsize;
a89aec1b
MD
1921 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
1922 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
195c19a1
MD
1923 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */
1924 if (tmp64 < ran_end)
a89aec1b
MD
1925 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
1926 else
7f7c1f84 1927 cursor.key_end.key = ran_end + MAXPHYS + 1;
a89aec1b 1928 }
d26d0ae9 1929 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
8cd0a023 1930
4e17f465 1931 error = hammer_ip_first(&cursor);
8cd0a023
MD
1932 boff = 0;
1933
a89aec1b 1934 while (error == 0) {
47637bff
MD
1935 /*
1936 * Get the base file offset of the record. The key for
1937 * data records is (base + bytes) rather then (base).
1938 */
11ad5ade 1939 base = &cursor.leaf->base;
11ad5ade 1940 rec_offset = base->key - cursor.leaf->data_len;
8cd0a023 1941
66325755 1942 /*
a89aec1b 1943 * Calculate the gap, if any, and zero-fill it.
1fef775e
MD
1944 *
1945 * n is the offset of the start of the record verses our
1946 * current seek offset in the bio.
66325755 1947 */
8cd0a023
MD
1948 n = (int)(rec_offset - (bio->bio_offset + boff));
1949 if (n > 0) {
a89aec1b
MD
1950 if (n > bp->b_bufsize - boff)
1951 n = bp->b_bufsize - boff;
8cd0a023
MD
1952 bzero((char *)bp->b_data + boff, n);
1953 boff += n;
1954 n = 0;
66325755 1955 }
8cd0a023
MD
1956
1957 /*
1958 * Calculate the data offset in the record and the number
1959 * of bytes we can copy.
a89aec1b 1960 *
1fef775e
MD
1961 * There are two degenerate cases. First, boff may already
1962 * be at bp->b_bufsize. Secondly, the data offset within
1963 * the record may exceed the record's size.
8cd0a023
MD
1964 */
1965 roff = -n;
b84de5af 1966 rec_offset += roff;
11ad5ade 1967 n = cursor.leaf->data_len - roff;
1fef775e
MD
1968 if (n <= 0) {
1969 kprintf("strategy_read: bad n=%d roff=%d\n", n, roff);
1970 n = 0;
1971 } else if (n > bp->b_bufsize - boff) {
8cd0a023 1972 n = bp->b_bufsize - boff;
1fef775e 1973 }
059819e3 1974
b84de5af 1975 /*
47637bff
MD
1976 * Deal with cached truncations. This cool bit of code
1977 * allows truncate()/ftruncate() to avoid having to sync
1978 * the file.
1979 *
1980 * If the frontend is truncated then all backend records are
1981 * subject to the frontend's truncation.
1982 *
1983 * If the backend is truncated then backend records on-disk
1984 * (but not in-memory) are subject to the backend's
1985 * truncation. In-memory records owned by the backend
1986 * represent data written after the truncation point on the
1987 * backend and must not be truncated.
1988 *
1989 * Truncate operations deal with frontend buffer cache
1990 * buffers and frontend-owned in-memory records synchronously.
b84de5af 1991 */
47637bff
MD
1992 if (ip->flags & HAMMER_INODE_TRUNCATED) {
1993 if (hammer_cursor_ondisk(&cursor) ||
1994 cursor.iprec->flush_state == HAMMER_FST_FLUSH) {
1995 if (ip->trunc_off <= rec_offset)
1996 n = 0;
1997 else if (ip->trunc_off < rec_offset + n)
1998 n = (int)(ip->trunc_off - rec_offset);
1999 }
2000 }
2001 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
2002 if (hammer_cursor_ondisk(&cursor)) {
2003 if (ip->sync_trunc_off <= rec_offset)
2004 n = 0;
2005 else if (ip->sync_trunc_off < rec_offset + n)
2006 n = (int)(ip->sync_trunc_off - rec_offset);
2007 }
2008 }
b84de5af
MD
2009
2010 /*
47637bff
MD
2011 * Try to issue a direct read into our bio if possible,
2012 * otherwise resolve the element data into a hammer_buffer
2013 * and copy.
b84de5af 2014 */
6aeaa7bd 2015 if (n && boff == 0 &&
a99b9ea2
MD
2016 ((cursor.leaf->data_offset + roff) & HAMMER_BUFMASK) == 0) {
2017 error = hammer_io_direct_read(
2018 trans.hmp,
2019 cursor.leaf->data_offset + roff,
2020 bio);
47637bff
MD
2021 goto done;
2022 } else if (n) {
2023 error = hammer_ip_resolve_data(&cursor);
2024 if (error == 0) {
2025 bcopy((char *)cursor.data + roff,
2026 (char *)bp->b_data + boff, n);
2027 }
b84de5af 2028 }
47637bff
MD
2029 if (error)
2030 break;
2031
2032 /*
2033 * Iterate until we have filled the request.
2034 */
2035 boff += n;
8cd0a023 2036 if (boff == bp->b_bufsize)
66325755 2037 break;
a89aec1b 2038 error = hammer_ip_next(&cursor);
66325755
MD
2039 }
2040
2041 /*
8cd0a023 2042 * There may have been a gap after the last record
66325755 2043 */
8cd0a023
MD
2044 if (error == ENOENT)
2045 error = 0;
2046 if (error == 0 && boff != bp->b_bufsize) {
7f7c1f84 2047 KKASSERT(boff < bp->b_bufsize);
8cd0a023
MD
2048 bzero((char *)bp->b_data + boff, bp->b_bufsize - boff);
2049 /* boff = bp->b_bufsize; */
2050 }
2051 bp->b_resid = 0;
059819e3
MD
2052 bp->b_error = error;
2053 if (error)
2054 bp->b_flags |= B_ERROR;
2055 biodone(ap->a_bio);
47637bff
MD
2056
2057done:
2058 if (cursor.node)
2059 hammer_cache_node(cursor.node, &ip->cache[1]);
2060 hammer_done_cursor(&cursor);
2061 hammer_done_transaction(&trans);
8cd0a023
MD
2062 return(error);
2063}
2064
a99b9ea2
MD
2065/*
2066 * BMAP operation - used to support cluster_read() only.
2067 *
2068 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
2069 *
2070 * This routine may return EOPNOTSUPP if the opration is not supported for
2071 * the specified offset. The contents of the pointer arguments do not
2072 * need to be initialized in that case.
2073 *
2074 * If a disk address is available and properly aligned return 0 with
2075 * *doffsetp set to the zone-2 address, and *runp / *runb set appropriately
2076 * to the run-length relative to that offset. Callers may assume that
2077 * *doffsetp is valid if 0 is returned, even if *runp is not sufficiently
2078 * large, so return EOPNOTSUPP if it is not sufficiently large.
2079 */
2080static
2081int
2082hammer_vop_bmap(struct vop_bmap_args *ap)
2083{
2084 struct hammer_transaction trans;
2085 struct hammer_inode *ip;
2086 struct hammer_cursor cursor;
2087 hammer_base_elm_t base;
2088 int64_t rec_offset;
2089 int64_t ran_end;
2090 int64_t tmp64;
2091 int64_t base_offset;
2092 int64_t base_disk_offset;
2093 int64_t last_offset;
2094 hammer_off_t last_disk_offset;
2095 hammer_off_t disk_offset;
2096 int rec_len;
2097 int error;
2098
2099 ip = ap->a_vp->v_data;
2100
2101 /*
2102 * We can only BMAP regular files. We can't BMAP database files,
2103 * directories, etc.
2104 */
2105 if (ip->ino_data.obj_type != HAMMER_OBJTYPE_REGFILE)
2106 return(EOPNOTSUPP);
2107
2108 /*
2109 * bmap is typically called with runp/runb both NULL when used
2110 * for writing. We do not support BMAP for writing atm.
2111 */
2112 if (ap->a_runp == NULL && ap->a_runb == NULL)
2113 return(EOPNOTSUPP);
2114
2115 /*
2116 * Scan the B-Tree to acquire blockmap addresses, then translate
2117 * to raw addresses.
2118 */
2119 hammer_simple_transaction(&trans, ip->hmp);
cb51be26
MD
2120#if 0
2121 kprintf("bmap_beg %016llx ip->cache %p\n", ap->a_loffset, ip->cache[1]);
2122#endif
a99b9ea2
MD
2123 hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
2124
2125 /*
2126 * Key range (begin and end inclusive) to scan. Note that the key's
2127 * stored in the actual records represent BASE+LEN, not BASE. The
2128 * first record containing bio_offset will have a key > bio_offset.
2129 */
2130 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
2131 cursor.key_beg.obj_id = ip->obj_id;
2132 cursor.key_beg.create_tid = 0;
2133 cursor.key_beg.delete_tid = 0;
2134 cursor.key_beg.obj_type = 0;
2135 if (ap->a_runb)
2136 cursor.key_beg.key = ap->a_loffset - MAXPHYS + 1;
2137 else
2138 cursor.key_beg.key = ap->a_loffset + 1;
2139 if (cursor.key_beg.key < 0)
2140 cursor.key_beg.key = 0;
2141 cursor.asof = ip->obj_asof;
bf3b416b 2142 cursor.flags |= HAMMER_CURSOR_ASOF;
a99b9ea2
MD
2143
2144 cursor.key_end = cursor.key_beg;
2145 KKASSERT(ip->ino_data.obj_type == HAMMER_OBJTYPE_REGFILE);
2146
2147 ran_end = ap->a_loffset + MAXPHYS;
2148 cursor.key_beg.rec_type = HAMMER_RECTYPE_DATA;
2149 cursor.key_end.rec_type = HAMMER_RECTYPE_DATA;
2150 tmp64 = ran_end + MAXPHYS + 1; /* work-around GCC-4 bug */
2151 if (tmp64 < ran_end)
2152 cursor.key_end.key = 0x7FFFFFFFFFFFFFFFLL;
2153 else
2154 cursor.key_end.key = ran_end + MAXPHYS + 1;
2155
2156 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
2157
2158 error = hammer_ip_first(&cursor);
2159 base_offset = last_offset = 0;
2160 base_disk_offset = last_disk_offset = 0;
2161
2162 while (error == 0) {
2163 /*
2164 * Get the base file offset of the record. The key for
2165 * data records is (base + bytes) rather then (base).
2166 */
2167 base = &cursor.leaf->base;
2168 rec_offset = base->key - cursor.leaf->data_len;
2169 rec_len = cursor.leaf->data_len;
2170
2171 /*
2172 * Incorporate any cached truncation
2173 */
2174 if (ip->flags & HAMMER_INODE_TRUNCATED) {
2175 if (hammer_cursor_ondisk(&cursor) ||
2176 cursor.iprec->flush_state == HAMMER_FST_FLUSH) {
2177 if (ip->trunc_off <= rec_offset)
2178 rec_len = 0;
2179 else if (ip->trunc_off < rec_offset + rec_len)
2180 rec_len = (int)(ip->trunc_off - rec_offset);
2181 }
2182 }
2183 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
2184 if (hammer_cursor_ondisk(&cursor)) {
2185 if (ip->sync_trunc_off <= rec_offset)
2186 rec_len = 0;
2187 else if (ip->sync_trunc_off < rec_offset + rec_len)
2188 rec_len = (int)(ip->sync_trunc_off - rec_offset);
2189 }
2190 }
2191
2192 /*
2193 * Accumulate information. If we have hit a discontiguous
2194 * block reset base_offset unless we are already beyond the
2195 * requested offset. If we are, that's it, we stop.
2196 */
2197 disk_offset = hammer_blockmap_lookup(trans.hmp,
2198 cursor.leaf->data_offset,
2199 &error);
2200 if (error)
2201 break;
2202 if (rec_offset != last_offset ||
2203 disk_offset != last_disk_offset) {
2204 if (rec_offset > ap->a_loffset)
2205 break;
2206 base_offset = rec_offset;
2207 base_disk_offset = disk_offset;
2208 }
2209 last_offset = rec_offset + rec_len;
2210 last_disk_offset = disk_offset + rec_len;
2211
2212 error = hammer_ip_next(&cursor);
2213 }
2214
2215#if 0
2216 kprintf("BMAP %016llx: %016llx - %016llx\n",
2217 ap->a_loffset, base_offset, last_offset);
2218 kprintf("BMAP %16s: %016llx - %016llx\n",
2219 "", base_disk_offset, last_disk_offset);
2220#endif
2221
cb51be26 2222 if (cursor.node) {
a99b9ea2 2223 hammer_cache_node(cursor.node, &ip->cache[1]);
cb51be26
MD
2224#if 0
2225 kprintf("bmap_end2 %016llx ip->cache %p\n", ap->a_loffset, ip->cache[1]);
2226#endif
2227 }
a99b9ea2
MD
2228 hammer_done_cursor(&cursor);
2229 hammer_done_transaction(&trans);
2230
2231 if (base_offset == 0 || base_offset > ap->a_loffset ||
2232 last_offset < ap->a_loffset) {
2233 error = EOPNOTSUPP;
2234 } else {
2235 disk_offset = base_disk_offset + (ap->a_loffset - base_offset);
2236
2237 /*
2238 * If doffsetp is not aligned or the forward run size does
2239 * not cover a whole buffer, disallow the direct I/O.
2240 */
2241 if ((disk_offset & HAMMER_BUFMASK) ||
2242 (last_offset - ap->a_loffset) < HAMMER_BUFSIZE) {
2243 error = EOPNOTSUPP;
2244 } else {
2245 *ap->a_doffsetp = disk_offset;
2246 if (ap->a_runb)
2247 *ap->a_runb = ap->a_loffset - base_offset;
2248 if (ap->a_runp)
2249 *ap->a_runp = last_offset - ap->a_loffset;
2250 error = 0;
2251 }
2252 }
2253 return(error);
2254}
2255
8cd0a023 2256/*
059819e3
MD
2257 * Write to a regular file. Because this is a strategy call the OS is
2258 * trying to actually sync data to the media. HAMMER can only flush
2259 * the entire inode (so the TID remains properly synchronized).
8cd0a023 2260 *
059819e3
MD
2261 * Basically all we do here is place the bio on the inode's flush queue
2262 * and activate the flusher.
8cd0a023
MD
2263 */
2264static
2265int
2266hammer_vop_strategy_write(struct vop_strategy_args *ap)
2267{
47637bff 2268 hammer_record_t record;
af209b0f 2269 hammer_mount_t hmp;
8cd0a023
MD
2270 hammer_inode_t ip;
2271 struct bio *bio;
2272 struct buf *bp;
0832c9bb
MD
2273 int bytes;
2274 int error;
8cd0a023
MD
2275
2276 bio = ap->a_bio;
2277 bp = bio->bio_buf;
2278 ip = ap->a_vp->v_data;
af209b0f 2279 hmp = ip->hmp;
d113fda1 2280
059819e3
MD
2281 if (ip->flags & HAMMER_INODE_RO) {
2282 bp->b_error = EROFS;
2283 bp->b_flags |= B_ERROR;
2284 biodone(ap->a_bio);
e63644f0 2285 hammer_cleanup_write_io(ip);
059819e3
MD
2286 return(EROFS);
2287 }
b84de5af 2288
29ce0677
MD
2289 /*
2290 * Interlock with inode destruction (no in-kernel or directory
2291 * topology visibility). If we queue new IO while trying to
2292 * destroy the inode we can deadlock the vtrunc call in
2293 * hammer_inode_unloadable_check().
2294 */
2295 if (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) {
2296 bp->b_resid = 0;
2297 biodone(ap->a_bio);
e63644f0 2298 hammer_cleanup_write_io(ip);
29ce0677
MD
2299 return(0);
2300 }
2301
b84de5af 2302 /*
a99b9ea2
MD
2303 * Reserve space and issue a direct-write from the front-end.
2304 * NOTE: The direct_io code will hammer_bread/bcopy smaller
2305 * allocations.
47637bff 2306 *
a99b9ea2
MD
2307 * An in-memory record will be installed to reference the storage
2308 * until the flusher can get to it.
47637bff
MD
2309 *
2310 * Since we own the high level bio the front-end will not try to
0832c9bb 2311 * do a direct-read until the write completes.
a99b9ea2
MD
2312 *
2313 * NOTE: The only time we do not reserve a full-sized buffers
2314 * worth of data is if the file is small. We do not try to
2315 * allocate a fragment (from the small-data zone) at the end of
2316 * an otherwise large file as this can lead to wildly separated
2317 * data.
47637bff 2318 */
0832c9bb
MD
2319 KKASSERT((bio->bio_offset & HAMMER_BUFMASK) == 0);
2320 KKASSERT(bio->bio_offset < ip->ino_data.size);
a99b9ea2
MD
2321 if (bio->bio_offset || ip->ino_data.size > HAMMER_BUFSIZE / 2)
2322 bytes = (bp->b_bufsize + HAMMER_BUFMASK) & ~HAMMER_BUFMASK;
b84de5af 2323 else
a99b9ea2 2324 bytes = ((int)ip->ino_data.size + 15) & ~15;
0832c9bb
MD
2325
2326 record = hammer_ip_add_bulk(ip, bio->bio_offset, bp->b_data,
2327 bytes, &error);
2328 if (record) {
af209b0f 2329 hammer_io_direct_write(hmp, &record->leaf, bio);
0832c9bb 2330 hammer_rel_mem_record(record);
af209b0f
MD
2331 if (hmp->rsv_recs > hammer_limit_recs &&
2332 ip->rsv_recs > hammer_limit_irecs / 10) {
0832c9bb 2333 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
a99b9ea2 2334 } else if (ip->rsv_recs > hammer_limit_irecs / 2) {
af209b0f
MD
2335 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
2336 }
0832c9bb 2337 } else {
a99b9ea2 2338 bp->b_bio2.bio_offset = NOOFFSET;
0832c9bb
MD
2339 bp->b_error = error;
2340 bp->b_flags |= B_ERROR;
2341 biodone(ap->a_bio);
2342 }
2343 hammer_cleanup_write_io(ip);
2344 return(error);
059819e3
MD
2345}
2346
2347/*
47637bff
MD
2348 * Clean-up after disposing of a dirty frontend buffer's data.
2349 * This is somewhat heuristical so try to be robust.
059819e3 2350 */
0832c9bb 2351static void
e63644f0
MD
2352hammer_cleanup_write_io(hammer_inode_t ip)
2353{
2354 if (ip->rsv_databufs) {
2355 --ip->rsv_databufs;
2356 --ip->hmp->rsv_databufs;
2357 }
2358}
2359
0832c9bb
MD
2360/*
2361 * We can lose track of dirty buffer cache buffers if we truncate, this
2362 * routine will resynchronize the count.
2363 */
2364static
2365void
2366hammer_update_rsv_databufs(hammer_inode_t ip)
2367{
2368 struct buf *bp;
2369 int delta;
2370 int n;
2371
2372 if (ip->vp) {
2373 n = 0;
2374 RB_FOREACH(bp, buf_rb_tree, &ip->vp->v_rbdirty_tree) {
2375 ++n;
2376 }
2377 } else {
2378 n = 0;
2379 }
2380 delta = n - ip->rsv_databufs;
2381 ip->rsv_databufs += delta;
2382 ip->hmp->rsv_databufs += delta;
2383}
2384
8cd0a023
MD
2385/*
2386 * dounlink - disconnect a directory entry
2387 *
2388 * XXX whiteout support not really in yet
2389 */
2390static int
b84de5af
MD
2391hammer_dounlink(hammer_transaction_t trans, struct nchandle *nch,
2392 struct vnode *dvp, struct ucred *cred, int flags)
8cd0a023 2393{
8cd0a023
MD
2394 struct namecache *ncp;
2395 hammer_inode_t dip;
2396 hammer_inode_t ip;
8cd0a023 2397 struct hammer_cursor cursor;
8cd0a023 2398 int64_t namekey;
11ad5ade 2399 int nlen, error;
8cd0a023
MD
2400
2401 /*
2402 * Calculate the namekey and setup the key range for the scan. This
2403 * works kinda like a chained hash table where the lower 32 bits
2404 * of the namekey synthesize the chain.
2405 *
2406 * The key range is inclusive of both key_beg and key_end.
2407 */
2408 dip = VTOI(dvp);
2409 ncp = nch->ncp;
d113fda1
MD
2410
2411 if (dip->flags & HAMMER_INODE_RO)
2412 return (EROFS);
2413
6a37e7e4
MD
2414 namekey = hammer_directory_namekey(ncp->nc_name, ncp->nc_nlen);
2415retry:
4e17f465 2416 hammer_init_cursor(trans, &cursor, &dip->cache[0], dip);
2f85fa4d 2417 cursor.key_beg.localization = HAMMER_LOCALIZE_MISC;
8cd0a023
MD
2418 cursor.key_beg.obj_id = dip->obj_id;
2419 cursor.key_beg.key = namekey;
d5530d22 2420 cursor.key_beg.create_tid = 0;
8cd0a023
MD
2421 cursor.key_beg.delete_tid = 0;
2422 cursor.key_beg.rec_type = HAMMER_RECTYPE_DIRENTRY;
2423 cursor.key_beg.obj_type = 0;
2424
2425 cursor.key_end = cursor.key_beg;
2426 cursor.key_end.key |= 0xFFFFFFFFULL;
d5530d22
MD
2427 cursor.asof = dip->obj_asof;
2428 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE | HAMMER_CURSOR_ASOF;
8cd0a023 2429
8cd0a023
MD
2430 /*
2431 * Scan all matching records (the chain), locate the one matching
2432 * the requested path component. info->last_error contains the
2433 * error code on search termination and could be 0, ENOENT, or
2434 * something else.
2435 *
2436 * The hammer_ip_*() functions merge in-memory records with on-disk
2437 * records for the purposes of the search.
2438 */
4e17f465
MD
2439 error = hammer_ip_first(&cursor);
2440
a89aec1b
MD
2441 while (error == 0) {
2442 error = hammer_ip_resolve_data(&cursor);
2443 if (error)
66325755 2444 break;
11ad5ade
MD
2445 nlen = cursor.leaf->data_len - HAMMER_ENTRY_NAME_OFF;
2446 KKASSERT(nlen > 0);
2447 if (ncp->nc_nlen == nlen &&
2448 bcmp(ncp->nc_name, cursor.data->entry.name, nlen) == 0) {
66325755
MD
2449 break;
2450 }
a89aec1b 2451 error = hammer_ip_next(&cursor);
66325755 2452 }
8cd0a023
MD
2453
2454 /*
2455 * If all is ok we have to get the inode so we can adjust nlinks.
269c5eab
MD
2456 * To avoid a deadlock with the flusher we must release the inode
2457 * lock on the directory when acquiring the inode for the entry.
b3deaf57
MD
2458 *
2459 * If the target is a directory, it must be empty.
8cd0a023 2460 */
66325755 2461 if (error == 0) {
269c5eab 2462 hammer_unlock(&cursor.ip->lock);
b84de5af 2463 ip = hammer_get_inode(trans, &dip->cache[1],
11ad5ade 2464 cursor.data->entry.obj_id,
d113fda1 2465 dip->hmp->asof, 0, &error);
269c5eab 2466 hammer_lock_sh(&cursor.ip->lock);
46fe7ae1 2467 if (error == ENOENT) {
11ad5ade 2468 kprintf("obj_id %016llx\n", cursor.data->entry.obj_id);
10a5d1ba 2469 Debugger("ENOENT unlinking object that should exist");
46fe7ae1 2470 }
1f07f686
MD
2471
2472 /*
2473 * If we are trying to remove a directory the directory must
2474 * be empty.
2475 *
2476 * WARNING: hammer_ip_check_directory_empty() may have to
2477 * terminate the cursor to avoid a deadlock. It is ok to
2478 * call hammer_done_cursor() twice.
2479 */
11ad5ade 2480 if (error == 0 && ip->ino_data.obj_type ==
b3deaf57 2481 HAMMER_OBJTYPE_DIRECTORY) {
98f7132d 2482 error = hammer_ip_check_directory_empty(trans, ip);
b3deaf57 2483 }
1f07f686 2484
6a37e7e4 2485 /*
1f07f686
MD
2486 * Delete the directory entry.
2487 *
6a37e7e4 2488 * WARNING: hammer_ip_del_directory() may have to terminate
1f07f686 2489 * the cursor to avoid a deadlock. It is ok to call
6a37e7e4
MD
2490 * hammer_done_cursor() twice.
2491 */
b84de5af 2492 if (error == 0) {
b84de5af
MD
2493 error = hammer_ip_del_directory(trans, &cursor,
2494 dip, ip);
b84de5af 2495 }
269c5eab 2496 hammer_done_cursor(&cursor);
8cd0a023
MD
2497 if (error == 0) {
2498 cache_setunresolved(nch);
2499 cache_setvp(nch, NULL);
2500 /* XXX locking */
2501 if (ip->vp)
2502 cache_inval_vp(ip->vp, CINV_DESTROY);
2503 }
af209b0f
MD
2504 if (ip)
2505 hammer_rel_inode(ip, 0);
269c5eab
MD
2506 } else {
2507 hammer_done_cursor(&cursor);
66325755 2508 }
6a37e7e4
MD
2509 if (error == EDEADLK)
2510 goto retry;
9c448776 2511
66325755 2512 return (error);
66325755
MD
2513}
2514
7a04d74f
MD
2515/************************************************************************
2516 * FIFO AND SPECFS OPS *
2517 ************************************************************************
2518 *
2519 */
2520
2521static int
2522hammer_vop_fifoclose (struct vop_close_args *ap)
2523{
2524 /* XXX update itimes */
2525 return (VOCALL(&fifo_vnode_vops, &ap->a_head));
2526}
2527
2528static int
2529hammer_vop_fiforead (struct vop_read_args *ap)
2530{
2531 int error;
2532
2533 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2534 /* XXX update access time */
2535 return (error);
2536}
2537
2538static int
2539hammer_vop_fifowrite (struct vop_write_args *ap)
2540{
2541 int error;
2542
2543 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2544 /* XXX update access time */
2545 return (error);
2546}
2547
2548static int
2549hammer_vop_specclose (struct vop_close_args *ap)
2550{
2551 /* XXX update itimes */
2552 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2553}
2554
2555static int
2556hammer_vop_specread (struct vop_read_args *ap)
2557{
2558 /* XXX update access time */
2559 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2560}
2561
2562static int
2563hammer_vop_specwrite (struct vop_write_args *ap)
2564{
2565 /* XXX update last change time */
2566 return (VOCALL(&spec_vnode_vops, &ap->a_head));
2567}
2568