hammer2 - Refactor frontend part 14/many
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
CommitLineData
e118c14f 1/*
b2900845 2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved.
e118c14f
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
355d67fc 7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
e118c14f
MD
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 * 3. Neither the name of The DragonFly Project nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific, prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
ea155208
MD
36/*
37 * Kernel Filesystem interface
38 *
39 * NOTE! local ipdata pointers must be reloaded on any modifying operation
40 * to the inode as its underlying chain may have changed.
41 */
42
703720e4
MD
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/fcntl.h>
47#include <sys/buf.h>
48#include <sys/proc.h>
49#include <sys/namei.h>
50#include <sys/mount.h>
51#include <sys/vnode.h>
f0206a67 52#include <sys/mountctl.h>
e028fa74 53#include <sys/dirent.h>
4e2004ea 54#include <sys/uio.h>
355d67fc 55#include <sys/objcache.h>
41c34a6d
MD
56#include <sys/event.h>
57#include <sys/file.h>
58#include <vfs/fifofs/fifo.h>
703720e4
MD
59
60#include "hammer2.h"
db71f61f 61
4e2004ea
MD
62static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
63 int seqcount);
c603b86b
MD
64static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
65 int ioflag, int seqcount);
355d67fc
MD
66static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
67static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
355d67fc 68
2ed4fece 69struct objcache *cache_xops;
1f6671f8 70
b2b78aaa
MD
71static __inline
72void
73hammer2_knote(struct vnode *vp, int flags)
74{
75 if (flags)
76 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
77}
78
703720e4
MD
79/*
80 * Last reference to a vnode is going away but it is still cached.
81 */
e118c14f 82static
703720e4 83int
e118c14f 84hammer2_vop_inactive(struct vop_inactive_args *ap)
703720e4 85{
e2e9e2db 86 hammer2_inode_t *ip;
703720e4 87 struct vnode *vp;
703720e4 88
05dd26e4 89 LOCKSTART;
703720e4
MD
90 vp = ap->a_vp;
91 ip = VTOI(vp);
703720e4 92
df9ea374
MD
93 /*
94 * Degenerate case
95 */
96 if (ip == NULL) {
97 vrecycle(vp);
05dd26e4 98 LOCKSTOP;
df9ea374
MD
99 return (0);
100 }
101
214f4a77 102 /*
1f6671f8
MD
103 * Check for deleted inodes and recycle immediately on the last
104 * release. Be sure to destroy any left-over buffer cache buffers
105 * so we do not waste time trying to flush them.
850687d2
MD
106 *
107 * WARNING: nvtruncbuf() can only be safely called without the inode
108 * lock held due to the way our write thread works.
214f4a77 109 */
1f6671f8 110 if (ip->flags & HAMMER2_INODE_ISUNLINKED) {
da6f36f4
MD
111 hammer2_key_t lbase;
112 int nblksize;
113
1f6671f8
MD
114 /*
115 * Detect updates to the embedded data which may be
116 * synchronized by the strategy code. Simply mark the
117 * inode modified so it gets picked up by our normal flush.
118 */
da6f36f4 119 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL);
850687d2 120 nvtruncbuf(vp, 0, nblksize, 0, 0);
214f4a77
MD
121 vrecycle(vp);
122 }
05dd26e4 123 LOCKSTOP;
703720e4
MD
124 return (0);
125}
126
127/*
128 * Reclaim a vnode so that it can be reused; after the inode is
129 * disassociated, the filesystem must manage it alone.
130 */
e118c14f 131static
703720e4 132int
e118c14f 133hammer2_vop_reclaim(struct vop_reclaim_args *ap)
703720e4 134{
e2e9e2db 135 hammer2_inode_t *ip;
506bd6d1 136 hammer2_pfs_t *pmp;
b7926f31 137 struct vnode *vp;
703720e4 138
05dd26e4 139 LOCKSTART;
703720e4
MD
140 vp = ap->a_vp;
141 ip = VTOI(vp);
05dd26e4
MD
142 if (ip == NULL) {
143 LOCKSTOP;
9c2e0de0 144 return(0);
05dd26e4 145 }
eae2ed61 146 pmp = ip->pmp;
ea155208
MD
147
148 /*
149 * The final close of a deleted file or directory marks it for
044541cd 150 * destruction. The DELETED flag allows the flusher to shortcut
ea155208
MD
151 * any modified blocks still unflushed (that is, just ignore them).
152 *
153 * HAMMER2 usually does not try to optimize the freemap by returning
154 * deleted blocks to it as it does not usually know how many snapshots
8138a154 155 * might be referencing portions of the file/dir.
ea155208 156 */
703720e4 157 vp->v_data = NULL;
0e92b724 158 ip->vp = NULL;
a7720be7 159
a02dfba1 160 /*
a7720be7
MD
161 * NOTE! We do not attempt to flush chains here, flushing is
162 * really fragile and could also deadlock.
a02dfba1 163 */
eddc656a 164 vclrisdirty(vp);
eae2ed61
MD
165
166 /*
1f6671f8
MD
167 * Once reclaimed the inode is disconnected from the normal flush
168 * mechanism and must be tracked
169 *
eae2ed61
MD
170 * A reclaim can occur at any time so we cannot safely start a
171 * transaction to handle reclamation of unlinked files. Instead,
172 * the ip is left with a reference and placed on a linked list and
173 * handled later on.
174 */
1f6671f8 175 if (ip->flags & HAMMER2_INODE_ISUNLINKED) {
eae2ed61
MD
176 hammer2_inode_unlink_t *ipul;
177
178 ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO);
179 ipul->ip = ip;
180
94491fa0 181 hammer2_spin_ex(&pmp->list_spin);
eae2ed61 182 TAILQ_INSERT_TAIL(&pmp->unlinkq, ipul, entry);
94491fa0 183 hammer2_spin_unex(&pmp->list_spin);
eae2ed61
MD
184 /* retain ref from vp for ipul */
185 } else {
eae2ed61
MD
186 hammer2_inode_drop(ip); /* vp ref */
187 }
54eb943b
MD
188
189 /*
190 * XXX handle background sync when ip dirty, kernel will no longer
191 * notify us regarding this inode because there is no longer a
192 * vnode attached to it.
193 */
703720e4 194
05dd26e4 195 LOCKSTOP;
703720e4
MD
196 return (0);
197}
198
e118c14f 199static
703720e4 200int
e118c14f 201hammer2_vop_fsync(struct vop_fsync_args *ap)
703720e4 202{
e2e9e2db 203 hammer2_inode_t *ip;
b7926f31
MD
204 struct vnode *vp;
205
05dd26e4 206 LOCKSTART;
b7926f31
MD
207 vp = ap->a_vp;
208 ip = VTOI(vp);
b7926f31 209
a4dc31e0 210#if 0
623d43d4 211 /* XXX can't do this yet */
c603b86b 212 hammer2_trans_init(ip->pmp, HAMMER2_TRANS_ISFLUSH);
b7926f31 213 vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
a4dc31e0 214#endif
c603b86b 215 hammer2_trans_init(ip->pmp, 0);
a4dc31e0 216 vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
6ba3b984
MD
217
218 /*
219 * Calling chain_flush here creates a lot of duplicative
220 * COW operations due to non-optimal vnode ordering.
221 *
222 * Only do it for an actual fsync() syscall. The other forms
223 * which call this function will eventually call chain_flush
224 * on the volume root as a catch-all, which is far more optimal.
225 */
159c3ca2 226 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
2121ef11 227 if (ip->flags & HAMMER2_INODE_MODIFIED)
c603b86b 228 hammer2_inode_fsync(ip, NULL);
2121ef11 229 hammer2_inode_unlock(ip, NULL);
c603b86b 230 hammer2_trans_done(ip->pmp);
a02dfba1 231
05dd26e4 232 LOCKSTOP;
b7926f31 233 return (0);
703720e4
MD
234}
235
e118c14f 236static
703720e4 237int
e118c14f 238hammer2_vop_access(struct vop_access_args *ap)
703720e4 239{
37494cab
MD
240 hammer2_inode_t *ip = VTOI(ap->a_vp);
241 uid_t uid;
242 gid_t gid;
243 int error;
244
05dd26e4 245 LOCKSTART;
2121ef11 246 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
159c3ca2
MD
247 uid = hammer2_to_unix_xid(&ip->meta.uid);
248 gid = hammer2_to_unix_xid(&ip->meta.gid);
249 error = vop_helper_access(ap, uid, gid, ip->meta.mode, ip->meta.uflags);
250 hammer2_inode_unlock(ip, NULL);
37494cab 251
05dd26e4 252 LOCKSTOP;
37494cab 253 return (error);
703720e4
MD
254}
255
e118c14f 256static
703720e4 257int
e118c14f 258hammer2_vop_getattr(struct vop_getattr_args *ap)
703720e4 259{
506bd6d1 260 hammer2_pfs_t *pmp;
cd4b3d92 261 hammer2_inode_t *ip;
703720e4
MD
262 struct vnode *vp;
263 struct vattr *vap;
703720e4 264
05dd26e4 265 LOCKSTART;
703720e4
MD
266 vp = ap->a_vp;
267 vap = ap->a_vap;
268
cd4b3d92 269 ip = VTOI(vp);
e4e20f48 270 pmp = ip->pmp;
cd4b3d92 271
2121ef11 272 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
703720e4 273
e4e20f48 274 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
159c3ca2
MD
275 vap->va_fileid = ip->meta.inum;
276 vap->va_mode = ip->meta.mode;
277 vap->va_nlink = ip->meta.nlinks;
278 vap->va_uid = hammer2_to_unix_xid(&ip->meta.uid);
279 vap->va_gid = hammer2_to_unix_xid(&ip->meta.gid);
cd4b3d92
MD
280 vap->va_rmajor = 0;
281 vap->va_rminor = 0;
7a9b14a0 282 vap->va_size = ip->meta.size; /* protected by shared lock */
df9ea374 283 vap->va_blocksize = HAMMER2_PBUFSIZE;
159c3ca2
MD
284 vap->va_flags = ip->meta.uflags;
285 hammer2_time_to_timespec(ip->meta.ctime, &vap->va_ctime);
286 hammer2_time_to_timespec(ip->meta.mtime, &vap->va_mtime);
287 hammer2_time_to_timespec(ip->meta.mtime, &vap->va_atime);
cd4b3d92 288 vap->va_gen = 1;
159c3ca2
MD
289 vap->va_bytes = ip->bref.data_count;
290 vap->va_type = hammer2_get_vtype(ip->meta.type);
cd4b3d92 291 vap->va_filerev = 0;
159c3ca2
MD
292 vap->va_uid_uuid = ip->meta.uid;
293 vap->va_gid_uuid = ip->meta.gid;
cd4b3d92
MD
294 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
295 VA_FSID_UUID_VALID;
703720e4 296
159c3ca2 297 hammer2_inode_unlock(ip, NULL);
703720e4 298
05dd26e4 299 LOCKSTOP;
703720e4
MD
300 return (0);
301}
302
3ac6a319
MD
303static
304int
305hammer2_vop_setattr(struct vop_setattr_args *ap)
306{
3ac6a319
MD
307 hammer2_inode_t *ip;
308 struct vnode *vp;
309 struct vattr *vap;
310 int error;
311 int kflags = 0;
b2b78aaa 312 uint64_t ctime;
3ac6a319 313
05dd26e4 314 LOCKSTART;
3ac6a319
MD
315 vp = ap->a_vp;
316 vap = ap->a_vap;
b2b78aaa 317 hammer2_update_time(&ctime);
3ac6a319
MD
318
319 ip = VTOI(vp);
3ac6a319 320
05dd26e4
MD
321 if (ip->pmp->ronly) {
322 LOCKSTOP;
3ac6a319 323 return(EROFS);
05dd26e4 324 }
3ac6a319 325
278ab2b2 326 hammer2_pfs_memory_wait(ip->pmp);
c603b86b 327 hammer2_trans_init(ip->pmp, 0);
2121ef11 328 hammer2_inode_lock(ip, 0);
3ac6a319
MD
329 error = 0;
330
331 if (vap->va_flags != VNOVAL) {
332 u_int32_t flags;
333
2121ef11 334 flags = ip->meta.uflags;
3ac6a319 335 error = vop_helper_setattr_flags(&flags, vap->va_flags,
2121ef11 336 hammer2_to_unix_xid(&ip->meta.uid),
b0f58de8 337 ap->a_cred);
3ac6a319 338 if (error == 0) {
2121ef11 339 if (ip->meta.uflags != flags) {
c603b86b 340 hammer2_inode_modify(ip);
2121ef11
MD
341 ip->meta.uflags = flags;
342 ip->meta.ctime = ctime;
3ac6a319
MD
343 kflags |= NOTE_ATTRIB;
344 }
2121ef11 345 if (ip->meta.uflags & (IMMUTABLE | APPEND)) {
3ac6a319
MD
346 error = 0;
347 goto done;
348 }
349 }
b2b78aaa 350 goto done;
3ac6a319 351 }
2121ef11 352 if (ip->meta.uflags & (IMMUTABLE | APPEND)) {
3ac6a319
MD
353 error = EPERM;
354 goto done;
355 }
b2b78aaa 356 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
2121ef11
MD
357 mode_t cur_mode = ip->meta.mode;
358 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid);
359 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid);
b2b78aaa
MD
360 uuid_t uuid_uid;
361 uuid_t uuid_gid;
362
363 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
364 ap->a_cred,
365 &cur_uid, &cur_gid, &cur_mode);
366 if (error == 0) {
367 hammer2_guid_to_uuid(&uuid_uid, cur_uid);
368 hammer2_guid_to_uuid(&uuid_gid, cur_gid);
2121ef11
MD
369 if (bcmp(&uuid_uid, &ip->meta.uid, sizeof(uuid_uid)) ||
370 bcmp(&uuid_gid, &ip->meta.gid, sizeof(uuid_gid)) ||
371 ip->meta.mode != cur_mode
b2b78aaa 372 ) {
c603b86b 373 hammer2_inode_modify(ip);
2121ef11
MD
374 ip->meta.uid = uuid_uid;
375 ip->meta.gid = uuid_gid;
376 ip->meta.mode = cur_mode;
377 ip->meta.ctime = ctime;
b2b78aaa
MD
378 }
379 kflags |= NOTE_ATTRIB;
380 }
381 }
3ac6a319
MD
382
383 /*
384 * Resize the file
385 */
7a9b14a0 386 if (vap->va_size != VNOVAL && ip->meta.size != vap->va_size) {
3ac6a319
MD
387 switch(vp->v_type) {
388 case VREG:
7a9b14a0 389 if (vap->va_size == ip->meta.size)
3ac6a319 390 break;
7a9b14a0 391 if (vap->va_size < ip->meta.size) {
355d67fc 392 hammer2_truncate_file(ip, vap->va_size);
3ac6a319 393 } else {
355d67fc 394 hammer2_extend_file(ip, vap->va_size);
3ac6a319 395 }
c603b86b 396 hammer2_inode_modify(ip);
2121ef11 397 ip->meta.mtime = ctime;
3ac6a319
MD
398 break;
399 default:
400 error = EINVAL;
401 goto done;
402 }
403 }
b2b78aaa
MD
404#if 0
405 /* atime not supported */
406 if (vap->va_atime.tv_sec != VNOVAL) {
c603b86b 407 hammer2_inode_modify(ip);
2121ef11 408 ip->meta.atime = hammer2_timespec_to_time(&vap->va_atime);
b2b78aaa
MD
409 kflags |= NOTE_ATTRIB;
410 }
411#endif
b2b78aaa 412 if (vap->va_mode != (mode_t)VNOVAL) {
2121ef11
MD
413 mode_t cur_mode = ip->meta.mode;
414 uid_t cur_uid = hammer2_to_unix_xid(&ip->meta.uid);
415 gid_t cur_gid = hammer2_to_unix_xid(&ip->meta.gid);
b2b78aaa
MD
416
417 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
418 cur_uid, cur_gid, &cur_mode);
2121ef11 419 if (error == 0 && ip->meta.mode != cur_mode) {
c603b86b 420 hammer2_inode_modify(ip);
2121ef11
MD
421 ip->meta.mode = cur_mode;
422 ip->meta.ctime = ctime;
b2b78aaa
MD
423 kflags |= NOTE_ATTRIB;
424 }
425 }
355d67fc 426
2121ef11 427 if (vap->va_mtime.tv_sec != VNOVAL) {
c603b86b 428 hammer2_inode_modify(ip);
2121ef11
MD
429 ip->meta.mtime = hammer2_timespec_to_time(&vap->va_mtime);
430 kflags |= NOTE_ATTRIB;
431 }
432
433done:
355d67fc
MD
434 /*
435 * If a truncation occurred we must call inode_fsync() now in order
436 * to trim the related data chains, otherwise a later expansion can
437 * cause havoc.
2121ef11
MD
438 *
439 * If an extend occured that changed the DIRECTDATA state, we must
440 * call inode_fsync now in order to prepare the inode's indirect
441 * block table.
355d67fc 442 */
2121ef11 443 if (ip->flags & HAMMER2_INODE_RESIZED)
c603b86b 444 hammer2_inode_fsync(ip, NULL);
eddc656a
MD
445
446 /*
2121ef11 447 * Cleanup.
eddc656a 448 */
2121ef11 449 hammer2_inode_unlock(ip, NULL);
c603b86b 450 hammer2_trans_done(ip->pmp);
eddc656a
MD
451 hammer2_knote(ip->vp, kflags);
452
05dd26e4 453 LOCKSTOP;
3ac6a319
MD
454 return (error);
455}
456
e118c14f 457static
703720e4 458int
e118c14f 459hammer2_vop_readdir(struct vop_readdir_args *ap)
703720e4 460{
c847e838 461 hammer2_xop_readdir_t *xop;
278ab2b2 462 hammer2_blockref_t bref;
c847e838 463 hammer2_inode_t *ip;
476d2aad 464 hammer2_tid_t inum;
e028fa74
MD
465 hammer2_key_t lkey;
466 struct uio *uio;
467 off_t *cookies;
468 off_t saveoff;
469 int cookie_index;
470 int ncookies;
471 int error;
c847e838 472 int eofflag;
e028fa74
MD
473 int dtype;
474 int r;
475
05dd26e4 476 LOCKSTART;
e028fa74 477 ip = VTOI(ap->a_vp);
e028fa74
MD
478 uio = ap->a_uio;
479 saveoff = uio->uio_offset;
c847e838
MD
480 eofflag = 0;
481 error = 0;
e028fa74
MD
482
483 /*
484 * Setup cookies directory entry cookies if requested
485 */
486 if (ap->a_ncookies) {
487 ncookies = uio->uio_resid / 16 + 1;
488 if (ncookies > 1024)
489 ncookies = 1024;
490 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
491 } else {
492 ncookies = -1;
493 cookies = NULL;
494 }
495 cookie_index = 0;
496
c847e838 497 hammer2_inode_lock(ip, HAMMER2_RESOLVE_SHARED);
476d2aad 498
e028fa74
MD
499 /*
500 * Handle artificial entries. To ensure that only positive 64 bit
501 * quantities are returned to userland we always strip off bit 63.
502 * The hash code is designed such that codes 0x0000-0x7FFF are not
503 * used, allowing us to use these codes for articial entries.
504 *
505 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not
506 * allow '..' to cross the mount point into (e.g.) the super-root.
507 */
e028fa74 508 if (saveoff == 0) {
c847e838 509 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
476d2aad 510 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, ".");
e028fa74
MD
511 if (r)
512 goto done;
513 if (cookies)
514 cookies[cookie_index] = saveoff;
515 ++saveoff;
516 ++cookie_index;
517 if (cookie_index == ncookies)
518 goto done;
519 }
476d2aad 520
e028fa74 521 if (saveoff == 1) {
476d2aad
MD
522 /*
523 * Be careful with lockorder when accessing ".."
e2e9e2db 524 *
0dea3156 525 * (ip is the current dir. xip is the parent dir).
476d2aad 526 */
c847e838
MD
527 inum = ip->meta.inum & HAMMER2_DIRHASH_USERMSK;
528 if (ip->pip && ip != ip->pmp->iroot)
529 inum = ip->pip->meta.inum & HAMMER2_DIRHASH_USERMSK;
476d2aad 530 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, "..");
e028fa74
MD
531 if (r)
532 goto done;
533 if (cookies)
534 cookies[cookie_index] = saveoff;
535 ++saveoff;
536 ++cookie_index;
537 if (cookie_index == ncookies)
538 goto done;
539 }
540
541 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
1fca819a
MD
542 if (hammer2_debug & 0x0020)
543 kprintf("readdir: lkey %016jx\n", lkey);
c847e838
MD
544 if (error)
545 goto done;
e028fa74 546
e2e9e2db 547 /*
c847e838
MD
548 * Use XOP for cluster scan.
549 *
278ab2b2 550 * parent is the inode cluster, already locked for us. Don't
e2e9e2db
MD
551 * double lock shared locks as this will screw up upgrades.
552 */
c603b86b 553 xop = &hammer2_xop_alloc(ip)->xop_readdir;
c847e838 554 xop->head.lkey = lkey;
c603b86b 555 hammer2_xop_start(&xop->head, hammer2_xop_readdir);
c847e838
MD
556
557 for (;;) {
558 const hammer2_inode_data_t *ripdata;
278ab2b2 559
c603b86b 560 error = hammer2_xop_collect(&xop->head, 0);
c847e838
MD
561 if (error)
562 break;
563 if (cookie_index == ncookies)
564 break;
565 if (hammer2_debug & 0x0020)
566 kprintf("cluster chain %p %p\n",
567 xop->head.cluster.focus,
568 (xop->head.cluster.focus ?
569 xop->head.cluster.focus->data : (void *)-1));
570 ripdata = &hammer2_cluster_rdata(&xop->head.cluster)->ipdata;
571 hammer2_cluster_bref(&xop->head.cluster, &bref);
278ab2b2 572 if (bref.type == HAMMER2_BREF_TYPE_INODE) {
bca9f8e6 573 dtype = hammer2_get_dtype(ripdata);
278ab2b2 574 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
c667909f 575 r = vop_write_dirent(&error, uio,
b0f58de8 576 ripdata->meta.inum &
c667909f 577 HAMMER2_DIRHASH_USERMSK,
476d2aad 578 dtype,
b0f58de8 579 ripdata->meta.name_len,
bca9f8e6 580 ripdata->filename);
c667909f
MD
581 if (r)
582 break;
583 if (cookies)
584 cookies[cookie_index] = saveoff;
585 ++cookie_index;
586 } else {
587 /* XXX chain error */
278ab2b2 588 kprintf("bad chain type readdir %d\n", bref.type);
c667909f 589 }
e028fa74 590 }
c847e838
MD
591 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
592 if (error == ENOENT) {
593 error = 0;
594 eofflag = 1;
595 saveoff = (hammer2_key_t)-1;
596 } else {
597 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
e513e77e 598 }
e028fa74 599done:
c847e838 600 hammer2_inode_unlock(ip, NULL);
e028fa74 601 if (ap->a_eofflag)
c847e838 602 *ap->a_eofflag = eofflag;
1fca819a
MD
603 if (hammer2_debug & 0x0020)
604 kprintf("readdir: done at %016jx\n", saveoff);
37aa19df 605 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
e028fa74
MD
606 if (error && cookie_index == 0) {
607 if (cookies) {
608 kfree(cookies, M_TEMP);
609 *ap->a_ncookies = 0;
610 *ap->a_cookies = NULL;
611 }
612 } else {
613 if (cookies) {
614 *ap->a_ncookies = cookie_index;
615 *ap->a_cookies = cookies;
616 }
617 }
05dd26e4 618 LOCKSTOP;
e028fa74 619 return (error);
703720e4
MD
620}
621
4e2004ea
MD
622/*
623 * hammer2_vop_readlink { vp, uio, cred }
624 */
625static
626int
627hammer2_vop_readlink(struct vop_readlink_args *ap)
628{
629 struct vnode *vp;
4e2004ea
MD
630 hammer2_inode_t *ip;
631 int error;
632
633 vp = ap->a_vp;
634 if (vp->v_type != VLNK)
635 return (EINVAL);
636 ip = VTOI(vp);
4e2004ea
MD
637
638 error = hammer2_read_file(ip, ap->a_uio, 0);
639 return (error);
640}
641
e118c14f 642static
703720e4 643int
e118c14f 644hammer2_vop_read(struct vop_read_args *ap)
703720e4 645{
db71f61f 646 struct vnode *vp;
db71f61f 647 hammer2_inode_t *ip;
db71f61f
MD
648 struct uio *uio;
649 int error;
650 int seqcount;
651 int bigread;
652
653 /*
654 * Read operations supported on this vnode?
655 */
656 vp = ap->a_vp;
657 if (vp->v_type != VREG)
658 return (EINVAL);
659
660 /*
661 * Misc
662 */
663 ip = VTOI(vp);
db71f61f
MD
664 uio = ap->a_uio;
665 error = 0;
666
667 seqcount = ap->a_ioflag >> 16;
668 bigread = (uio->uio_resid > 100 * 1024 * 1024);
669
4e2004ea 670 error = hammer2_read_file(ip, uio, seqcount);
db71f61f 671 return (error);
47902fef 672}
703720e4 673
e118c14f 674static
47902fef 675int
e118c14f 676hammer2_vop_write(struct vop_write_args *ap)
47902fef 677{
db71f61f 678 hammer2_inode_t *ip;
e2e9e2db
MD
679 thread_t td;
680 struct vnode *vp;
db71f61f
MD
681 struct uio *uio;
682 int error;
db71f61f 683 int seqcount;
db71f61f
MD
684
685 /*
686 * Read operations supported on this vnode?
687 */
688 vp = ap->a_vp;
5c88f2c7 689 if (vp->v_type != VREG)
db71f61f
MD
690 return (EINVAL);
691
692 /*
693 * Misc
694 */
695 ip = VTOI(vp);
db71f61f
MD
696 uio = ap->a_uio;
697 error = 0;
05dd26e4 698 if (ip->pmp->ronly) {
db71f61f 699 return (EROFS);
05dd26e4 700 }
db71f61f
MD
701
702 seqcount = ap->a_ioflag >> 16;
db71f61f
MD
703
704 /*
705 * Check resource limit
706 */
707 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
708 uio->uio_offset + uio->uio_resid >
709 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
710 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
711 return (EFBIG);
712 }
713
db71f61f 714 /*
355d67fc
MD
715 * The transaction interlocks against flushes initiations
716 * (note: but will run concurrently with the actual flush).
3ac6a319 717 */
c603b86b
MD
718 hammer2_trans_init(ip->pmp, 0);
719 error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
720 hammer2_trans_done(ip->pmp);
ea155208 721
4e2004ea
MD
722 return (error);
723}
724
725/*
726 * Perform read operations on a file or symlink given an UNLOCKED
727 * inode and uio.
476d2aad
MD
728 *
729 * The passed ip is not locked.
4e2004ea
MD
730 */
731static
732int
733hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
734{
e2e9e2db 735 hammer2_off_t size;
4e2004ea
MD
736 struct buf *bp;
737 int error;
738
739 error = 0;
740
741 /*
e2e9e2db 742 * UIO read loop.
94491fa0
MD
743 *
744 * WARNING! Assumes that the kernel interlocks size changes at the
745 * vnode level.
4e2004ea 746 */
0bdddbf4 747 hammer2_mtx_sh(&ip->lock);
7a9b14a0 748 size = ip->meta.size;
94491fa0 749 hammer2_mtx_unlock(&ip->lock);
e2e9e2db
MD
750
751 while (uio->uio_resid > 0 && uio->uio_offset < size) {
8cce658d
MD
752 hammer2_key_t lbase;
753 hammer2_key_t leof;
754 int lblksize;
755 int loff;
4e2004ea
MD
756 int n;
757
8cce658d
MD
758 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
759 &lbase, &leof);
4e2004ea 760
8cce658d 761 error = cluster_read(ip->vp, leof, lbase, lblksize,
6ba3b984
MD
762 uio->uio_resid, seqcount * BKVASIZE,
763 &bp);
8cce658d 764
4e2004ea
MD
765 if (error)
766 break;
8cce658d
MD
767 loff = (int)(uio->uio_offset - lbase);
768 n = lblksize - loff;
4e2004ea
MD
769 if (n > uio->uio_resid)
770 n = uio->uio_resid;
e2e9e2db
MD
771 if (n > size - uio->uio_offset)
772 n = (int)(size - uio->uio_offset);
4e2004ea 773 bp->b_flags |= B_AGE;
8cce658d 774 uiomove((char *)bp->b_data + loff, n, uio);
4e2004ea
MD
775 bqrelse(bp);
776 }
777 return (error);
778}
779
780/*
355d67fc
MD
781 * Write to the file represented by the inode via the logical buffer cache.
782 * The inode may represent a regular file or a symlink.
783 *
784 * The inode must not be locked.
4e2004ea
MD
785 */
786static
787int
c603b86b
MD
788hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
789 int ioflag, int seqcount)
4e2004ea 790{
8cce658d 791 hammer2_key_t old_eof;
355d67fc 792 hammer2_key_t new_eof;
4e2004ea
MD
793 struct buf *bp;
794 int kflags;
795 int error;
355d67fc 796 int modified;
3ac6a319 797
4e2004ea
MD
798 /*
799 * Setup if append
94491fa0
MD
800 *
801 * WARNING! Assumes that the kernel interlocks size changes at the
802 * vnode level.
4e2004ea 803 */
0bdddbf4 804 hammer2_mtx_ex(&ip->lock);
4e2004ea 805 if (ioflag & IO_APPEND)
7a9b14a0
MD
806 uio->uio_offset = ip->meta.size;
807 old_eof = ip->meta.size;
9596b8c4 808
8cce658d
MD
809 /*
810 * Extend the file if necessary. If the write fails at some point
811 * we will truncate it back down to cover as much as we were able
812 * to write.
813 *
814 * Doing this now makes it easier to calculate buffer sizes in
815 * the loop.
816 */
355d67fc
MD
817 kflags = 0;
818 error = 0;
819 modified = 0;
820
821 if (uio->uio_offset + uio->uio_resid > old_eof) {
822 new_eof = uio->uio_offset + uio->uio_resid;
b2b78aaa 823 modified = 1;
355d67fc 824 hammer2_extend_file(ip, new_eof);
8cce658d 825 kflags |= NOTE_EXTEND;
355d67fc
MD
826 } else {
827 new_eof = old_eof;
8cce658d 828 }
2121ef11 829 hammer2_mtx_unlock(&ip->lock);
355d67fc 830
3ac6a319
MD
831 /*
832 * UIO write loop
db71f61f
MD
833 */
834 while (uio->uio_resid > 0) {
8cce658d 835 hammer2_key_t lbase;
db71f61f 836 int trivial;
d7bfb2cb 837 int endofblk;
8cce658d
MD
838 int lblksize;
839 int loff;
840 int n;
db71f61f
MD
841
842 /*
843 * Don't allow the buffer build to blow out the buffer
844 * cache.
845 */
355d67fc 846 if ((ioflag & IO_RECURSE) == 0)
01eabad4 847 bwillwrite(HAMMER2_PBUFSIZE);
8cce658d 848
db71f61f 849 /*
8cce658d
MD
850 * This nominally tells us how much we can cluster and
851 * what the logical buffer size needs to be. Currently
852 * we don't try to cluster the write and just handle one
853 * block at a time.
db71f61f 854 */
8cce658d 855 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
355d67fc 856 &lbase, NULL);
8cce658d 857 loff = (int)(uio->uio_offset - lbase);
355d67fc 858
355d67fc 859 KKASSERT(lblksize <= 65536);
8cce658d
MD
860
861 /*
862 * Calculate bytes to copy this transfer and whether the
863 * copy completely covers the buffer or not.
864 */
865 trivial = 0;
866 n = lblksize - loff;
867 if (n > uio->uio_resid) {
868 n = uio->uio_resid;
355d67fc 869 if (loff == lbase && uio->uio_offset + n == new_eof)
db71f61f 870 trivial = 1;
d7bfb2cb
MD
871 endofblk = 0;
872 } else {
873 if (loff == 0)
874 trivial = 1;
875 endofblk = 1;
db71f61f
MD
876 }
877
8cce658d
MD
878 /*
879 * Get the buffer
880 */
db71f61f
MD
881 if (uio->uio_segflg == UIO_NOCOPY) {
882 /*
883 * Issuing a write with the same data backing the
884 * buffer. Instantiate the buffer to collect the
885 * backing vm pages, then read-in any missing bits.
886 *
887 * This case is used by vop_stdputpages().
888 */
8cce658d 889 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
db71f61f
MD
890 if ((bp->b_flags & B_CACHE) == 0) {
891 bqrelse(bp);
8cce658d 892 error = bread(ip->vp, lbase, lblksize, &bp);
db71f61f 893 }
8cce658d 894 } else if (trivial) {
db71f61f
MD
895 /*
896 * Even though we are entirely overwriting the buffer
897 * we may still have to zero it out to avoid a
898 * mmap/write visibility issue.
899 */
8cce658d 900 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
db71f61f
MD
901 if ((bp->b_flags & B_CACHE) == 0)
902 vfs_bio_clrbuf(bp);
db71f61f
MD
903 } else {
904 /*
905 * Partial overwrite, read in any missing bits then
906 * replace the portion being written.
8cce658d
MD
907 *
908 * (The strategy code will detect zero-fill physical
909 * blocks for this case).
db71f61f 910 */
8cce658d 911 error = bread(ip->vp, lbase, lblksize, &bp);
db71f61f
MD
912 if (error == 0)
913 bheavy(bp);
914 }
915
8cce658d
MD
916 if (error) {
917 brelse(bp);
918 break;
db71f61f
MD
919 }
920
8cce658d
MD
921 /*
922 * Ok, copy the data in
923 */
8cce658d 924 error = uiomove(bp->b_data + loff, n, uio);
db71f61f 925 kflags |= NOTE_WRITE;
b2b78aaa 926 modified = 1;
a5913bdf
MD
927 if (error) {
928 brelse(bp);
929 break;
930 }
d7bfb2cb
MD
931
932 /*
933 * WARNING: Pageout daemon will issue UIO_NOCOPY writes
934 * with IO_SYNC or IO_ASYNC set. These writes
935 * must be handled as the pageout daemon expects.
936 */
065f4046 937 if (ioflag & IO_SYNC) {
d7bfb2cb 938 bwrite(bp);
065f4046 939 } else if ((ioflag & IO_DIRECT) && endofblk) {
d7bfb2cb 940 bawrite(bp);
065f4046 941 } else if (ioflag & IO_ASYNC) {
d7bfb2cb
MD
942 bawrite(bp);
943 } else {
944 bdwrite(bp);
945 }
db71f61f 946 }
8cce658d
MD
947
948 /*
949 * Cleanup. If we extended the file EOF but failed to write through
950 * the entire write is a failure and we have to back-up.
951 */
355d67fc 952 if (error && new_eof != old_eof) {
2121ef11 953 hammer2_mtx_ex(&ip->lock);
355d67fc 954 hammer2_truncate_file(ip, old_eof);
2121ef11 955 if (ip->flags & HAMMER2_INODE_MODIFIED)
c603b86b 956 hammer2_inode_fsync(ip, NULL);
2121ef11 957 hammer2_mtx_unlock(&ip->lock);
b2b78aaa 958 } else if (modified) {
0bdddbf4 959 hammer2_mtx_ex(&ip->lock);
c603b86b 960 hammer2_inode_modify(ip);
7a9b14a0 961 hammer2_update_time(&ip->meta.mtime);
2121ef11 962 if (ip->flags & HAMMER2_INODE_MODIFIED)
c603b86b 963 hammer2_inode_fsync(ip, NULL);
94491fa0 964 hammer2_mtx_unlock(&ip->lock);
2121ef11 965 hammer2_knote(ip->vp, kflags);
b2b78aaa 966 }
9450e866 967 hammer2_trans_assert_strategy(ip->pmp);
ea155208 968
4e2004ea 969 return error;
703720e4
MD
970}
971
a5913bdf 972/*
355d67fc 973 * Truncate the size of a file. The inode must not be locked.
eddc656a 974 *
2121ef11
MD
975 * We must unconditionally set HAMMER2_INODE_RESIZED to properly
976 * ensure that any on-media data beyond the new file EOF has been destroyed.
850687d2
MD
977 *
978 * WARNING: nvtruncbuf() can only be safely called without the inode lock
d34788ef
MD
979 * held due to the way our write thread works. If the truncation
980 * occurs in the middle of a buffer, nvtruncbuf() is responsible
981 * for dirtying that buffer and zeroing out trailing bytes.
94491fa0
MD
982 *
983 * WARNING! Assumes that the kernel interlocks size changes at the
984 * vnode level.
d34788ef
MD
985 *
986 * WARNING! Caller assumes responsibility for removing dead blocks
987 * if INODE_RESIZED is set.
a5913bdf
MD
988 */
989static
990void
355d67fc 991hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
8cce658d 992{
8cce658d 993 hammer2_key_t lbase;
8cce658d
MD
994 int nblksize;
995
05dd26e4 996 LOCKSTART;
2121ef11 997 hammer2_mtx_unlock(&ip->lock);
8cce658d 998 if (ip->vp) {
355d67fc 999 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
8cce658d
MD
1000 nvtruncbuf(ip->vp, nsize,
1001 nblksize, (int)nsize & (nblksize - 1),
355d67fc 1002 0);
3ac6a319 1003 }
0bdddbf4 1004 hammer2_mtx_ex(&ip->lock);
2121ef11
MD
1005 KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0);
1006 ip->osize = ip->meta.size;
7a9b14a0 1007 ip->meta.size = nsize;
2121ef11
MD
1008 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED |
1009 HAMMER2_INODE_RESIZED);
05dd26e4 1010 LOCKSTOP;
3ac6a319
MD
1011}
1012
1013/*
355d67fc 1014 * Extend the size of a file. The inode must not be locked.
eddc656a 1015 *
2121ef11
MD
1016 * Even though the file size is changing, we do not have to set the
1017 * INODE_RESIZED bit unless the file size crosses the EMBEDDED_BYTES
1018 * boundary. When this occurs a hammer2_inode_fsync() is required
1019 * to prepare the inode cluster's indirect block table.
1020 *
94491fa0
MD
1021 * WARNING! Assumes that the kernel interlocks size changes at the
1022 * vnode level.
d34788ef
MD
1023 *
1024 * WARNING! Caller assumes responsibility for transitioning out
1025 * of the inode DIRECTDATA mode if INODE_RESIZED is set.
3ac6a319
MD
1026 */
1027static
1028void
355d67fc 1029hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
3ac6a319 1030{
355d67fc 1031 hammer2_key_t lbase;
8cce658d 1032 hammer2_key_t osize;
8cce658d
MD
1033 int oblksize;
1034 int nblksize;
3ac6a319 1035
05dd26e4 1036 LOCKSTART;
2121ef11
MD
1037
1038 KKASSERT((ip->flags & HAMMER2_INODE_RESIZED) == 0);
7a9b14a0 1039 osize = ip->meta.size;
2121ef11 1040 ip->osize = osize;
7a9b14a0 1041 ip->meta.size = nsize;
2121ef11
MD
1042 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED);
1043
1044 if (osize <= HAMMER2_EMBEDDED_BYTES && nsize > HAMMER2_EMBEDDED_BYTES)
1045 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
8cce658d 1046
2121ef11 1047 hammer2_mtx_unlock(&ip->lock);
355d67fc
MD
1048 if (ip->vp) {
1049 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL);
1050 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
004f88b4 1051 nvextendbuf(ip->vp,
355d67fc
MD
1052 osize, nsize,
1053 oblksize, nblksize,
1054 -1, -1, 0);
3ac6a319 1055 }
2121ef11
MD
1056 hammer2_mtx_ex(&ip->lock);
1057
05dd26e4 1058 LOCKSTOP;
3ac6a319
MD
1059}
1060
e118c14f 1061static
703720e4 1062int
e118c14f 1063hammer2_vop_nresolve(struct vop_nresolve_args *ap)
703720e4 1064{
c603b86b 1065 hammer2_xop_nresolve_t *xop;
e2e9e2db 1066 hammer2_inode_t *ip;
37494cab 1067 hammer2_inode_t *dip;
37494cab 1068 struct namecache *ncp;
37494cab 1069 struct vnode *vp;
c603b86b 1070 int error;
37494cab 1071
05dd26e4 1072 LOCKSTART;
37494cab 1073 dip = VTOI(ap->a_dvp);
c603b86b
MD
1074 xop = &hammer2_xop_alloc(dip)->xop_nresolve;
1075
37494cab 1076 ncp = ap->a_nch->ncp;
e12ae3a5 1077 hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
37494cab
MD
1078
1079 /*
1080 * Note: In DragonFly the kernel handles '.' and '..'.
1081 */
159c3ca2
MD
1082 hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS |
1083 HAMMER2_RESOLVE_SHARED);
c603b86b 1084 hammer2_xop_start(&xop->head, hammer2_xop_nresolve);
84e47819 1085
c603b86b
MD
1086 error = hammer2_xop_collect(&xop->head, 0);
1087 if (error) {
278ab2b2 1088 ip = NULL;
c603b86b
MD
1089 } else {
1090 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster);
e708f8b9 1091 }
c603b86b 1092 hammer2_inode_unlock(dip, NULL);
e708f8b9
MD
1093
1094 /*
1095 * Acquire the related vnode
9bab8c22
MD
1096 *
1097 * NOTE: For error processing, only ENOENT resolves the namecache
1098 * entry to NULL, otherwise we just return the error and
1099 * leave the namecache unresolved.
10252dc7
MD
1100 *
1101 * NOTE: multiple hammer2_inode structures can be aliased to the
1102 * same chain element, for example for hardlinks. This
1103 * use case does not 'reattach' inode associations that
1104 * might already exist, but always allocates a new one.
0dea3156
MD
1105 *
1106 * WARNING: inode structure is locked exclusively via inode_get
b93cc2e0 1107 * but chain was locked shared. inode_unlock()
0dea3156 1108 * will handle it properly.
e708f8b9 1109 */
c603b86b
MD
1110 if (ip) {
1111 vp = hammer2_igetv(ip, &error);
37494cab
MD
1112 if (error == 0) {
1113 vn_unlock(vp);
1114 cache_setvp(ap->a_nch, vp);
9bab8c22 1115 } else if (error == ENOENT) {
f3843dc2 1116 cache_setvp(ap->a_nch, NULL);
37494cab 1117 }
c603b86b 1118 hammer2_inode_unlock(ip, NULL);
10252dc7
MD
1119
1120 /*
1121 * The vp should not be released until after we've disposed
1122 * of our locks, because it might cause vop_inactive() to
1123 * be called.
1124 */
1125 if (vp)
1126 vrele(vp);
37494cab
MD
1127 } else {
1128 error = ENOENT;
1129 cache_setvp(ap->a_nch, NULL);
1130 }
c603b86b 1131 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
9bab8c22 1132 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL,
278ab2b2
MD
1133 ("resolve error %d/%p ap %p\n",
1134 error, ap->a_nch->ncp->nc_vp, ap));
05dd26e4 1135 LOCKSTOP;
c603b86b 1136
37494cab
MD
1137 return error;
1138}
1139
1140static
1141int
1142hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1143{
1144 hammer2_inode_t *dip;
1145 hammer2_inode_t *ip;
37494cab
MD
1146 int error;
1147
05dd26e4 1148 LOCKSTART;
37494cab 1149 dip = VTOI(ap->a_dvp);
37494cab
MD
1150
1151 if ((ip = dip->pip) == NULL) {
1152 *ap->a_vpp = NULL;
05dd26e4 1153 LOCKSTOP;
37494cab
MD
1154 return ENOENT;
1155 }
159c3ca2 1156 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
c603b86b
MD
1157 *ap->a_vpp = hammer2_igetv(ip, &error);
1158 hammer2_inode_unlock(ip, NULL);
37494cab 1159
05dd26e4 1160 LOCKSTOP;
37494cab
MD
1161 return error;
1162}
1163
1164static
1165int
1166hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1167{
37494cab
MD
1168 hammer2_inode_t *dip;
1169 hammer2_inode_t *nip;
1170 struct namecache *ncp;
1171 const uint8_t *name;
1172 size_t name_len;
1173 int error;
1174
05dd26e4 1175 LOCKSTART;
37494cab 1176 dip = VTOI(ap->a_dvp);
05dd26e4
MD
1177 if (dip->pmp->ronly) {
1178 LOCKSTOP;
db71f61f 1179 return (EROFS);
05dd26e4 1180 }
db71f61f 1181
37494cab
MD
1182 ncp = ap->a_nch->ncp;
1183 name = ncp->nc_name;
1184 name_len = ncp->nc_nlen;
1185
278ab2b2 1186 hammer2_pfs_memory_wait(dip->pmp);
c603b86b
MD
1187 hammer2_trans_init(dip->pmp, 0);
1188 nip = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
e12ae3a5
MD
1189 name, name_len,
1190 hammer2_trans_newinum(dip->pmp), 0, 0,
1191 0, &error);
37494cab
MD
1192 if (error) {
1193 KKASSERT(nip == NULL);
1194 *ap->a_vpp = NULL;
0dea3156 1195 } else {
c603b86b
MD
1196 *ap->a_vpp = hammer2_igetv(nip, &error);
1197 hammer2_inode_unlock(nip, NULL);
37494cab 1198 }
c603b86b 1199 hammer2_trans_done(dip->pmp);
37494cab
MD
1200
1201 if (error == 0) {
1202 cache_setunresolved(ap->a_nch);
1203 cache_setvp(ap->a_nch, *ap->a_vpp);
1204 }
05dd26e4 1205 LOCKSTOP;
37494cab 1206 return error;
703720e4
MD
1207}
1208
e118c14f 1209static
703720e4 1210int
e118c14f 1211hammer2_vop_open(struct vop_open_args *ap)
703720e4 1212{
703720e4
MD
1213 return vop_stdopen(ap);
1214}
1215
37aa19df 1216/*
db0c2eb3 1217 * hammer2_vop_advlock { vp, id, op, fl, flags }
37aa19df
MD
1218 */
1219static
1220int
1221hammer2_vop_advlock(struct vop_advlock_args *ap)
1222{
1223 hammer2_inode_t *ip = VTOI(ap->a_vp);
bca9f8e6 1224 const hammer2_inode_data_t *ripdata;
278ab2b2 1225 hammer2_cluster_t *cparent;
476d2aad 1226 hammer2_off_t size;
37aa19df 1227
159c3ca2
MD
1228 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS |
1229 HAMMER2_RESOLVE_SHARED);
1230 cparent = hammer2_inode_cluster(ip, HAMMER2_RESOLVE_ALWAYS |
1231 HAMMER2_RESOLVE_SHARED);
bca9f8e6 1232 ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
b0f58de8 1233 size = ripdata->meta.size;
b93cc2e0 1234 hammer2_inode_unlock(ip, cparent);
476d2aad 1235 return (lf_advlock(ap, &ip->advlock, size));
37aa19df
MD
1236}
1237
1238
c667909f
MD
1239static
1240int
1241hammer2_vop_close(struct vop_close_args *ap)
1242{
1243 return vop_stdclose(ap);
1244}
1245
1246/*
db0c2eb3
MD
1247 * hammer2_vop_nlink { nch, dvp, vp, cred }
1248 *
e708f8b9 1249 * Create a hardlink from (vp) to {dvp, nch}.
db0c2eb3
MD
1250 */
1251static
1252int
1253hammer2_vop_nlink(struct vop_nlink_args *ap)
1254{
e12ae3a5 1255 hammer2_xop_nlink_t *xop1;
9b21452a
MD
1256 hammer2_inode_t *fdip; /* target directory to create link in */
1257 hammer2_inode_t *tdip; /* target directory to create link in */
1258 hammer2_inode_t *cdip; /* common parent directory */
db0c2eb3 1259 hammer2_inode_t *ip; /* inode we are hardlinking to */
db0c2eb3
MD
1260 struct namecache *ncp;
1261 const uint8_t *name;
1262 size_t name_len;
1263 int error;
1264
05dd26e4 1265 LOCKSTART;
9b21452a 1266 tdip = VTOI(ap->a_dvp);
05dd26e4
MD
1267 if (tdip->pmp->ronly) {
1268 LOCKSTOP;
db0c2eb3 1269 return (EROFS);
05dd26e4 1270 }
db0c2eb3 1271
db0c2eb3
MD
1272 ncp = ap->a_nch->ncp;
1273 name = ncp->nc_name;
1274 name_len = ncp->nc_nlen;
1275
e708f8b9 1276 /*
51bf8e9b
MD
1277 * ip represents the file being hardlinked. The file could be a
1278 * normal file or a hardlink target if it has already been hardlinked.
1279 * If ip is a hardlinked target then ip->pip represents the location
1280 * of the hardlinked target, NOT the location of the hardlink pointer.
1281 *
1282 * Bump nlinks and potentially also create or move the hardlink
9b21452a 1283 * target in the parent directory common to (ip) and (tdip). The
278ab2b2
MD
1284 * consolidation code can modify ip->cluster and ip->pip. The
1285 * returned cluster is locked.
e708f8b9 1286 */
51bf8e9b 1287 ip = VTOI(ap->a_vp);
278ab2b2 1288 hammer2_pfs_memory_wait(ip->pmp);
c603b86b 1289 hammer2_trans_init(ip->pmp, 0);
1a7cfe5a 1290
9b21452a
MD
1291 /*
1292 * The common parent directory must be locked first to avoid deadlocks.
1293 * Also note that fdip and/or tdip might match cdip.
1294 */
1295 fdip = ip->pip;
1296 cdip = hammer2_inode_common_parent(fdip, tdip);
159c3ca2
MD
1297 hammer2_inode_lock(cdip, HAMMER2_RESOLVE_ALWAYS);
1298 hammer2_inode_lock(fdip, HAMMER2_RESOLVE_ALWAYS);
1299 hammer2_inode_lock(tdip, HAMMER2_RESOLVE_ALWAYS);
159c3ca2 1300 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
e12ae3a5 1301 error = 0;
159c3ca2 1302
e12ae3a5
MD
1303 /*
1304 * If ip is not a hardlink target we must convert it to a hardlink.
1305 * If fdip != cdip we must shift the inode to cdip.
1306 */
1307 if (fdip != cdip || (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1308 xop1 = &hammer2_xop_alloc(fdip)->xop_nlink;
1309 hammer2_xop_setip2(&xop1->head, ip);
1310 hammer2_xop_setip3(&xop1->head, cdip);
1311
1312 hammer2_xop_start(&xop1->head, hammer2_xop_nlink);
1313 error = hammer2_xop_collect(&xop1->head, 0);
1314 hammer2_xop_retire(&xop1->head, HAMMER2_XOPMASK_VOP);
1315 if (error == ENOENT)
1316 error = 0;
1317 }
e708f8b9
MD
1318
1319 /*
e12ae3a5
MD
1320 * Must synchronize original inode whos chains are now a hardlink
1321 * target. We must match what the backend XOP did to the
1322 * chains.
1323 */
1324 if (error == 0 && (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1325 hammer2_inode_modify(ip);
1326 ip->meta.name_key = ip->meta.inum;
1327 ip->meta.name_len = 18; /* "0x%016jx" */
1328 }
1329
1330 /*
1331 * Create the hardlink target and bump nlinks.
e708f8b9 1332 */
e12ae3a5
MD
1333 if (error == 0) {
1334 hammer2_inode_create(tdip, NULL, NULL,
1335 name, name_len,
1336 ip->meta.inum,
1337 HAMMER2_OBJTYPE_HARDLINK, ip->meta.type,
1338 0, &error);
1339 hammer2_inode_modify(ip);
1340 ++ip->meta.nlinks;
1341 }
db0c2eb3
MD
1342 if (error == 0) {
1343 cache_setunresolved(ap->a_nch);
1344 cache_setvp(ap->a_nch, ap->a_vp);
1345 }
e12ae3a5
MD
1346 hammer2_inode_unlock(ip, NULL);
1347 hammer2_inode_unlock(tdip, NULL);
1348 hammer2_inode_unlock(fdip, NULL);
1349 hammer2_inode_unlock(cdip, NULL);
850687d2 1350 hammer2_inode_drop(cdip);
c603b86b 1351 hammer2_trans_done(ip->pmp);
0dea3156 1352
05dd26e4 1353 LOCKSTOP;
db0c2eb3
MD
1354 return error;
1355}
1356
1357/*
1358 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
c667909f
MD
1359 *
1360 * The operating system has already ensured that the directory entry
1361 * does not exist and done all appropriate namespace locking.
1362 */
1363static
1364int
1365hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1366{
c667909f
MD
1367 hammer2_inode_t *dip;
1368 hammer2_inode_t *nip;
1369 struct namecache *ncp;
1370 const uint8_t *name;
1371 size_t name_len;
1372 int error;
1373
05dd26e4 1374 LOCKSTART;
c667909f 1375 dip = VTOI(ap->a_dvp);
05dd26e4
MD
1376 if (dip->pmp->ronly) {
1377 LOCKSTOP;
c667909f 1378 return (EROFS);
05dd26e4 1379 }
c667909f
MD
1380
1381 ncp = ap->a_nch->ncp;
1382 name = ncp->nc_name;
1383 name_len = ncp->nc_nlen;
278ab2b2 1384 hammer2_pfs_memory_wait(dip->pmp);
c603b86b 1385 hammer2_trans_init(dip->pmp, 0);
c667909f 1386
c603b86b 1387 nip = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
e12ae3a5
MD
1388 name, name_len,
1389 hammer2_trans_newinum(dip->pmp), 0, 0,
1390 0, &error);
c667909f
MD
1391 if (error) {
1392 KKASSERT(nip == NULL);
1393 *ap->a_vpp = NULL;
0dea3156 1394 } else {
c603b86b
MD
1395 *ap->a_vpp = hammer2_igetv(nip, &error);
1396 hammer2_inode_unlock(nip, NULL);
c667909f 1397 }
c603b86b 1398 hammer2_trans_done(dip->pmp);
c667909f
MD
1399
1400 if (error == 0) {
1401 cache_setunresolved(ap->a_nch);
1402 cache_setvp(ap->a_nch, *ap->a_vpp);
1403 }
05dd26e4 1404 LOCKSTOP;
c667909f
MD
1405 return error;
1406}
1407
41c34a6d 1408/*
278ab2b2 1409 * Make a device node (typically a fifo)
41c34a6d
MD
1410 */
1411static
1412int
1413hammer2_vop_nmknod(struct vop_nmknod_args *ap)
1414{
1415 hammer2_inode_t *dip;
1416 hammer2_inode_t *nip;
41c34a6d
MD
1417 struct namecache *ncp;
1418 const uint8_t *name;
1419 size_t name_len;
1420 int error;
1421
05dd26e4 1422 LOCKSTART;
41c34a6d 1423 dip = VTOI(ap->a_dvp);
05dd26e4
MD
1424 if (dip->pmp->ronly) {
1425 LOCKSTOP;
41c34a6d 1426 return (EROFS);
05dd26e4 1427 }
41c34a6d
MD
1428
1429 ncp = ap->a_nch->ncp;
1430 name = ncp->nc_name;
1431 name_len = ncp->nc_nlen;
278ab2b2 1432 hammer2_pfs_memory_wait(dip->pmp);
c603b86b 1433 hammer2_trans_init(dip->pmp, 0);
41c34a6d 1434
c603b86b 1435 nip = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
e12ae3a5
MD
1436 name, name_len,
1437 hammer2_trans_newinum(dip->pmp), 0, 0,
1438 0, &error);
41c34a6d
MD
1439 if (error) {
1440 KKASSERT(nip == NULL);
1441 *ap->a_vpp = NULL;
1442 } else {
c603b86b
MD
1443 *ap->a_vpp = hammer2_igetv(nip, &error);
1444 hammer2_inode_unlock(nip, NULL);
41c34a6d 1445 }
c603b86b 1446 hammer2_trans_done(dip->pmp);
41c34a6d
MD
1447
1448 if (error == 0) {
1449 cache_setunresolved(ap->a_nch);
1450 cache_setvp(ap->a_nch, *ap->a_vpp);
1451 }
05dd26e4 1452 LOCKSTOP;
41c34a6d
MD
1453 return error;
1454}
1455
4e2004ea
MD
1456/*
1457 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1458 */
1459static
1460int
1461hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1462{
4e2004ea
MD
1463 hammer2_inode_t *dip;
1464 hammer2_inode_t *nip;
1465 struct namecache *ncp;
1466 const uint8_t *name;
1467 size_t name_len;
1468 int error;
355d67fc 1469
4e2004ea 1470 dip = VTOI(ap->a_dvp);
5c88f2c7 1471 if (dip->pmp->ronly)
4e2004ea
MD
1472 return (EROFS);
1473
1474 ncp = ap->a_nch->ncp;
1475 name = ncp->nc_name;
1476 name_len = ncp->nc_nlen;
278ab2b2 1477 hammer2_pfs_memory_wait(dip->pmp);
c603b86b 1478 hammer2_trans_init(dip->pmp, 0);
4e2004ea
MD
1479
1480 ap->a_vap->va_type = VLNK; /* enforce type */
1481
c603b86b 1482 nip = hammer2_inode_create(dip, ap->a_vap, ap->a_cred,
e12ae3a5
MD
1483 name, name_len,
1484 hammer2_trans_newinum(dip->pmp), 0, 0,
1485 0, &error);
4e2004ea
MD
1486 if (error) {
1487 KKASSERT(nip == NULL);
1488 *ap->a_vpp = NULL;
c603b86b 1489 hammer2_trans_done(dip->pmp);
4e2004ea
MD
1490 return error;
1491 }
c603b86b 1492 *ap->a_vpp = hammer2_igetv(nip, &error);
4e2004ea
MD
1493
1494 /*
1495 * Build the softlink (~like file data) and finalize the namecache.
1496 */
1497 if (error == 0) {
1498 size_t bytes;
1499 struct uio auio;
1500 struct iovec aiov;
1501
1502 bytes = strlen(ap->a_target);
1503
c603b86b 1504#if 0
4e2004ea 1505 if (bytes <= HAMMER2_EMBEDDED_BYTES) {
b0f58de8 1506 KKASSERT(nipdata->meta.op_flags &
4e2004ea 1507 HAMMER2_OPFLAG_DIRECTDATA);
476d2aad 1508 bcopy(ap->a_target, nipdata->u.data, bytes);
b0f58de8 1509 nipdata->meta.size = bytes;
7a9b14a0 1510 nip->meta.size = bytes;
03188ed8 1511 hammer2_cluster_modsync(ncparent);
b93cc2e0 1512 hammer2_inode_unlock(nip, ncparent);
278ab2b2 1513 /* nipdata = NULL; not needed */
c603b86b
MD
1514 } else
1515#endif
1516 {
1517 hammer2_inode_unlock(nip, NULL);
4e2004ea
MD
1518 bzero(&auio, sizeof(auio));
1519 bzero(&aiov, sizeof(aiov));
1520 auio.uio_iov = &aiov;
1521 auio.uio_segflg = UIO_SYSSPACE;
1522 auio.uio_rw = UIO_WRITE;
1523 auio.uio_resid = bytes;
1524 auio.uio_iovcnt = 1;
1525 auio.uio_td = curthread;
1526 aiov.iov_base = ap->a_target;
1527 aiov.iov_len = bytes;
c603b86b 1528 error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
4e2004ea
MD
1529 /* XXX handle error */
1530 error = 0;
1531 }
355d67fc 1532 } else {
c603b86b 1533 hammer2_inode_unlock(nip, NULL);
4e2004ea 1534 }
c603b86b 1535 hammer2_trans_done(dip->pmp);
4e2004ea
MD
1536
1537 /*
1538 * Finalize namecache
1539 */
1540 if (error == 0) {
1541 cache_setunresolved(ap->a_nch);
1542 cache_setvp(ap->a_nch, *ap->a_vpp);
1543 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */
1544 }
1545 return error;
1546}
1547
1548/*
1549 * hammer2_vop_nremove { nch, dvp, cred }
1550 */
1551static
1552int
1553hammer2_vop_nremove(struct vop_nremove_args *ap)
1554{
e12ae3a5 1555 hammer2_xop_unlink_t *xop;
4e2004ea 1556 hammer2_inode_t *dip;
e12ae3a5 1557 hammer2_inode_t *ip;
4e2004ea 1558 struct namecache *ncp;
4e2004ea 1559 int error;
e12ae3a5 1560 int isopen;
4e2004ea 1561
05dd26e4 1562 LOCKSTART;
4e2004ea 1563 dip = VTOI(ap->a_dvp);
05dd26e4
MD
1564 if (dip->pmp->ronly) {
1565 LOCKSTOP;
4e2004ea 1566 return(EROFS);
05dd26e4 1567 }
4e2004ea
MD
1568
1569 ncp = ap->a_nch->ncp;
044541cd 1570
278ab2b2 1571 hammer2_pfs_memory_wait(dip->pmp);
c603b86b 1572 hammer2_trans_init(dip->pmp, 0);
e12ae3a5
MD
1573 hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS);
1574
1575 /*
1576 * The unlink XOP unlinks the path from the directory and
1577 * locates and returns the cluster associated with the real inode.
1578 * We have to handle nlinks here on the frontend.
1579 */
1580 xop = &hammer2_xop_alloc(dip)->xop_unlink;
1581 hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1582 isopen = cache_isopen(ap->a_nch);
1583 xop->isdir = 0;
1584 xop->dopermanent = isopen ? 0 : HAMMER2_DELETE_PERMANENT;
1585 hammer2_xop_start(&xop->head, hammer2_xop_unlink);
1586
1587 /*
1588 * Collect the real inode and adjust nlinks, destroy the real
1589 * inode if nlinks transitions to 0 and it was the real inode
1590 * (else it has already been removed).
1591 */
1592 error = hammer2_xop_collect(&xop->head, 0);
1593 hammer2_inode_unlock(dip, NULL);
1594
1595 if (error == 0) {
1596 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster);
1597 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1598 if (ip) {
1599 hammer2_inode_unlink_finisher(ip, isopen);
1600 hammer2_inode_unlock(ip, NULL);
1601 }
1602 } else {
1603 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1604 }
1605
c603b86b
MD
1606 hammer2_run_unlinkq(dip->pmp);
1607 hammer2_trans_done(dip->pmp);
9b21452a 1608 if (error == 0)
3fc4c63d 1609 cache_unlink(ap->a_nch);
05dd26e4 1610 LOCKSTOP;
4e2004ea
MD
1611 return (error);
1612}
1613
1614/*
1615 * hammer2_vop_nrmdir { nch, dvp, cred }
1616 */
1617static
1618int
1619hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1620{
e12ae3a5 1621 hammer2_xop_unlink_t *xop;
4e2004ea 1622 hammer2_inode_t *dip;
e12ae3a5 1623 hammer2_inode_t *ip;
4e2004ea 1624 struct namecache *ncp;
e12ae3a5 1625 int isopen;
4e2004ea
MD
1626 int error;
1627
05dd26e4 1628 LOCKSTART;
4e2004ea 1629 dip = VTOI(ap->a_dvp);
05dd26e4
MD
1630 if (dip->pmp->ronly) {
1631 LOCKSTOP;
4e2004ea 1632 return(EROFS);
05dd26e4 1633 }
4e2004ea 1634
278ab2b2 1635 hammer2_pfs_memory_wait(dip->pmp);
c603b86b 1636 hammer2_trans_init(dip->pmp, 0);
e12ae3a5
MD
1637 hammer2_inode_lock(dip, HAMMER2_RESOLVE_ALWAYS);
1638
1639 xop = &hammer2_xop_alloc(dip)->xop_unlink;
1640
1641 ncp = ap->a_nch->ncp;
1642 hammer2_xop_setname(&xop->head, ncp->nc_name, ncp->nc_nlen);
1643 isopen = cache_isopen(ap->a_nch);
1644 xop->isdir = 1;
1645 xop->dopermanent = isopen ? 0 : HAMMER2_DELETE_PERMANENT;
1646 hammer2_xop_start(&xop->head, hammer2_xop_unlink);
1647
1648 /*
1649 * Collect the real inode and adjust nlinks, destroy the real
1650 * inode if nlinks transitions to 0 and it was the real inode
1651 * (else it has already been removed).
1652 */
1653 error = hammer2_xop_collect(&xop->head, 0);
1654 hammer2_inode_unlock(dip, NULL);
1655
1656 if (error == 0) {
1657 ip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster);
1658 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1659 if (ip) {
1660 hammer2_inode_unlink_finisher(ip, isopen);
1661 hammer2_inode_unlock(ip, NULL);
1662 }
1663 } else {
1664 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP);
1665 }
c603b86b 1666 hammer2_run_unlinkq(dip->pmp);
c603b86b 1667 hammer2_trans_done(dip->pmp);
9b21452a 1668 if (error == 0)
3fc4c63d 1669 cache_unlink(ap->a_nch);
05dd26e4 1670 LOCKSTOP;
4e2004ea
MD
1671 return (error);
1672}
1673
6934ae32
MD
1674/*
1675 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1676 */
4e2004ea
MD
1677static
1678int
1679hammer2_vop_nrename(struct vop_nrename_args *ap)
1680{
6934ae32
MD
1681 struct namecache *fncp;
1682 struct namecache *tncp;
9b21452a 1683 hammer2_inode_t *cdip;
6934ae32
MD
1684 hammer2_inode_t *fdip;
1685 hammer2_inode_t *tdip;
1686 hammer2_inode_t *ip;
6934ae32
MD
1687 const uint8_t *fname;
1688 size_t fname_len;
1689 const uint8_t *tname;
1690 size_t tname_len;
1691 int error;
850687d2 1692 int tnch_error;
e12ae3a5 1693 hammer2_key_t tlhc;
6934ae32
MD
1694
1695 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1696 return(EXDEV);
1697 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1698 return(EXDEV);
1699
1700 fdip = VTOI(ap->a_fdvp); /* source directory */
1701 tdip = VTOI(ap->a_tdvp); /* target directory */
1702
a5913bdf 1703 if (fdip->pmp->ronly)
6934ae32
MD
1704 return(EROFS);
1705
05dd26e4 1706 LOCKSTART;
6934ae32
MD
1707 fncp = ap->a_fnch->ncp; /* entry name in source */
1708 fname = fncp->nc_name;
1709 fname_len = fncp->nc_nlen;
1710
1711 tncp = ap->a_tnch->ncp; /* entry name in target */
1712 tname = tncp->nc_name;
1713 tname_len = tncp->nc_nlen;
1714
278ab2b2 1715 hammer2_pfs_memory_wait(tdip->pmp);
c603b86b 1716 hammer2_trans_init(tdip->pmp, 0);
0dea3156 1717
e708f8b9 1718 /*
d5fabb70 1719 * ip is the inode being renamed. If this is a hardlink then
e708f8b9
MD
1720 * ip represents the actual file and not the hardlink marker.
1721 */
1722 ip = VTOI(fncp->nc_vp);
9b21452a
MD
1723
1724 /*
1725 * The common parent directory must be locked first to avoid deadlocks.
1726 * Also note that fdip and/or tdip might match cdip.
9b21452a
MD
1727 */
1728 cdip = hammer2_inode_common_parent(ip->pip, tdip);
159c3ca2
MD
1729 hammer2_inode_lock(cdip, HAMMER2_RESOLVE_ALWAYS);
1730 hammer2_inode_lock(fdip, HAMMER2_RESOLVE_ALWAYS);
1731 hammer2_inode_lock(tdip, HAMMER2_RESOLVE_ALWAYS);
e12ae3a5
MD
1732 hammer2_inode_ref(ip); /* extra ref */
1733 error = 0;
9b21452a 1734
6934ae32 1735 /*
e12ae3a5
MD
1736 * If ip is a hardlink target and fdip != cdip we must shift the
1737 * inode to cdip.
6934ae32 1738 */
e12ae3a5
MD
1739 if (fdip != cdip &&
1740 (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1741 hammer2_xop_nlink_t *xop1;
1742
1743 xop1 = &hammer2_xop_alloc(fdip)->xop_nlink;
1744 hammer2_xop_setip2(&xop1->head, ip);
1745 hammer2_xop_setip3(&xop1->head, cdip);
1746
1747 hammer2_xop_start(&xop1->head, hammer2_xop_nlink);
1748 error = hammer2_xop_collect(&xop1->head, 0);
1749 hammer2_xop_retire(&xop1->head, HAMMER2_XOPMASK_VOP);
1750 }
6934ae32
MD
1751
1752 /*
e12ae3a5 1753 * Delete the target namespace.
6934ae32 1754 */
e12ae3a5
MD
1755 {
1756 hammer2_xop_unlink_t *xop2;
1757 hammer2_inode_t *tip;
1758 int isopen;
1759
1760 /*
1761 * The unlink XOP unlinks the path from the directory and
1762 * locates and returns the cluster associated with the real
1763 * inode. We have to handle nlinks here on the frontend.
1764 */
1765 xop2 = &hammer2_xop_alloc(tdip)->xop_unlink;
1766 hammer2_xop_setname(&xop2->head, tname, tname_len);
1767 isopen = cache_isopen(ap->a_tnch);
1768 xop2->isdir = -1;
1769 xop2->dopermanent = isopen ? 0 : HAMMER2_DELETE_PERMANENT;
1770 hammer2_xop_start(&xop2->head, hammer2_xop_unlink);
1771
1772 /*
1773 * Collect the real inode and adjust nlinks, destroy the real
1774 * inode if nlinks transitions to 0 and it was the real inode
1775 * (else it has already been removed).
1776 */
1777 tnch_error = hammer2_xop_collect(&xop2->head, 0);
1778 /* hammer2_inode_unlock(tdip, NULL); */
1779
1780 if (tnch_error == 0) {
1781 tip = hammer2_inode_get(tdip->pmp, NULL,
1782 &xop2->head.cluster);
1783 hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
1784 if (tip) {
1785 hammer2_inode_unlink_finisher(tip, isopen);
1786 hammer2_inode_unlock(tip, NULL);
1787 }
1788 } else {
1789 hammer2_xop_retire(&xop2->head, HAMMER2_XOPMASK_VOP);
1790 }
1791 /* hammer2_inode_lock(tdip, HAMMER2_RESOLVE_ALWAYS); */
1792
1793 if (tnch_error && tnch_error != ENOENT) {
1794 error = tnch_error;
1795 goto done2;
1796 }
1797 }
6934ae32
MD
1798
1799 /*
e12ae3a5 1800 * Resolve the collision space for (tdip, tname, tname_len)
e708f8b9 1801 *
e12ae3a5 1802 * tdip must be held exclusively locked to prevent races.
6934ae32 1803 */
e12ae3a5
MD
1804 {
1805 hammer2_xop_scanlhc_t *sxop;
1806 hammer2_tid_t lhcbase;
1807
1808 tlhc = hammer2_dirhash(tname, tname_len);
1809 lhcbase = tlhc;
1810 sxop = &hammer2_xop_alloc(tdip)->xop_scanlhc;
1811 sxop->lhc = tlhc;
1812 hammer2_xop_start(&sxop->head, hammer2_inode_xop_scanlhc);
1813 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) {
1814 if (tlhc != sxop->head.cluster.focus->bref.key)
1815 break;
1816 ++tlhc;
1817 }
1818 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP);
1819
1820 if (error) {
1821 if (error != ENOENT)
1822 goto done2;
1823 ++tlhc;
1824 error = 0;
1825 }
1826 if ((lhcbase ^ tlhc) & ~HAMMER2_DIRHASH_LOMASK) {
1827 error = ENOSPC;
1828 goto done2;
1829 }
1830 }
5f6853df
MD
1831
1832 /*
e12ae3a5 1833 * Everything is setup, do the rename.
51bf8e9b 1834 *
e12ae3a5 1835 * We have to synchronize ip->meta to the underlying operation.
044541cd 1836 *
e12ae3a5
MD
1837 * NOTE: To avoid deadlocks we cannot lock (ip) while we are
1838 * unlinking elements from their directories. Locking
1839 * the nlinks field does not lock the whole inode.
5f6853df 1840 */
e12ae3a5
MD
1841 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
1842 if (error == 0) {
1843 hammer2_xop_nrename_t *xop4;
1844
1845 xop4 = &hammer2_xop_alloc(fdip)->xop_nrename;
1846 xop4->lhc = tlhc;
1847 xop4->ip_key = ip->meta.name_key;
1848 hammer2_xop_setip2(&xop4->head, ip);
1849 hammer2_xop_setip3(&xop4->head, tdip);
1850 hammer2_xop_setname(&xop4->head, fname, fname_len);
1851 hammer2_xop_setname2(&xop4->head, tname, tname_len);
1852 hammer2_xop_start(&xop4->head, hammer2_xop_nrename);
1853
1854 error = hammer2_xop_collect(&xop4->head, 0);
1855 hammer2_xop_retire(&xop4->head, HAMMER2_XOPMASK_VOP);
1856
1857 if (error == ENOENT)
1858 error = 0;
1859 if (error == 0 &&
1860 (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1861 hammer2_inode_modify(ip);
1862 ip->meta.name_len = tname_len;
1863 ip->meta.name_key = tlhc;
1864
1865 }
1866 }
6934ae32 1867
6934ae32 1868 /*
e12ae3a5
MD
1869 * Fixup ip->pip if we were renaming the actual file and not a
1870 * hardlink pointer.
6934ae32 1871 */
e12ae3a5
MD
1872 if (error == 0 && (ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE)) {
1873 hammer2_inode_t *opip;
1874
1875 if (ip->pip != tdip) {
1876 hammer2_inode_ref(tdip);
1877 opip = ip->pip;
1878 ip->pip = tdip;
1879 if (opip)
1880 hammer2_inode_drop(opip);
1881 }
0dea3156 1882 }
e12ae3a5 1883 hammer2_inode_unlock(ip, NULL);
2121ef11 1884done2:
e12ae3a5
MD
1885 hammer2_inode_unlock(tdip, NULL);
1886 hammer2_inode_unlock(fdip, NULL);
1887 hammer2_inode_unlock(cdip, NULL);
476d2aad 1888 hammer2_inode_drop(ip);
850687d2 1889 hammer2_inode_drop(cdip);
c603b86b
MD
1890 hammer2_run_unlinkq(fdip->pmp);
1891 hammer2_trans_done(tdip->pmp);
6934ae32 1892
9b21452a
MD
1893 /*
1894 * Issue the namecache update after unlocking all the internal
1895 * hammer structures, otherwise we might deadlock.
1896 */
850687d2
MD
1897 if (tnch_error == 0) {
1898 cache_unlink(ap->a_tnch);
1899 cache_setunresolved(ap->a_tnch);
1900 }
9b21452a
MD
1901 if (error == 0)
1902 cache_rename(ap->a_fnch, ap->a_tnch);
1903
05dd26e4 1904 LOCKSTOP;
6934ae32 1905 return (error);
4e2004ea
MD
1906}
1907
2910a90c
MD
1908/*
1909 * hammer2_vop_ioctl { vp, command, data, fflag, cred }
1910 */
1911static
1912int
1913hammer2_vop_ioctl(struct vop_ioctl_args *ap)
1914{
2910a90c
MD
1915 hammer2_inode_t *ip;
1916 int error;
1917
05dd26e4 1918 LOCKSTART;
2910a90c 1919 ip = VTOI(ap->a_vp);
2910a90c
MD
1920
1921 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
1922 ap->a_fflag, ap->a_cred);
05dd26e4 1923 LOCKSTOP;
2910a90c
MD
1924 return (error);
1925}
1926
e118c14f 1927static
f0206a67 1928int
e118c14f 1929hammer2_vop_mountctl(struct vop_mountctl_args *ap)
f0206a67
VS
1930{
1931 struct mount *mp;
506bd6d1 1932 hammer2_pfs_t *pmp;
f0206a67
VS
1933 int rc;
1934
05dd26e4 1935 LOCKSTART;
f0206a67
VS
1936 switch (ap->a_op) {
1937 case (MOUNTCTL_SET_EXPORT):
1938 mp = ap->a_head.a_ops->head.vv_mount;
e4e20f48 1939 pmp = MPTOPMP(mp);
f0206a67
VS
1940
1941 if (ap->a_ctllen != sizeof(struct export_args))
1942 rc = (EINVAL);
1943 else
e4e20f48 1944 rc = vfs_export(mp, &pmp->export,
10c5dee0 1945 (const struct export_args *)ap->a_ctl);
f0206a67
VS
1946 break;
1947 default:
1948 rc = vop_stdmountctl(ap);
1949 break;
1950 }
05dd26e4 1951 LOCKSTOP;
f0206a67
VS
1952 return (rc);
1953}
1954
eae2ed61 1955/*
da6f36f4 1956 * This handles unlinked open files after the vnode is finally dereferenced.
850687d2
MD
1957 * To avoid deadlocks it cannot be called from the normal vnode recycling
1958 * path, so we call it (1) after a unlink, rmdir, or rename, (2) on every
1959 * flush, and (3) on umount.
eae2ed61
MD
1960 */
1961void
c603b86b 1962hammer2_run_unlinkq(hammer2_pfs_t *pmp)
eae2ed61 1963{
da6f36f4 1964 const hammer2_inode_data_t *ripdata;
eae2ed61
MD
1965 hammer2_inode_unlink_t *ipul;
1966 hammer2_inode_t *ip;
278ab2b2 1967 hammer2_cluster_t *cluster;
da6f36f4 1968 hammer2_cluster_t *cparent;
eae2ed61
MD
1969
1970 if (TAILQ_EMPTY(&pmp->unlinkq))
1971 return;
1972
05dd26e4 1973 LOCKSTART;
94491fa0 1974 hammer2_spin_ex(&pmp->list_spin);
eae2ed61
MD
1975 while ((ipul = TAILQ_FIRST(&pmp->unlinkq)) != NULL) {
1976 TAILQ_REMOVE(&pmp->unlinkq, ipul, entry);
94491fa0 1977 hammer2_spin_unex(&pmp->list_spin);
eae2ed61
MD
1978 ip = ipul->ip;
1979 kfree(ipul, pmp->minode);
1980
159c3ca2
MD
1981 hammer2_inode_lock(ip, HAMMER2_RESOLVE_ALWAYS);
1982 cluster = hammer2_inode_cluster(ip, HAMMER2_RESOLVE_ALWAYS);
bca9f8e6 1983 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
850687d2
MD
1984 if (hammer2_debug & 0x400) {
1985 kprintf("hammer2: unlink on reclaim: %s refs=%d\n",
bca9f8e6 1986 ripdata->filename, ip->refs);
850687d2 1987 }
da6f36f4 1988
f1c7c224
MD
1989 /*
1990 * NOTE: Due to optimizations to avoid I/O on the inode for
1991 * the last unlink, ripdata->nlinks is not necessarily
1992 * 0 here.
1993 */
1994 /* KKASSERT(ripdata->nlinks == 0); (see NOTE) */
da6f36f4 1995 cparent = hammer2_cluster_parent(cluster);
c603b86b 1996 hammer2_cluster_delete(cparent, cluster,
da6f36f4 1997 HAMMER2_DELETE_PERMANENT);
850687d2 1998 hammer2_cluster_unlock(cparent);
e513e77e 1999 hammer2_cluster_drop(cparent);
b93cc2e0 2000 hammer2_inode_unlock(ip, cluster); /* inode lock */
eae2ed61
MD
2001 hammer2_inode_drop(ip); /* ipul ref */
2002
94491fa0 2003 hammer2_spin_ex(&pmp->list_spin);
eae2ed61 2004 }
94491fa0 2005 hammer2_spin_unex(&pmp->list_spin);
05dd26e4 2006 LOCKSTOP;
eae2ed61
MD
2007}
2008
2009
41c34a6d
MD
2010/*
2011 * KQFILTER
2012 */
2013static void filt_hammer2detach(struct knote *kn);
2014static int filt_hammer2read(struct knote *kn, long hint);
2015static int filt_hammer2write(struct knote *kn, long hint);
2016static int filt_hammer2vnode(struct knote *kn, long hint);
2017
2018static struct filterops hammer2read_filtops =
2019 { FILTEROP_ISFD | FILTEROP_MPSAFE,
2020 NULL, filt_hammer2detach, filt_hammer2read };
2021static struct filterops hammer2write_filtops =
2022 { FILTEROP_ISFD | FILTEROP_MPSAFE,
2023 NULL, filt_hammer2detach, filt_hammer2write };
2024static struct filterops hammer2vnode_filtops =
2025 { FILTEROP_ISFD | FILTEROP_MPSAFE,
2026 NULL, filt_hammer2detach, filt_hammer2vnode };
2027
2028static
2029int
2030hammer2_vop_kqfilter(struct vop_kqfilter_args *ap)
2031{
2032 struct vnode *vp = ap->a_vp;
2033 struct knote *kn = ap->a_kn;
2034
2035 switch (kn->kn_filter) {
2036 case EVFILT_READ:
2037 kn->kn_fop = &hammer2read_filtops;
2038 break;
2039 case EVFILT_WRITE:
2040 kn->kn_fop = &hammer2write_filtops;
2041 break;
2042 case EVFILT_VNODE:
2043 kn->kn_fop = &hammer2vnode_filtops;
2044 break;
2045 default:
2046 return (EOPNOTSUPP);
2047 }
2048
2049 kn->kn_hook = (caddr_t)vp;
2050
2051 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2052
2053 return(0);
2054}
2055
2056static void
2057filt_hammer2detach(struct knote *kn)
2058{
2059 struct vnode *vp = (void *)kn->kn_hook;
2060
2061 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2062}
2063
2064static int
2065filt_hammer2read(struct knote *kn, long hint)
2066{
2067 struct vnode *vp = (void *)kn->kn_hook;
2068 hammer2_inode_t *ip = VTOI(vp);
2069 off_t off;
2070
2071 if (hint == NOTE_REVOKE) {
2072 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2073 return(1);
2074 }
7a9b14a0 2075 off = ip->meta.size - kn->kn_fp->f_offset;
41c34a6d
MD
2076 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
2077 if (kn->kn_sfflags & NOTE_OLDAPI)
2078 return(1);
2079 return (kn->kn_data != 0);
2080}
2081
2082
2083static int
2084filt_hammer2write(struct knote *kn, long hint)
2085{
2086 if (hint == NOTE_REVOKE)
2087 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2088 kn->kn_data = 0;
2089 return (1);
2090}
2091
2092static int
2093filt_hammer2vnode(struct knote *kn, long hint)
2094{
2095 if (kn->kn_sfflags & hint)
2096 kn->kn_fflags |= hint;
2097 if (hint == NOTE_REVOKE) {
2098 kn->kn_flags |= (EV_EOF | EV_NODATA);
2099 return (1);
2100 }
2101 return (kn->kn_fflags != 0);
2102}
2103
2104/*
2105 * FIFO VOPS
2106 */
2107static
2108int
2109hammer2_vop_markatime(struct vop_markatime_args *ap)
2110{
2111 hammer2_inode_t *ip;
2112 struct vnode *vp;
2113
2114 vp = ap->a_vp;
2115 ip = VTOI(vp);
2116
2117 if (ip->pmp->ronly)
2118 return(EROFS);
2119 return(0);
2120}
2121
2122static
2123int
2124hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap)
2125{
2126 int error;
2127
2128 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2129 if (error)
2130 error = hammer2_vop_kqfilter(ap);
2131 return(error);
2132}
2133
2134/*
2135 * VOPS vector
2136 */
703720e4
MD
2137struct vop_ops hammer2_vnode_vops = {
2138 .vop_default = vop_defaultop,
e118c14f 2139 .vop_fsync = hammer2_vop_fsync,
703720e4
MD
2140 .vop_getpages = vop_stdgetpages,
2141 .vop_putpages = vop_stdputpages,
e118c14f 2142 .vop_access = hammer2_vop_access,
37aa19df 2143 .vop_advlock = hammer2_vop_advlock,
c667909f 2144 .vop_close = hammer2_vop_close,
db0c2eb3 2145 .vop_nlink = hammer2_vop_nlink,
c667909f 2146 .vop_ncreate = hammer2_vop_ncreate,
4e2004ea
MD
2147 .vop_nsymlink = hammer2_vop_nsymlink,
2148 .vop_nremove = hammer2_vop_nremove,
2149 .vop_nrmdir = hammer2_vop_nrmdir,
2150 .vop_nrename = hammer2_vop_nrename,
e118c14f 2151 .vop_getattr = hammer2_vop_getattr,
3ac6a319 2152 .vop_setattr = hammer2_vop_setattr,
e118c14f 2153 .vop_readdir = hammer2_vop_readdir,
4e2004ea 2154 .vop_readlink = hammer2_vop_readlink,
5b4a2132
MD
2155 .vop_getpages = vop_stdgetpages,
2156 .vop_putpages = vop_stdputpages,
e118c14f
MD
2157 .vop_read = hammer2_vop_read,
2158 .vop_write = hammer2_vop_write,
2159 .vop_open = hammer2_vop_open,
2160 .vop_inactive = hammer2_vop_inactive,
2161 .vop_reclaim = hammer2_vop_reclaim,
2162 .vop_nresolve = hammer2_vop_nresolve,
37494cab
MD
2163 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2164 .vop_nmkdir = hammer2_vop_nmkdir,
41c34a6d 2165 .vop_nmknod = hammer2_vop_nmknod,
2910a90c 2166 .vop_ioctl = hammer2_vop_ioctl,
e118c14f
MD
2167 .vop_mountctl = hammer2_vop_mountctl,
2168 .vop_bmap = hammer2_vop_bmap,
2169 .vop_strategy = hammer2_vop_strategy,
41c34a6d 2170 .vop_kqfilter = hammer2_vop_kqfilter
703720e4
MD
2171};
2172
2173struct vop_ops hammer2_spec_vops = {
41c34a6d
MD
2174 .vop_default = vop_defaultop,
2175 .vop_fsync = hammer2_vop_fsync,
2176 .vop_read = vop_stdnoread,
2177 .vop_write = vop_stdnowrite,
2178 .vop_access = hammer2_vop_access,
2179 .vop_close = hammer2_vop_close,
2180 .vop_markatime = hammer2_vop_markatime,
2181 .vop_getattr = hammer2_vop_getattr,
2182 .vop_inactive = hammer2_vop_inactive,
2183 .vop_reclaim = hammer2_vop_reclaim,
2184 .vop_setattr = hammer2_vop_setattr
703720e4
MD
2185};
2186
2187struct vop_ops hammer2_fifo_vops = {
41c34a6d
MD
2188 .vop_default = fifo_vnoperate,
2189 .vop_fsync = hammer2_vop_fsync,
2190#if 0
2191 .vop_read = hammer2_vop_fiforead,
2192 .vop_write = hammer2_vop_fifowrite,
2193#endif
2194 .vop_access = hammer2_vop_access,
2195#if 0
2196 .vop_close = hammer2_vop_fifoclose,
2197#endif
2198 .vop_markatime = hammer2_vop_markatime,
2199 .vop_getattr = hammer2_vop_getattr,
2200 .vop_inactive = hammer2_vop_inactive,
2201 .vop_reclaim = hammer2_vop_reclaim,
2202 .vop_setattr = hammer2_vop_setattr,
2203 .vop_kqfilter = hammer2_vop_fifokqfilter
703720e4 2204};
355d67fc 2205