hammer2 - update documentation, begin working on callback I/O
[dragonfly.git] / sys / vfs / hammer2 / hammer2_vnops.c
CommitLineData
e118c14f 1/*
8138a154 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved.
e118c14f
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
355d67fc 7 * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
e118c14f
MD
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in
17 * the documentation and/or other materials provided with the
18 * distribution.
19 * 3. Neither the name of The DragonFly Project nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific, prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
ea155208
MD
36/*
37 * Kernel Filesystem interface
38 *
39 * NOTE! local ipdata pointers must be reloaded on any modifying operation
40 * to the inode as its underlying chain may have changed.
41 */
42
703720e4
MD
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/fcntl.h>
47#include <sys/buf.h>
48#include <sys/proc.h>
49#include <sys/namei.h>
50#include <sys/mount.h>
51#include <sys/vnode.h>
f0206a67 52#include <sys/mountctl.h>
e028fa74 53#include <sys/dirent.h>
4e2004ea 54#include <sys/uio.h>
355d67fc 55#include <sys/objcache.h>
41c34a6d
MD
56#include <sys/event.h>
57#include <sys/file.h>
58#include <vfs/fifofs/fifo.h>
703720e4
MD
59
60#include "hammer2.h"
355d67fc
MD
61#include "hammer2_lz4.h"
62
63#include "zlib/hammer2_zlib.h"
703720e4 64
db71f61f
MD
65#define ZFOFFSET (-2LL)
66
4e2004ea
MD
67static int hammer2_read_file(hammer2_inode_t *ip, struct uio *uio,
68 int seqcount);
355d67fc
MD
69static int hammer2_write_file(hammer2_inode_t *ip, struct uio *uio,
70 int ioflag, int seqcount);
71static void hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize);
72static void hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize);
355d67fc
MD
73
74struct objcache *cache_buffer_read;
75struct objcache *cache_buffer_write;
76
77/*
78 * Callback used in read path in case that a block is compressed with LZ4.
79 */
80static
81void
278ab2b2 82hammer2_decompress_LZ4_callback(const char *data, u_int bytes, struct bio *bio)
355d67fc 83{
278ab2b2
MD
84 struct buf *bp;
85 char *compressed_buffer;
86 int compressed_size;
87 int result;
355d67fc 88
278ab2b2 89 bp = bio->bio_buf;
355d67fc 90
fdf62707 91#if 0
278ab2b2
MD
92 if bio->bio_caller_info2.index &&
93 bio->bio_caller_info1.uvalue32 !=
94 crc32(bp->b_data, bp->b_bufsize) --- return error
fdf62707 95#endif
278ab2b2
MD
96
97 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
98 compressed_size = *(const int *)data;
99 KKASSERT(compressed_size <= bytes - sizeof(int));
100
101 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
102 result = LZ4_decompress_safe(__DECONST(char *, &data[sizeof(int)]),
103 compressed_buffer,
104 compressed_size,
105 bp->b_bufsize);
106 if (result < 0) {
107 kprintf("READ PATH: Error during decompression."
108 "bio %016jx/%d\n",
109 (intmax_t)bio->bio_offset, bytes);
110 /* make sure it isn't random garbage */
111 bzero(compressed_buffer, bp->b_bufsize);
355d67fc 112 }
278ab2b2
MD
113 KKASSERT(result <= bp->b_bufsize);
114 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
115 if (result < bp->b_bufsize)
116 bzero(bp->b_data + result, bp->b_bufsize - result);
117 objcache_put(cache_buffer_read, compressed_buffer);
118 bp->b_resid = 0;
119 bp->b_flags |= B_AGE;
355d67fc
MD
120}
121
122/*
123 * Callback used in read path in case that a block is compressed with ZLIB.
124 * It is almost identical to LZ4 callback, so in theory they can be unified,
125 * but we didn't want to make changes in bio structure for that.
126 */
127static
128void
278ab2b2 129hammer2_decompress_ZLIB_callback(const char *data, u_int bytes, struct bio *bio)
355d67fc 130{
278ab2b2
MD
131 struct buf *bp;
132 char *compressed_buffer;
133 z_stream strm_decompress;
134 int result;
135 int ret;
355d67fc 136
278ab2b2 137 bp = bio->bio_buf;
355d67fc 138
278ab2b2
MD
139 KKASSERT(bp->b_bufsize <= HAMMER2_PBUFSIZE);
140 strm_decompress.avail_in = 0;
141 strm_decompress.next_in = Z_NULL;
142
143 ret = inflateInit(&strm_decompress);
144
145 if (ret != Z_OK)
146 kprintf("HAMMER2 ZLIB: Fatal error in inflateInit.\n");
147
148 compressed_buffer = objcache_get(cache_buffer_read, M_INTWAIT);
149 strm_decompress.next_in = __DECONST(char *, data);
150
151 /* XXX supply proper size, subset of device bp */
152 strm_decompress.avail_in = bytes;
153 strm_decompress.next_out = compressed_buffer;
154 strm_decompress.avail_out = bp->b_bufsize;
155
156 ret = inflate(&strm_decompress, Z_FINISH);
157 if (ret != Z_STREAM_END) {
158 kprintf("HAMMER2 ZLIB: Fatar error during decompression.\n");
159 bzero(compressed_buffer, bp->b_bufsize);
355d67fc 160 }
278ab2b2
MD
161 bcopy(compressed_buffer, bp->b_data, bp->b_bufsize);
162 result = bp->b_bufsize - strm_decompress.avail_out;
163 if (result < bp->b_bufsize)
164 bzero(bp->b_data + result, strm_decompress.avail_out);
165 objcache_put(cache_buffer_read, compressed_buffer);
166 ret = inflateEnd(&strm_decompress);
167
168 bp->b_resid = 0;
169 bp->b_flags |= B_AGE;
355d67fc 170}
3ac6a319 171
b2b78aaa
MD
172static __inline
173void
174hammer2_knote(struct vnode *vp, int flags)
175{
176 if (flags)
177 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
178}
179
703720e4
MD
180/*
181 * Last reference to a vnode is going away but it is still cached.
182 */
e118c14f 183static
703720e4 184int
e118c14f 185hammer2_vop_inactive(struct vop_inactive_args *ap)
703720e4 186{
da6f36f4 187 const hammer2_inode_data_t *ripdata;
e2e9e2db 188 hammer2_inode_t *ip;
da6f36f4 189 hammer2_cluster_t *cluster;
703720e4 190 struct vnode *vp;
703720e4 191
05dd26e4 192 LOCKSTART;
703720e4
MD
193 vp = ap->a_vp;
194 ip = VTOI(vp);
703720e4 195
df9ea374
MD
196 /*
197 * Degenerate case
198 */
199 if (ip == NULL) {
200 vrecycle(vp);
05dd26e4 201 LOCKSTOP;
df9ea374
MD
202 return (0);
203 }
204
214f4a77
MD
205 /*
206 * Detect updates to the embedded data which may be synchronized by
207 * the strategy code. Simply mark the inode modified so it gets
208 * picked up by our normal flush.
209 */
da6f36f4
MD
210 cluster = hammer2_inode_lock_ex(ip);
211 KKASSERT(cluster);
bca9f8e6 212 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
214f4a77
MD
213
214 /*
215 * Check for deleted inodes and recycle immediately.
850687d2
MD
216 *
217 * WARNING: nvtruncbuf() can only be safely called without the inode
218 * lock held due to the way our write thread works.
214f4a77 219 */
da6f36f4
MD
220 if (ripdata->nlinks == 0) {
221 hammer2_key_t lbase;
222 int nblksize;
223
224 nblksize = hammer2_calc_logical(ip, 0, &lbase, NULL);
da6f36f4 225 hammer2_inode_unlock_ex(ip, cluster);
850687d2 226 nvtruncbuf(vp, 0, nblksize, 0, 0);
214f4a77 227 vrecycle(vp);
10252dc7 228 } else {
da6f36f4 229 hammer2_inode_unlock_ex(ip, cluster);
214f4a77 230 }
05dd26e4 231 LOCKSTOP;
703720e4
MD
232 return (0);
233}
234
235/*
236 * Reclaim a vnode so that it can be reused; after the inode is
237 * disassociated, the filesystem must manage it alone.
238 */
e118c14f 239static
703720e4 240int
e118c14f 241hammer2_vop_reclaim(struct vop_reclaim_args *ap)
703720e4 242{
da6f36f4 243 const hammer2_inode_data_t *ripdata;
278ab2b2 244 hammer2_cluster_t *cluster;
e2e9e2db 245 hammer2_inode_t *ip;
eae2ed61 246 hammer2_pfsmount_t *pmp;
b7926f31 247 struct vnode *vp;
703720e4 248
05dd26e4 249 LOCKSTART;
703720e4
MD
250 vp = ap->a_vp;
251 ip = VTOI(vp);
05dd26e4
MD
252 if (ip == NULL) {
253 LOCKSTOP;
9c2e0de0 254 return(0);
05dd26e4 255 }
b7926f31 256
1c9f601e 257 /*
eae2ed61 258 * Inode must be locked for reclaim.
1c9f601e 259 */
eae2ed61 260 pmp = ip->pmp;
278ab2b2 261 cluster = hammer2_inode_lock_ex(ip);
bca9f8e6 262 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
ea155208
MD
263
264 /*
265 * The final close of a deleted file or directory marks it for
044541cd 266 * destruction. The DELETED flag allows the flusher to shortcut
ea155208
MD
267 * any modified blocks still unflushed (that is, just ignore them).
268 *
269 * HAMMER2 usually does not try to optimize the freemap by returning
270 * deleted blocks to it as it does not usually know how many snapshots
8138a154 271 * might be referencing portions of the file/dir.
ea155208 272 */
703720e4 273 vp->v_data = NULL;
0e92b724 274 ip->vp = NULL;
a7720be7 275
a02dfba1 276 /*
a7720be7
MD
277 * NOTE! We do not attempt to flush chains here, flushing is
278 * really fragile and could also deadlock.
a02dfba1 279 */
eddc656a 280 vclrisdirty(vp);
eae2ed61
MD
281
282 /*
283 * A reclaim can occur at any time so we cannot safely start a
284 * transaction to handle reclamation of unlinked files. Instead,
285 * the ip is left with a reference and placed on a linked list and
286 * handled later on.
287 */
da6f36f4 288 if (ripdata->nlinks == 0) {
eae2ed61
MD
289 hammer2_inode_unlink_t *ipul;
290
291 ipul = kmalloc(sizeof(*ipul), pmp->minode, M_WAITOK | M_ZERO);
292 ipul->ip = ip;
293
da6f36f4 294 spin_lock(&pmp->list_spin);
eae2ed61 295 TAILQ_INSERT_TAIL(&pmp->unlinkq, ipul, entry);
da6f36f4 296 spin_unlock(&pmp->list_spin);
278ab2b2 297 hammer2_inode_unlock_ex(ip, cluster); /* unlock */
eae2ed61
MD
298 /* retain ref from vp for ipul */
299 } else {
278ab2b2 300 hammer2_inode_unlock_ex(ip, cluster); /* unlock */
eae2ed61
MD
301 hammer2_inode_drop(ip); /* vp ref */
302 }
278ab2b2
MD
303 /* cluster no longer referenced */
304 /* cluster = NULL; not needed */
54eb943b
MD
305
306 /*
307 * XXX handle background sync when ip dirty, kernel will no longer
308 * notify us regarding this inode because there is no longer a
309 * vnode attached to it.
310 */
703720e4 311
05dd26e4 312 LOCKSTOP;
703720e4
MD
313 return (0);
314}
315
e118c14f 316static
703720e4 317int
e118c14f 318hammer2_vop_fsync(struct vop_fsync_args *ap)
703720e4 319{
e2e9e2db 320 hammer2_inode_t *ip;
0dea3156 321 hammer2_trans_t trans;
278ab2b2 322 hammer2_cluster_t *cluster;
b7926f31
MD
323 struct vnode *vp;
324
05dd26e4 325 LOCKSTART;
b7926f31
MD
326 vp = ap->a_vp;
327 ip = VTOI(vp);
b7926f31 328
a4dc31e0 329#if 0
623d43d4 330 /* XXX can't do this yet */
50456506 331 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_ISFLUSH);
b7926f31 332 vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
a4dc31e0 333#endif
50456506 334 hammer2_trans_init(&trans, ip->pmp, 0);
a4dc31e0 335 vfsync(vp, ap->a_waitfor, 1, NULL, NULL);
6ba3b984
MD
336
337 /*
338 * Calling chain_flush here creates a lot of duplicative
339 * COW operations due to non-optimal vnode ordering.
340 *
341 * Only do it for an actual fsync() syscall. The other forms
342 * which call this function will eventually call chain_flush
343 * on the volume root as a catch-all, which is far more optimal.
344 */
278ab2b2 345 cluster = hammer2_inode_lock_ex(ip);
9596b8c4 346 atomic_clear_int(&ip->flags, HAMMER2_INODE_MODIFIED);
eddc656a 347 vclrisdirty(vp);
355d67fc 348 if (ip->flags & (HAMMER2_INODE_RESIZED|HAMMER2_INODE_MTIME))
278ab2b2 349 hammer2_inode_fsync(&trans, ip, cluster);
355d67fc 350
a4dc31e0 351#if 0
925e4ad1
MD
352 /*
353 * XXX creates discontinuity w/modify_tid
354 */
a0b41c45 355 if (ap->a_flags & VOP_FSYNC_SYSCALL) {
278ab2b2 356 hammer2_flush(&trans, cluster);
a0b41c45 357 }
a4dc31e0 358#endif
278ab2b2 359 hammer2_inode_unlock_ex(ip, cluster);
d001f460 360 hammer2_trans_done(&trans);
a02dfba1 361
05dd26e4 362 LOCKSTOP;
b7926f31 363 return (0);
703720e4
MD
364}
365
e118c14f 366static
703720e4 367int
e118c14f 368hammer2_vop_access(struct vop_access_args *ap)
703720e4 369{
37494cab 370 hammer2_inode_t *ip = VTOI(ap->a_vp);
bca9f8e6 371 const hammer2_inode_data_t *ripdata;
278ab2b2 372 hammer2_cluster_t *cluster;
37494cab
MD
373 uid_t uid;
374 gid_t gid;
375 int error;
376
05dd26e4 377 LOCKSTART;
278ab2b2 378 cluster = hammer2_inode_lock_sh(ip);
bca9f8e6
MD
379 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
380 uid = hammer2_to_unix_xid(&ripdata->uid);
381 gid = hammer2_to_unix_xid(&ripdata->gid);
382 error = vop_helper_access(ap, uid, gid, ripdata->mode, ripdata->uflags);
278ab2b2 383 hammer2_inode_unlock_sh(ip, cluster);
37494cab 384
05dd26e4 385 LOCKSTOP;
37494cab 386 return (error);
703720e4
MD
387}
388
e118c14f 389static
703720e4 390int
e118c14f 391hammer2_vop_getattr(struct vop_getattr_args *ap)
703720e4 392{
bca9f8e6 393 const hammer2_inode_data_t *ripdata;
278ab2b2 394 hammer2_cluster_t *cluster;
e4e20f48 395 hammer2_pfsmount_t *pmp;
cd4b3d92 396 hammer2_inode_t *ip;
703720e4
MD
397 struct vnode *vp;
398 struct vattr *vap;
703720e4 399
05dd26e4 400 LOCKSTART;
703720e4
MD
401 vp = ap->a_vp;
402 vap = ap->a_vap;
403
cd4b3d92 404 ip = VTOI(vp);
e4e20f48 405 pmp = ip->pmp;
cd4b3d92 406
278ab2b2 407 cluster = hammer2_inode_lock_sh(ip);
bca9f8e6 408 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
278ab2b2 409 KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE);
703720e4 410
e4e20f48 411 vap->va_fsid = pmp->mp->mnt_stat.f_fsid.val[0];
bca9f8e6
MD
412 vap->va_fileid = ripdata->inum;
413 vap->va_mode = ripdata->mode;
414 vap->va_nlink = ripdata->nlinks;
415 vap->va_uid = hammer2_to_unix_xid(&ripdata->uid);
416 vap->va_gid = hammer2_to_unix_xid(&ripdata->gid);
cd4b3d92
MD
417 vap->va_rmajor = 0;
418 vap->va_rminor = 0;
355d67fc 419 vap->va_size = ip->size; /* protected by shared lock */
df9ea374 420 vap->va_blocksize = HAMMER2_PBUFSIZE;
bca9f8e6
MD
421 vap->va_flags = ripdata->uflags;
422 hammer2_time_to_timespec(ripdata->ctime, &vap->va_ctime);
423 hammer2_time_to_timespec(ripdata->mtime, &vap->va_mtime);
424 hammer2_time_to_timespec(ripdata->mtime, &vap->va_atime);
cd4b3d92 425 vap->va_gen = 1;
866d5273 426 vap->va_bytes = vap->va_size; /* XXX */
bca9f8e6 427 vap->va_type = hammer2_get_vtype(ripdata);
cd4b3d92 428 vap->va_filerev = 0;
bca9f8e6
MD
429 vap->va_uid_uuid = ripdata->uid;
430 vap->va_gid_uuid = ripdata->gid;
cd4b3d92
MD
431 vap->va_vaflags = VA_UID_UUID_VALID | VA_GID_UUID_VALID |
432 VA_FSID_UUID_VALID;
703720e4 433
278ab2b2 434 hammer2_inode_unlock_sh(ip, cluster);
703720e4 435
05dd26e4 436 LOCKSTOP;
703720e4
MD
437 return (0);
438}
439
3ac6a319
MD
440static
441int
442hammer2_vop_setattr(struct vop_setattr_args *ap)
443{
6a5f4fe6
MD
444 const hammer2_inode_data_t *ripdata;
445 hammer2_inode_data_t *wipdata;
3ac6a319 446 hammer2_inode_t *ip;
278ab2b2 447 hammer2_cluster_t *cluster;
0dea3156 448 hammer2_trans_t trans;
3ac6a319
MD
449 struct vnode *vp;
450 struct vattr *vap;
451 int error;
452 int kflags = 0;
3ac6a319 453 int domtime = 0;
6a5f4fe6 454 int dosync = 0;
b2b78aaa 455 uint64_t ctime;
3ac6a319 456
05dd26e4 457 LOCKSTART;
3ac6a319
MD
458 vp = ap->a_vp;
459 vap = ap->a_vap;
b2b78aaa 460 hammer2_update_time(&ctime);
3ac6a319
MD
461
462 ip = VTOI(vp);
3ac6a319 463
05dd26e4
MD
464 if (ip->pmp->ronly) {
465 LOCKSTOP;
3ac6a319 466 return(EROFS);
05dd26e4 467 }
3ac6a319 468
278ab2b2 469 hammer2_pfs_memory_wait(ip->pmp);
50456506 470 hammer2_trans_init(&trans, ip->pmp, 0);
278ab2b2 471 cluster = hammer2_inode_lock_ex(ip);
bca9f8e6 472 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
3ac6a319
MD
473 error = 0;
474
475 if (vap->va_flags != VNOVAL) {
476 u_int32_t flags;
477
6a5f4fe6 478 flags = ripdata->uflags;
3ac6a319 479 error = vop_helper_setattr_flags(&flags, vap->va_flags,
6a5f4fe6 480 hammer2_to_unix_xid(&ripdata->uid),
3ac6a319
MD
481 ap->a_cred);
482 if (error == 0) {
6a5f4fe6
MD
483 if (ripdata->uflags != flags) {
484 wipdata = hammer2_cluster_modify_ip(&trans, ip,
485 cluster, 0);
486 wipdata->uflags = flags;
487 wipdata->ctime = ctime;
3ac6a319 488 kflags |= NOTE_ATTRIB;
6a5f4fe6
MD
489 dosync = 1;
490 ripdata = wipdata;
3ac6a319 491 }
6a5f4fe6 492 if (ripdata->uflags & (IMMUTABLE | APPEND)) {
3ac6a319
MD
493 error = 0;
494 goto done;
495 }
496 }
b2b78aaa 497 goto done;
3ac6a319 498 }
6a5f4fe6 499 if (ripdata->uflags & (IMMUTABLE | APPEND)) {
3ac6a319
MD
500 error = EPERM;
501 goto done;
502 }
b2b78aaa 503 if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
6a5f4fe6
MD
504 mode_t cur_mode = ripdata->mode;
505 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid);
506 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid);
b2b78aaa
MD
507 uuid_t uuid_uid;
508 uuid_t uuid_gid;
509
510 error = vop_helper_chown(ap->a_vp, vap->va_uid, vap->va_gid,
511 ap->a_cred,
512 &cur_uid, &cur_gid, &cur_mode);
513 if (error == 0) {
514 hammer2_guid_to_uuid(&uuid_uid, cur_uid);
515 hammer2_guid_to_uuid(&uuid_gid, cur_gid);
6a5f4fe6
MD
516 if (bcmp(&uuid_uid, &ripdata->uid, sizeof(uuid_uid)) ||
517 bcmp(&uuid_gid, &ripdata->gid, sizeof(uuid_gid)) ||
518 ripdata->mode != cur_mode
b2b78aaa 519 ) {
6a5f4fe6
MD
520 wipdata = hammer2_cluster_modify_ip(&trans, ip,
521 cluster, 0);
522 wipdata->uid = uuid_uid;
523 wipdata->gid = uuid_gid;
524 wipdata->mode = cur_mode;
525 wipdata->ctime = ctime;
526 dosync = 1;
527 ripdata = wipdata;
b2b78aaa
MD
528 }
529 kflags |= NOTE_ATTRIB;
530 }
531 }
3ac6a319
MD
532
533 /*
534 * Resize the file
535 */
355d67fc 536 if (vap->va_size != VNOVAL && ip->size != vap->va_size) {
3ac6a319
MD
537 switch(vp->v_type) {
538 case VREG:
355d67fc 539 if (vap->va_size == ip->size)
3ac6a319 540 break;
278ab2b2 541 hammer2_inode_unlock_ex(ip, cluster);
355d67fc
MD
542 if (vap->va_size < ip->size) {
543 hammer2_truncate_file(ip, vap->va_size);
3ac6a319 544 } else {
355d67fc 545 hammer2_extend_file(ip, vap->va_size);
3ac6a319 546 }
278ab2b2
MD
547 cluster = hammer2_inode_lock_ex(ip);
548 /* RELOAD */
bca9f8e6 549 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
3ac6a319
MD
550 domtime = 1;
551 break;
552 default:
553 error = EINVAL;
554 goto done;
555 }
556 }
b2b78aaa
MD
557#if 0
558 /* atime not supported */
559 if (vap->va_atime.tv_sec != VNOVAL) {
6a5f4fe6
MD
560 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0);
561 wipdata->atime = hammer2_timespec_to_time(&vap->va_atime);
b2b78aaa 562 kflags |= NOTE_ATTRIB;
6a5f4fe6
MD
563 dosync = 1;
564 ripdata = wipdata;
b2b78aaa
MD
565 }
566#endif
567 if (vap->va_mtime.tv_sec != VNOVAL) {
6a5f4fe6
MD
568 wipdata = hammer2_cluster_modify_ip(&trans, ip, cluster, 0);
569 wipdata->mtime = hammer2_timespec_to_time(&vap->va_mtime);
b2b78aaa 570 kflags |= NOTE_ATTRIB;
eddc656a 571 domtime = 0;
6a5f4fe6
MD
572 dosync = 1;
573 ripdata = wipdata;
b2b78aaa
MD
574 }
575 if (vap->va_mode != (mode_t)VNOVAL) {
6a5f4fe6
MD
576 mode_t cur_mode = ripdata->mode;
577 uid_t cur_uid = hammer2_to_unix_xid(&ripdata->uid);
578 gid_t cur_gid = hammer2_to_unix_xid(&ripdata->gid);
b2b78aaa
MD
579
580 error = vop_helper_chmod(ap->a_vp, vap->va_mode, ap->a_cred,
581 cur_uid, cur_gid, &cur_mode);
6a5f4fe6
MD
582 if (error == 0 && ripdata->mode != cur_mode) {
583 wipdata = hammer2_cluster_modify_ip(&trans, ip,
584 cluster, 0);
585 wipdata->mode = cur_mode;
586 wipdata->ctime = ctime;
b2b78aaa 587 kflags |= NOTE_ATTRIB;
6a5f4fe6
MD
588 dosync = 1;
589 ripdata = wipdata;
b2b78aaa
MD
590 }
591 }
355d67fc
MD
592
593 /*
594 * If a truncation occurred we must call inode_fsync() now in order
595 * to trim the related data chains, otherwise a later expansion can
596 * cause havoc.
597 */
6a5f4fe6
MD
598 if (dosync) {
599 hammer2_cluster_modsync(cluster);
600 dosync = 0;
601 }
278ab2b2 602 hammer2_inode_fsync(&trans, ip, cluster);
eddc656a
MD
603
604 /*
605 * Cleanup. If domtime is set an additional inode modification
606 * must be flagged. All other modifications will have already
607 * set INODE_MODIFIED and called vsetisdirty().
608 */
3ac6a319 609done:
eddc656a
MD
610 if (domtime) {
611 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED |
612 HAMMER2_INODE_MTIME);
613 vsetisdirty(ip->vp);
614 }
6a5f4fe6
MD
615 if (dosync)
616 hammer2_cluster_modsync(cluster);
278ab2b2 617 hammer2_inode_unlock_ex(ip, cluster);
0dea3156 618 hammer2_trans_done(&trans);
eddc656a
MD
619 hammer2_knote(ip->vp, kflags);
620
05dd26e4 621 LOCKSTOP;
3ac6a319
MD
622 return (error);
623}
624
e118c14f 625static
703720e4 626int
e118c14f 627hammer2_vop_readdir(struct vop_readdir_args *ap)
703720e4 628{
bca9f8e6 629 const hammer2_inode_data_t *ripdata;
e028fa74
MD
630 hammer2_inode_t *ip;
631 hammer2_inode_t *xip;
278ab2b2
MD
632 hammer2_cluster_t *cparent;
633 hammer2_cluster_t *cluster;
634 hammer2_cluster_t *xcluster;
635 hammer2_blockref_t bref;
476d2aad 636 hammer2_tid_t inum;
1897c66e 637 hammer2_key_t key_next;
e028fa74
MD
638 hammer2_key_t lkey;
639 struct uio *uio;
640 off_t *cookies;
641 off_t saveoff;
642 int cookie_index;
643 int ncookies;
644 int error;
645 int dtype;
278ab2b2 646 int ddflag;
e028fa74
MD
647 int r;
648
05dd26e4 649 LOCKSTART;
e028fa74 650 ip = VTOI(ap->a_vp);
e028fa74
MD
651 uio = ap->a_uio;
652 saveoff = uio->uio_offset;
653
654 /*
655 * Setup cookies directory entry cookies if requested
656 */
657 if (ap->a_ncookies) {
658 ncookies = uio->uio_resid / 16 + 1;
659 if (ncookies > 1024)
660 ncookies = 1024;
661 cookies = kmalloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK);
662 } else {
663 ncookies = -1;
664 cookies = NULL;
665 }
666 cookie_index = 0;
667
278ab2b2 668 cparent = hammer2_inode_lock_sh(ip);
bca9f8e6 669 ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
476d2aad 670
e028fa74
MD
671 /*
672 * Handle artificial entries. To ensure that only positive 64 bit
673 * quantities are returned to userland we always strip off bit 63.
674 * The hash code is designed such that codes 0x0000-0x7FFF are not
675 * used, allowing us to use these codes for articial entries.
676 *
677 * Entry 0 is used for '.' and entry 1 is used for '..'. Do not
678 * allow '..' to cross the mount point into (e.g.) the super-root.
679 */
680 error = 0;
278ab2b2 681 cluster = (void *)(intptr_t)-1; /* non-NULL for early goto done case */
e028fa74
MD
682
683 if (saveoff == 0) {
bca9f8e6 684 inum = ripdata->inum & HAMMER2_DIRHASH_USERMSK;
476d2aad 685 r = vop_write_dirent(&error, uio, inum, DT_DIR, 1, ".");
e028fa74
MD
686 if (r)
687 goto done;
688 if (cookies)
689 cookies[cookie_index] = saveoff;
690 ++saveoff;
691 ++cookie_index;
692 if (cookie_index == ncookies)
693 goto done;
694 }
476d2aad 695
e028fa74 696 if (saveoff == 1) {
476d2aad
MD
697 /*
698 * Be careful with lockorder when accessing ".."
e2e9e2db 699 *
0dea3156 700 * (ip is the current dir. xip is the parent dir).
476d2aad 701 */
bca9f8e6 702 inum = ripdata->inum & HAMMER2_DIRHASH_USERMSK;
476d2aad 703 while (ip->pip != NULL && ip != ip->pmp->iroot) {
e028fa74 704 xip = ip->pip;
476d2aad 705 hammer2_inode_ref(xip);
278ab2b2
MD
706 hammer2_inode_unlock_sh(ip, cparent);
707 xcluster = hammer2_inode_lock_sh(xip);
708 cparent = hammer2_inode_lock_sh(ip);
476d2aad 709 hammer2_inode_drop(xip);
bca9f8e6 710 ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
476d2aad 711 if (xip == ip->pip) {
bca9f8e6 712 inum = hammer2_cluster_rdata(xcluster)->
278ab2b2
MD
713 ipdata.inum & HAMMER2_DIRHASH_USERMSK;
714 hammer2_inode_unlock_sh(xip, xcluster);
476d2aad
MD
715 break;
716 }
278ab2b2 717 hammer2_inode_unlock_sh(xip, xcluster);
476d2aad
MD
718 }
719 r = vop_write_dirent(&error, uio, inum, DT_DIR, 2, "..");
e028fa74
MD
720 if (r)
721 goto done;
722 if (cookies)
723 cookies[cookie_index] = saveoff;
724 ++saveoff;
725 ++cookie_index;
726 if (cookie_index == ncookies)
727 goto done;
728 }
729
730 lkey = saveoff | HAMMER2_DIRHASH_VISIBLE;
1fca819a
MD
731 if (hammer2_debug & 0x0020)
732 kprintf("readdir: lkey %016jx\n", lkey);
e028fa74 733
e2e9e2db 734 /*
278ab2b2 735 * parent is the inode cluster, already locked for us. Don't
e2e9e2db
MD
736 * double lock shared locks as this will screw up upgrades.
737 */
e028fa74 738 if (error) {
e028fa74
MD
739 goto done;
740 }
278ab2b2
MD
741 cluster = hammer2_cluster_lookup(cparent, &key_next, lkey, lkey,
742 HAMMER2_LOOKUP_SHARED, &ddflag);
743 if (cluster == NULL) {
744 cluster = hammer2_cluster_lookup(cparent, &key_next,
a0ed3c24 745 lkey, (hammer2_key_t)-1,
278ab2b2 746 HAMMER2_LOOKUP_SHARED, &ddflag);
37aa19df 747 }
278ab2b2
MD
748 if (cluster)
749 hammer2_cluster_bref(cluster, &bref);
750 while (cluster) {
1fca819a
MD
751 if (hammer2_debug & 0x0020)
752 kprintf("readdir: p=%p chain=%p %016jx (next %016jx)\n",
278ab2b2
MD
753 cparent->focus, cluster->focus,
754 bref.key, key_next);
755
756 if (bref.type == HAMMER2_BREF_TYPE_INODE) {
bca9f8e6
MD
757 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
758 dtype = hammer2_get_dtype(ripdata);
278ab2b2 759 saveoff = bref.key & HAMMER2_DIRHASH_USERMSK;
c667909f 760 r = vop_write_dirent(&error, uio,
bca9f8e6 761 ripdata->inum &
c667909f 762 HAMMER2_DIRHASH_USERMSK,
476d2aad 763 dtype,
bca9f8e6
MD
764 ripdata->name_len,
765 ripdata->filename);
c667909f
MD
766 if (r)
767 break;
768 if (cookies)
769 cookies[cookie_index] = saveoff;
770 ++cookie_index;
771 } else {
772 /* XXX chain error */
278ab2b2 773 kprintf("bad chain type readdir %d\n", bref.type);
c667909f 774 }
995e78dc
MD
775
776 /*
777 * Keys may not be returned in order so once we have a
278ab2b2 778 * placemarker (cluster) the scan must allow the full range
995e78dc
MD
779 * or some entries will be missed.
780 */
278ab2b2
MD
781 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
782 key_next, (hammer2_key_t)-1,
783 HAMMER2_LOOKUP_SHARED);
784 if (cluster) {
785 hammer2_cluster_bref(cluster, &bref);
786 saveoff = (bref.key & HAMMER2_DIRHASH_USERMSK) + 1;
028a55bb
MD
787 } else {
788 saveoff = (hammer2_key_t)-1;
789 }
790 if (cookie_index == ncookies)
791 break;
e028fa74 792 }
278ab2b2
MD
793 if (cluster)
794 hammer2_cluster_unlock(cluster);
e028fa74 795done:
278ab2b2 796 hammer2_inode_unlock_sh(ip, cparent);
e028fa74 797 if (ap->a_eofflag)
278ab2b2 798 *ap->a_eofflag = (cluster == NULL);
1fca819a
MD
799 if (hammer2_debug & 0x0020)
800 kprintf("readdir: done at %016jx\n", saveoff);
37aa19df 801 uio->uio_offset = saveoff & ~HAMMER2_DIRHASH_VISIBLE;
e028fa74
MD
802 if (error && cookie_index == 0) {
803 if (cookies) {
804 kfree(cookies, M_TEMP);
805 *ap->a_ncookies = 0;
806 *ap->a_cookies = NULL;
807 }
808 } else {
809 if (cookies) {
810 *ap->a_ncookies = cookie_index;
811 *ap->a_cookies = cookies;
812 }
813 }
05dd26e4 814 LOCKSTOP;
e028fa74 815 return (error);
703720e4
MD
816}
817
4e2004ea
MD
818/*
819 * hammer2_vop_readlink { vp, uio, cred }
820 */
821static
822int
823hammer2_vop_readlink(struct vop_readlink_args *ap)
824{
825 struct vnode *vp;
4e2004ea
MD
826 hammer2_inode_t *ip;
827 int error;
828
829 vp = ap->a_vp;
830 if (vp->v_type != VLNK)
831 return (EINVAL);
832 ip = VTOI(vp);
4e2004ea
MD
833
834 error = hammer2_read_file(ip, ap->a_uio, 0);
835 return (error);
836}
837
e118c14f 838static
703720e4 839int
e118c14f 840hammer2_vop_read(struct vop_read_args *ap)
703720e4 841{
db71f61f 842 struct vnode *vp;
db71f61f 843 hammer2_inode_t *ip;
db71f61f
MD
844 struct uio *uio;
845 int error;
846 int seqcount;
847 int bigread;
848
849 /*
850 * Read operations supported on this vnode?
851 */
852 vp = ap->a_vp;
853 if (vp->v_type != VREG)
854 return (EINVAL);
855
856 /*
857 * Misc
858 */
859 ip = VTOI(vp);
db71f61f
MD
860 uio = ap->a_uio;
861 error = 0;
862
863 seqcount = ap->a_ioflag >> 16;
864 bigread = (uio->uio_resid > 100 * 1024 * 1024);
865
4e2004ea 866 error = hammer2_read_file(ip, uio, seqcount);
db71f61f 867 return (error);
47902fef 868}
703720e4 869
e118c14f 870static
47902fef 871int
e118c14f 872hammer2_vop_write(struct vop_write_args *ap)
47902fef 873{
db71f61f 874 hammer2_inode_t *ip;
ea155208 875 hammer2_trans_t trans;
e2e9e2db
MD
876 thread_t td;
877 struct vnode *vp;
db71f61f
MD
878 struct uio *uio;
879 int error;
db71f61f
MD
880 int seqcount;
881 int bigwrite;
882
883 /*
884 * Read operations supported on this vnode?
885 */
886 vp = ap->a_vp;
5c88f2c7 887 if (vp->v_type != VREG)
db71f61f
MD
888 return (EINVAL);
889
890 /*
891 * Misc
892 */
893 ip = VTOI(vp);
db71f61f
MD
894 uio = ap->a_uio;
895 error = 0;
05dd26e4 896 if (ip->pmp->ronly) {
db71f61f 897 return (EROFS);
05dd26e4 898 }
db71f61f
MD
899
900 seqcount = ap->a_ioflag >> 16;
901 bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
902
903 /*
904 * Check resource limit
905 */
906 if (uio->uio_resid > 0 && (td = uio->uio_td) != NULL && td->td_proc &&
907 uio->uio_offset + uio->uio_resid >
908 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
909 lwpsignal(td->td_proc, td->td_lwp, SIGXFSZ);
910 return (EFBIG);
911 }
912
913 bigwrite = (uio->uio_resid > 100 * 1024 * 1024);
914
915 /*
355d67fc
MD
916 * The transaction interlocks against flushes initiations
917 * (note: but will run concurrently with the actual flush).
3ac6a319 918 */
50456506 919 hammer2_trans_init(&trans, ip->pmp, 0);
355d67fc 920 error = hammer2_write_file(ip, uio, ap->a_ioflag, seqcount);
ea155208
MD
921 hammer2_trans_done(&trans);
922
4e2004ea
MD
923 return (error);
924}
925
926/*
927 * Perform read operations on a file or symlink given an UNLOCKED
928 * inode and uio.
476d2aad
MD
929 *
930 * The passed ip is not locked.
4e2004ea
MD
931 */
932static
933int
934hammer2_read_file(hammer2_inode_t *ip, struct uio *uio, int seqcount)
935{
e2e9e2db 936 hammer2_off_t size;
4e2004ea
MD
937 struct buf *bp;
938 int error;
939
940 error = 0;
941
942 /*
e2e9e2db 943 * UIO read loop.
4e2004ea 944 */
355d67fc
MD
945 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
946 size = ip->size;
947 ccms_thread_unlock(&ip->topo_cst);
e2e9e2db
MD
948
949 while (uio->uio_resid > 0 && uio->uio_offset < size) {
8cce658d
MD
950 hammer2_key_t lbase;
951 hammer2_key_t leof;
952 int lblksize;
953 int loff;
4e2004ea
MD
954 int n;
955
8cce658d
MD
956 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
957 &lbase, &leof);
4e2004ea 958
8cce658d 959 error = cluster_read(ip->vp, leof, lbase, lblksize,
6ba3b984
MD
960 uio->uio_resid, seqcount * BKVASIZE,
961 &bp);
8cce658d 962
4e2004ea
MD
963 if (error)
964 break;
8cce658d
MD
965 loff = (int)(uio->uio_offset - lbase);
966 n = lblksize - loff;
4e2004ea
MD
967 if (n > uio->uio_resid)
968 n = uio->uio_resid;
e2e9e2db
MD
969 if (n > size - uio->uio_offset)
970 n = (int)(size - uio->uio_offset);
4e2004ea 971 bp->b_flags |= B_AGE;
8cce658d 972 uiomove((char *)bp->b_data + loff, n, uio);
4e2004ea
MD
973 bqrelse(bp);
974 }
975 return (error);
976}
977
978/*
355d67fc
MD
979 * Write to the file represented by the inode via the logical buffer cache.
980 * The inode may represent a regular file or a symlink.
981 *
982 * The inode must not be locked.
4e2004ea
MD
983 */
984static
985int
355d67fc 986hammer2_write_file(hammer2_inode_t *ip,
ea155208 987 struct uio *uio, int ioflag, int seqcount)
4e2004ea 988{
8cce658d 989 hammer2_key_t old_eof;
355d67fc 990 hammer2_key_t new_eof;
4e2004ea
MD
991 struct buf *bp;
992 int kflags;
993 int error;
355d67fc 994 int modified;
3ac6a319 995
4e2004ea
MD
996 /*
997 * Setup if append
998 */
355d67fc 999 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
4e2004ea 1000 if (ioflag & IO_APPEND)
355d67fc
MD
1001 uio->uio_offset = ip->size;
1002 old_eof = ip->size;
1003 ccms_thread_unlock(&ip->topo_cst);
9596b8c4 1004
8cce658d
MD
1005 /*
1006 * Extend the file if necessary. If the write fails at some point
1007 * we will truncate it back down to cover as much as we were able
1008 * to write.
1009 *
1010 * Doing this now makes it easier to calculate buffer sizes in
1011 * the loop.
1012 */
355d67fc
MD
1013 kflags = 0;
1014 error = 0;
1015 modified = 0;
1016
1017 if (uio->uio_offset + uio->uio_resid > old_eof) {
1018 new_eof = uio->uio_offset + uio->uio_resid;
b2b78aaa 1019 modified = 1;
355d67fc 1020 hammer2_extend_file(ip, new_eof);
8cce658d 1021 kflags |= NOTE_EXTEND;
355d67fc
MD
1022 } else {
1023 new_eof = old_eof;
8cce658d 1024 }
355d67fc 1025
3ac6a319
MD
1026 /*
1027 * UIO write loop
db71f61f
MD
1028 */
1029 while (uio->uio_resid > 0) {
8cce658d 1030 hammer2_key_t lbase;
db71f61f 1031 int trivial;
d7bfb2cb 1032 int endofblk;
8cce658d
MD
1033 int lblksize;
1034 int loff;
1035 int n;
db71f61f
MD
1036
1037 /*
1038 * Don't allow the buffer build to blow out the buffer
1039 * cache.
1040 */
355d67fc 1041 if ((ioflag & IO_RECURSE) == 0)
01eabad4 1042 bwillwrite(HAMMER2_PBUFSIZE);
8cce658d 1043
db71f61f 1044 /*
8cce658d
MD
1045 * This nominally tells us how much we can cluster and
1046 * what the logical buffer size needs to be. Currently
1047 * we don't try to cluster the write and just handle one
1048 * block at a time.
db71f61f 1049 */
8cce658d 1050 lblksize = hammer2_calc_logical(ip, uio->uio_offset,
355d67fc 1051 &lbase, NULL);
8cce658d 1052 loff = (int)(uio->uio_offset - lbase);
355d67fc 1053
355d67fc 1054 KKASSERT(lblksize <= 65536);
8cce658d
MD
1055
1056 /*
1057 * Calculate bytes to copy this transfer and whether the
1058 * copy completely covers the buffer or not.
1059 */
1060 trivial = 0;
1061 n = lblksize - loff;
1062 if (n > uio->uio_resid) {
1063 n = uio->uio_resid;
355d67fc 1064 if (loff == lbase && uio->uio_offset + n == new_eof)
db71f61f 1065 trivial = 1;
d7bfb2cb
MD
1066 endofblk = 0;
1067 } else {
1068 if (loff == 0)
1069 trivial = 1;
1070 endofblk = 1;
db71f61f
MD
1071 }
1072
8cce658d
MD
1073 /*
1074 * Get the buffer
1075 */
db71f61f
MD
1076 if (uio->uio_segflg == UIO_NOCOPY) {
1077 /*
1078 * Issuing a write with the same data backing the
1079 * buffer. Instantiate the buffer to collect the
1080 * backing vm pages, then read-in any missing bits.
1081 *
1082 * This case is used by vop_stdputpages().
1083 */
8cce658d 1084 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
db71f61f
MD
1085 if ((bp->b_flags & B_CACHE) == 0) {
1086 bqrelse(bp);
8cce658d 1087 error = bread(ip->vp, lbase, lblksize, &bp);
db71f61f 1088 }
8cce658d 1089 } else if (trivial) {
db71f61f
MD
1090 /*
1091 * Even though we are entirely overwriting the buffer
1092 * we may still have to zero it out to avoid a
1093 * mmap/write visibility issue.
1094 */
8cce658d 1095 bp = getblk(ip->vp, lbase, lblksize, GETBLK_BHEAVY, 0);
db71f61f
MD
1096 if ((bp->b_flags & B_CACHE) == 0)
1097 vfs_bio_clrbuf(bp);
db71f61f
MD
1098 } else {
1099 /*
1100 * Partial overwrite, read in any missing bits then
1101 * replace the portion being written.
8cce658d
MD
1102 *
1103 * (The strategy code will detect zero-fill physical
1104 * blocks for this case).
db71f61f 1105 */
8cce658d 1106 error = bread(ip->vp, lbase, lblksize, &bp);
db71f61f
MD
1107 if (error == 0)
1108 bheavy(bp);
1109 }
1110
8cce658d
MD
1111 if (error) {
1112 brelse(bp);
1113 break;
db71f61f
MD
1114 }
1115
8cce658d
MD
1116 /*
1117 * Ok, copy the data in
1118 */
8cce658d 1119 error = uiomove(bp->b_data + loff, n, uio);
db71f61f 1120 kflags |= NOTE_WRITE;
b2b78aaa 1121 modified = 1;
a5913bdf
MD
1122 if (error) {
1123 brelse(bp);
1124 break;
1125 }
d7bfb2cb
MD
1126
1127 /*
1128 * WARNING: Pageout daemon will issue UIO_NOCOPY writes
1129 * with IO_SYNC or IO_ASYNC set. These writes
1130 * must be handled as the pageout daemon expects.
1131 */
065f4046 1132 if (ioflag & IO_SYNC) {
d7bfb2cb 1133 bwrite(bp);
065f4046 1134 } else if ((ioflag & IO_DIRECT) && endofblk) {
d7bfb2cb 1135 bawrite(bp);
065f4046 1136 } else if (ioflag & IO_ASYNC) {
d7bfb2cb
MD
1137 bawrite(bp);
1138 } else {
1139 bdwrite(bp);
1140 }
db71f61f 1141 }
8cce658d
MD
1142
1143 /*
1144 * Cleanup. If we extended the file EOF but failed to write through
1145 * the entire write is a failure and we have to back-up.
1146 */
355d67fc
MD
1147 if (error && new_eof != old_eof) {
1148 hammer2_truncate_file(ip, old_eof);
b2b78aaa 1149 } else if (modified) {
355d67fc
MD
1150 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
1151 hammer2_update_time(&ip->mtime);
1152 atomic_set_int(&ip->flags, HAMMER2_INODE_MTIME);
1153 ccms_thread_unlock(&ip->topo_cst);
b2b78aaa 1154 }
355d67fc 1155 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED);
b2b78aaa 1156 hammer2_knote(ip->vp, kflags);
eddc656a 1157 vsetisdirty(ip->vp);
ea155208 1158
4e2004ea 1159 return error;
703720e4
MD
1160}
1161
a5913bdf 1162/*
355d67fc 1163 * Truncate the size of a file. The inode must not be locked.
eddc656a 1164 *
850687d2
MD
1165 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED
1166 *
1167 * WARNING: nvtruncbuf() can only be safely called without the inode lock
1168 * held due to the way our write thread works.
a5913bdf
MD
1169 */
1170static
1171void
355d67fc 1172hammer2_truncate_file(hammer2_inode_t *ip, hammer2_key_t nsize)
8cce658d 1173{
8cce658d 1174 hammer2_key_t lbase;
8cce658d
MD
1175 int nblksize;
1176
05dd26e4 1177 LOCKSTART;
8cce658d 1178 if (ip->vp) {
355d67fc 1179 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
8cce658d
MD
1180 nvtruncbuf(ip->vp, nsize,
1181 nblksize, (int)nsize & (nblksize - 1),
355d67fc 1182 0);
3ac6a319 1183 }
355d67fc
MD
1184 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
1185 ip->size = nsize;
1186 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
1187 ccms_thread_unlock(&ip->topo_cst);
05dd26e4 1188 LOCKSTOP;
3ac6a319
MD
1189}
1190
1191/*
355d67fc 1192 * Extend the size of a file. The inode must not be locked.
eddc656a
MD
1193 *
1194 * NOTE: Caller handles setting HAMMER2_INODE_MODIFIED
3ac6a319
MD
1195 */
1196static
1197void
355d67fc 1198hammer2_extend_file(hammer2_inode_t *ip, hammer2_key_t nsize)
3ac6a319 1199{
355d67fc 1200 hammer2_key_t lbase;
8cce658d 1201 hammer2_key_t osize;
8cce658d
MD
1202 int oblksize;
1203 int nblksize;
3ac6a319 1204
05dd26e4 1205 LOCKSTART;
355d67fc
MD
1206 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE);
1207 osize = ip->size;
1208 ip->size = nsize;
1209 ccms_thread_unlock(&ip->topo_cst);
8cce658d 1210
355d67fc
MD
1211 if (ip->vp) {
1212 oblksize = hammer2_calc_logical(ip, osize, &lbase, NULL);
1213 nblksize = hammer2_calc_logical(ip, nsize, &lbase, NULL);
004f88b4 1214 nvextendbuf(ip->vp,
355d67fc
MD
1215 osize, nsize,
1216 oblksize, nblksize,
1217 -1, -1, 0);
3ac6a319 1218 }
355d67fc 1219 atomic_set_int(&ip->flags, HAMMER2_INODE_RESIZED);
05dd26e4 1220 LOCKSTOP;
3ac6a319
MD
1221}
1222
e118c14f 1223static
703720e4 1224int
e118c14f 1225hammer2_vop_nresolve(struct vop_nresolve_args *ap)
703720e4 1226{
e2e9e2db 1227 hammer2_inode_t *ip;
37494cab 1228 hammer2_inode_t *dip;
278ab2b2
MD
1229 hammer2_cluster_t *cparent;
1230 hammer2_cluster_t *cluster;
bca9f8e6 1231 const hammer2_inode_data_t *ripdata;
1897c66e
MD
1232 hammer2_key_t key_next;
1233 hammer2_key_t lhc;
37494cab
MD
1234 struct namecache *ncp;
1235 const uint8_t *name;
1236 size_t name_len;
37494cab 1237 int error = 0;
278ab2b2 1238 int ddflag;
37494cab
MD
1239 struct vnode *vp;
1240
05dd26e4 1241 LOCKSTART;
37494cab 1242 dip = VTOI(ap->a_dvp);
37494cab
MD
1243 ncp = ap->a_nch->ncp;
1244 name = ncp->nc_name;
1245 name_len = ncp->nc_nlen;
1246 lhc = hammer2_dirhash(name, name_len);
1247
1248 /*
1249 * Note: In DragonFly the kernel handles '.' and '..'.
1250 */
278ab2b2
MD
1251 cparent = hammer2_inode_lock_sh(dip);
1252 cluster = hammer2_cluster_lookup(cparent, &key_next,
1253 lhc, lhc + HAMMER2_DIRHASH_LOMASK,
1254 HAMMER2_LOOKUP_SHARED, &ddflag);
1255 while (cluster) {
1256 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) {
bca9f8e6
MD
1257 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1258 if (ripdata->name_len == name_len &&
1259 bcmp(ripdata->filename, name, name_len) == 0) {
278ab2b2
MD
1260 break;
1261 }
37494cab 1262 }
278ab2b2
MD
1263 cluster = hammer2_cluster_next(cparent, cluster, &key_next,
1264 key_next,
1265 lhc + HAMMER2_DIRHASH_LOMASK,
1266 HAMMER2_LOOKUP_SHARED);
37494cab 1267 }
278ab2b2 1268 hammer2_inode_unlock_sh(dip, cparent);
37494cab 1269
84e47819
MD
1270 /*
1271 * Resolve hardlink entries before acquiring the inode.
1272 */
1273 if (cluster) {
bca9f8e6
MD
1274 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1275 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
1276 hammer2_tid_t inum = ripdata->inum;
da6f36f4 1277 error = hammer2_hardlink_find(dip, NULL, cluster);
84e47819
MD
1278 if (error) {
1279 kprintf("hammer2: unable to find hardlink "
1280 "0x%016jx\n", inum);
1281 hammer2_cluster_unlock(cluster);
05dd26e4 1282 LOCKSTOP;
84e47819
MD
1283 return error;
1284 }
1285 }
1286 }
1287
e708f8b9 1288 /*
278ab2b2
MD
1289 * nresolve needs to resolve hardlinks, the original cluster is not
1290 * sufficient.
e708f8b9 1291 */
278ab2b2
MD
1292 if (cluster) {
1293 ip = hammer2_inode_get(dip->pmp, dip, cluster);
bca9f8e6
MD
1294 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
1295 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) {
278ab2b2
MD
1296 kprintf("nresolve: fixup hardlink\n");
1297 hammer2_inode_ref(ip);
1298 hammer2_inode_unlock_ex(ip, NULL);
1299 hammer2_cluster_unlock(cluster);
1300 cluster = hammer2_inode_lock_ex(ip);
bca9f8e6 1301 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
850687d2 1302 hammer2_inode_drop(ip);
bca9f8e6
MD
1303 kprintf("nresolve: fixup to type %02x\n",
1304 ripdata->type);
e708f8b9 1305 }
278ab2b2
MD
1306 } else {
1307 ip = NULL;
e708f8b9
MD
1308 }
1309
278ab2b2 1310#if 0
e708f8b9
MD
1311 /*
1312 * Deconsolidate any hardlink whos nlinks == 1. Ignore errors.
1313 * If an error occurs chain and ip are left alone.
a0ed3c24
MD
1314 *
1315 * XXX upgrade shared lock?
e708f8b9 1316 */
a5913bdf
MD
1317 if (ochain && chain &&
1318 chain->data->ipdata.nlinks == 1 && !dip->pmp->ronly) {
e708f8b9 1319 kprintf("hammer2: need to unconsolidate hardlink for %s\n",
476d2aad
MD
1320 chain->data->ipdata.filename);
1321 /* XXX retain shared lock on dip? (currently not held) */
50456506 1322 hammer2_trans_init(&trans, dip->pmp, 0);
0dea3156
MD
1323 hammer2_hardlink_deconsolidate(&trans, dip, &chain, &ochain);
1324 hammer2_trans_done(&trans);
e708f8b9 1325 }
278ab2b2 1326#endif
e708f8b9
MD
1327
1328 /*
1329 * Acquire the related vnode
9bab8c22
MD
1330 *
1331 * NOTE: For error processing, only ENOENT resolves the namecache
1332 * entry to NULL, otherwise we just return the error and
1333 * leave the namecache unresolved.
10252dc7
MD
1334 *
1335 * NOTE: multiple hammer2_inode structures can be aliased to the
1336 * same chain element, for example for hardlinks. This
1337 * use case does not 'reattach' inode associations that
1338 * might already exist, but always allocates a new one.
0dea3156
MD
1339 *
1340 * WARNING: inode structure is locked exclusively via inode_get
1341 * but chain was locked shared. inode_unlock_ex()
1342 * will handle it properly.
e708f8b9 1343 */
278ab2b2 1344 if (cluster) {
84e47819 1345 vp = hammer2_igetv(ip, cluster, &error);
37494cab
MD
1346 if (error == 0) {
1347 vn_unlock(vp);
1348 cache_setvp(ap->a_nch, vp);
9bab8c22 1349 } else if (error == ENOENT) {
f3843dc2 1350 cache_setvp(ap->a_nch, NULL);
37494cab 1351 }
278ab2b2 1352 hammer2_inode_unlock_ex(ip, cluster);
10252dc7
MD
1353
1354 /*
1355 * The vp should not be released until after we've disposed
1356 * of our locks, because it might cause vop_inactive() to
1357 * be called.
1358 */
1359 if (vp)
1360 vrele(vp);
37494cab
MD
1361 } else {
1362 error = ENOENT;
1363 cache_setvp(ap->a_nch, NULL);
1364 }
9bab8c22 1365 KASSERT(error || ap->a_nch->ncp->nc_vp != NULL,
278ab2b2
MD
1366 ("resolve error %d/%p ap %p\n",
1367 error, ap->a_nch->ncp->nc_vp, ap));
05dd26e4 1368 LOCKSTOP;
37494cab
MD
1369 return error;
1370}
1371
1372static
1373int
1374hammer2_vop_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
1375{
1376 hammer2_inode_t *dip;
1377 hammer2_inode_t *ip;
278ab2b2 1378 hammer2_cluster_t *cparent;
37494cab
MD
1379 int error;
1380
05dd26e4 1381 LOCKSTART;
37494cab 1382 dip = VTOI(ap->a_dvp);
37494cab
MD
1383
1384 if ((ip = dip->pip) == NULL) {
1385 *ap->a_vpp = NULL;
05dd26e4 1386 LOCKSTOP;
37494cab
MD
1387 return ENOENT;
1388 }
278ab2b2 1389 cparent = hammer2_inode_lock_ex(ip);
84e47819 1390 *ap->a_vpp = hammer2_igetv(ip, cparent, &error);
278ab2b2 1391 hammer2_inode_unlock_ex(ip, cparent);
37494cab 1392
05dd26e4 1393 LOCKSTOP;
37494cab
MD
1394 return error;
1395}
1396
1397static
1398int
1399hammer2_vop_nmkdir(struct vop_nmkdir_args *ap)
1400{
37494cab
MD
1401 hammer2_inode_t *dip;
1402 hammer2_inode_t *nip;
0dea3156 1403 hammer2_trans_t trans;
278ab2b2 1404 hammer2_cluster_t *cluster;
37494cab
MD
1405 struct namecache *ncp;
1406 const uint8_t *name;
1407 size_t name_len;
1408 int error;
1409
05dd26e4 1410 LOCKSTART;
37494cab 1411 dip = VTOI(ap->a_dvp);
05dd26e4
MD
1412 if (dip->pmp->ronly) {
1413 LOCKSTOP;
db71f61f 1414 return (EROFS);
05dd26e4 1415 }
db71f61f 1416
37494cab
MD
1417 ncp = ap->a_nch->ncp;
1418 name = ncp->nc_name;
1419 name_len = ncp->nc_nlen;
278ab2b2 1420 cluster = NULL;
37494cab 1421
278ab2b2 1422 hammer2_pfs_memory_wait(dip->pmp);
50456506 1423 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE);
0dea3156 1424 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
278ab2b2 1425 name, name_len, &cluster, &error);
37494cab
MD
1426 if (error) {
1427 KKASSERT(nip == NULL);
1428 *ap->a_vpp = NULL;
0dea3156 1429 } else {
84e47819 1430 *ap->a_vpp = hammer2_igetv(nip, cluster, &error);
278ab2b2 1431 hammer2_inode_unlock_ex(nip, cluster);
37494cab 1432 }
0dea3156 1433 hammer2_trans_done(&trans);
37494cab
MD
1434
1435 if (error == 0) {
1436 cache_setunresolved(ap->a_nch);
1437 cache_setvp(ap->a_nch, *ap->a_vpp);
1438 }
05dd26e4 1439 LOCKSTOP;
37494cab 1440 return error;
703720e4
MD
1441}
1442
db71f61f
MD
1443/*
1444 * Return the largest contiguous physical disk range for the logical
512beabd 1445 * request, in bytes.
db71f61f
MD
1446 *
1447 * (struct vnode *vp, off_t loffset, off_t *doffsetp, int *runp, int *runb)
d7bfb2cb
MD
1448 *
1449 * Basically disabled, the logical buffer write thread has to deal with
1450 * buffers one-at-a-time.
db71f61f 1451 */
e118c14f 1452static
703720e4 1453int
e118c14f 1454hammer2_vop_bmap(struct vop_bmap_args *ap)
703720e4 1455{
a5913bdf
MD
1456 *ap->a_doffsetp = NOOFFSET;
1457 if (ap->a_runp)
1458 *ap->a_runp = 0;
1459 if (ap->a_runb)
1460 *ap->a_runb = 0;
1461 return (EOPNOTSUPP);
703720e4
MD
1462}
1463
e118c14f 1464static
703720e4 1465int
e118c14f 1466hammer2_vop_open(struct vop_open_args *ap)
703720e4 1467{
703720e4
MD
1468 return vop_stdopen(ap);
1469}
1470
37aa19df 1471/*
db0c2eb3 1472 * hammer2_vop_advlock { vp, id, op, fl, flags }
37aa19df
MD
1473 */
1474static
1475int
1476hammer2_vop_advlock(struct vop_advlock_args *ap)
1477{
1478 hammer2_inode_t *ip = VTOI(ap->a_vp);
bca9f8e6 1479 const hammer2_inode_data_t *ripdata;
278ab2b2 1480 hammer2_cluster_t *cparent;
476d2aad 1481 hammer2_off_t size;
37aa19df 1482
278ab2b2 1483 cparent = hammer2_inode_lock_sh(ip);
bca9f8e6
MD
1484 ripdata = &hammer2_cluster_rdata(cparent)->ipdata;
1485 size = ripdata->size;
278ab2b2 1486 hammer2_inode_unlock_sh(ip, cparent);
476d2aad 1487 return (lf_advlock(ap, &ip->advlock, size));
37aa19df
MD
1488}
1489
1490
c667909f
MD
1491static
1492int
1493hammer2_vop_close(struct vop_close_args *ap)
1494{
1495 return vop_stdclose(ap);
1496}
1497
1498/*
db0c2eb3
MD
1499 * hammer2_vop_nlink { nch, dvp, vp, cred }
1500 *
e708f8b9 1501 * Create a hardlink from (vp) to {dvp, nch}.
db0c2eb3
MD
1502 */
1503static
1504int
1505hammer2_vop_nlink(struct vop_nlink_args *ap)
1506{
9b21452a
MD
1507 hammer2_inode_t *fdip; /* target directory to create link in */
1508 hammer2_inode_t *tdip; /* target directory to create link in */
1509 hammer2_inode_t *cdip; /* common parent directory */
db0c2eb3 1510 hammer2_inode_t *ip; /* inode we are hardlinking to */
278ab2b2
MD
1511 hammer2_cluster_t *cluster;
1512 hammer2_cluster_t *fdcluster;
1513 hammer2_cluster_t *tdcluster;
1514 hammer2_cluster_t *cdcluster;
0dea3156 1515 hammer2_trans_t trans;
db0c2eb3
MD
1516 struct namecache *ncp;
1517 const uint8_t *name;
1518 size_t name_len;
1519 int error;
1520
05dd26e4 1521 LOCKSTART;
9b21452a 1522 tdip = VTOI(ap->a_dvp);
05dd26e4
MD
1523 if (tdip->pmp->ronly) {
1524 LOCKSTOP;
db0c2eb3 1525 return (EROFS);
05dd26e4 1526 }
db0c2eb3 1527
db0c2eb3
MD
1528 ncp = ap->a_nch->ncp;
1529 name = ncp->nc_name;
1530 name_len = ncp->nc_nlen;
1531
e708f8b9 1532 /*
51bf8e9b
MD
1533 * ip represents the file being hardlinked. The file could be a
1534 * normal file or a hardlink target if it has already been hardlinked.
1535 * If ip is a hardlinked target then ip->pip represents the location
1536 * of the hardlinked target, NOT the location of the hardlink pointer.
1537 *
1538 * Bump nlinks and potentially also create or move the hardlink
9b21452a 1539 * target in the parent directory common to (ip) and (tdip). The
278ab2b2
MD
1540 * consolidation code can modify ip->cluster and ip->pip. The
1541 * returned cluster is locked.
e708f8b9 1542 */
51bf8e9b 1543 ip = VTOI(ap->a_vp);
278ab2b2 1544 hammer2_pfs_memory_wait(ip->pmp);
50456506 1545 hammer2_trans_init(&trans, ip->pmp, HAMMER2_TRANS_NEWINODE);
1a7cfe5a 1546
9b21452a
MD
1547 /*
1548 * The common parent directory must be locked first to avoid deadlocks.
1549 * Also note that fdip and/or tdip might match cdip.
1550 */
1551 fdip = ip->pip;
1552 cdip = hammer2_inode_common_parent(fdip, tdip);
278ab2b2
MD
1553 cdcluster = hammer2_inode_lock_ex(cdip);
1554 fdcluster = hammer2_inode_lock_ex(fdip);
1555 tdcluster = hammer2_inode_lock_ex(tdip);
1556 cluster = hammer2_inode_lock_ex(ip);
1557 error = hammer2_hardlink_consolidate(&trans, ip, &cluster,
1558 cdip, cdcluster, 1);
e708f8b9
MD
1559 if (error)
1560 goto done;
1561
1562 /*
278ab2b2 1563 * Create a directory entry connected to the specified cluster.
0924b3f8
MD
1564 *
1565 * WARNING! chain can get moved by the connect (indirectly due to
1566 * potential indirect block creation).
e708f8b9 1567 */
278ab2b2
MD
1568 error = hammer2_inode_connect(&trans, &cluster, 1,
1569 tdip, tdcluster,
044541cd 1570 name, name_len, 0);
db0c2eb3
MD
1571 if (error == 0) {
1572 cache_setunresolved(ap->a_nch);
1573 cache_setvp(ap->a_nch, ap->a_vp);
1574 }
e708f8b9 1575done:
278ab2b2
MD
1576 hammer2_inode_unlock_ex(ip, cluster);
1577 hammer2_inode_unlock_ex(tdip, tdcluster);
1578 hammer2_inode_unlock_ex(fdip, fdcluster);
1579 hammer2_inode_unlock_ex(cdip, cdcluster);
850687d2 1580 hammer2_inode_drop(cdip);
0dea3156
MD
1581 hammer2_trans_done(&trans);
1582
05dd26e4 1583 LOCKSTOP;
db0c2eb3
MD
1584 return error;
1585}
1586
1587/*
1588 * hammer2_vop_ncreate { nch, dvp, vpp, cred, vap }
c667909f
MD
1589 *
1590 * The operating system has already ensured that the directory entry
1591 * does not exist and done all appropriate namespace locking.
1592 */
1593static
1594int
1595hammer2_vop_ncreate(struct vop_ncreate_args *ap)
1596{
c667909f
MD
1597 hammer2_inode_t *dip;
1598 hammer2_inode_t *nip;
0dea3156 1599 hammer2_trans_t trans;
278ab2b2 1600 hammer2_cluster_t *ncluster;
c667909f
MD
1601 struct namecache *ncp;
1602 const uint8_t *name;
1603 size_t name_len;
1604 int error;
1605
05dd26e4 1606 LOCKSTART;
c667909f 1607 dip = VTOI(ap->a_dvp);
05dd26e4
MD
1608 if (dip->pmp->ronly) {
1609 LOCKSTOP;
c667909f 1610 return (EROFS);
05dd26e4 1611 }
c667909f
MD
1612
1613 ncp = ap->a_nch->ncp;
1614 name = ncp->nc_name;
1615 name_len = ncp->nc_nlen;
278ab2b2 1616 hammer2_pfs_memory_wait(dip->pmp);
50456506 1617 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE);
278ab2b2 1618 ncluster = NULL;
c667909f 1619
0dea3156 1620 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
278ab2b2 1621 name, name_len, &ncluster, &error);
c667909f
MD
1622 if (error) {
1623 KKASSERT(nip == NULL);
1624 *ap->a_vpp = NULL;
0dea3156 1625 } else {
84e47819 1626 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error);
278ab2b2 1627 hammer2_inode_unlock_ex(nip, ncluster);
c667909f 1628 }
0dea3156 1629 hammer2_trans_done(&trans);
c667909f
MD
1630
1631 if (error == 0) {
1632 cache_setunresolved(ap->a_nch);
1633 cache_setvp(ap->a_nch, *ap->a_vpp);
1634 }
05dd26e4 1635 LOCKSTOP;
c667909f
MD
1636 return error;
1637}
1638
41c34a6d 1639/*
278ab2b2 1640 * Make a device node (typically a fifo)
41c34a6d
MD
1641 */
1642static
1643int
1644hammer2_vop_nmknod(struct vop_nmknod_args *ap)
1645{
1646 hammer2_inode_t *dip;
1647 hammer2_inode_t *nip;
1648 hammer2_trans_t trans;
278ab2b2 1649 hammer2_cluster_t *ncluster;
41c34a6d
MD
1650 struct namecache *ncp;
1651 const uint8_t *name;
1652 size_t name_len;
1653 int error;
1654
05dd26e4 1655 LOCKSTART;
41c34a6d 1656 dip = VTOI(ap->a_dvp);
05dd26e4
MD
1657 if (dip->pmp->ronly) {
1658 LOCKSTOP;
41c34a6d 1659 return (EROFS);
05dd26e4 1660 }
41c34a6d
MD
1661
1662 ncp = ap->a_nch->ncp;
1663 name = ncp->nc_name;
1664 name_len = ncp->nc_nlen;
278ab2b2 1665 hammer2_pfs_memory_wait(dip->pmp);
50456506 1666 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE);
278ab2b2 1667 ncluster = NULL;
41c34a6d
MD
1668
1669 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
278ab2b2 1670 name, name_len, &ncluster, &error);
41c34a6d
MD
1671 if (error) {
1672 KKASSERT(nip == NULL);
1673 *ap->a_vpp = NULL;
1674 } else {
84e47819 1675 *ap->a_vpp = hammer2_igetv(nip, ncluster, &error);
278ab2b2 1676 hammer2_inode_unlock_ex(nip, ncluster);
41c34a6d
MD
1677 }
1678 hammer2_trans_done(&trans);
1679
1680 if (error == 0) {
1681 cache_setunresolved(ap->a_nch);
1682 cache_setvp(ap->a_nch, *ap->a_vpp);
1683 }
05dd26e4 1684 LOCKSTOP;
41c34a6d
MD
1685 return error;
1686}
1687
4e2004ea
MD
1688/*
1689 * hammer2_vop_nsymlink { nch, dvp, vpp, cred, vap, target }
1690 */
1691static
1692int
1693hammer2_vop_nsymlink(struct vop_nsymlink_args *ap)
1694{
4e2004ea
MD
1695 hammer2_inode_t *dip;
1696 hammer2_inode_t *nip;
278ab2b2 1697 hammer2_cluster_t *ncparent;
0dea3156 1698 hammer2_trans_t trans;
4e2004ea
MD
1699 struct namecache *ncp;
1700 const uint8_t *name;
1701 size_t name_len;
1702 int error;
355d67fc 1703
4e2004ea 1704 dip = VTOI(ap->a_dvp);
5c88f2c7 1705 if (dip->pmp->ronly)
4e2004ea
MD
1706 return (EROFS);
1707
1708 ncp = ap->a_nch->ncp;
1709 name = ncp->nc_name;
1710 name_len = ncp->nc_nlen;
278ab2b2 1711 hammer2_pfs_memory_wait(dip->pmp);
50456506 1712 hammer2_trans_init(&trans, dip->pmp, HAMMER2_TRANS_NEWINODE);
278ab2b2 1713 ncparent = NULL;
4e2004ea
MD
1714
1715 ap->a_vap->va_type = VLNK; /* enforce type */
1716
0dea3156 1717 nip = hammer2_inode_create(&trans, dip, ap->a_vap, ap->a_cred,
278ab2b2 1718 name, name_len, &ncparent, &error);
4e2004ea
MD
1719 if (error) {
1720 KKASSERT(nip == NULL);
1721 *ap->a_vpp = NULL;
0dea3156 1722 hammer2_trans_done(&trans);
4e2004ea
MD
1723 return error;
1724 }
84e47819 1725 *ap->a_vpp = hammer2_igetv(nip, ncparent, &error);
4e2004ea
MD
1726
1727 /*
1728 * Build the softlink (~like file data) and finalize the namecache.
1729 */
1730 if (error == 0) {
1731 size_t bytes;
1732 struct uio auio;
1733 struct iovec aiov;
476d2aad 1734 hammer2_inode_data_t *nipdata;
4e2004ea 1735
6a5f4fe6 1736 nipdata = &hammer2_cluster_wdata(ncparent)->ipdata;
278ab2b2 1737 /* nipdata = &nip->chain->data->ipdata;XXX */
4e2004ea
MD
1738 bytes = strlen(ap->a_target);
1739
1740 if (bytes <= HAMMER2_EMBEDDED_BYTES) {
476d2aad 1741 KKASSERT(nipdata->op_flags &
4e2004ea 1742 HAMMER2_OPFLAG_DIRECTDATA);
476d2aad
MD
1743 bcopy(ap->a_target, nipdata->u.data, bytes);
1744 nipdata->size = bytes;
355d67fc 1745 nip->size = bytes;
03188ed8 1746 hammer2_cluster_modsync(ncparent);
278ab2b2
MD
1747 hammer2_inode_unlock_ex(nip, ncparent);
1748 /* nipdata = NULL; not needed */
4e2004ea 1749 } else {
278ab2b2
MD
1750 hammer2_inode_unlock_ex(nip, ncparent);
1751 /* nipdata = NULL; not needed */
4e2004ea
MD
1752 bzero(&auio, sizeof(auio));
1753 bzero(&aiov, sizeof(aiov));
1754 auio.uio_iov = &aiov;
1755 auio.uio_segflg = UIO_SYSSPACE;
1756 auio.uio_rw = UIO_WRITE;
1757 auio.uio_resid = bytes;
1758 auio.uio_iovcnt = 1;
1759 auio.uio_td = curthread;
1760 aiov.iov_base = ap->a_target;
1761 aiov.iov_len = bytes;
355d67fc 1762 error = hammer2_write_file(nip, &auio, IO_APPEND, 0);
4e2004ea
MD
1763 /* XXX handle error */
1764 error = 0;
1765 }
355d67fc 1766 } else {
278ab2b2 1767 hammer2_inode_unlock_ex(nip, ncparent);
4e2004ea 1768 }
0dea3156 1769 hammer2_trans_done(&trans);
4e2004ea
MD
1770
1771 /*
1772 * Finalize namecache
1773 */
1774 if (error == 0) {
1775 cache_setunresolved(ap->a_nch);
1776 cache_setvp(ap->a_nch, *ap->a_vpp);
1777 /* hammer2_knote(ap->a_dvp, NOTE_WRITE); */
1778 }
1779 return error;
1780}
1781
1782/*
1783 * hammer2_vop_nremove { nch, dvp, cred }
1784 */
1785static
1786int
1787hammer2_vop_nremove(struct vop_nremove_args *ap)
1788{
1789 hammer2_inode_t *dip;
0dea3156 1790 hammer2_trans_t trans;
4e2004ea
MD
1791 struct namecache *ncp;
1792 const uint8_t *name;
1793 size_t name_len;
1794 int error;
1795
05dd26e4 1796 LOCKSTART;
4e2004ea 1797 dip = VTOI(ap->a_dvp);
05dd26e4
MD
1798 if (dip->pmp->ronly) {
1799 LOCKSTOP;
4e2004ea 1800 return(EROFS);
05dd26e4 1801 }
4e2004ea
MD
1802
1803 ncp = ap->a_nch->ncp;
1804 name = ncp->nc_name;
1805 name_len = ncp->nc_nlen;
044541cd 1806
278ab2b2 1807 hammer2_pfs_memory_wait(dip->pmp);
50456506 1808 hammer2_trans_init(&trans, dip->pmp, 0);
044541cd 1809 error = hammer2_unlink_file(&trans, dip, name, name_len,
da6f36f4 1810 0, NULL, ap->a_nch, -1);
850687d2 1811 hammer2_run_unlinkq(&trans, dip->pmp);
0dea3156 1812 hammer2_trans_done(&trans);
9b21452a 1813 if (error == 0)
3fc4c63d 1814 cache_unlink(ap->a_nch);
05dd26e4 1815 LOCKSTOP;
4e2004ea
MD
1816 return (error);
1817}
1818
1819/*
1820 * hammer2_vop_nrmdir { nch, dvp, cred }
1821 */
1822static
1823int
1824hammer2_vop_nrmdir(struct vop_nrmdir_args *ap)
1825{
1826 hammer2_inode_t *dip;
0dea3156 1827 hammer2_trans_t trans;
4e2004ea
MD
1828 struct namecache *ncp;
1829 const uint8_t *name;
1830 size_t name_len;
1831 int error;
1832
05dd26e4 1833 LOCKSTART;
4e2004ea 1834 dip = VTOI(ap->a_dvp);
05dd26e4
MD
1835 if (dip->pmp->ronly) {
1836 LOCKSTOP;
4e2004ea 1837 return(EROFS);
05dd26e4 1838 }
4e2004ea
MD
1839
1840 ncp = ap->a_nch->ncp;
1841 name = ncp->nc_name;
1842 name_len = ncp->nc_nlen;
1843
278ab2b2 1844 hammer2_pfs_memory_wait(dip->pmp);
50456506 1845 hammer2_trans_init(&trans, dip->pmp, 0);
eae2ed61 1846 hammer2_run_unlinkq(&trans, dip->pmp);
044541cd 1847 error = hammer2_unlink_file(&trans, dip, name, name_len,
da6f36f4 1848 1, NULL, ap->a_nch, -1);
0dea3156 1849 hammer2_trans_done(&trans);
9b21452a 1850 if (error == 0)
3fc4c63d 1851 cache_unlink(ap->a_nch);
05dd26e4 1852 LOCKSTOP;
4e2004ea
MD
1853 return (error);
1854}
1855
6934ae32
MD
1856/*
1857 * hammer2_vop_nrename { fnch, tnch, fdvp, tdvp, cred }
1858 */
4e2004ea
MD
1859static
1860int
1861hammer2_vop_nrename(struct vop_nrename_args *ap)
1862{
6934ae32
MD
1863 struct namecache *fncp;
1864 struct namecache *tncp;
9b21452a 1865 hammer2_inode_t *cdip;
6934ae32
MD
1866 hammer2_inode_t *fdip;
1867 hammer2_inode_t *tdip;
1868 hammer2_inode_t *ip;
278ab2b2
MD
1869 hammer2_cluster_t *cluster;
1870 hammer2_cluster_t *fdcluster;
1871 hammer2_cluster_t *tdcluster;
1872 hammer2_cluster_t *cdcluster;
0dea3156 1873 hammer2_trans_t trans;
6934ae32
MD
1874 const uint8_t *fname;
1875 size_t fname_len;
1876 const uint8_t *tname;
1877 size_t tname_len;
1878 int error;
850687d2 1879 int tnch_error;
9797e933 1880 int hlink;
6934ae32
MD
1881
1882 if (ap->a_fdvp->v_mount != ap->a_tdvp->v_mount)
1883 return(EXDEV);
1884 if (ap->a_fdvp->v_mount != ap->a_fnch->ncp->nc_vp->v_mount)
1885 return(EXDEV);
1886
1887 fdip = VTOI(ap->a_fdvp); /* source directory */
1888 tdip = VTOI(ap->a_tdvp); /* target directory */
1889
a5913bdf 1890 if (fdip->pmp->ronly)
6934ae32
MD
1891 return(EROFS);
1892
05dd26e4 1893 LOCKSTART;
6934ae32
MD
1894 fncp = ap->a_fnch->ncp; /* entry name in source */
1895 fname = fncp->nc_name;
1896 fname_len = fncp->nc_nlen;
1897
1898 tncp = ap->a_tnch->ncp; /* entry name in target */
1899 tname = tncp->nc_name;
1900 tname_len = tncp->nc_nlen;
1901
278ab2b2 1902 hammer2_pfs_memory_wait(tdip->pmp);
50456506 1903 hammer2_trans_init(&trans, tdip->pmp, 0);
0dea3156 1904
e708f8b9 1905 /*
d5fabb70 1906 * ip is the inode being renamed. If this is a hardlink then
e708f8b9
MD
1907 * ip represents the actual file and not the hardlink marker.
1908 */
1909 ip = VTOI(fncp->nc_vp);
278ab2b2 1910 cluster = NULL;
6934ae32 1911
9b21452a
MD
1912
1913 /*
1914 * The common parent directory must be locked first to avoid deadlocks.
1915 * Also note that fdip and/or tdip might match cdip.
1916 *
1917 * WARNING! fdip may not match ip->pip. That is, if the source file
1918 * is already a hardlink then what we are renaming is the
1919 * hardlink pointer, not the hardlink itself. The hardlink
1920 * directory (ip->pip) will already be at a common parent
1921 * of fdrip.
1922 *
1923 * Be sure to use ip->pip when finding the common parent
1924 * against tdip or we might accidently move the hardlink
1925 * target into a subdirectory that makes it inaccessible to
1926 * other pointers.
1927 */
1928 cdip = hammer2_inode_common_parent(ip->pip, tdip);
278ab2b2
MD
1929 cdcluster = hammer2_inode_lock_ex(cdip);
1930 fdcluster = hammer2_inode_lock_ex(fdip);
1931 tdcluster = hammer2_inode_lock_ex(tdip);
9b21452a 1932
6934ae32 1933 /*
51bf8e9b
MD
1934 * Keep a tight grip on the inode so the temporary unlinking from
1935 * the source location prior to linking to the target location
278ab2b2 1936 * does not cause the cluster to be destroyed.
222d9e22
MD
1937 *
1938 * NOTE: To avoid deadlocks we cannot lock (ip) while we are
e708f8b9
MD
1939 * unlinking elements from their directories. Locking
1940 * the nlinks field does not lock the whole inode.
6934ae32 1941 */
476d2aad 1942 hammer2_inode_ref(ip);
6934ae32
MD
1943
1944 /*
850687d2 1945 * Remove target if it exists.
6934ae32 1946 */
044541cd 1947 error = hammer2_unlink_file(&trans, tdip, tname, tname_len,
da6f36f4 1948 -1, NULL, ap->a_tnch, -1);
850687d2 1949 tnch_error = error;
6934ae32
MD
1950 if (error && error != ENOENT)
1951 goto done;
6934ae32
MD
1952
1953 /*
51bf8e9b 1954 * When renaming a hardlinked file we may have to re-consolidate
da6f36f4 1955 * the location of the hardlink target.
e708f8b9 1956 *
51bf8e9b 1957 * If ip represents a regular file the consolidation code essentially
278ab2b2 1958 * does nothing other than return the same locked cluster that was
731b2a84 1959 * passed in.
e708f8b9 1960 *
278ab2b2 1961 * The returned cluster will be locked.
ea155208
MD
1962 *
1963 * WARNING! We do not currently have a local copy of ipdata but
1964 * we do use one later remember that it must be reloaded
1965 * on any modification to the inode, including connects.
6934ae32 1966 */
278ab2b2
MD
1967 cluster = hammer2_inode_lock_ex(ip);
1968 error = hammer2_hardlink_consolidate(&trans, ip, &cluster,
da6f36f4 1969 cdip, cdcluster, 0);
51bf8e9b
MD
1970 if (error)
1971 goto done;
5f6853df
MD
1972
1973 /*
51bf8e9b
MD
1974 * Disconnect (fdip, fname) from the source directory. This will
1975 * disconnect (ip) if it represents a direct file. If (ip) represents
1976 * a hardlink the HARDLINK pointer object will be removed but the
1977 * hardlink will stay intact.
1978 *
044541cd
MD
1979 * Always pass nch as NULL because we intend to reconnect the inode,
1980 * so we don't want hammer2_unlink_file() to rename it to the hidden
1981 * open-but-unlinked directory.
1982 *
278ab2b2
MD
1983 * The target cluster may be marked DELETED but will not be destroyed
1984 * since we retain our hold on ip and cluster.
da6f36f4
MD
1985 *
1986 * NOTE: We pass nlinks as 0 (not -1) in order to retain the file's
1987 * link count.
5f6853df 1988 */
044541cd 1989 error = hammer2_unlink_file(&trans, fdip, fname, fname_len,
da6f36f4 1990 -1, &hlink, NULL, 0);
0dea3156 1991 KKASSERT(error != EAGAIN);
6934ae32
MD
1992 if (error)
1993 goto done;
1994
6934ae32 1995 /*
278ab2b2
MD
1996 * Reconnect ip to target directory using cluster. Chains cannot
1997 * actually be moved, so this will duplicate the cluster in the new
1998 * spot and assign it to the ip, replacing the old cluster.
1c9f601e 1999 *
41c34a6d 2000 * WARNING: Because recursive locks are allowed and we unlinked the
278ab2b2
MD
2001 * file that we have a cluster-in-hand for just above, the
2002 * cluster might have been delete-duplicated. We must
2003 * refactor the cluster.
41c34a6d
MD
2004 *
2005 * WARNING: Chain locks can lock buffer cache buffers, to avoid
1c9f601e 2006 * deadlocks we want to unlock before issuing a cache_*()
0dea3156 2007 * op (that might have to lock a vnode).
da6f36f4
MD
2008 *
2009 * NOTE: Pass nlinks as 0 because we retained the link count from
2010 * the unlink, so we do not have to modify it.
6934ae32 2011 */
278ab2b2
MD
2012 error = hammer2_inode_connect(&trans, &cluster, hlink,
2013 tdip, tdcluster,
044541cd 2014 tname, tname_len, 0);
0dea3156 2015 if (error == 0) {
278ab2b2
MD
2016 KKASSERT(cluster != NULL);
2017 hammer2_inode_repoint(ip, (hlink ? ip->pip : tdip), cluster);
0dea3156 2018 }
222d9e22 2019done:
278ab2b2
MD
2020 hammer2_inode_unlock_ex(ip, cluster);
2021 hammer2_inode_unlock_ex(tdip, tdcluster);
2022 hammer2_inode_unlock_ex(fdip, fdcluster);
2023 hammer2_inode_unlock_ex(cdip, cdcluster);
476d2aad 2024 hammer2_inode_drop(ip);
850687d2
MD
2025 hammer2_inode_drop(cdip);
2026 hammer2_run_unlinkq(&trans, fdip->pmp);
0dea3156 2027 hammer2_trans_done(&trans);
6934ae32 2028
9b21452a
MD
2029 /*
2030 * Issue the namecache update after unlocking all the internal
2031 * hammer structures, otherwise we might deadlock.
2032 */
850687d2
MD
2033 if (tnch_error == 0) {
2034 cache_unlink(ap->a_tnch);
2035 cache_setunresolved(ap->a_tnch);
2036 }
9b21452a
MD
2037 if (error == 0)
2038 cache_rename(ap->a_fnch, ap->a_tnch);
2039
05dd26e4 2040 LOCKSTOP;
6934ae32 2041 return (error);
4e2004ea
MD
2042}
2043
d001f460 2044/*
bca9f8e6 2045 * Strategy code (async logical file buffer I/O from system)
d001f460
MD
2046 *
2047 * WARNING: The strategy code cannot safely use hammer2 transactions
2048 * as this can deadlock against vfs_sync's vfsync() call
bca9f8e6
MD
2049 * if multiple flushes are queued. All H2 structures must
2050 * already be present and ready for the DIO.
2051 *
2052 * Reads can be initiated asynchronously, writes have to be
2053 * spooled to a separate thread for action to avoid deadlocks.
d001f460 2054 */
db71f61f
MD
2055static int hammer2_strategy_read(struct vop_strategy_args *ap);
2056static int hammer2_strategy_write(struct vop_strategy_args *ap);
bca9f8e6 2057static void hammer2_strategy_read_callback(hammer2_iocb_t *iocb);
db71f61f 2058
e118c14f 2059static
703720e4 2060int
e118c14f 2061hammer2_vop_strategy(struct vop_strategy_args *ap)
703720e4 2062{
703720e4
MD
2063 struct bio *biop;
2064 struct buf *bp;
703720e4
MD
2065 int error;
2066
703720e4
MD
2067 biop = ap->a_bio;
2068 bp = biop->bio_buf;
703720e4
MD
2069
2070 switch(bp->b_cmd) {
9c2e0de0 2071 case BUF_CMD_READ:
db71f61f 2072 error = hammer2_strategy_read(ap);
01eabad4 2073 ++hammer2_iod_file_read;
db71f61f 2074 break;
9c2e0de0 2075 case BUF_CMD_WRITE:
db71f61f 2076 error = hammer2_strategy_write(ap);
01eabad4 2077 ++hammer2_iod_file_write;
db71f61f 2078 break;
703720e4
MD
2079 default:
2080 bp->b_error = error = EINVAL;
2081 bp->b_flags |= B_ERROR;
2082 biodone(biop);
2083 break;
2084 }
703720e4
MD
2085 return (error);
2086}
2087
bca9f8e6
MD
2088/*
2089 * Logical buffer I/O, async read.
2090 */
db71f61f
MD
2091static
2092int
2093hammer2_strategy_read(struct vop_strategy_args *ap)
2094{
2095 struct buf *bp;
2096 struct bio *bio;
2097 struct bio *nbio;
db71f61f 2098 hammer2_inode_t *ip;
278ab2b2
MD
2099 hammer2_cluster_t *cparent;
2100 hammer2_cluster_t *cluster;
1897c66e 2101 hammer2_key_t key_dummy;
8cce658d 2102 hammer2_key_t lbase;
278ab2b2
MD
2103 int ddflag;
2104 uint8_t btype;
db71f61f
MD
2105
2106 bio = ap->a_bio;
2107 bp = bio->bio_buf;
2108 ip = VTOI(ap->a_vp);
db71f61f
MD
2109 nbio = push_bio(bio);
2110
8cce658d 2111 lbase = bio->bio_offset;
8cce658d 2112 KKASSERT(((int)lbase & HAMMER2_PBUFMASK) == 0);
db71f61f 2113
bca9f8e6
MD
2114 /*
2115 * Lookup the file offset.
2116 */
278ab2b2
MD
2117 cparent = hammer2_inode_lock_sh(ip);
2118 cluster = hammer2_cluster_lookup(cparent, &key_dummy,
2119 lbase, lbase,
2120 HAMMER2_LOOKUP_NODATA |
2121 HAMMER2_LOOKUP_SHARED,
2122 &ddflag);
2123 hammer2_inode_unlock_sh(ip, cparent);
8e12e3c9 2124
278ab2b2
MD
2125 /*
2126 * Data is zero-fill if no cluster could be found
2127 * (XXX or EIO on a cluster failure).
2128 */
2129 if (cluster == NULL) {
3ac6a319
MD
2130 bp->b_resid = 0;
2131 bp->b_error = 0;
8cce658d 2132 bzero(bp->b_data, bp->b_bcount);
3ac6a319 2133 biodone(nbio);
278ab2b2
MD
2134 return(0);
2135 }
2136
2137 /*
2138 * Cluster elements must be type INODE or type DATA, but the
2139 * compression mode (or not) for DATA chains can be different for
2140 * each chain. This will be handled by the callback.
bca9f8e6
MD
2141 *
2142 * If the cluster already has valid data the callback will be made
2143 * immediately/synchronously.
278ab2b2
MD
2144 */
2145 btype = hammer2_cluster_type(cluster);
2146 if (btype != HAMMER2_BREF_TYPE_INODE &&
2147 btype != HAMMER2_BREF_TYPE_DATA) {
355d67fc 2148 panic("READ PATH: hammer2_strategy_read: unknown bref type");
a5913bdf 2149 }
bca9f8e6
MD
2150 hammer2_cluster_load_async(cluster, hammer2_strategy_read_callback,
2151 nbio);
278ab2b2 2152 return(0);
a5913bdf
MD
2153}
2154
355d67fc 2155/*
bca9f8e6
MD
2156 * Read callback for hammer2_cluster_load_async(). The load function may
2157 * start several actual I/Os but will only make one callback, typically with
2158 * the first valid I/O XXX
355d67fc 2159 */
a5913bdf
MD
2160static
2161void
bca9f8e6 2162hammer2_strategy_read_callback(hammer2_iocb_t *iocb)
a5913bdf 2163{
bca9f8e6
MD
2164 struct bio *bio = iocb->ptr; /* original logical buffer */
2165 struct buf *bp = bio->bio_buf; /* original logical buffer */
2166 hammer2_chain_t *chain;
2167 hammer2_cluster_t *cluster;
2168 hammer2_io_t *dio;
fdf62707 2169 char *data;
278ab2b2 2170 int i;
fdf62707 2171
278ab2b2 2172 /*
bca9f8e6
MD
2173 * Extract data and handle iteration on I/O failure. iocb->off
2174 * is the cluster index for iteration.
2175 */
2176 cluster = iocb->cluster;
2177 dio = iocb->dio; /* can be NULL */
2178
2179 /*
2180 * Work to do if INPROG set, else data already available.
278ab2b2 2181 */
bca9f8e6
MD
2182 if (iocb->flags & HAMMER2_IOCB_INPROG) {
2183 /*
2184 * read not issued yet, chain the iocb to execute the
2185 * read operation.
2186 */
2187 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) {
2188 iocb->flags |= HAMMER2_IOCB_READ;
2189 breadcb(dio->hmp->devvp, dio->pbase, dio->psize,
2190 hammer2_io_callback, iocb);
2191 return;
2192 }
2193
2194 /*
2195 * check results.
2196 */
278ab2b2 2197 if (dio->bp->b_flags & B_ERROR) {
bca9f8e6 2198 i = (int)iocb->lbase + 1;
278ab2b2
MD
2199 if (i >= cluster->nchains) {
2200 bp->b_flags |= B_ERROR;
2201 bp->b_error = dio->bp->b_error;
bca9f8e6 2202 hammer2_io_complete(iocb);
278ab2b2 2203 biodone(bio);
84e47819 2204 hammer2_cluster_unlock(cluster);
278ab2b2 2205 } else {
bca9f8e6 2206 hammer2_io_complete(iocb);
278ab2b2
MD
2207 chain = cluster->array[i];
2208 kprintf("hammer2: IO CHAIN-%d %p\n", i, chain);
2209 hammer2_adjreadcounter(&chain->bref,
2210 chain->bytes);
bca9f8e6
MD
2211 iocb->chain = chain;
2212 iocb->lbase = (off_t)i;
2213 iocb->flags = 0;
2214 iocb->error = 0;
2215 hammer2_io_getblk(chain->hmp,
2216 chain->bref.data_off,
2217 chain->bytes,
2218 iocb);
278ab2b2
MD
2219 }
2220 return;
2221 }
bca9f8e6 2222 chain = iocb->chain;
fdf62707 2223 data = hammer2_io_data(dio, chain->bref.data_off);
278ab2b2 2224 } else {
bca9f8e6 2225 chain = iocb->chain;
fdf62707 2226 data = (void *)chain->data;
278ab2b2 2227 }
a5913bdf
MD
2228
2229 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
866d5273 2230 /*
a5913bdf 2231 * Data is embedded in the inode (copy from inode).
866d5273 2232 */
a5913bdf
MD
2233 bcopy(((hammer2_inode_data_t *)data)->u.data,
2234 bp->b_data, HAMMER2_EMBEDDED_BYTES);
8cce658d
MD
2235 bzero(bp->b_data + HAMMER2_EMBEDDED_BYTES,
2236 bp->b_bcount - HAMMER2_EMBEDDED_BYTES);
db71f61f
MD
2237 bp->b_resid = 0;
2238 bp->b_error = 0;
a5913bdf
MD
2239 } else if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) {
2240 /*
2241 * Data is on-media, issue device I/O and copy.
2242 *
2243 * XXX direct-IO shortcut could go here XXX.
2244 */
278ab2b2
MD
2245 switch (HAMMER2_DEC_COMP(chain->bref.methods)) {
2246 case HAMMER2_COMP_LZ4:
2247 hammer2_decompress_LZ4_callback(data, chain->bytes,
2248 bio);
2249 break;
2250 case HAMMER2_COMP_ZLIB:
2251 hammer2_decompress_ZLIB_callback(data, chain->bytes,
2252 bio);
2253 break;
2254 case HAMMER2_COMP_NONE:
2255 KKASSERT(chain->bytes <= bp->b_bcount);
2256 bcopy(data, bp->b_data, chain->bytes);
2257 if (chain->bytes < bp->b_bcount) {
2258 bzero(bp->b_data + chain->bytes,
2259 bp->b_bcount - chain->bytes);
2260 }
2261 bp->b_flags |= B_NOTMETA;
2262 bp->b_resid = 0;
2263 bp->b_error = 0;
278ab2b2
MD
2264 break;
2265 default:
2266 panic("hammer2_strategy_read: "
2267 "unknown compression type");
355d67fc 2268 }
a5913bdf 2269 } else {
fdf62707
MD
2270 /* bqrelse the dio to help stabilize the call to panic() */
2271 if (dio)
2272 hammer2_io_bqrelse(&dio);
a5913bdf 2273 panic("hammer2_strategy_read: unknown bref type");
db71f61f 2274 }
bca9f8e6 2275 hammer2_io_complete(iocb);
278ab2b2
MD
2276 hammer2_cluster_unlock(cluster);
2277 biodone(bio);
db71f61f
MD
2278}
2279
2280static
2281int
2282hammer2_strategy_write(struct vop_strategy_args *ap)
355d67fc 2283{
065f4046 2284 hammer2_pfsmount_t *pmp;
355d67fc
MD
2285 struct bio *bio;
2286 struct buf *bp;
db71f61f 2287 hammer2_inode_t *ip;
355d67fc 2288
db71f61f
MD
2289 bio = ap->a_bio;
2290 bp = bio->bio_buf;
2291 ip = VTOI(ap->a_vp);
065f4046 2292 pmp = ip->pmp;
355d67fc 2293
3f5b8b3b 2294 hammer2_lwinprog_ref(pmp);
065f4046 2295 mtx_lock(&pmp->wthread_mtx);
a7720be7
MD
2296 if (TAILQ_EMPTY(&pmp->wthread_bioq.queue)) {
2297 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio);
3f5b8b3b 2298 mtx_unlock(&pmp->wthread_mtx);
a7720be7
MD
2299 wakeup(&pmp->wthread_bioq);
2300 } else {
2301 bioq_insert_tail(&pmp->wthread_bioq, ap->a_bio);
3f5b8b3b 2302 mtx_unlock(&pmp->wthread_mtx);
a7720be7 2303 }
3f5b8b3b 2304 hammer2_lwinprog_wait(pmp);
065f4046 2305
355d67fc 2306 return(0);
db71f61f
MD
2307}
2308
2910a90c
MD
2309/*
2310 * hammer2_vop_ioctl { vp, command, data, fflag, cred }
2311 */
2312static
2313int
2314hammer2_vop_ioctl(struct vop_ioctl_args *ap)
2315{
2910a90c
MD
2316 hammer2_inode_t *ip;
2317 int error;
2318
05dd26e4 2319 LOCKSTART;
2910a90c 2320 ip = VTOI(ap->a_vp);
2910a90c
MD
2321
2322 error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
2323 ap->a_fflag, ap->a_cred);
05dd26e4 2324 LOCKSTOP;
2910a90c
MD
2325 return (error);
2326}
2327
e118c14f 2328static
f0206a67 2329int
e118c14f 2330hammer2_vop_mountctl(struct vop_mountctl_args *ap)
f0206a67
VS
2331{
2332 struct mount *mp;
e4e20f48 2333 hammer2_pfsmount_t *pmp;
f0206a67
VS
2334 int rc;
2335
05dd26e4 2336 LOCKSTART;
f0206a67
VS
2337 switch (ap->a_op) {
2338 case (MOUNTCTL_SET_EXPORT):
2339 mp = ap->a_head.a_ops->head.vv_mount;
e4e20f48 2340 pmp = MPTOPMP(mp);
f0206a67
VS
2341
2342 if (ap->a_ctllen != sizeof(struct export_args))
2343 rc = (EINVAL);
2344 else
e4e20f48 2345 rc = vfs_export(mp, &pmp->export,
10c5dee0 2346 (const struct export_args *)ap->a_ctl);
f0206a67
VS
2347 break;
2348 default:
2349 rc = vop_stdmountctl(ap);
2350 break;
2351 }
05dd26e4 2352 LOCKSTOP;
f0206a67
VS
2353 return (rc);
2354}
2355
eae2ed61 2356/*
da6f36f4 2357 * This handles unlinked open files after the vnode is finally dereferenced.
850687d2
MD
2358 * To avoid deadlocks it cannot be called from the normal vnode recycling
2359 * path, so we call it (1) after a unlink, rmdir, or rename, (2) on every
2360 * flush, and (3) on umount.
eae2ed61
MD
2361 */
2362void
2363hammer2_run_unlinkq(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp)
2364{
da6f36f4 2365 const hammer2_inode_data_t *ripdata;
eae2ed61
MD
2366 hammer2_inode_unlink_t *ipul;
2367 hammer2_inode_t *ip;
278ab2b2 2368 hammer2_cluster_t *cluster;
da6f36f4 2369 hammer2_cluster_t *cparent;
eae2ed61
MD
2370
2371 if (TAILQ_EMPTY(&pmp->unlinkq))
2372 return;
2373
05dd26e4 2374 LOCKSTART;
da6f36f4 2375 spin_lock(&pmp->list_spin);
eae2ed61
MD
2376 while ((ipul = TAILQ_FIRST(&pmp->unlinkq)) != NULL) {
2377 TAILQ_REMOVE(&pmp->unlinkq, ipul, entry);
da6f36f4 2378 spin_unlock(&pmp->list_spin);
eae2ed61
MD
2379 ip = ipul->ip;
2380 kfree(ipul, pmp->minode);
2381
278ab2b2 2382 cluster = hammer2_inode_lock_ex(ip);
bca9f8e6 2383 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
850687d2
MD
2384 if (hammer2_debug & 0x400) {
2385 kprintf("hammer2: unlink on reclaim: %s refs=%d\n",
bca9f8e6 2386 ripdata->filename, ip->refs);
850687d2 2387 }
da6f36f4
MD
2388 KKASSERT(ripdata->nlinks == 0);
2389
2390 cparent = hammer2_cluster_parent(cluster);
2391 hammer2_cluster_delete(trans, cparent, cluster,
2392 HAMMER2_DELETE_PERMANENT);
850687d2 2393 hammer2_cluster_unlock(cparent);
278ab2b2 2394 hammer2_inode_unlock_ex(ip, cluster); /* inode lock */
eae2ed61
MD
2395 hammer2_inode_drop(ip); /* ipul ref */
2396
da6f36f4 2397 spin_lock(&pmp->list_spin);
eae2ed61 2398 }
da6f36f4 2399 spin_unlock(&pmp->list_spin);
05dd26e4 2400 LOCKSTOP;
eae2ed61
MD
2401}
2402
2403
41c34a6d
MD
2404/*
2405 * KQFILTER
2406 */
2407static void filt_hammer2detach(struct knote *kn);
2408static int filt_hammer2read(struct knote *kn, long hint);
2409static int filt_hammer2write(struct knote *kn, long hint);
2410static int filt_hammer2vnode(struct knote *kn, long hint);
2411
2412static struct filterops hammer2read_filtops =
2413 { FILTEROP_ISFD | FILTEROP_MPSAFE,
2414 NULL, filt_hammer2detach, filt_hammer2read };
2415static struct filterops hammer2write_filtops =
2416 { FILTEROP_ISFD | FILTEROP_MPSAFE,
2417 NULL, filt_hammer2detach, filt_hammer2write };
2418static struct filterops hammer2vnode_filtops =
2419 { FILTEROP_ISFD | FILTEROP_MPSAFE,
2420 NULL, filt_hammer2detach, filt_hammer2vnode };
2421
2422static
2423int
2424hammer2_vop_kqfilter(struct vop_kqfilter_args *ap)
2425{
2426 struct vnode *vp = ap->a_vp;
2427 struct knote *kn = ap->a_kn;
2428
2429 switch (kn->kn_filter) {
2430 case EVFILT_READ:
2431 kn->kn_fop = &hammer2read_filtops;
2432 break;
2433 case EVFILT_WRITE:
2434 kn->kn_fop = &hammer2write_filtops;
2435 break;
2436 case EVFILT_VNODE:
2437 kn->kn_fop = &hammer2vnode_filtops;
2438 break;
2439 default:
2440 return (EOPNOTSUPP);
2441 }
2442
2443 kn->kn_hook = (caddr_t)vp;
2444
2445 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2446
2447 return(0);
2448}
2449
2450static void
2451filt_hammer2detach(struct knote *kn)
2452{
2453 struct vnode *vp = (void *)kn->kn_hook;
2454
2455 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
2456}
2457
2458static int
2459filt_hammer2read(struct knote *kn, long hint)
2460{
2461 struct vnode *vp = (void *)kn->kn_hook;
2462 hammer2_inode_t *ip = VTOI(vp);
2463 off_t off;
2464
2465 if (hint == NOTE_REVOKE) {
2466 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2467 return(1);
2468 }
2469 off = ip->size - kn->kn_fp->f_offset;
2470 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
2471 if (kn->kn_sfflags & NOTE_OLDAPI)
2472 return(1);
2473 return (kn->kn_data != 0);
2474}
2475
2476
2477static int
2478filt_hammer2write(struct knote *kn, long hint)
2479{
2480 if (hint == NOTE_REVOKE)
2481 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
2482 kn->kn_data = 0;
2483 return (1);
2484}
2485
2486static int
2487filt_hammer2vnode(struct knote *kn, long hint)
2488{
2489 if (kn->kn_sfflags & hint)
2490 kn->kn_fflags |= hint;
2491 if (hint == NOTE_REVOKE) {
2492 kn->kn_flags |= (EV_EOF | EV_NODATA);
2493 return (1);
2494 }
2495 return (kn->kn_fflags != 0);
2496}
2497
2498/*
2499 * FIFO VOPS
2500 */
2501static
2502int
2503hammer2_vop_markatime(struct vop_markatime_args *ap)
2504{
2505 hammer2_inode_t *ip;
2506 struct vnode *vp;
2507
2508 vp = ap->a_vp;
2509 ip = VTOI(vp);
2510
2511 if (ip->pmp->ronly)
2512 return(EROFS);
2513 return(0);
2514}
2515
2516static
2517int
2518hammer2_vop_fifokqfilter(struct vop_kqfilter_args *ap)
2519{
2520 int error;
2521
2522 error = VOCALL(&fifo_vnode_vops, &ap->a_head);
2523 if (error)
2524 error = hammer2_vop_kqfilter(ap);
2525 return(error);
2526}
2527
2528/*
2529 * VOPS vector
2530 */
703720e4
MD
2531struct vop_ops hammer2_vnode_vops = {
2532 .vop_default = vop_defaultop,
e118c14f 2533 .vop_fsync = hammer2_vop_fsync,
703720e4
MD
2534 .vop_getpages = vop_stdgetpages,
2535 .vop_putpages = vop_stdputpages,
e118c14f 2536 .vop_access = hammer2_vop_access,
37aa19df 2537 .vop_advlock = hammer2_vop_advlock,
c667909f 2538 .vop_close = hammer2_vop_close,
db0c2eb3 2539 .vop_nlink = hammer2_vop_nlink,
c667909f 2540 .vop_ncreate = hammer2_vop_ncreate,
4e2004ea
MD
2541 .vop_nsymlink = hammer2_vop_nsymlink,
2542 .vop_nremove = hammer2_vop_nremove,
2543 .vop_nrmdir = hammer2_vop_nrmdir,
2544 .vop_nrename = hammer2_vop_nrename,
e118c14f 2545 .vop_getattr = hammer2_vop_getattr,
3ac6a319 2546 .vop_setattr = hammer2_vop_setattr,
e118c14f 2547 .vop_readdir = hammer2_vop_readdir,
4e2004ea 2548 .vop_readlink = hammer2_vop_readlink,
5b4a2132
MD
2549 .vop_getpages = vop_stdgetpages,
2550 .vop_putpages = vop_stdputpages,
e118c14f
MD
2551 .vop_read = hammer2_vop_read,
2552 .vop_write = hammer2_vop_write,
2553 .vop_open = hammer2_vop_open,
2554 .vop_inactive = hammer2_vop_inactive,
2555 .vop_reclaim = hammer2_vop_reclaim,
2556 .vop_nresolve = hammer2_vop_nresolve,
37494cab
MD
2557 .vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
2558 .vop_nmkdir = hammer2_vop_nmkdir,
41c34a6d 2559 .vop_nmknod = hammer2_vop_nmknod,
2910a90c 2560 .vop_ioctl = hammer2_vop_ioctl,
e118c14f
MD
2561 .vop_mountctl = hammer2_vop_mountctl,
2562 .vop_bmap = hammer2_vop_bmap,
2563 .vop_strategy = hammer2_vop_strategy,
41c34a6d 2564 .vop_kqfilter = hammer2_vop_kqfilter
703720e4
MD
2565};
2566
2567struct vop_ops hammer2_spec_vops = {
41c34a6d
MD
2568 .vop_default = vop_defaultop,
2569 .vop_fsync = hammer2_vop_fsync,
2570 .vop_read = vop_stdnoread,
2571 .vop_write = vop_stdnowrite,
2572 .vop_access = hammer2_vop_access,
2573 .vop_close = hammer2_vop_close,
2574 .vop_markatime = hammer2_vop_markatime,
2575 .vop_getattr = hammer2_vop_getattr,
2576 .vop_inactive = hammer2_vop_inactive,
2577 .vop_reclaim = hammer2_vop_reclaim,
2578 .vop_setattr = hammer2_vop_setattr
703720e4
MD
2579};
2580
2581struct vop_ops hammer2_fifo_vops = {
41c34a6d
MD
2582 .vop_default = fifo_vnoperate,
2583 .vop_fsync = hammer2_vop_fsync,
2584#if 0
2585 .vop_read = hammer2_vop_fiforead,
2586 .vop_write = hammer2_vop_fifowrite,
2587#endif
2588 .vop_access = hammer2_vop_access,
2589#if 0
2590 .vop_close = hammer2_vop_fifoclose,
2591#endif
2592 .vop_markatime = hammer2_vop_markatime,
2593 .vop_getattr = hammer2_vop_getattr,
2594 .vop_inactive = hammer2_vop_inactive,
2595 .vop_reclaim = hammer2_vop_reclaim,
2596 .vop_setattr = hammer2_vop_setattr,
2597 .vop_kqfilter = hammer2_vop_fifokqfilter
703720e4 2598};
355d67fc 2599