HAMMER 56C/Many: Performance tuning - MEDIA STRUCTURES CHANGED!
[dragonfly.git] / sys / vfs / hammer / hammer_inode.c
CommitLineData
427e5fc6 1/*
b84de5af 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
427e5fc6
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
4a2796f3 34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.78 2008/06/20 05:38:26 dillon Exp $
427e5fc6
MD
35 */
36
37#include "hammer.h"
869e8f55 38#include <vm/vm_extern.h>
427e5fc6
MD
39#include <sys/buf.h>
40#include <sys/buf2.h>
41
af209b0f
MD
42static int hammer_unload_inode(struct hammer_inode *ip);
43static void hammer_flush_inode_core(hammer_inode_t ip, int flags);
44static int hammer_setup_child_callback(hammer_record_t rec, void *data);
bf3b416b
MD
45static int hammer_setup_parent_inodes(hammer_inode_t ip);
46static int hammer_setup_parent_inodes_helper(hammer_record_t record);
7bc5b8c2 47static void hammer_inode_wakereclaims(hammer_inode_t ip);
b84de5af 48
0832c9bb
MD
49#ifdef DEBUG_TRUNCATE
50extern struct hammer_inode *HammerTruncIp;
51#endif
52
d113fda1
MD
53/*
54 * The kernel is not actively referencing this vnode but is still holding
55 * it cached.
b84de5af
MD
56 *
57 * This is called from the frontend.
d113fda1 58 */
427e5fc6
MD
59int
60hammer_vop_inactive(struct vop_inactive_args *ap)
61{
66325755 62 struct hammer_inode *ip = VTOI(ap->a_vp);
27ea2398 63
c0ade690
MD
64 /*
65 * Degenerate case
66 */
67 if (ip == NULL) {
66325755 68 vrecycle(ap->a_vp);
c0ade690
MD
69 return(0);
70 }
71
72 /*
4a2796f3
MD
73 * If the inode no longer has visibility in the filesystem try to
74 * recycle it immediately, even if the inode is dirty. Recycling
75 * it quickly allows the system to reclaim buffer cache and VM
76 * resources which can matter a lot in a heavily loaded system.
77 *
78 * This can deadlock in vfsync() if we aren't careful.
4e97774c
MD
79 *
80 * Do not queue the inode to the flusher if we still have visibility,
81 * otherwise namespace calls such as chmod will unnecessarily generate
82 * multiple inode updates.
c0ade690 83 */
e8599db1 84 hammer_inode_unloadable_check(ip, 0);
4e97774c
MD
85 if (ip->ino_data.nlinks == 0) {
86 if (ip->flags & HAMMER_INODE_MODMASK)
87 hammer_flush_inode(ip, 0);
4a2796f3 88 vrecycle(ap->a_vp);
4e97774c 89 }
427e5fc6
MD
90 return(0);
91}
92
d113fda1
MD
93/*
94 * Release the vnode association. This is typically (but not always)
1f07f686 95 * the last reference on the inode.
d113fda1 96 *
1f07f686
MD
97 * Once the association is lost we are on our own with regards to
98 * flushing the inode.
d113fda1 99 */
427e5fc6
MD
100int
101hammer_vop_reclaim(struct vop_reclaim_args *ap)
102{
427e5fc6 103 struct hammer_inode *ip;
7bc5b8c2 104 hammer_mount_t hmp;
427e5fc6
MD
105 struct vnode *vp;
106
107 vp = ap->a_vp;
c0ade690 108
a89aec1b 109 if ((ip = vp->v_data) != NULL) {
da2da375 110 hmp = ip->hmp;
a89aec1b
MD
111 vp->v_data = NULL;
112 ip->vp = NULL;
7bc5b8c2 113
4a2796f3 114 if ((ip->flags & HAMMER_INODE_RECLAIM) == 0) {
9f5097dc 115 ++hammer_count_reclaiming;
da2da375 116 ++hmp->inode_reclaims;
9f5097dc 117 ip->flags |= HAMMER_INODE_RECLAIM;
4a2796f3
MD
118 if (hmp->inode_reclaims > HAMMER_RECLAIM_FLUSH &&
119 (hmp->inode_reclaims & 255) == 0) {
120 hammer_flusher_async(hmp);
7bc5b8c2 121 }
9f5097dc 122 }
ec4e8497 123 hammer_rel_inode(ip, 1);
a89aec1b 124 }
427e5fc6
MD
125 return(0);
126}
127
66325755
MD
128/*
129 * Return a locked vnode for the specified inode. The inode must be
130 * referenced but NOT LOCKED on entry and will remain referenced on
131 * return.
b84de5af
MD
132 *
133 * Called from the frontend.
66325755
MD
134 */
135int
e8599db1 136hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp)
66325755 137{
9f5097dc 138 hammer_mount_t hmp;
66325755
MD
139 struct vnode *vp;
140 int error = 0;
141
9f5097dc
MD
142 hmp = ip->hmp;
143
66325755
MD
144 for (;;) {
145 if ((vp = ip->vp) == NULL) {
9f5097dc 146 error = getnewvnode(VT_HAMMER, hmp->mp, vpp, 0, 0);
66325755
MD
147 if (error)
148 break;
8cd0a023
MD
149 hammer_lock_ex(&ip->lock);
150 if (ip->vp != NULL) {
151 hammer_unlock(&ip->lock);
152 vp->v_type = VBAD;
153 vx_put(vp);
154 continue;
66325755 155 }
8cd0a023
MD
156 hammer_ref(&ip->lock);
157 vp = *vpp;
158 ip->vp = vp;
11ad5ade
MD
159 vp->v_type =
160 hammer_get_vnode_type(ip->ino_data.obj_type);
7a04d74f 161
7bc5b8c2 162 hammer_inode_wakereclaims(ip);
9f5097dc 163
11ad5ade 164 switch(ip->ino_data.obj_type) {
7a04d74f
MD
165 case HAMMER_OBJTYPE_CDEV:
166 case HAMMER_OBJTYPE_BDEV:
9f5097dc 167 vp->v_ops = &hmp->mp->mnt_vn_spec_ops;
7a04d74f
MD
168 addaliasu(vp, ip->ino_data.rmajor,
169 ip->ino_data.rminor);
170 break;
171 case HAMMER_OBJTYPE_FIFO:
9f5097dc 172 vp->v_ops = &hmp->mp->mnt_vn_fifo_ops;
7a04d74f
MD
173 break;
174 default:
175 break;
176 }
42c7d26b
MD
177
178 /*
179 * Only mark as the root vnode if the ip is not
180 * historical, otherwise the VFS cache will get
181 * confused. The other half of the special handling
182 * is in hammer_vop_nlookupdotdot().
183 */
184 if (ip->obj_id == HAMMER_OBJID_ROOT &&
9f5097dc 185 ip->obj_asof == hmp->asof) {
7a04d74f 186 vp->v_flag |= VROOT;
42c7d26b 187 }
7a04d74f 188
8cd0a023
MD
189 vp->v_data = (void *)ip;
190 /* vnode locked by getnewvnode() */
191 /* make related vnode dirty if inode dirty? */
192 hammer_unlock(&ip->lock);
a89aec1b 193 if (vp->v_type == VREG)
11ad5ade 194 vinitvmio(vp, ip->ino_data.size);
8cd0a023
MD
195 break;
196 }
197
198 /*
199 * loop if the vget fails (aka races), or if the vp
200 * no longer matches ip->vp.
201 */
202 if (vget(vp, LK_EXCLUSIVE) == 0) {
203 if (vp == ip->vp)
204 break;
205 vput(vp);
66325755
MD
206 }
207 }
a89aec1b 208 *vpp = vp;
66325755
MD
209 return(error);
210}
211
212/*
8cd0a023
MD
213 * Acquire a HAMMER inode. The returned inode is not locked. These functions
214 * do not attach or detach the related vnode (use hammer_get_vnode() for
215 * that).
d113fda1
MD
216 *
217 * The flags argument is only applied for newly created inodes, and only
218 * certain flags are inherited.
b84de5af
MD
219 *
220 * Called from the frontend.
66325755
MD
221 */
222struct hammer_inode *
bcac4bbb 223hammer_get_inode(hammer_transaction_t trans, hammer_inode_t dip,
61aeeb33 224 u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
66325755 225{
36f82b23 226 hammer_mount_t hmp = trans->hmp;
427e5fc6 227 struct hammer_inode_info iinfo;
8cd0a023 228 struct hammer_cursor cursor;
427e5fc6 229 struct hammer_inode *ip;
427e5fc6
MD
230
231 /*
232 * Determine if we already have an inode cached. If we do then
233 * we are golden.
234 */
66325755 235 iinfo.obj_id = obj_id;
7f7c1f84 236 iinfo.obj_asof = asof;
427e5fc6
MD
237loop:
238 ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
239 if (ip) {
8cd0a023 240 hammer_ref(&ip->lock);
66325755
MD
241 *errorp = 0;
242 return(ip);
427e5fc6
MD
243 }
244
3897d7e9
MD
245 /*
246 * Allocate a new inode structure and deal with races later.
247 */
427e5fc6 248 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
b3deaf57 249 ++hammer_count_inodes;
9f5097dc 250 ++hmp->count_inodes;
66325755 251 ip->obj_id = obj_id;
27ea2398 252 ip->obj_asof = iinfo.obj_asof;
66325755 253 ip->hmp = hmp;
d113fda1 254 ip->flags = flags & HAMMER_INODE_RO;
bcac4bbb
MD
255 ip->cache[0].ip = ip;
256 ip->cache[1].ip = ip;
d113fda1
MD
257 if (hmp->ronly)
258 ip->flags |= HAMMER_INODE_RO;
cb51be26 259 ip->sync_trunc_off = ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
8cd0a023 260 RB_INIT(&ip->rec_tree);
1f07f686 261 TAILQ_INIT(&ip->target_list);
427e5fc6
MD
262
263 /*
8cd0a023 264 * Locate the on-disk inode.
427e5fc6 265 */
6a37e7e4 266retry:
bcac4bbb 267 hammer_init_cursor(trans, &cursor, (dip ? &dip->cache[0] : NULL), NULL);
2f85fa4d 268 cursor.key_beg.localization = HAMMER_LOCALIZE_INODE;
8cd0a023
MD
269 cursor.key_beg.obj_id = ip->obj_id;
270 cursor.key_beg.key = 0;
d5530d22 271 cursor.key_beg.create_tid = 0;
8cd0a023
MD
272 cursor.key_beg.delete_tid = 0;
273 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
274 cursor.key_beg.obj_type = 0;
d5530d22 275 cursor.asof = iinfo.obj_asof;
11ad5ade 276 cursor.flags = HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA |
d5530d22 277 HAMMER_CURSOR_ASOF;
8cd0a023
MD
278
279 *errorp = hammer_btree_lookup(&cursor);
6a37e7e4
MD
280 if (*errorp == EDEADLK) {
281 hammer_done_cursor(&cursor);
282 goto retry;
283 }
427e5fc6
MD
284
285 /*
286 * On success the B-Tree lookup will hold the appropriate
287 * buffer cache buffers and provide a pointer to the requested
d113fda1
MD
288 * information. Copy the information to the in-memory inode
289 * and cache the B-Tree node to improve future operations.
427e5fc6 290 */
66325755 291 if (*errorp == 0) {
11ad5ade 292 ip->ino_leaf = cursor.node->ondisk->elms[cursor.index].leaf;
40043e7f 293 ip->ino_data = cursor.data->inode;
bcac4bbb
MD
294
295 /*
296 * cache[0] tries to cache the location of the object inode.
297 * The assumption is that it is near the directory inode.
298 *
299 * cache[1] tries to cache the location of the object data.
300 * The assumption is that it is near the directory data.
301 */
302 hammer_cache_node(&ip->cache[0], cursor.node);
303 if (dip && dip->cache[1].node)
304 hammer_cache_node(&ip->cache[1], dip->cache[1].node);
cb51be26
MD
305
306 /*
307 * The file should not contain any data past the file size
308 * stored in the inode. Setting sync_trunc_off to the
309 * file size instead of max reduces B-Tree lookup overheads
310 * on append by allowing the flusher to avoid checking for
311 * record overwrites.
312 */
313 ip->sync_trunc_off = ip->ino_data.size;
427e5fc6 314 }
427e5fc6
MD
315
316 /*
cb51be26
MD
317 * The inode is placed on the red-black tree and will be synced to
318 * the media when flushed or by the filesystem sync. If this races
319 * another instantiation/lookup the insertion will fail.
427e5fc6 320 */
66325755 321 if (*errorp == 0) {
8cd0a023 322 hammer_ref(&ip->lock);
427e5fc6 323 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
61aeeb33
MD
324 hammer_uncache_node(&ip->cache[0]);
325 hammer_uncache_node(&ip->cache[1]);
b84de5af 326 KKASSERT(ip->lock.refs == 1);
b3deaf57 327 --hammer_count_inodes;
9f5097dc 328 --hmp->count_inodes;
427e5fc6 329 kfree(ip, M_HAMMER);
b3deaf57 330 hammer_done_cursor(&cursor);
427e5fc6
MD
331 goto loop;
332 }
c0ade690 333 ip->flags |= HAMMER_INODE_ONDISK;
427e5fc6 334 } else {
19619882
MD
335 /*
336 * Do not panic on read-only accesses which fail, particularly
337 * historical accesses where the snapshot might not have
338 * complete connectivity.
339 */
340 if ((flags & HAMMER_INODE_RO) == 0) {
341 kprintf("hammer_get_inode: failed ip %p obj_id %016llx cursor %p error %d\n",
342 ip, ip->obj_id, &cursor, *errorp);
77062c8a 343 Debugger("x");
19619882 344 }
e63644f0
MD
345 if (ip->flags & HAMMER_INODE_RSV_INODES) {
346 ip->flags &= ~HAMMER_INODE_RSV_INODES; /* sanity */
9f5097dc 347 --hmp->rsv_inodes;
e63644f0 348 }
9f5097dc 349 hmp->rsv_databufs -= ip->rsv_databufs;
e63644f0
MD
350 ip->rsv_databufs = 0; /* sanity */
351
b3deaf57 352 --hammer_count_inodes;
9f5097dc 353 --hmp->count_inodes;
66325755
MD
354 kfree(ip, M_HAMMER);
355 ip = NULL;
427e5fc6 356 }
b3deaf57 357 hammer_done_cursor(&cursor);
66325755
MD
358 return (ip);
359}
360
8cd0a023
MD
361/*
362 * Create a new filesystem object, returning the inode in *ipp. The
1f07f686 363 * returned inode will be referenced.
8cd0a023 364 *
b84de5af 365 * The inode is created in-memory.
8cd0a023
MD
366 */
367int
a89aec1b
MD
368hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
369 struct ucred *cred, hammer_inode_t dip,
8cd0a023 370 struct hammer_inode **ipp)
66325755 371{
a89aec1b
MD
372 hammer_mount_t hmp;
373 hammer_inode_t ip;
6b4f890b 374 uid_t xuid;
66325755 375
8cd0a023
MD
376 hmp = trans->hmp;
377 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
b3deaf57 378 ++hammer_count_inodes;
9f5097dc 379 ++hmp->count_inodes;
0729c8c8 380 ip->obj_id = hammer_alloc_objid(trans, dip);
8cd0a023 381 KKASSERT(ip->obj_id != 0);
7f7c1f84 382 ip->obj_asof = hmp->asof;
8cd0a023 383 ip->hmp = hmp;
b84de5af 384 ip->flush_state = HAMMER_FST_IDLE;
11ad5ade 385 ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES;
bcac4bbb
MD
386 ip->cache[0].ip = ip;
387 ip->cache[1].ip = ip;
8cd0a023 388
a5fddc16 389 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
8cd0a023 390 RB_INIT(&ip->rec_tree);
1f07f686 391 TAILQ_INIT(&ip->target_list);
8cd0a023 392
bcac4bbb 393 ip->ino_data.atime = trans->time;
11ad5ade
MD
394 ip->ino_data.mtime = trans->time;
395 ip->ino_data.size = 0;
396 ip->ino_data.nlinks = 0;
e63644f0
MD
397
398 /*
399 * A nohistory designator on the parent directory is inherited by
400 * the child.
401 */
402 ip->ino_data.uflags = dip->ino_data.uflags &
403 (SF_NOHISTORY|UF_NOHISTORY|UF_NODUMP);
404
11ad5ade 405 ip->ino_leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
2f85fa4d 406 ip->ino_leaf.base.localization = HAMMER_LOCALIZE_INODE;
11ad5ade
MD
407 ip->ino_leaf.base.obj_id = ip->obj_id;
408 ip->ino_leaf.base.key = 0;
409 ip->ino_leaf.base.create_tid = 0;
410 ip->ino_leaf.base.delete_tid = 0;
411 ip->ino_leaf.base.rec_type = HAMMER_RECTYPE_INODE;
412 ip->ino_leaf.base.obj_type = hammer_get_obj_type(vap->va_type);
413
414 ip->ino_data.obj_type = ip->ino_leaf.base.obj_type;
8cd0a023
MD
415 ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
416 ip->ino_data.mode = vap->va_mode;
b84de5af 417 ip->ino_data.ctime = trans->time;
11ad5ade 418 ip->ino_data.parent_obj_id = (dip) ? dip->ino_leaf.base.obj_id : 0;
6b4f890b 419
11ad5ade 420 switch(ip->ino_leaf.base.obj_type) {
7a04d74f
MD
421 case HAMMER_OBJTYPE_CDEV:
422 case HAMMER_OBJTYPE_BDEV:
423 ip->ino_data.rmajor = vap->va_rmajor;
424 ip->ino_data.rminor = vap->va_rminor;
425 break;
426 default:
427 break;
428 }
429
6b4f890b
MD
430 /*
431 * Calculate default uid/gid and overwrite with information from
432 * the vap.
433 */
434 xuid = hammer_to_unix_xid(&dip->ino_data.uid);
6b4f890b
MD
435 xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
436 &vap->va_mode);
437 ip->ino_data.mode = vap->va_mode;
438
8cd0a023
MD
439 if (vap->va_vaflags & VA_UID_UUID_VALID)
440 ip->ino_data.uid = vap->va_uid_uuid;
6b4f890b 441 else if (vap->va_uid != (uid_t)VNOVAL)
7538695e
MD
442 hammer_guid_to_uuid(&ip->ino_data.uid, vap->va_uid);
443 else
6b4f890b 444 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
7538695e 445
8cd0a023
MD
446 if (vap->va_vaflags & VA_GID_UUID_VALID)
447 ip->ino_data.gid = vap->va_gid_uuid;
6b4f890b 448 else if (vap->va_gid != (gid_t)VNOVAL)
8cd0a023 449 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
7538695e
MD
450 else
451 ip->ino_data.gid = dip->ino_data.gid;
8cd0a023
MD
452
453 hammer_ref(&ip->lock);
454 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
455 hammer_unref(&ip->lock);
a89aec1b 456 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
8cd0a023
MD
457 }
458 *ipp = ip;
459 return(0);
66325755
MD
460}
461
d113fda1
MD
462/*
463 * Called by hammer_sync_inode().
464 */
465static int
4e17f465 466hammer_update_inode(hammer_cursor_t cursor, hammer_inode_t ip)
c0ade690 467{
4e17f465 468 hammer_transaction_t trans = cursor->trans;
c0ade690
MD
469 hammer_record_t record;
470 int error;
471
d26d0ae9 472retry:
c0ade690
MD
473 error = 0;
474
869e8f55
MD
475 /*
476 * If the inode has a presence on-disk then locate it and mark
477 * it deleted, setting DELONDISK.
478 *
479 * The record may or may not be physically deleted, depending on
480 * the retention policy.
481 */
76376933
MD
482 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
483 HAMMER_INODE_ONDISK) {
4e17f465 484 hammer_normalize_cursor(cursor);
2f85fa4d 485 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
4e17f465
MD
486 cursor->key_beg.obj_id = ip->obj_id;
487 cursor->key_beg.key = 0;
488 cursor->key_beg.create_tid = 0;
489 cursor->key_beg.delete_tid = 0;
490 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
491 cursor->key_beg.obj_type = 0;
492 cursor->asof = ip->obj_asof;
493 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
11ad5ade 494 cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
4e17f465
MD
495 cursor->flags |= HAMMER_CURSOR_BACKEND;
496
497 error = hammer_btree_lookup(cursor);
e8599db1
MD
498 if (hammer_debug_inode)
499 kprintf("IPDEL %p %08x %d", ip, ip->flags, error);
b84de5af
MD
500 if (error) {
501 kprintf("error %d\n", error);
502 Debugger("hammer_update_inode");
503 }
504
c0ade690 505 if (error == 0) {
e63644f0 506 error = hammer_ip_delete_record(cursor, ip, trans->tid);
e8599db1
MD
507 if (hammer_debug_inode)
508 kprintf(" error %d\n", error);
f90dde4c 509 if (error && error != EDEADLK) {
b84de5af
MD
510 kprintf("error %d\n", error);
511 Debugger("hammer_update_inode2");
512 }
1f07f686 513 if (error == 0) {
195c19a1 514 ip->flags |= HAMMER_INODE_DELONDISK;
1f07f686 515 }
e8599db1 516 if (cursor->node)
bcac4bbb 517 hammer_cache_node(&ip->cache[0], cursor->node);
4e17f465
MD
518 }
519 if (error == EDEADLK) {
520 hammer_done_cursor(cursor);
521 error = hammer_init_cursor(trans, cursor,
522 &ip->cache[0], ip);
e8599db1
MD
523 if (hammer_debug_inode)
524 kprintf("IPDED %p %d\n", ip, error);
4e17f465
MD
525 if (error == 0)
526 goto retry;
c0ade690 527 }
c0ade690
MD
528 }
529
530 /*
869e8f55
MD
531 * Ok, write out the initial record or a new record (after deleting
532 * the old one), unless the DELETED flag is set. This routine will
533 * clear DELONDISK if it writes out a record.
76376933 534 *
869e8f55
MD
535 * Update our inode statistics if this is the first application of
536 * the inode on-disk.
c0ade690 537 */
869e8f55
MD
538 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
539 /*
540 * Generate a record and write it to the media
541 */
11ad5ade 542 record = hammer_alloc_mem_record(ip, 0);
930bf163 543 record->type = HAMMER_MEM_RECORD_INODE;
1f07f686 544 record->flush_state = HAMMER_FST_FLUSH;
11ad5ade
MD
545 record->leaf = ip->sync_ino_leaf;
546 record->leaf.base.create_tid = trans->tid;
547 record->leaf.data_len = sizeof(ip->sync_ino_data);
b84de5af 548 record->data = (void *)&ip->sync_ino_data;
d36ec43b 549 record->flags |= HAMMER_RECF_INTERLOCK_BE;
4e17f465
MD
550 for (;;) {
551 error = hammer_ip_sync_record_cursor(cursor, record);
e8599db1
MD
552 if (hammer_debug_inode)
553 kprintf("GENREC %p rec %08x %d\n",
554 ip, record->flags, error);
4e17f465
MD
555 if (error != EDEADLK)
556 break;
557 hammer_done_cursor(cursor);
558 error = hammer_init_cursor(trans, cursor,
559 &ip->cache[0], ip);
e8599db1
MD
560 if (hammer_debug_inode)
561 kprintf("GENREC reinit %d\n", error);
4e17f465
MD
562 if (error)
563 break;
564 }
b84de5af
MD
565 if (error) {
566 kprintf("error %d\n", error);
567 Debugger("hammer_update_inode3");
568 }
d36ec43b
MD
569
570 /*
571 * The record isn't managed by the inode's record tree,
572 * destroy it whether we succeed or fail.
573 */
574 record->flags &= ~HAMMER_RECF_INTERLOCK_BE;
575 record->flags |= HAMMER_RECF_DELETED_FE;
1f07f686 576 record->flush_state = HAMMER_FST_IDLE;
b3deaf57 577 hammer_rel_mem_record(record);
d36ec43b 578
869e8f55
MD
579 /*
580 * Finish up.
581 */
d26d0ae9 582 if (error == 0) {
e8599db1
MD
583 if (hammer_debug_inode)
584 kprintf("CLEANDELOND %p %08x\n", ip, ip->flags);
11ad5ade 585 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
b84de5af
MD
586 HAMMER_INODE_ITIMES);
587 ip->flags &= ~HAMMER_INODE_DELONDISK;
1f07f686
MD
588
589 /*
590 * Root volume count of inodes
591 */
d26d0ae9 592 if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
e8599db1
MD
593 hammer_modify_volume_field(trans,
594 trans->rootvol,
595 vol0_stat_inodes);
0b075555 596 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
10a5d1ba 597 hammer_modify_volume_done(trans->rootvol);
d26d0ae9 598 ip->flags |= HAMMER_INODE_ONDISK;
e8599db1
MD
599 if (hammer_debug_inode)
600 kprintf("NOWONDISK %p\n", ip);
d26d0ae9 601 }
fbc6e32a 602 }
c0ade690 603 }
869e8f55
MD
604
605 /*
606 * If the inode has been destroyed, clean out any left-over flags
607 * that may have been set by the frontend.
608 */
f90dde4c 609 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) {
11ad5ade 610 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
f90dde4c
MD
611 HAMMER_INODE_ITIMES);
612 }
c0ade690
MD
613 return(error);
614}
615
a89aec1b 616/*
d113fda1
MD
617 * Update only the itimes fields. This is done no-historically. The
618 * record is updated in-place on the disk.
619 */
620static int
4e17f465 621hammer_update_itimes(hammer_cursor_t cursor, hammer_inode_t ip)
d113fda1 622{
4e17f465 623 hammer_transaction_t trans = cursor->trans;
11ad5ade 624 struct hammer_btree_leaf_elm *leaf;
d113fda1
MD
625 int error;
626
6a37e7e4 627retry:
d113fda1
MD
628 error = 0;
629 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
630 HAMMER_INODE_ONDISK) {
4e17f465 631 hammer_normalize_cursor(cursor);
2f85fa4d 632 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
4e17f465
MD
633 cursor->key_beg.obj_id = ip->obj_id;
634 cursor->key_beg.key = 0;
635 cursor->key_beg.create_tid = 0;
636 cursor->key_beg.delete_tid = 0;
637 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
638 cursor->key_beg.obj_type = 0;
639 cursor->asof = ip->obj_asof;
640 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
bcac4bbb
MD
641 cursor->flags |= HAMMER_CURSOR_ASOF;
642 cursor->flags |= HAMMER_CURSOR_GET_LEAF;
643 cursor->flags |= HAMMER_CURSOR_GET_DATA;
4e17f465
MD
644 cursor->flags |= HAMMER_CURSOR_BACKEND;
645
646 error = hammer_btree_lookup(cursor);
b84de5af
MD
647 if (error) {
648 kprintf("error %d\n", error);
649 Debugger("hammer_update_itimes1");
650 }
d113fda1 651 if (error == 0) {
10a5d1ba 652 /*
bcac4bbb
MD
653 * atime/mtime updates can be done in place, but
654 * they are nasty because we also have to update the
655 * data_crc in the B-Tree leaf, which means we
656 * ALSO have to generate UNDO records.
10a5d1ba 657 */
bcac4bbb
MD
658 hammer_modify_buffer(trans, cursor->data_buffer,
659 HAMMER_ITIMES_BASE(&cursor->data->inode),
660 HAMMER_ITIMES_BYTES);
661 cursor->data->inode.atime = ip->sync_ino_data.atime;
662 cursor->data->inode.mtime = ip->sync_ino_data.mtime;
663 hammer_modify_buffer_done(cursor->data_buffer);
664
11ad5ade 665 leaf = cursor->leaf;
bcac4bbb
MD
666 hammer_modify_node(trans, cursor->node,
667 &leaf->data_crc,
668 sizeof(leaf->data_crc));
669 leaf->data_crc = crc32(cursor->data, leaf->data_len);
11ad5ade 670 hammer_modify_node_done(cursor->node);
bcac4bbb 671
b84de5af 672 ip->sync_flags &= ~HAMMER_INODE_ITIMES;
d113fda1 673 /* XXX recalculate crc */
bcac4bbb 674 hammer_cache_node(&ip->cache[0], cursor->node);
4e17f465
MD
675 }
676 if (error == EDEADLK) {
677 hammer_done_cursor(cursor);
678 error = hammer_init_cursor(trans, cursor,
679 &ip->cache[0], ip);
680 if (error == 0)
681 goto retry;
d113fda1 682 }
d113fda1
MD
683 }
684 return(error);
685}
686
687/*
1f07f686 688 * Release a reference on an inode, flush as requested.
b84de5af
MD
689 *
690 * On the last reference we queue the inode to the flusher for its final
691 * disposition.
a89aec1b 692 */
66325755 693void
a89aec1b 694hammer_rel_inode(struct hammer_inode *ip, int flush)
66325755 695{
1f07f686
MD
696 hammer_mount_t hmp = ip->hmp;
697
f90dde4c
MD
698 /*
699 * Handle disposition when dropping the last ref.
700 */
1f07f686
MD
701 for (;;) {
702 if (ip->lock.refs == 1) {
703 /*
704 * Determine whether on-disk action is needed for
705 * the inode's final disposition.
706 */
e8599db1
MD
707 KKASSERT(ip->vp == NULL);
708 hammer_inode_unloadable_check(ip, 0);
4e17f465 709 if (ip->flags & HAMMER_INODE_MODMASK) {
0832c9bb
MD
710 if (hmp->rsv_inodes > desiredvnodes) {
711 hammer_flush_inode(ip,
712 HAMMER_FLUSH_SIGNAL);
713 } else {
714 hammer_flush_inode(ip, 0);
715 }
4e17f465 716 } else if (ip->lock.refs == 1) {
1f07f686
MD
717 hammer_unload_inode(ip);
718 break;
719 }
b84de5af 720 } else {
4e17f465 721 if (flush)
1f07f686 722 hammer_flush_inode(ip, 0);
4e17f465 723
1f07f686
MD
724 /*
725 * The inode still has multiple refs, try to drop
726 * one ref.
727 */
728 KKASSERT(ip->lock.refs >= 1);
729 if (ip->lock.refs > 1) {
730 hammer_unref(&ip->lock);
731 break;
732 }
b84de5af 733 }
f90dde4c 734 }
427e5fc6
MD
735}
736
27ea2398 737/*
b84de5af
MD
738 * Unload and destroy the specified inode. Must be called with one remaining
739 * reference. The reference is disposed of.
8cd0a023 740 *
b84de5af 741 * This can only be called in the context of the flusher.
27ea2398 742 */
b84de5af 743static int
ec4e8497 744hammer_unload_inode(struct hammer_inode *ip)
27ea2398 745{
9f5097dc
MD
746 hammer_mount_t hmp = ip->hmp;
747
b84de5af 748 KASSERT(ip->lock.refs == 1,
a89aec1b 749 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
8cd0a023 750 KKASSERT(ip->vp == NULL);
f90dde4c
MD
751 KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
752 KKASSERT(ip->cursor_ip_refs == 0);
45a014dc 753 KKASSERT(ip->lock.lockcount == 0);
f90dde4c
MD
754 KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
755
756 KKASSERT(RB_EMPTY(&ip->rec_tree));
1f07f686 757 KKASSERT(TAILQ_EMPTY(&ip->target_list));
f90dde4c 758
9f5097dc 759 RB_REMOVE(hammer_ino_rb_tree, &hmp->rb_inos_root, ip);
f90dde4c
MD
760
761 hammer_uncache_node(&ip->cache[0]);
762 hammer_uncache_node(&ip->cache[1]);
0729c8c8
MD
763 if (ip->objid_cache)
764 hammer_clear_objid(ip);
f90dde4c 765 --hammer_count_inodes;
9f5097dc 766 --hmp->count_inodes;
9f5097dc 767
7bc5b8c2 768 hammer_inode_wakereclaims(ip);
f90dde4c 769 kfree(ip, M_HAMMER);
6b4f890b 770
27ea2398
MD
771 return(0);
772}
773
51c35492
MD
774/*
775 * Called on mount -u when switching from RW to RO or vise-versa. Adjust
776 * the read-only flag for cached inodes.
777 *
778 * This routine is called from a RB_SCAN().
779 */
780int
781hammer_reload_inode(hammer_inode_t ip, void *arg __unused)
782{
783 hammer_mount_t hmp = ip->hmp;
784
785 if (hmp->ronly || hmp->asof != HAMMER_MAX_TID)
786 ip->flags |= HAMMER_INODE_RO;
787 else
788 ip->flags &= ~HAMMER_INODE_RO;
789 return(0);
790}
791
427e5fc6 792/*
d113fda1
MD
793 * A transaction has modified an inode, requiring updates as specified by
794 * the passed flags.
7f7c1f84 795 *
d113fda1 796 * HAMMER_INODE_DDIRTY: Inode data has been updated
1f07f686 797 * HAMMER_INODE_XDIRTY: Dirty in-memory records
4e17f465 798 * HAMMER_INODE_BUFS: Dirty buffer cache buffers
d113fda1
MD
799 * HAMMER_INODE_DELETED: Inode record/data must be deleted
800 * HAMMER_INODE_ITIMES: mtime/atime has been updated
427e5fc6 801 */
66325755 802void
47637bff 803hammer_modify_inode(hammer_inode_t ip, int flags)
427e5fc6 804{
d113fda1 805 KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
11ad5ade
MD
806 (flags & (HAMMER_INODE_DDIRTY |
807 HAMMER_INODE_XDIRTY | HAMMER_INODE_BUFS |
808 HAMMER_INODE_DELETED | HAMMER_INODE_ITIMES)) == 0);
e63644f0
MD
809 if ((ip->flags & HAMMER_INODE_RSV_INODES) == 0) {
810 ip->flags |= HAMMER_INODE_RSV_INODES;
811 ++ip->hmp->rsv_inodes;
812 }
b84de5af
MD
813
814 ip->flags |= flags;
815}
816
817/*
1f07f686 818 * Request that an inode be flushed. This whole mess cannot block and may
7bc5b8c2
MD
819 * recurse (if not synchronous). Once requested HAMMER will attempt to
820 * actively flush the inode until the flush can be done.
b84de5af 821 *
1f07f686
MD
822 * The inode may already be flushing, or may be in a setup state. We can
823 * place the inode in a flushing state if it is currently idle and flag it
824 * to reflush if it is currently flushing.
7bc5b8c2
MD
825 *
826 * If the HAMMER_FLUSH_SYNCHRONOUS flag is specified we will attempt to
827 * flush the indoe synchronously using the caller's context.
b84de5af
MD
828 */
829void
f90dde4c 830hammer_flush_inode(hammer_inode_t ip, int flags)
b84de5af 831{
bf3b416b 832 int good;
1f07f686
MD
833
834 /*
835 * Trivial 'nothing to flush' case. If the inode is ina SETUP
836 * state we have to put it back into an IDLE state so we can
837 * drop the extra ref.
838 */
4e17f465 839 if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
1f07f686
MD
840 if (ip->flush_state == HAMMER_FST_SETUP) {
841 ip->flush_state = HAMMER_FST_IDLE;
842 hammer_rel_inode(ip, 0);
ec4e8497 843 }
b84de5af
MD
844 return;
845 }
42c7d26b 846
1f07f686
MD
847 /*
848 * Our flush action will depend on the current state.
849 */
850 switch(ip->flush_state) {
851 case HAMMER_FST_IDLE:
852 /*
853 * We have no dependancies and can flush immediately. Some
854 * our children may not be flushable so we have to re-test
855 * with that additional knowledge.
856 */
857 hammer_flush_inode_core(ip, flags);
858 break;
859 case HAMMER_FST_SETUP:
860 /*
861 * Recurse upwards through dependancies via target_list
862 * and start their flusher actions going if possible.
863 *
864 * 'good' is our connectivity. -1 means we have none and
865 * can't flush, 0 means there weren't any dependancies, and
866 * 1 means we have good connectivity.
867 */
bf3b416b 868 good = hammer_setup_parent_inodes(ip);
1f07f686
MD
869
870 /*
871 * We can continue if good >= 0. Determine how many records
872 * under our inode can be flushed (and mark them).
873 */
1f07f686
MD
874 if (good >= 0) {
875 hammer_flush_inode_core(ip, flags);
876 } else {
877 ip->flags |= HAMMER_INODE_REFLUSH;
4e17f465
MD
878 if (flags & HAMMER_FLUSH_SIGNAL) {
879 ip->flags |= HAMMER_INODE_RESIGNAL;
880 hammer_flusher_async(ip->hmp);
881 }
1f07f686
MD
882 }
883 break;
884 default:
885 /*
886 * We are already flushing, flag the inode to reflush
887 * if needed after it completes its current flush.
888 */
889 if ((ip->flags & HAMMER_INODE_REFLUSH) == 0)
890 ip->flags |= HAMMER_INODE_REFLUSH;
4e17f465
MD
891 if (flags & HAMMER_FLUSH_SIGNAL) {
892 ip->flags |= HAMMER_INODE_RESIGNAL;
893 hammer_flusher_async(ip->hmp);
894 }
1f07f686
MD
895 break;
896 }
897}
898
899/*
bf3b416b
MD
900 * Scan ip->target_list, which is a list of records owned by PARENTS to our
901 * ip which reference our ip.
902 *
903 * XXX This is a huge mess of recursive code, but not one bit of it blocks
904 * so for now do not ref/deref the structures. Note that if we use the
905 * ref/rel code later, the rel CAN block.
906 */
907static int
908hammer_setup_parent_inodes(hammer_inode_t ip)
909{
910 hammer_record_t depend;
911#if 0
912 hammer_record_t next;
913 hammer_inode_t pip;
914#endif
915 int good;
916 int r;
917
918 good = 0;
919 TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
920 r = hammer_setup_parent_inodes_helper(depend);
921 KKASSERT(depend->target_ip == ip);
922 if (r < 0 && good == 0)
923 good = -1;
924 if (r > 0)
925 good = 1;
926 }
927 return(good);
928
929#if 0
930retry:
931 good = 0;
932 next = TAILQ_FIRST(&ip->target_list);
933 if (next) {
934 hammer_ref(&next->lock);
935 hammer_ref(&next->ip->lock);
936 }
937 while ((depend = next) != NULL) {
938 if (depend->target_ip == NULL) {
939 pip = depend->ip;
940 hammer_rel_mem_record(depend);
941 hammer_rel_inode(pip, 0);
942 goto retry;
943 }
944 KKASSERT(depend->target_ip == ip);
945 next = TAILQ_NEXT(depend, target_entry);
946 if (next) {
947 hammer_ref(&next->lock);
948 hammer_ref(&next->ip->lock);
949 }
950 r = hammer_setup_parent_inodes_helper(depend);
951 if (r < 0 && good == 0)
952 good = -1;
953 if (r > 0)
954 good = 1;
955 pip = depend->ip;
956 hammer_rel_mem_record(depend);
957 hammer_rel_inode(pip, 0);
958 }
959 return(good);
960#endif
961}
962
963/*
964 * This helper function takes a record representing the dependancy between
965 * the parent inode and child inode.
966 *
967 * record->ip = parent inode
968 * record->target_ip = child inode
969 *
1f07f686 970 * We are asked to recurse upwards and convert the record from SETUP
bf3b416b 971 * to FLUSH if possible.
1f07f686
MD
972 *
973 * Return 1 if the record gives us connectivity
974 *
975 * Return 0 if the record is not relevant
976 *
977 * Return -1 if we can't resolve the dependancy and there is no connectivity.
978 */
979static int
bf3b416b 980hammer_setup_parent_inodes_helper(hammer_record_t record)
1f07f686 981{
bf3b416b
MD
982 hammer_mount_t hmp;
983 hammer_inode_t pip;
984 int good;
1f07f686
MD
985
986 KKASSERT(record->flush_state != HAMMER_FST_IDLE);
bf3b416b
MD
987 pip = record->ip;
988 hmp = pip->hmp;
1f07f686
MD
989
990 /*
991 * If the record is already flushing, is it in our flush group?
992 *
e8599db1
MD
993 * If it is in our flush group but it is a general record or a
994 * delete-on-disk, it does not improve our connectivity (return 0),
995 * and if the target inode is not trying to destroy itself we can't
996 * allow the operation yet anyway (the second return -1).
1f07f686
MD
997 */
998 if (record->flush_state == HAMMER_FST_FLUSH) {
da2da375 999 if (record->flush_group != hmp->flusher.next) {
bf3b416b 1000 pip->flags |= HAMMER_INODE_REFLUSH;
1f07f686 1001 return(-1);
f90dde4c 1002 }
1f07f686
MD
1003 if (record->type == HAMMER_MEM_RECORD_ADD)
1004 return(1);
e8599db1 1005 /* GENERAL or DEL */
1f07f686
MD
1006 return(0);
1007 }
1008
1009 /*
1010 * It must be a setup record. Try to resolve the setup dependancies
1011 * by recursing upwards so we can place ip on the flush list.
1012 */
1013 KKASSERT(record->flush_state == HAMMER_FST_SETUP);
1014
bf3b416b 1015 good = hammer_setup_parent_inodes(pip);
1f07f686
MD
1016
1017 /*
1018 * We can't flush ip because it has no connectivity (XXX also check
1019 * nlinks for pre-existing connectivity!). Flag it so any resolution
1020 * recurses back down.
1021 */
1022 if (good < 0) {
bf3b416b 1023 pip->flags |= HAMMER_INODE_REFLUSH;
1f07f686
MD
1024 return(good);
1025 }
1026
1027 /*
1028 * We are go, place the parent inode in a flushing state so we can
1029 * place its record in a flushing state. Note that the parent
1030 * may already be flushing. The record must be in the same flush
1031 * group as the parent.
1032 */
bf3b416b
MD
1033 if (pip->flush_state != HAMMER_FST_FLUSH)
1034 hammer_flush_inode_core(pip, HAMMER_FLUSH_RECURSION);
1035 KKASSERT(pip->flush_state == HAMMER_FST_FLUSH);
1f07f686
MD
1036 KKASSERT(record->flush_state == HAMMER_FST_SETUP);
1037
1038#if 0
1039 if (record->type == HAMMER_MEM_RECORD_DEL &&
869e8f55 1040 (record->target_ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_DELONDISK)) == 0) {
1f07f686
MD
1041 /*
1042 * Regardless of flushing state we cannot sync this path if the
1043 * record represents a delete-on-disk but the target inode
1044 * is not ready to sync its own deletion.
1045 *
1046 * XXX need to count effective nlinks to determine whether
1047 * the flush is ok, otherwise removing a hardlink will
1048 * just leave the DEL record to rot.
1049 */
1050 record->target_ip->flags |= HAMMER_INODE_REFLUSH;
1051 return(-1);
1052 } else
1053#endif
bf3b416b 1054 if (pip->flush_group == pip->hmp->flusher.next) {
1f07f686 1055 /*
bf3b416b
MD
1056 * This is the record we wanted to synchronize. If the
1057 * record went into a flush state while we blocked it
1058 * had better be in the correct flush group.
1f07f686 1059 */
bf3b416b
MD
1060 if (record->flush_state != HAMMER_FST_FLUSH) {
1061 record->flush_state = HAMMER_FST_FLUSH;
1062 record->flush_group = pip->flush_group;
1063 hammer_ref(&record->lock);
1064 } else {
1065 KKASSERT(record->flush_group == pip->flush_group);
1066 }
1f07f686
MD
1067 if (record->type == HAMMER_MEM_RECORD_ADD)
1068 return(1);
1069
1070 /*
e8599db1
MD
1071 * A general or delete-on-disk record does not contribute
1072 * to our visibility. We can still flush it, however.
1f07f686
MD
1073 */
1074 return(0);
1075 } else {
1076 /*
1077 * We couldn't resolve the dependancies, request that the
1078 * inode be flushed when the dependancies can be resolved.
1079 */
bf3b416b 1080 pip->flags |= HAMMER_INODE_REFLUSH;
1f07f686 1081 return(-1);
7f7c1f84 1082 }
c0ade690
MD
1083}
1084
1085/*
1f07f686 1086 * This is the core routine placing an inode into the FST_FLUSH state.
c0ade690 1087 */
b84de5af 1088static void
1f07f686 1089hammer_flush_inode_core(hammer_inode_t ip, int flags)
b84de5af 1090{
1f07f686 1091 int go_count;
1f07f686 1092
4e17f465
MD
1093 /*
1094 * Set flush state and prevent the flusher from cycling into
1095 * the next flush group. Do not place the ip on the list yet.
1096 * Inodes not in the idle state get an extra reference.
1097 */
1f07f686
MD
1098 KKASSERT(ip->flush_state != HAMMER_FST_FLUSH);
1099 if (ip->flush_state == HAMMER_FST_IDLE)
1100 hammer_ref(&ip->lock);
1101 ip->flush_state = HAMMER_FST_FLUSH;
da2da375
MD
1102 ip->flush_group = ip->hmp->flusher.next;
1103 ++ip->hmp->flusher.group_lock;
af209b0f
MD
1104 ++ip->hmp->count_iqueued;
1105 ++hammer_count_iqueued;
b84de5af 1106
e8599db1
MD
1107 /*
1108 * We need to be able to vfsync/truncate from the backend.
1109 */
1110 KKASSERT((ip->flags & HAMMER_INODE_VHELD) == 0);
1111 if (ip->vp && (ip->vp->v_flag & VINACTIVE) == 0) {
1112 ip->flags |= HAMMER_INODE_VHELD;
1113 vref(ip->vp);
1114 }
1115
ec4e8497 1116 /*
1f07f686
MD
1117 * Figure out how many in-memory records we can actually flush
1118 * (not including inode meta-data, buffers, etc).
ec4e8497 1119 */
1f07f686
MD
1120 if (flags & HAMMER_FLUSH_RECURSION) {
1121 go_count = 1;
1122 } else {
1123 go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1124 hammer_setup_child_callback, NULL);
1125 }
b84de5af
MD
1126
1127 /*
1f07f686
MD
1128 * This is a more involved test that includes go_count. If we
1129 * can't flush, flag the inode and return. If go_count is 0 we
1130 * were are unable to flush any records in our rec_tree and
1131 * must ignore the XDIRTY flag.
b84de5af 1132 */
1f07f686
MD
1133 if (go_count == 0) {
1134 if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
1135 ip->flags |= HAMMER_INODE_REFLUSH;
af209b0f
MD
1136
1137 --ip->hmp->count_iqueued;
1138 --hammer_count_iqueued;
1139
1f07f686 1140 ip->flush_state = HAMMER_FST_SETUP;
e8599db1
MD
1141 if (ip->flags & HAMMER_INODE_VHELD) {
1142 ip->flags &= ~HAMMER_INODE_VHELD;
1143 vrele(ip->vp);
1144 }
4e17f465
MD
1145 if (flags & HAMMER_FLUSH_SIGNAL) {
1146 ip->flags |= HAMMER_INODE_RESIGNAL;
1147 hammer_flusher_async(ip->hmp);
1148 }
da2da375
MD
1149 if (--ip->hmp->flusher.group_lock == 0)
1150 wakeup(&ip->hmp->flusher.group_lock);
1f07f686
MD
1151 return;
1152 }
1153 }
b84de5af 1154
b84de5af
MD
1155 /*
1156 * Snapshot the state of the inode for the backend flusher.
1157 *
1158 * The truncation must be retained in the frontend until after
1159 * we've actually performed the record deletion.
1f07f686 1160 *
cb51be26
MD
1161 * We continue to retain sync_trunc_off even when all truncations
1162 * have been resolved as an optimization to determine if we can
1163 * skip the B-Tree lookup for overwrite deletions.
1164 *
1f07f686
MD
1165 * NOTE: The DELETING flag is a mod flag, but it is also sticky,
1166 * and stays in ip->flags. Once set, it stays set until the
1167 * inode is destroyed.
b84de5af
MD
1168 */
1169 ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
cb51be26
MD
1170 if (ip->sync_flags & HAMMER_INODE_TRUNCATED)
1171 ip->sync_trunc_off = ip->trunc_off;
11ad5ade 1172 ip->sync_ino_leaf = ip->ino_leaf;
b84de5af 1173 ip->sync_ino_data = ip->ino_data;
47637bff
MD
1174 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1175 ip->flags &= ~HAMMER_INODE_MODMASK;
0832c9bb
MD
1176#ifdef DEBUG_TRUNCATE
1177 if ((ip->sync_flags & HAMMER_INODE_TRUNCATED) && ip == HammerTruncIp)
1178 kprintf("truncateS %016llx\n", ip->sync_trunc_off);
1179#endif
b84de5af
MD
1180
1181 /*
4e17f465 1182 * The flusher list inherits our inode and reference.
b84de5af 1183 */
1f07f686 1184 TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
da2da375
MD
1185 if (--ip->hmp->flusher.group_lock == 0)
1186 wakeup(&ip->hmp->flusher.group_lock);
1f07f686 1187
0832c9bb 1188 if (flags & HAMMER_FLUSH_SIGNAL) {
1f07f686 1189 hammer_flusher_async(ip->hmp);
0832c9bb 1190 }
b84de5af
MD
1191}
1192
ec4e8497 1193/*
1f07f686
MD
1194 * Callback for scan of ip->rec_tree. Try to include each record in our
1195 * flush. ip->flush_group has been set but the inode has not yet been
1196 * moved into a flushing state.
1197 *
1198 * If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on
1199 * both inodes.
1200 *
1201 * We return 1 for any record placed or found in FST_FLUSH, which prevents
1202 * the caller from shortcutting the flush.
ec4e8497 1203 */
c0ade690 1204static int
1f07f686 1205hammer_setup_child_callback(hammer_record_t rec, void *data)
b84de5af 1206{
1f07f686
MD
1207 hammer_inode_t target_ip;
1208 hammer_inode_t ip;
1209 int r;
1210
1211 /*
7bc5b8c2
MD
1212 * Deleted records are ignored. Note that the flush detects deleted
1213 * front-end records at multiple points to deal with races. This is
1214 * just the first line of defense. The only time DELETED_FE cannot
1215 * be set is when HAMMER_RECF_INTERLOCK_BE is set.
1216 *
1217 * Don't get confused between record deletion and, say, directory
1218 * entry deletion. The deletion of a directory entry that is on
1219 * the media has nothing to do with the record deletion flags.
1f07f686 1220 */
7bc5b8c2 1221 if (rec->flags & (HAMMER_RECF_DELETED_FE|HAMMER_RECF_DELETED_BE))
ec4e8497 1222 return(0);
1f07f686
MD
1223
1224 /*
1225 * If the record is in an idle state it has no dependancies and
1226 * can be flushed.
1227 */
1228 ip = rec->ip;
1229 r = 0;
1230
1231 switch(rec->flush_state) {
1232 case HAMMER_FST_IDLE:
1233 /*
1234 * Record has no setup dependancy, we can flush it.
1235 */
1236 KKASSERT(rec->target_ip == NULL);
1237 rec->flush_state = HAMMER_FST_FLUSH;
1238 rec->flush_group = ip->flush_group;
b84de5af 1239 hammer_ref(&rec->lock);
1f07f686
MD
1240 r = 1;
1241 break;
1242 case HAMMER_FST_SETUP:
1243 /*
1244 * Record has a setup dependancy. Try to include the
1245 * target ip in the flush.
1246 *
1247 * We have to be careful here, if we do not do the right
1248 * thing we can lose track of dirty inodes and the system
1249 * will lockup trying to allocate buffers.
1250 */
1251 target_ip = rec->target_ip;
1252 KKASSERT(target_ip != NULL);
1253 KKASSERT(target_ip->flush_state != HAMMER_FST_IDLE);
1254 if (target_ip->flush_state == HAMMER_FST_FLUSH) {
1255 /*
1256 * If the target IP is already flushing in our group
1257 * we are golden, otherwise make sure the target
1258 * reflushes.
1259 */
1260 if (target_ip->flush_group == ip->flush_group) {
1261 rec->flush_state = HAMMER_FST_FLUSH;
1262 rec->flush_group = ip->flush_group;
1263 hammer_ref(&rec->lock);
1264 r = 1;
1265 } else {
1266 target_ip->flags |= HAMMER_INODE_REFLUSH;
1267 }
1268 } else if (rec->type == HAMMER_MEM_RECORD_ADD) {
1269 /*
1270 * If the target IP is not flushing we can force
1271 * it to flush, even if it is unable to write out
1272 * any of its own records we have at least one in
1273 * hand that we CAN deal with.
1274 */
1275 rec->flush_state = HAMMER_FST_FLUSH;
1276 rec->flush_group = ip->flush_group;
1277 hammer_ref(&rec->lock);
1278 hammer_flush_inode_core(target_ip,
1279 HAMMER_FLUSH_RECURSION);
1280 r = 1;
1281 } else {
1282 /*
e8599db1
MD
1283 * General or delete-on-disk record.
1284 *
1285 * XXX this needs help. If a delete-on-disk we could
1286 * disconnect the target. If the target has its own
1287 * dependancies they really need to be flushed.
1f07f686
MD
1288 *
1289 * XXX
1290 */
1291 rec->flush_state = HAMMER_FST_FLUSH;
1292 rec->flush_group = ip->flush_group;
1293 hammer_ref(&rec->lock);
1294 hammer_flush_inode_core(target_ip,
1295 HAMMER_FLUSH_RECURSION);
1296 r = 1;
1297 }
1298 break;
1299 case HAMMER_FST_FLUSH:
1300 /*
1301 * Record already associated with a flush group. It had
1302 * better be ours.
1303 */
1304 KKASSERT(rec->flush_group == ip->flush_group);
1305 r = 1;
1306 break;
b84de5af 1307 }
1f07f686 1308 return(r);
b84de5af
MD
1309}
1310
b84de5af
MD
1311/*
1312 * Wait for a previously queued flush to complete
1313 */
1314void
1315hammer_wait_inode(hammer_inode_t ip)
1316{
e8599db1 1317 while (ip->flush_state != HAMMER_FST_IDLE) {
0832c9bb
MD
1318 if (ip->flush_state == HAMMER_FST_SETUP) {
1319 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1320 } else {
1321 ip->flags |= HAMMER_INODE_FLUSHW;
1322 tsleep(&ip->flags, 0, "hmrwin", 0);
1323 }
b84de5af
MD
1324 }
1325}
1326
1327/*
1328 * Called by the backend code when a flush has been completed.
1329 * The inode has already been removed from the flush list.
1330 *
1331 * A pipelined flush can occur, in which case we must re-enter the
1332 * inode on the list and re-copy its fields.
1333 */
1334void
1335hammer_flush_inode_done(hammer_inode_t ip)
1336{
af209b0f
MD
1337 hammer_mount_t hmp;
1338 int dorel;
1955afa7 1339
b84de5af
MD
1340 KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
1341
af209b0f
MD
1342 hmp = ip->hmp;
1343
1f07f686
MD
1344 /*
1345 * Merge left-over flags back into the frontend and fix the state.
1346 */
b84de5af 1347 ip->flags |= ip->sync_flags;
1f07f686
MD
1348
1349 /*
1350 * The backend may have adjusted nlinks, so if the adjusted nlinks
1351 * does not match the fronttend set the frontend's RDIRTY flag again.
1352 */
11ad5ade
MD
1353 if (ip->ino_data.nlinks != ip->sync_ino_data.nlinks)
1354 ip->flags |= HAMMER_INODE_DDIRTY;
b84de5af 1355
4e17f465 1356 /*
e63644f0
MD
1357 * Fix up the dirty buffer status. IO completions will also
1358 * try to clean up rsv_databufs.
4e17f465 1359 */
0832c9bb 1360 if (ip->vp && RB_ROOT(&ip->vp->v_rbdirty_tree)) {
1f07f686 1361 ip->flags |= HAMMER_INODE_BUFS;
e63644f0 1362 } else {
af209b0f 1363 hmp->rsv_databufs -= ip->rsv_databufs;
e63644f0 1364 ip->rsv_databufs = 0;
1f07f686
MD
1365 }
1366
1367 /*
1368 * Re-set the XDIRTY flag if some of the inode's in-memory records
1369 * could not be flushed.
1370 */
0832c9bb
MD
1371 KKASSERT((RB_EMPTY(&ip->rec_tree) &&
1372 (ip->flags & HAMMER_INODE_XDIRTY) == 0) ||
1373 (!RB_EMPTY(&ip->rec_tree) &&
1374 (ip->flags & HAMMER_INODE_XDIRTY) != 0));
4e17f465
MD
1375
1376 /*
1377 * Do not lose track of inodes which no longer have vnode
1378 * assocations, otherwise they may never get flushed again.
1379 */
1380 if ((ip->flags & HAMMER_INODE_MODMASK) && ip->vp == NULL)
b84de5af 1381 ip->flags |= HAMMER_INODE_REFLUSH;
4e17f465
MD
1382
1383 /*
1384 * Adjust flush_state. The target state (idle or setup) shouldn't
1385 * be terribly important since we will reflush if we really need
1386 * to do anything. XXX
1387 */
1388 if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) {
1389 ip->flush_state = HAMMER_FST_IDLE;
1390 dorel = 1;
1391 } else {
1392 ip->flush_state = HAMMER_FST_SETUP;
af209b0f 1393 dorel = 0;
b84de5af 1394 }
b84de5af 1395
af209b0f
MD
1396 --hmp->count_iqueued;
1397 --hammer_count_iqueued;
1398
e8599db1
MD
1399 /*
1400 * Clean up the vnode ref
1401 */
1402 if (ip->flags & HAMMER_INODE_VHELD) {
1403 ip->flags &= ~HAMMER_INODE_VHELD;
1404 vrele(ip->vp);
1405 }
1406
b84de5af
MD
1407 /*
1408 * If the frontend made more changes and requested another flush,
4e17f465 1409 * then try to get it running.
b84de5af
MD
1410 */
1411 if (ip->flags & HAMMER_INODE_REFLUSH) {
1412 ip->flags &= ~HAMMER_INODE_REFLUSH;
4e17f465
MD
1413 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1414 ip->flags &= ~HAMMER_INODE_RESIGNAL;
1415 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1416 } else {
1417 hammer_flush_inode(ip, 0);
0729c8c8 1418 }
4e17f465
MD
1419 }
1420
e63644f0
MD
1421 /*
1422 * If the inode is now clean drop the space reservation.
1423 */
1424 if ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
1425 (ip->flags & HAMMER_INODE_RSV_INODES)) {
1426 ip->flags &= ~HAMMER_INODE_RSV_INODES;
af209b0f 1427 --hmp->rsv_inodes;
e63644f0
MD
1428 }
1429
4e17f465
MD
1430 /*
1431 * Finally, if the frontend is waiting for a flush to complete,
1432 * wake it up.
1433 */
1434 if (ip->flush_state != HAMMER_FST_FLUSH) {
b84de5af
MD
1435 if (ip->flags & HAMMER_INODE_FLUSHW) {
1436 ip->flags &= ~HAMMER_INODE_FLUSHW;
1437 wakeup(&ip->flags);
1438 }
1439 }
1f07f686
MD
1440 if (dorel)
1441 hammer_rel_inode(ip, 0);
b84de5af
MD
1442}
1443
1444/*
1445 * Called from hammer_sync_inode() to synchronize in-memory records
1446 * to the media.
1447 */
1448static int
1449hammer_sync_record_callback(hammer_record_t record, void *data)
c0ade690 1450{
4e17f465
MD
1451 hammer_cursor_t cursor = data;
1452 hammer_transaction_t trans = cursor->trans;
c0ade690
MD
1453 int error;
1454
b84de5af 1455 /*
1f07f686 1456 * Skip records that do not belong to the current flush.
b84de5af 1457 */
47637bff 1458 ++hammer_stats_record_iterations;
1f07f686 1459 if (record->flush_state != HAMMER_FST_FLUSH)
b84de5af 1460 return(0);
47637bff 1461
1f07f686
MD
1462#if 1
1463 if (record->flush_group != record->ip->flush_group) {
1464 kprintf("sync_record %p ip %p bad flush group %d %d\n", record, record->ip, record->flush_group ,record->ip->flush_group);
1465 Debugger("blah2");
1466 return(0);
1467 }
1468#endif
1469 KKASSERT(record->flush_group == record->ip->flush_group);
d36ec43b
MD
1470
1471 /*
1472 * Interlock the record using the BE flag. Once BE is set the
1473 * frontend cannot change the state of FE.
1474 *
1475 * NOTE: If FE is set prior to us setting BE we still sync the
1476 * record out, but the flush completion code converts it to
1477 * a delete-on-disk record instead of destroying it.
1478 */
4e17f465 1479 KKASSERT((record->flags & HAMMER_RECF_INTERLOCK_BE) == 0);
d36ec43b
MD
1480 record->flags |= HAMMER_RECF_INTERLOCK_BE;
1481
47637bff
MD
1482 /*
1483 * The backend may have already disposed of the record.
1484 */
1485 if (record->flags & HAMMER_RECF_DELETED_BE) {
1486 error = 0;
1487 goto done;
1488 }
1489
98f7132d
MD
1490 /*
1491 * If the whole inode is being deleting all on-disk records will
930bf163
MD
1492 * be deleted very soon, we can't sync any new records to disk
1493 * because they will be deleted in the same transaction they were
1494 * created in (delete_tid == create_tid), which will assert.
1495 *
1496 * XXX There may be a case with RECORD_ADD with DELETED_FE set
1497 * that we currently panic on.
98f7132d
MD
1498 */
1499 if (record->ip->sync_flags & HAMMER_INODE_DELETING) {
930bf163 1500 switch(record->type) {
47637bff
MD
1501 case HAMMER_MEM_RECORD_DATA:
1502 /*
1503 * We don't have to do anything, if the record was
1504 * committed the space will have been accounted for
1505 * in the blockmap.
1506 */
1507 /* fall through */
930bf163 1508 case HAMMER_MEM_RECORD_GENERAL:
98f7132d
MD
1509 record->flags |= HAMMER_RECF_DELETED_FE;
1510 record->flags |= HAMMER_RECF_DELETED_BE;
930bf163
MD
1511 error = 0;
1512 goto done;
1513 case HAMMER_MEM_RECORD_ADD:
1514 panic("hammer_sync_record_callback: illegal add "
1515 "during inode deletion record %p", record);
1516 break; /* NOT REACHED */
1517 case HAMMER_MEM_RECORD_INODE:
1518 panic("hammer_sync_record_callback: attempt to "
1519 "sync inode record %p?", record);
1520 break; /* NOT REACHED */
1521 case HAMMER_MEM_RECORD_DEL:
1522 /*
1523 * Follow through and issue the on-disk deletion
98f7132d 1524 */
930bf163 1525 break;
98f7132d 1526 }
98f7132d
MD
1527 }
1528
d36ec43b 1529 /*
7bc5b8c2
MD
1530 * If DELETED_FE is set special handling is needed for directory
1531 * entries. Dependant pieces related to the directory entry may
1532 * have already been synced to disk. If this occurs we have to
1533 * sync the directory entry and then change the in-memory record
1534 * from an ADD to a DELETE to cover the fact that it's been
1535 * deleted by the frontend.
1536 *
1537 * A directory delete covering record (MEM_RECORD_DEL) can never
1538 * be deleted by the frontend.
1539 *
1540 * Any other record type (aka DATA) can be deleted by the frontend.
1541 * XXX At the moment the flusher must skip it because there may
1542 * be another data record in the flush group for the same block,
1543 * meaning that some frontend data changes can leak into the backend's
1544 * synchronization point.
d36ec43b 1545 */
1f07f686 1546 if (record->flags & HAMMER_RECF_DELETED_FE) {
e8599db1
MD
1547 if (record->type == HAMMER_MEM_RECORD_ADD) {
1548 record->flags |= HAMMER_RECF_CONVERT_DELETE;
1549 } else {
1550 KKASSERT(record->type != HAMMER_MEM_RECORD_DEL);
7bc5b8c2
MD
1551 record->flags |= HAMMER_RECF_DELETED_BE;
1552 error = 0;
1553 goto done;
e8599db1 1554 }
1f07f686 1555 }
b84de5af
MD
1556
1557 /*
1558 * Assign the create_tid for new records. Deletions already
1559 * have the record's entire key properly set up.
1560 */
1f07f686 1561 if (record->type != HAMMER_MEM_RECORD_DEL)
11ad5ade 1562 record->leaf.base.create_tid = trans->tid;
4e17f465
MD
1563 for (;;) {
1564 error = hammer_ip_sync_record_cursor(cursor, record);
1565 if (error != EDEADLK)
1566 break;
1567 hammer_done_cursor(cursor);
1568 error = hammer_init_cursor(trans, cursor, &record->ip->cache[0],
1569 record->ip);
1570 if (error)
1571 break;
1572 }
1573 record->flags &= ~HAMMER_RECF_CONVERT_DELETE;
c0ade690
MD
1574
1575 if (error) {
b3deaf57
MD
1576 error = -error;
1577 if (error != -ENOSPC) {
b84de5af
MD
1578 kprintf("hammer_sync_record_callback: sync failed rec "
1579 "%p, error %d\n", record, error);
1580 Debugger("sync failed rec");
b3deaf57 1581 }
c0ade690 1582 }
98f7132d 1583done:
d36ec43b 1584 hammer_flush_record_done(record, error);
b3deaf57 1585 return(error);
c0ade690
MD
1586}
1587
1588/*
1589 * XXX error handling
1590 */
1591int
1f07f686 1592hammer_sync_inode(hammer_inode_t ip)
c0ade690
MD
1593{
1594 struct hammer_transaction trans;
4e17f465 1595 struct hammer_cursor cursor;
cb51be26 1596 hammer_node_t tmp_node;
1f07f686
MD
1597 hammer_record_t depend;
1598 hammer_record_t next;
ec4e8497 1599 int error, tmp_error;
1f07f686 1600 u_int64_t nlinks;
c0ade690 1601
1f07f686 1602 if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0)
d113fda1 1603 return(0);
d113fda1 1604
b84de5af 1605 hammer_start_transaction_fls(&trans, ip->hmp);
cb51be26 1606 error = hammer_init_cursor(&trans, &cursor, &ip->cache[1], ip);
4e17f465
MD
1607 if (error)
1608 goto done;
c0ade690 1609
ec4e8497 1610 /*
1f07f686
MD
1611 * Any directory records referencing this inode which are not in
1612 * our current flush group must adjust our nlink count for the
1613 * purposes of synchronization to disk.
1614 *
1615 * Records which are in our flush group can be unlinked from our
c4bae5fd
MD
1616 * inode now, potentially allowing the inode to be physically
1617 * deleted.
bf3b416b
MD
1618 *
1619 * This cannot block.
ec4e8497 1620 */
11ad5ade 1621 nlinks = ip->ino_data.nlinks;
1f07f686
MD
1622 next = TAILQ_FIRST(&ip->target_list);
1623 while ((depend = next) != NULL) {
1624 next = TAILQ_NEXT(depend, target_entry);
1625 if (depend->flush_state == HAMMER_FST_FLUSH &&
da2da375 1626 depend->flush_group == ip->hmp->flusher.act) {
c4bae5fd
MD
1627 /*
1628 * If this is an ADD that was deleted by the frontend
1629 * the frontend nlinks count will have already been
1630 * decremented, but the backend is going to sync its
1631 * directory entry and must account for it. The
1632 * record will be converted to a delete-on-disk when
1633 * it gets synced.
1634 *
1635 * If the ADD was not deleted by the frontend we
1636 * can remove the dependancy from our target_list.
1637 */
1638 if (depend->flags & HAMMER_RECF_DELETED_FE) {
1639 ++nlinks;
1640 } else {
1641 TAILQ_REMOVE(&ip->target_list, depend,
1642 target_entry);
1643 depend->target_ip = NULL;
1644 }
1f07f686 1645 } else if ((depend->flags & HAMMER_RECF_DELETED_FE) == 0) {
c4bae5fd
MD
1646 /*
1647 * Not part of our flush group
1648 */
1649 KKASSERT((depend->flags & HAMMER_RECF_DELETED_BE) == 0);
1f07f686
MD
1650 switch(depend->type) {
1651 case HAMMER_MEM_RECORD_ADD:
1652 --nlinks;
1653 break;
1654 case HAMMER_MEM_RECORD_DEL:
1655 ++nlinks;
1656 break;
e8599db1
MD
1657 default:
1658 break;
1f07f686 1659 }
ec4e8497 1660 }
ec4e8497
MD
1661 }
1662
c0ade690 1663 /*
1f07f686 1664 * Set dirty if we had to modify the link count.
c0ade690 1665 */
11ad5ade 1666 if (ip->sync_ino_data.nlinks != nlinks) {
1f07f686 1667 KKASSERT((int64_t)nlinks >= 0);
11ad5ade
MD
1668 ip->sync_ino_data.nlinks = nlinks;
1669 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1f07f686 1670 }
b84de5af 1671
1f07f686 1672 /*
869e8f55
MD
1673 * If there is a trunction queued destroy any data past the (aligned)
1674 * truncation point. Userland will have dealt with the buffer
1675 * containing the truncation point for us.
1676 *
1677 * We don't flush pending frontend data buffers until after we've
cb51be26 1678 * dealt with the truncation.
1f07f686 1679 */
869e8f55 1680 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
b84de5af
MD
1681 /*
1682 * Interlock trunc_off. The VOP front-end may continue to
1683 * make adjustments to it while we are blocked.
1684 */
1685 off_t trunc_off;
1686 off_t aligned_trunc_off;
4a2796f3 1687 int blkmask;
c0ade690 1688
b84de5af 1689 trunc_off = ip->sync_trunc_off;
4a2796f3
MD
1690 blkmask = hammer_blocksize(trunc_off) - 1;
1691 aligned_trunc_off = (trunc_off + blkmask) & ~(int64_t)blkmask;
b84de5af
MD
1692
1693 /*
1694 * Delete any whole blocks on-media. The front-end has
1695 * already cleaned out any partial block and made it
1696 * pending. The front-end may have updated trunc_off
47637bff 1697 * while we were blocked so we only use sync_trunc_off.
b84de5af 1698 */
4e17f465 1699 error = hammer_ip_delete_range(&cursor, ip,
b84de5af 1700 aligned_trunc_off,
47637bff 1701 0x7FFFFFFFFFFFFFFFLL, 1);
b84de5af
MD
1702 if (error)
1703 Debugger("hammer_ip_delete_range errored");
47637bff
MD
1704
1705 /*
1706 * Clear the truncation flag on the backend after we have
1707 * complete the deletions. Backend data is now good again
1708 * (including new records we are about to sync, below).
cb51be26
MD
1709 *
1710 * Leave sync_trunc_off intact. As we write additional
1711 * records the backend will update sync_trunc_off. This
1712 * tells the backend whether it can skip the overwrite
1713 * test. This should work properly even when the backend
1714 * writes full blocks where the truncation point straddles
1715 * the block because the comparison is against the base
1716 * offset of the record.
47637bff 1717 */
b84de5af 1718 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
cb51be26 1719 /* ip->sync_trunc_off = 0x7FFFFFFFFFFFFFFFLL; */
1f07f686
MD
1720 } else {
1721 error = 0;
f3b0f382
MD
1722 }
1723
1f07f686
MD
1724 /*
1725 * Now sync related records. These will typically be directory
1726 * entries or delete-on-disk records.
869e8f55
MD
1727 *
1728 * Not all records will be flushed, but clear XDIRTY anyway. We
1729 * will set it again in the frontend hammer_flush_inode_done()
1730 * if records remain.
1f07f686
MD
1731 */
1732 if (error == 0) {
1733 tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
4e17f465 1734 hammer_sync_record_callback, &cursor);
1f07f686
MD
1735 if (tmp_error < 0)
1736 tmp_error = -error;
1737 if (tmp_error)
1738 error = tmp_error;
1739 }
bcac4bbb 1740 hammer_cache_node(&ip->cache[1], cursor.node);
cb51be26
MD
1741
1742 /*
1743 * Re-seek for inode update.
1744 */
1745 if (error == 0) {
1746 tmp_node = hammer_ref_node_safe(ip->hmp, &ip->cache[0], &error);
1747 if (tmp_node) {
1748 hammer_cursor_seek(&cursor, tmp_node, 0);
1749 hammer_rel_node(tmp_node);
1750 }
1751 error = 0;
1752 }
1f07f686
MD
1753
1754 /*
869e8f55
MD
1755 * If we are deleting the inode the frontend had better not have
1756 * any active references on elements making up the inode.
1f07f686 1757 */
11ad5ade 1758 if (error == 0 && ip->sync_ino_data.nlinks == 0 &&
869e8f55
MD
1759 RB_EMPTY(&ip->rec_tree) &&
1760 (ip->sync_flags & HAMMER_INODE_DELETING) &&
1761 (ip->flags & HAMMER_INODE_DELETED) == 0) {
1762 int count1 = 0;
1f07f686 1763
869e8f55 1764 ip->flags |= HAMMER_INODE_DELETED;
4e17f465 1765 error = hammer_ip_delete_range_all(&cursor, ip, &count1);
869e8f55
MD
1766 if (error == 0) {
1767 ip->sync_flags &= ~HAMMER_INODE_DELETING;
1768 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1769 KKASSERT(RB_EMPTY(&ip->rec_tree));
1f07f686 1770
869e8f55
MD
1771 /*
1772 * Set delete_tid in both the frontend and backend
1773 * copy of the inode record. The DELETED flag handles
1774 * this, do not set RDIRTY.
1775 */
11ad5ade
MD
1776 ip->ino_leaf.base.delete_tid = trans.tid;
1777 ip->sync_ino_leaf.base.delete_tid = trans.tid;
1f07f686 1778
869e8f55
MD
1779 /*
1780 * Adjust the inode count in the volume header
1781 */
f36a9737
MD
1782 if (ip->flags & HAMMER_INODE_ONDISK) {
1783 hammer_modify_volume_field(&trans,
1784 trans.rootvol,
1785 vol0_stat_inodes);
1786 --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
1787 hammer_modify_volume_done(trans.rootvol);
1788 }
869e8f55
MD
1789 } else {
1790 ip->flags &= ~HAMMER_INODE_DELETED;
1791 Debugger("hammer_ip_delete_range_all errored");
1792 }
1f07f686 1793 }
b84de5af 1794
b84de5af 1795 ip->sync_flags &= ~HAMMER_INODE_BUFS;
c0ade690 1796
b84de5af
MD
1797 if (error)
1798 Debugger("RB_SCAN errored");
c0ade690
MD
1799
1800 /*
1801 * Now update the inode's on-disk inode-data and/or on-disk record.
b84de5af 1802 * DELETED and ONDISK are managed only in ip->flags.
c0ade690 1803 */
b84de5af 1804 switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) {
c0ade690
MD
1805 case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
1806 /*
1807 * If deleted and on-disk, don't set any additional flags.
1808 * the delete flag takes care of things.
869e8f55
MD
1809 *
1810 * Clear flags which may have been set by the frontend.
c0ade690 1811 */
11ad5ade 1812 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
869e8f55
MD
1813 HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1814 HAMMER_INODE_DELETING);
c0ade690
MD
1815 break;
1816 case HAMMER_INODE_DELETED:
1817 /*
1818 * Take care of the case where a deleted inode was never
1819 * flushed to the disk in the first place.
869e8f55
MD
1820 *
1821 * Clear flags which may have been set by the frontend.
c0ade690 1822 */
11ad5ade 1823 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
869e8f55
MD
1824 HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1825 HAMMER_INODE_DELETING);
d26d0ae9 1826 while (RB_ROOT(&ip->rec_tree)) {
d36ec43b
MD
1827 hammer_record_t record = RB_ROOT(&ip->rec_tree);
1828 hammer_ref(&record->lock);
1829 KKASSERT(record->lock.refs == 1);
1830 record->flags |= HAMMER_RECF_DELETED_FE;
1831 record->flags |= HAMMER_RECF_DELETED_BE;
d36ec43b 1832 hammer_rel_mem_record(record);
d26d0ae9 1833 }
c0ade690
MD
1834 break;
1835 case HAMMER_INODE_ONDISK:
1836 /*
1837 * If already on-disk, do not set any additional flags.
1838 */
1839 break;
1840 default:
1841 /*
1842 * If not on-disk and not deleted, set both dirty flags
b84de5af
MD
1843 * to force an initial record to be written. Also set
1844 * the create_tid for the inode.
1845 *
1846 * Set create_tid in both the frontend and backend
1847 * copy of the inode record.
c0ade690 1848 */
11ad5ade
MD
1849 ip->ino_leaf.base.create_tid = trans.tid;
1850 ip->sync_ino_leaf.base.create_tid = trans.tid;
1851 ip->sync_flags |= HAMMER_INODE_DDIRTY;
c0ade690
MD
1852 break;
1853 }
1854
1855 /*
d113fda1
MD
1856 * If RDIRTY or DDIRTY is set, write out a new record. If the inode
1857 * is already on-disk the old record is marked as deleted.
1858 *
1859 * If DELETED is set hammer_update_inode() will delete the existing
1860 * record without writing out a new one.
1861 *
1862 * If *ONLY* the ITIMES flag is set we can update the record in-place.
c0ade690 1863 */
b84de5af 1864 if (ip->flags & HAMMER_INODE_DELETED) {
4e17f465 1865 error = hammer_update_inode(&cursor, ip);
b84de5af 1866 } else
11ad5ade
MD
1867 if ((ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) ==
1868 HAMMER_INODE_ITIMES) {
4e17f465 1869 error = hammer_update_itimes(&cursor, ip);
d113fda1 1870 } else
11ad5ade 1871 if (ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) {
4e17f465 1872 error = hammer_update_inode(&cursor, ip);
c0ade690 1873 }
b84de5af
MD
1874 if (error)
1875 Debugger("hammer_update_itimes/inode errored");
4e17f465 1876done:
b84de5af
MD
1877 /*
1878 * Save the TID we used to sync the inode with to make sure we
1879 * do not improperly reuse it.
1880 */
4e17f465 1881 hammer_done_cursor(&cursor);
b84de5af 1882 hammer_done_transaction(&trans);
c0ade690 1883 return(error);
8cd0a023
MD
1884}
1885
1f07f686
MD
1886/*
1887 * This routine is called when the OS is no longer actively referencing
1888 * the inode (but might still be keeping it cached), or when releasing
1889 * the last reference to an inode.
1890 *
1891 * At this point if the inode's nlinks count is zero we want to destroy
1892 * it, which may mean destroying it on-media too.
1893 */
3bf2d80a 1894void
e8599db1 1895hammer_inode_unloadable_check(hammer_inode_t ip, int getvp)
1f07f686 1896{
e8599db1
MD
1897 struct vnode *vp;
1898
1f07f686 1899 /*
c4bae5fd
MD
1900 * Set the DELETING flag when the link count drops to 0 and the
1901 * OS no longer has any opens on the inode.
1902 *
1903 * The backend will clear DELETING (a mod flag) and set DELETED
1904 * (a state flag) when it is actually able to perform the
1905 * operation.
1f07f686 1906 */
11ad5ade 1907 if (ip->ino_data.nlinks == 0 &&
869e8f55 1908 (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) == 0) {
e8599db1
MD
1909 ip->flags |= HAMMER_INODE_DELETING;
1910 ip->flags |= HAMMER_INODE_TRUNCATED;
1911 ip->trunc_off = 0;
1912 vp = NULL;
1913 if (getvp) {
1914 if (hammer_get_vnode(ip, &vp) != 0)
1915 return;
1916 }
29ce0677 1917
29ce0677
MD
1918 /*
1919 * Final cleanup
1920 */
869e8f55
MD
1921 if (ip->vp) {
1922 vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
1923 vnode_pager_setsize(ip->vp, 0);
1924 }
e8599db1
MD
1925 if (getvp) {
1926 vput(vp);
1927 }
1f07f686 1928 }
1f07f686
MD
1929}
1930
3bf2d80a
MD
1931/*
1932 * Re-test an inode when a dependancy had gone away to see if we
1933 * can chain flush it.
1934 */
1f07f686
MD
1935void
1936hammer_test_inode(hammer_inode_t ip)
1937{
1938 if (ip->flags & HAMMER_INODE_REFLUSH) {
1939 ip->flags &= ~HAMMER_INODE_REFLUSH;
1940 hammer_ref(&ip->lock);
3bf2d80a
MD
1941 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1942 ip->flags &= ~HAMMER_INODE_RESIGNAL;
1943 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1944 } else {
1945 hammer_flush_inode(ip, 0);
1946 }
1f07f686
MD
1947 hammer_rel_inode(ip, 0);
1948 }
1949}
1950
9f5097dc 1951/*
7bc5b8c2
MD
1952 * Clear the RECLAIM flag on an inode. This occurs when the inode is
1953 * reassociated with a vp or just before it gets freed.
af209b0f 1954 *
7bc5b8c2
MD
1955 * Wakeup one thread blocked waiting on reclaims to complete. Note that
1956 * the inode the thread is waiting on behalf of is a different inode then
1957 * the inode we are called with. This is to create a pipeline.
9f5097dc 1958 */
7bc5b8c2
MD
1959static void
1960hammer_inode_wakereclaims(hammer_inode_t ip)
9f5097dc 1961{
7bc5b8c2 1962 struct hammer_reclaim *reclaim;
d99d6bf5 1963 hammer_mount_t hmp = ip->hmp;
d99d6bf5 1964
7bc5b8c2 1965 if ((ip->flags & HAMMER_INODE_RECLAIM) == 0)
d99d6bf5 1966 return;
3897d7e9 1967
7bc5b8c2
MD
1968 --hammer_count_reclaiming;
1969 --hmp->inode_reclaims;
1970 ip->flags &= ~HAMMER_INODE_RECLAIM;
9f5097dc 1971
7bc5b8c2
MD
1972 if ((reclaim = TAILQ_FIRST(&hmp->reclaim_list)) != NULL) {
1973 TAILQ_REMOVE(&hmp->reclaim_list, reclaim, entry);
1974 reclaim->okydoky = 1;
1975 wakeup(reclaim);
9f5097dc
MD
1976 }
1977}
1978
4a2796f3
MD
1979/*
1980 * Setup our reclaim pipeline. We only let so many detached (and dirty)
1981 * inodes build up before we start blocking.
1982 *
1983 * When we block we don't care *which* inode has finished reclaiming,
1984 * as lone as one does. This is somewhat heuristical... we also put a
1985 * cap on how long we are willing to wait.
1986 */
1987void
1988hammer_inode_waitreclaims(hammer_mount_t hmp)
1989{
1990 struct hammer_reclaim reclaim;
1991 int delay;
1992
1993 if (hmp->inode_reclaims > HAMMER_RECLAIM_WAIT) {
1994 reclaim.okydoky = 0;
1995 TAILQ_INSERT_TAIL(&hmp->reclaim_list,
1996 &reclaim, entry);
1997 } else {
1998 reclaim.okydoky = 1;
1999 }
2000
2001 if (reclaim.okydoky == 0) {
2002 delay = (hmp->inode_reclaims - HAMMER_RECLAIM_WAIT) * hz /
2003 HAMMER_RECLAIM_WAIT;
2004 if (delay >= 0)
2005 tsleep(&reclaim, 0, "hmrrcm", delay + 1);
2006 if (reclaim.okydoky == 0)
2007 TAILQ_REMOVE(&hmp->reclaim_list, &reclaim, entry);
2008 }
2009}
2010