Switch from bioq_insert_tail() to bioqdisksort(). When the kernel is
[dragonfly.git] / sys / vfs / hammer / hammer_inode.c
CommitLineData
427e5fc6 1/*
b84de5af 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
427e5fc6
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
da2da375 34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.69 2008/06/10 08:51:01 dillon Exp $
427e5fc6
MD
35 */
36
37#include "hammer.h"
869e8f55 38#include <vm/vm_extern.h>
427e5fc6
MD
39#include <sys/buf.h>
40#include <sys/buf2.h>
41
ec4e8497 42static int hammer_unload_inode(struct hammer_inode *ip);
1f07f686
MD
43static void hammer_flush_inode_core(hammer_inode_t ip, int flags);
44static int hammer_setup_child_callback(hammer_record_t rec, void *data);
1f07f686 45static int hammer_setup_parent_inodes(hammer_record_t record);
b84de5af 46
0832c9bb
MD
47#ifdef DEBUG_TRUNCATE
48extern struct hammer_inode *HammerTruncIp;
49#endif
50
d113fda1
MD
51/*
52 * The kernel is not actively referencing this vnode but is still holding
53 * it cached.
b84de5af
MD
54 *
55 * This is called from the frontend.
d113fda1 56 */
427e5fc6
MD
57int
58hammer_vop_inactive(struct vop_inactive_args *ap)
59{
66325755 60 struct hammer_inode *ip = VTOI(ap->a_vp);
27ea2398 61
c0ade690
MD
62 /*
63 * Degenerate case
64 */
65 if (ip == NULL) {
66325755 66 vrecycle(ap->a_vp);
c0ade690
MD
67 return(0);
68 }
69
70 /*
1f07f686
MD
71 * If the inode no longer has visibility in the filesystem and is
72 * fairly clean, try to recycle it immediately. This can deadlock
73 * in vfsync() if we aren't careful.
4e97774c
MD
74 *
75 * Do not queue the inode to the flusher if we still have visibility,
76 * otherwise namespace calls such as chmod will unnecessarily generate
77 * multiple inode updates.
c0ade690 78 */
e8599db1 79 hammer_inode_unloadable_check(ip, 0);
4e97774c
MD
80 if (ip->ino_data.nlinks == 0) {
81 if (ip->flags & HAMMER_INODE_MODMASK)
82 hammer_flush_inode(ip, 0);
83 else
84 vrecycle(ap->a_vp);
85 }
427e5fc6
MD
86 return(0);
87}
88
d113fda1
MD
89/*
90 * Release the vnode association. This is typically (but not always)
1f07f686 91 * the last reference on the inode.
d113fda1 92 *
1f07f686
MD
93 * Once the association is lost we are on our own with regards to
94 * flushing the inode.
d113fda1 95 */
427e5fc6
MD
96int
97hammer_vop_reclaim(struct vop_reclaim_args *ap)
98{
da2da375 99 hammer_mount_t hmp;
427e5fc6
MD
100 struct hammer_inode *ip;
101 struct vnode *vp;
102
103 vp = ap->a_vp;
c0ade690 104
a89aec1b 105 if ((ip = vp->v_data) != NULL) {
da2da375 106 hmp = ip->hmp;
a89aec1b
MD
107 vp->v_data = NULL;
108 ip->vp = NULL;
9f5097dc
MD
109 if ((ip->flags & HAMMER_INODE_RECLAIM) == 0) {
110 ++hammer_count_reclaiming;
da2da375 111 ++hmp->inode_reclaims;
9f5097dc
MD
112 ip->flags |= HAMMER_INODE_RECLAIM;
113 }
ec4e8497 114 hammer_rel_inode(ip, 1);
da2da375
MD
115
116 /*
117 * Do not let too many reclaimed inodes build up.
118 *
119 */
120 hammer_inode_waitreclaims(hmp);
a89aec1b 121 }
427e5fc6
MD
122 return(0);
123}
124
66325755
MD
125/*
126 * Return a locked vnode for the specified inode. The inode must be
127 * referenced but NOT LOCKED on entry and will remain referenced on
128 * return.
b84de5af
MD
129 *
130 * Called from the frontend.
66325755
MD
131 */
132int
e8599db1 133hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp)
66325755 134{
9f5097dc 135 hammer_mount_t hmp;
66325755
MD
136 struct vnode *vp;
137 int error = 0;
138
9f5097dc
MD
139 hmp = ip->hmp;
140
66325755
MD
141 for (;;) {
142 if ((vp = ip->vp) == NULL) {
9f5097dc 143 error = getnewvnode(VT_HAMMER, hmp->mp, vpp, 0, 0);
66325755
MD
144 if (error)
145 break;
8cd0a023
MD
146 hammer_lock_ex(&ip->lock);
147 if (ip->vp != NULL) {
148 hammer_unlock(&ip->lock);
149 vp->v_type = VBAD;
150 vx_put(vp);
151 continue;
66325755 152 }
8cd0a023
MD
153 hammer_ref(&ip->lock);
154 vp = *vpp;
155 ip->vp = vp;
11ad5ade
MD
156 vp->v_type =
157 hammer_get_vnode_type(ip->ino_data.obj_type);
7a04d74f 158
9f5097dc
MD
159 if (ip->flags & HAMMER_INODE_RECLAIM) {
160 --hammer_count_reclaiming;
161 --hmp->inode_reclaims;
162 ip->flags &= ~HAMMER_INODE_RECLAIM;
163 if (hmp->flags & HAMMER_MOUNT_WAITIMAX)
164 hammer_inode_wakereclaims(hmp);
165 }
166
11ad5ade 167 switch(ip->ino_data.obj_type) {
7a04d74f
MD
168 case HAMMER_OBJTYPE_CDEV:
169 case HAMMER_OBJTYPE_BDEV:
9f5097dc 170 vp->v_ops = &hmp->mp->mnt_vn_spec_ops;
7a04d74f
MD
171 addaliasu(vp, ip->ino_data.rmajor,
172 ip->ino_data.rminor);
173 break;
174 case HAMMER_OBJTYPE_FIFO:
9f5097dc 175 vp->v_ops = &hmp->mp->mnt_vn_fifo_ops;
7a04d74f
MD
176 break;
177 default:
178 break;
179 }
42c7d26b
MD
180
181 /*
182 * Only mark as the root vnode if the ip is not
183 * historical, otherwise the VFS cache will get
184 * confused. The other half of the special handling
185 * is in hammer_vop_nlookupdotdot().
186 */
187 if (ip->obj_id == HAMMER_OBJID_ROOT &&
9f5097dc 188 ip->obj_asof == hmp->asof) {
7a04d74f 189 vp->v_flag |= VROOT;
42c7d26b 190 }
7a04d74f 191
8cd0a023
MD
192 vp->v_data = (void *)ip;
193 /* vnode locked by getnewvnode() */
194 /* make related vnode dirty if inode dirty? */
195 hammer_unlock(&ip->lock);
a89aec1b 196 if (vp->v_type == VREG)
11ad5ade 197 vinitvmio(vp, ip->ino_data.size);
8cd0a023
MD
198 break;
199 }
200
201 /*
202 * loop if the vget fails (aka races), or if the vp
203 * no longer matches ip->vp.
204 */
205 if (vget(vp, LK_EXCLUSIVE) == 0) {
206 if (vp == ip->vp)
207 break;
208 vput(vp);
66325755
MD
209 }
210 }
a89aec1b 211 *vpp = vp;
66325755
MD
212 return(error);
213}
214
215/*
8cd0a023
MD
216 * Acquire a HAMMER inode. The returned inode is not locked. These functions
217 * do not attach or detach the related vnode (use hammer_get_vnode() for
218 * that).
d113fda1
MD
219 *
220 * The flags argument is only applied for newly created inodes, and only
221 * certain flags are inherited.
b84de5af
MD
222 *
223 * Called from the frontend.
66325755
MD
224 */
225struct hammer_inode *
36f82b23 226hammer_get_inode(hammer_transaction_t trans, struct hammer_node **cache,
61aeeb33 227 u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
66325755 228{
36f82b23 229 hammer_mount_t hmp = trans->hmp;
427e5fc6 230 struct hammer_inode_info iinfo;
8cd0a023 231 struct hammer_cursor cursor;
427e5fc6 232 struct hammer_inode *ip;
427e5fc6
MD
233
234 /*
235 * Determine if we already have an inode cached. If we do then
236 * we are golden.
237 */
66325755 238 iinfo.obj_id = obj_id;
7f7c1f84 239 iinfo.obj_asof = asof;
427e5fc6
MD
240loop:
241 ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
242 if (ip) {
8cd0a023 243 hammer_ref(&ip->lock);
66325755
MD
244 *errorp = 0;
245 return(ip);
427e5fc6
MD
246 }
247
da2da375 248#if 0
3897d7e9
MD
249 /*
250 * Impose a slow-down if HAMMER is heavily backlogged on cleaning
251 * out reclaimed inodes.
252 */
253 if (hmp->inode_reclaims > HAMMER_RECLAIM_MIN &&
da2da375 254 trans->type != HAMMER_TRANS_FLS) {
3897d7e9 255 hammer_inode_waitreclaims(hmp);
da2da375
MD
256 }
257#endif
3897d7e9
MD
258
259 /*
260 * Allocate a new inode structure and deal with races later.
261 */
427e5fc6 262 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
b3deaf57 263 ++hammer_count_inodes;
9f5097dc 264 ++hmp->count_inodes;
66325755 265 ip->obj_id = obj_id;
27ea2398 266 ip->obj_asof = iinfo.obj_asof;
66325755 267 ip->hmp = hmp;
d113fda1
MD
268 ip->flags = flags & HAMMER_INODE_RO;
269 if (hmp->ronly)
270 ip->flags |= HAMMER_INODE_RO;
a5fddc16 271 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
8cd0a023 272 RB_INIT(&ip->rec_tree);
1f07f686 273 TAILQ_INIT(&ip->target_list);
427e5fc6
MD
274
275 /*
8cd0a023 276 * Locate the on-disk inode.
427e5fc6 277 */
6a37e7e4 278retry:
4e17f465 279 hammer_init_cursor(trans, &cursor, cache, NULL);
2f85fa4d 280 cursor.key_beg.localization = HAMMER_LOCALIZE_INODE;
8cd0a023
MD
281 cursor.key_beg.obj_id = ip->obj_id;
282 cursor.key_beg.key = 0;
d5530d22 283 cursor.key_beg.create_tid = 0;
8cd0a023
MD
284 cursor.key_beg.delete_tid = 0;
285 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
286 cursor.key_beg.obj_type = 0;
d5530d22 287 cursor.asof = iinfo.obj_asof;
11ad5ade 288 cursor.flags = HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA |
d5530d22 289 HAMMER_CURSOR_ASOF;
8cd0a023
MD
290
291 *errorp = hammer_btree_lookup(&cursor);
6a37e7e4
MD
292 if (*errorp == EDEADLK) {
293 hammer_done_cursor(&cursor);
294 goto retry;
295 }
427e5fc6
MD
296
297 /*
298 * On success the B-Tree lookup will hold the appropriate
299 * buffer cache buffers and provide a pointer to the requested
d113fda1
MD
300 * information. Copy the information to the in-memory inode
301 * and cache the B-Tree node to improve future operations.
427e5fc6 302 */
66325755 303 if (*errorp == 0) {
11ad5ade 304 ip->ino_leaf = cursor.node->ondisk->elms[cursor.index].leaf;
40043e7f 305 ip->ino_data = cursor.data->inode;
61aeeb33
MD
306 hammer_cache_node(cursor.node, &ip->cache[0]);
307 if (cache)
308 hammer_cache_node(cursor.node, cache);
427e5fc6 309 }
427e5fc6
MD
310
311 /*
312 * On success load the inode's record and data and insert the
313 * inode into the B-Tree. It is possible to race another lookup
314 * insertion of the same inode so deal with that condition too.
b3deaf57
MD
315 *
316 * The cursor's locked node interlocks against others creating and
317 * destroying ip while we were blocked.
427e5fc6 318 */
66325755 319 if (*errorp == 0) {
8cd0a023 320 hammer_ref(&ip->lock);
427e5fc6 321 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
61aeeb33
MD
322 hammer_uncache_node(&ip->cache[0]);
323 hammer_uncache_node(&ip->cache[1]);
b84de5af 324 KKASSERT(ip->lock.refs == 1);
b3deaf57 325 --hammer_count_inodes;
9f5097dc 326 --hmp->count_inodes;
427e5fc6 327 kfree(ip, M_HAMMER);
b3deaf57 328 hammer_done_cursor(&cursor);
427e5fc6
MD
329 goto loop;
330 }
c0ade690 331 ip->flags |= HAMMER_INODE_ONDISK;
427e5fc6 332 } else {
19619882
MD
333 /*
334 * Do not panic on read-only accesses which fail, particularly
335 * historical accesses where the snapshot might not have
336 * complete connectivity.
337 */
338 if ((flags & HAMMER_INODE_RO) == 0) {
339 kprintf("hammer_get_inode: failed ip %p obj_id %016llx cursor %p error %d\n",
340 ip, ip->obj_id, &cursor, *errorp);
77062c8a 341 Debugger("x");
19619882 342 }
e63644f0
MD
343 if (ip->flags & HAMMER_INODE_RSV_INODES) {
344 ip->flags &= ~HAMMER_INODE_RSV_INODES; /* sanity */
9f5097dc 345 --hmp->rsv_inodes;
e63644f0 346 }
9f5097dc 347 hmp->rsv_databufs -= ip->rsv_databufs;
e63644f0
MD
348 ip->rsv_databufs = 0; /* sanity */
349
b3deaf57 350 --hammer_count_inodes;
9f5097dc 351 --hmp->count_inodes;
66325755
MD
352 kfree(ip, M_HAMMER);
353 ip = NULL;
427e5fc6 354 }
b3deaf57 355 hammer_done_cursor(&cursor);
66325755
MD
356 return (ip);
357}
358
8cd0a023
MD
359/*
360 * Create a new filesystem object, returning the inode in *ipp. The
1f07f686 361 * returned inode will be referenced.
8cd0a023 362 *
b84de5af 363 * The inode is created in-memory.
8cd0a023
MD
364 */
365int
a89aec1b
MD
366hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
367 struct ucred *cred, hammer_inode_t dip,
8cd0a023 368 struct hammer_inode **ipp)
66325755 369{
a89aec1b
MD
370 hammer_mount_t hmp;
371 hammer_inode_t ip;
6b4f890b 372 uid_t xuid;
66325755 373
8cd0a023
MD
374 hmp = trans->hmp;
375 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
b3deaf57 376 ++hammer_count_inodes;
9f5097dc 377 ++hmp->count_inodes;
0729c8c8 378 ip->obj_id = hammer_alloc_objid(trans, dip);
8cd0a023 379 KKASSERT(ip->obj_id != 0);
7f7c1f84 380 ip->obj_asof = hmp->asof;
8cd0a023 381 ip->hmp = hmp;
b84de5af 382 ip->flush_state = HAMMER_FST_IDLE;
11ad5ade 383 ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES;
8cd0a023 384
a5fddc16 385 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
8cd0a023 386 RB_INIT(&ip->rec_tree);
1f07f686 387 TAILQ_INIT(&ip->target_list);
8cd0a023 388
11ad5ade
MD
389 ip->ino_leaf.atime = trans->time;
390 ip->ino_data.mtime = trans->time;
391 ip->ino_data.size = 0;
392 ip->ino_data.nlinks = 0;
e63644f0
MD
393
394 /*
395 * A nohistory designator on the parent directory is inherited by
396 * the child.
397 */
398 ip->ino_data.uflags = dip->ino_data.uflags &
399 (SF_NOHISTORY|UF_NOHISTORY|UF_NODUMP);
400
11ad5ade 401 ip->ino_leaf.base.btype = HAMMER_BTREE_TYPE_RECORD;
2f85fa4d 402 ip->ino_leaf.base.localization = HAMMER_LOCALIZE_INODE;
11ad5ade
MD
403 ip->ino_leaf.base.obj_id = ip->obj_id;
404 ip->ino_leaf.base.key = 0;
405 ip->ino_leaf.base.create_tid = 0;
406 ip->ino_leaf.base.delete_tid = 0;
407 ip->ino_leaf.base.rec_type = HAMMER_RECTYPE_INODE;
408 ip->ino_leaf.base.obj_type = hammer_get_obj_type(vap->va_type);
409
410 ip->ino_data.obj_type = ip->ino_leaf.base.obj_type;
8cd0a023
MD
411 ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
412 ip->ino_data.mode = vap->va_mode;
b84de5af 413 ip->ino_data.ctime = trans->time;
11ad5ade 414 ip->ino_data.parent_obj_id = (dip) ? dip->ino_leaf.base.obj_id : 0;
6b4f890b 415
11ad5ade 416 switch(ip->ino_leaf.base.obj_type) {
7a04d74f
MD
417 case HAMMER_OBJTYPE_CDEV:
418 case HAMMER_OBJTYPE_BDEV:
419 ip->ino_data.rmajor = vap->va_rmajor;
420 ip->ino_data.rminor = vap->va_rminor;
421 break;
422 default:
423 break;
424 }
425
6b4f890b
MD
426 /*
427 * Calculate default uid/gid and overwrite with information from
428 * the vap.
429 */
430 xuid = hammer_to_unix_xid(&dip->ino_data.uid);
6b4f890b
MD
431 xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
432 &vap->va_mode);
433 ip->ino_data.mode = vap->va_mode;
434
8cd0a023
MD
435 if (vap->va_vaflags & VA_UID_UUID_VALID)
436 ip->ino_data.uid = vap->va_uid_uuid;
6b4f890b 437 else if (vap->va_uid != (uid_t)VNOVAL)
7538695e
MD
438 hammer_guid_to_uuid(&ip->ino_data.uid, vap->va_uid);
439 else
6b4f890b 440 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
7538695e 441
8cd0a023
MD
442 if (vap->va_vaflags & VA_GID_UUID_VALID)
443 ip->ino_data.gid = vap->va_gid_uuid;
6b4f890b 444 else if (vap->va_gid != (gid_t)VNOVAL)
8cd0a023 445 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
7538695e
MD
446 else
447 ip->ino_data.gid = dip->ino_data.gid;
8cd0a023
MD
448
449 hammer_ref(&ip->lock);
450 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
451 hammer_unref(&ip->lock);
a89aec1b 452 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
8cd0a023
MD
453 }
454 *ipp = ip;
455 return(0);
66325755
MD
456}
457
d113fda1
MD
458/*
459 * Called by hammer_sync_inode().
460 */
461static int
4e17f465 462hammer_update_inode(hammer_cursor_t cursor, hammer_inode_t ip)
c0ade690 463{
4e17f465 464 hammer_transaction_t trans = cursor->trans;
c0ade690
MD
465 hammer_record_t record;
466 int error;
467
d26d0ae9 468retry:
c0ade690
MD
469 error = 0;
470
869e8f55
MD
471 /*
472 * If the inode has a presence on-disk then locate it and mark
473 * it deleted, setting DELONDISK.
474 *
475 * The record may or may not be physically deleted, depending on
476 * the retention policy.
477 */
76376933
MD
478 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
479 HAMMER_INODE_ONDISK) {
4e17f465 480 hammer_normalize_cursor(cursor);
2f85fa4d 481 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
4e17f465
MD
482 cursor->key_beg.obj_id = ip->obj_id;
483 cursor->key_beg.key = 0;
484 cursor->key_beg.create_tid = 0;
485 cursor->key_beg.delete_tid = 0;
486 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
487 cursor->key_beg.obj_type = 0;
488 cursor->asof = ip->obj_asof;
489 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
11ad5ade 490 cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
4e17f465
MD
491 cursor->flags |= HAMMER_CURSOR_BACKEND;
492
493 error = hammer_btree_lookup(cursor);
e8599db1
MD
494 if (hammer_debug_inode)
495 kprintf("IPDEL %p %08x %d", ip, ip->flags, error);
b84de5af
MD
496 if (error) {
497 kprintf("error %d\n", error);
498 Debugger("hammer_update_inode");
499 }
500
c0ade690 501 if (error == 0) {
e63644f0 502 error = hammer_ip_delete_record(cursor, ip, trans->tid);
e8599db1
MD
503 if (hammer_debug_inode)
504 kprintf(" error %d\n", error);
f90dde4c 505 if (error && error != EDEADLK) {
b84de5af
MD
506 kprintf("error %d\n", error);
507 Debugger("hammer_update_inode2");
508 }
1f07f686 509 if (error == 0) {
195c19a1 510 ip->flags |= HAMMER_INODE_DELONDISK;
1f07f686 511 }
e8599db1
MD
512 if (cursor->node)
513 hammer_cache_node(cursor->node, &ip->cache[0]);
4e17f465
MD
514 }
515 if (error == EDEADLK) {
516 hammer_done_cursor(cursor);
517 error = hammer_init_cursor(trans, cursor,
518 &ip->cache[0], ip);
e8599db1
MD
519 if (hammer_debug_inode)
520 kprintf("IPDED %p %d\n", ip, error);
4e17f465
MD
521 if (error == 0)
522 goto retry;
c0ade690 523 }
c0ade690
MD
524 }
525
526 /*
869e8f55
MD
527 * Ok, write out the initial record or a new record (after deleting
528 * the old one), unless the DELETED flag is set. This routine will
529 * clear DELONDISK if it writes out a record.
76376933 530 *
869e8f55
MD
531 * Update our inode statistics if this is the first application of
532 * the inode on-disk.
c0ade690 533 */
869e8f55
MD
534 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
535 /*
536 * Generate a record and write it to the media
537 */
11ad5ade 538 record = hammer_alloc_mem_record(ip, 0);
930bf163 539 record->type = HAMMER_MEM_RECORD_INODE;
1f07f686 540 record->flush_state = HAMMER_FST_FLUSH;
11ad5ade
MD
541 record->leaf = ip->sync_ino_leaf;
542 record->leaf.base.create_tid = trans->tid;
543 record->leaf.data_len = sizeof(ip->sync_ino_data);
b84de5af 544 record->data = (void *)&ip->sync_ino_data;
d36ec43b 545 record->flags |= HAMMER_RECF_INTERLOCK_BE;
4e17f465
MD
546 for (;;) {
547 error = hammer_ip_sync_record_cursor(cursor, record);
e8599db1
MD
548 if (hammer_debug_inode)
549 kprintf("GENREC %p rec %08x %d\n",
550 ip, record->flags, error);
4e17f465
MD
551 if (error != EDEADLK)
552 break;
553 hammer_done_cursor(cursor);
554 error = hammer_init_cursor(trans, cursor,
555 &ip->cache[0], ip);
e8599db1
MD
556 if (hammer_debug_inode)
557 kprintf("GENREC reinit %d\n", error);
4e17f465
MD
558 if (error)
559 break;
560 }
b84de5af
MD
561 if (error) {
562 kprintf("error %d\n", error);
563 Debugger("hammer_update_inode3");
564 }
d36ec43b
MD
565
566 /*
567 * The record isn't managed by the inode's record tree,
568 * destroy it whether we succeed or fail.
569 */
570 record->flags &= ~HAMMER_RECF_INTERLOCK_BE;
571 record->flags |= HAMMER_RECF_DELETED_FE;
1f07f686 572 record->flush_state = HAMMER_FST_IDLE;
b3deaf57 573 hammer_rel_mem_record(record);
d36ec43b 574
869e8f55
MD
575 /*
576 * Finish up.
577 */
d26d0ae9 578 if (error == 0) {
e8599db1
MD
579 if (hammer_debug_inode)
580 kprintf("CLEANDELOND %p %08x\n", ip, ip->flags);
11ad5ade 581 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
b84de5af
MD
582 HAMMER_INODE_ITIMES);
583 ip->flags &= ~HAMMER_INODE_DELONDISK;
1f07f686
MD
584
585 /*
586 * Root volume count of inodes
587 */
d26d0ae9 588 if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
e8599db1
MD
589 hammer_modify_volume_field(trans,
590 trans->rootvol,
591 vol0_stat_inodes);
0b075555 592 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
10a5d1ba 593 hammer_modify_volume_done(trans->rootvol);
d26d0ae9 594 ip->flags |= HAMMER_INODE_ONDISK;
e8599db1
MD
595 if (hammer_debug_inode)
596 kprintf("NOWONDISK %p\n", ip);
d26d0ae9 597 }
fbc6e32a 598 }
c0ade690 599 }
869e8f55
MD
600
601 /*
602 * If the inode has been destroyed, clean out any left-over flags
603 * that may have been set by the frontend.
604 */
f90dde4c 605 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) {
11ad5ade 606 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY |
f90dde4c
MD
607 HAMMER_INODE_ITIMES);
608 }
c0ade690
MD
609 return(error);
610}
611
a89aec1b 612/*
d113fda1
MD
613 * Update only the itimes fields. This is done no-historically. The
614 * record is updated in-place on the disk.
615 */
616static int
4e17f465 617hammer_update_itimes(hammer_cursor_t cursor, hammer_inode_t ip)
d113fda1 618{
4e17f465 619 hammer_transaction_t trans = cursor->trans;
11ad5ade 620 struct hammer_btree_leaf_elm *leaf;
d113fda1
MD
621 int error;
622
6a37e7e4 623retry:
d113fda1
MD
624 error = 0;
625 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
626 HAMMER_INODE_ONDISK) {
4e17f465 627 hammer_normalize_cursor(cursor);
2f85fa4d 628 cursor->key_beg.localization = HAMMER_LOCALIZE_INODE;
4e17f465
MD
629 cursor->key_beg.obj_id = ip->obj_id;
630 cursor->key_beg.key = 0;
631 cursor->key_beg.create_tid = 0;
632 cursor->key_beg.delete_tid = 0;
633 cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE;
634 cursor->key_beg.obj_type = 0;
635 cursor->asof = ip->obj_asof;
636 cursor->flags &= ~HAMMER_CURSOR_INITMASK;
11ad5ade 637 cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF;
4e17f465
MD
638 cursor->flags |= HAMMER_CURSOR_BACKEND;
639
640 error = hammer_btree_lookup(cursor);
b84de5af
MD
641 if (error) {
642 kprintf("error %d\n", error);
643 Debugger("hammer_update_itimes1");
644 }
d113fda1 645 if (error == 0) {
10a5d1ba 646 /*
11ad5ade 647 * Do not generate UNDO records for atime updates.
10a5d1ba 648 */
11ad5ade
MD
649 leaf = cursor->leaf;
650 hammer_modify_node(trans, cursor->node,
651 &leaf->atime, sizeof(leaf->atime));
652 leaf->atime = ip->sync_ino_leaf.atime;
653 hammer_modify_node_done(cursor->node);
654 /*rec->ino_mtime = ip->sync_ino_rec.ino_mtime;*/
b84de5af 655 ip->sync_flags &= ~HAMMER_INODE_ITIMES;
d113fda1 656 /* XXX recalculate crc */
4e17f465
MD
657 hammer_cache_node(cursor->node, &ip->cache[0]);
658 }
659 if (error == EDEADLK) {
660 hammer_done_cursor(cursor);
661 error = hammer_init_cursor(trans, cursor,
662 &ip->cache[0], ip);
663 if (error == 0)
664 goto retry;
d113fda1 665 }
d113fda1
MD
666 }
667 return(error);
668}
669
670/*
1f07f686 671 * Release a reference on an inode, flush as requested.
b84de5af
MD
672 *
673 * On the last reference we queue the inode to the flusher for its final
674 * disposition.
a89aec1b 675 */
66325755 676void
a89aec1b 677hammer_rel_inode(struct hammer_inode *ip, int flush)
66325755 678{
1f07f686
MD
679 hammer_mount_t hmp = ip->hmp;
680
f90dde4c
MD
681 /*
682 * Handle disposition when dropping the last ref.
683 */
1f07f686
MD
684 for (;;) {
685 if (ip->lock.refs == 1) {
686 /*
687 * Determine whether on-disk action is needed for
688 * the inode's final disposition.
689 */
e8599db1
MD
690 KKASSERT(ip->vp == NULL);
691 hammer_inode_unloadable_check(ip, 0);
4e17f465 692 if (ip->flags & HAMMER_INODE_MODMASK) {
0832c9bb
MD
693 if (hmp->rsv_inodes > desiredvnodes) {
694 hammer_flush_inode(ip,
695 HAMMER_FLUSH_SIGNAL);
696 } else {
697 hammer_flush_inode(ip, 0);
698 }
4e17f465 699 } else if (ip->lock.refs == 1) {
1f07f686
MD
700 hammer_unload_inode(ip);
701 break;
702 }
b84de5af 703 } else {
4e17f465 704 if (flush)
1f07f686 705 hammer_flush_inode(ip, 0);
4e17f465 706
1f07f686
MD
707 /*
708 * The inode still has multiple refs, try to drop
709 * one ref.
710 */
711 KKASSERT(ip->lock.refs >= 1);
712 if (ip->lock.refs > 1) {
713 hammer_unref(&ip->lock);
714 break;
715 }
b84de5af 716 }
f90dde4c 717 }
427e5fc6
MD
718}
719
27ea2398 720/*
b84de5af
MD
721 * Unload and destroy the specified inode. Must be called with one remaining
722 * reference. The reference is disposed of.
8cd0a023 723 *
b84de5af 724 * This can only be called in the context of the flusher.
27ea2398 725 */
b84de5af 726static int
ec4e8497 727hammer_unload_inode(struct hammer_inode *ip)
27ea2398 728{
9f5097dc
MD
729 hammer_mount_t hmp = ip->hmp;
730
b84de5af 731 KASSERT(ip->lock.refs == 1,
a89aec1b 732 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
8cd0a023 733 KKASSERT(ip->vp == NULL);
f90dde4c
MD
734 KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
735 KKASSERT(ip->cursor_ip_refs == 0);
45a014dc 736 KKASSERT(ip->lock.lockcount == 0);
f90dde4c
MD
737 KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
738
739 KKASSERT(RB_EMPTY(&ip->rec_tree));
1f07f686 740 KKASSERT(TAILQ_EMPTY(&ip->target_list));
f90dde4c 741
9f5097dc 742 RB_REMOVE(hammer_ino_rb_tree, &hmp->rb_inos_root, ip);
f90dde4c
MD
743
744 hammer_uncache_node(&ip->cache[0]);
745 hammer_uncache_node(&ip->cache[1]);
0729c8c8
MD
746 if (ip->objid_cache)
747 hammer_clear_objid(ip);
f90dde4c 748 --hammer_count_inodes;
9f5097dc
MD
749 --hmp->count_inodes;
750 if (hmp->flags & HAMMER_MOUNT_WAITIMAX)
751 hammer_inode_wakereclaims(hmp);
752
753 if (ip->flags & HAMMER_INODE_RECLAIM) {
754 --hammer_count_reclaiming;
755 --hmp->inode_reclaims;
756 ip->flags &= ~HAMMER_INODE_RECLAIM;
757 }
f90dde4c 758 kfree(ip, M_HAMMER);
6b4f890b 759
27ea2398
MD
760 return(0);
761}
762
51c35492
MD
763/*
764 * Called on mount -u when switching from RW to RO or vise-versa. Adjust
765 * the read-only flag for cached inodes.
766 *
767 * This routine is called from a RB_SCAN().
768 */
769int
770hammer_reload_inode(hammer_inode_t ip, void *arg __unused)
771{
772 hammer_mount_t hmp = ip->hmp;
773
774 if (hmp->ronly || hmp->asof != HAMMER_MAX_TID)
775 ip->flags |= HAMMER_INODE_RO;
776 else
777 ip->flags &= ~HAMMER_INODE_RO;
778 return(0);
779}
780
427e5fc6 781/*
d113fda1
MD
782 * A transaction has modified an inode, requiring updates as specified by
783 * the passed flags.
7f7c1f84 784 *
d113fda1 785 * HAMMER_INODE_DDIRTY: Inode data has been updated
1f07f686 786 * HAMMER_INODE_XDIRTY: Dirty in-memory records
4e17f465 787 * HAMMER_INODE_BUFS: Dirty buffer cache buffers
d113fda1
MD
788 * HAMMER_INODE_DELETED: Inode record/data must be deleted
789 * HAMMER_INODE_ITIMES: mtime/atime has been updated
427e5fc6 790 */
66325755 791void
47637bff 792hammer_modify_inode(hammer_inode_t ip, int flags)
427e5fc6 793{
d113fda1 794 KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
11ad5ade
MD
795 (flags & (HAMMER_INODE_DDIRTY |
796 HAMMER_INODE_XDIRTY | HAMMER_INODE_BUFS |
797 HAMMER_INODE_DELETED | HAMMER_INODE_ITIMES)) == 0);
e63644f0
MD
798 if ((ip->flags & HAMMER_INODE_RSV_INODES) == 0) {
799 ip->flags |= HAMMER_INODE_RSV_INODES;
800 ++ip->hmp->rsv_inodes;
801 }
b84de5af
MD
802
803 ip->flags |= flags;
804}
805
806/*
1f07f686
MD
807 * Request that an inode be flushed. This whole mess cannot block and may
808 * recurse. Once requested HAMMER will attempt to actively flush it until
809 * the flush can be done.
b84de5af 810 *
1f07f686
MD
811 * The inode may already be flushing, or may be in a setup state. We can
812 * place the inode in a flushing state if it is currently idle and flag it
813 * to reflush if it is currently flushing.
b84de5af
MD
814 */
815void
f90dde4c 816hammer_flush_inode(hammer_inode_t ip, int flags)
b84de5af 817{
1f07f686
MD
818 hammer_record_t depend;
819 int r, good;
820
821 /*
822 * Trivial 'nothing to flush' case. If the inode is ina SETUP
823 * state we have to put it back into an IDLE state so we can
824 * drop the extra ref.
825 */
4e17f465 826 if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
1f07f686
MD
827 if (ip->flush_state == HAMMER_FST_SETUP) {
828 ip->flush_state = HAMMER_FST_IDLE;
829 hammer_rel_inode(ip, 0);
ec4e8497 830 }
b84de5af
MD
831 return;
832 }
42c7d26b 833
1f07f686
MD
834 /*
835 * Our flush action will depend on the current state.
836 */
837 switch(ip->flush_state) {
838 case HAMMER_FST_IDLE:
839 /*
840 * We have no dependancies and can flush immediately. Some
841 * our children may not be flushable so we have to re-test
842 * with that additional knowledge.
843 */
844 hammer_flush_inode_core(ip, flags);
845 break;
846 case HAMMER_FST_SETUP:
847 /*
848 * Recurse upwards through dependancies via target_list
849 * and start their flusher actions going if possible.
850 *
851 * 'good' is our connectivity. -1 means we have none and
852 * can't flush, 0 means there weren't any dependancies, and
853 * 1 means we have good connectivity.
854 */
855 good = 0;
856 TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
857 r = hammer_setup_parent_inodes(depend);
858 if (r < 0 && good == 0)
859 good = -1;
860 if (r > 0)
861 good = 1;
862 }
863
864 /*
865 * We can continue if good >= 0. Determine how many records
866 * under our inode can be flushed (and mark them).
867 */
1f07f686
MD
868 if (good >= 0) {
869 hammer_flush_inode_core(ip, flags);
870 } else {
871 ip->flags |= HAMMER_INODE_REFLUSH;
4e17f465
MD
872 if (flags & HAMMER_FLUSH_SIGNAL) {
873 ip->flags |= HAMMER_INODE_RESIGNAL;
874 hammer_flusher_async(ip->hmp);
875 }
1f07f686
MD
876 }
877 break;
878 default:
879 /*
880 * We are already flushing, flag the inode to reflush
881 * if needed after it completes its current flush.
882 */
883 if ((ip->flags & HAMMER_INODE_REFLUSH) == 0)
884 ip->flags |= HAMMER_INODE_REFLUSH;
4e17f465
MD
885 if (flags & HAMMER_FLUSH_SIGNAL) {
886 ip->flags |= HAMMER_INODE_RESIGNAL;
887 hammer_flusher_async(ip->hmp);
888 }
1f07f686
MD
889 break;
890 }
891}
892
893/*
894 * We are asked to recurse upwards and convert the record from SETUP
895 * to FLUSH if possible. record->ip is a parent of the caller's inode,
896 * and record->target_ip is the caller's inode.
897 *
898 * Return 1 if the record gives us connectivity
899 *
900 * Return 0 if the record is not relevant
901 *
902 * Return -1 if we can't resolve the dependancy and there is no connectivity.
903 */
904static int
905hammer_setup_parent_inodes(hammer_record_t record)
906{
907 hammer_mount_t hmp = record->ip->hmp;
908 hammer_record_t depend;
909 hammer_inode_t ip;
910 int r, good;
911
912 KKASSERT(record->flush_state != HAMMER_FST_IDLE);
913 ip = record->ip;
914
915 /*
916 * If the record is already flushing, is it in our flush group?
917 *
e8599db1
MD
918 * If it is in our flush group but it is a general record or a
919 * delete-on-disk, it does not improve our connectivity (return 0),
920 * and if the target inode is not trying to destroy itself we can't
921 * allow the operation yet anyway (the second return -1).
1f07f686
MD
922 */
923 if (record->flush_state == HAMMER_FST_FLUSH) {
da2da375 924 if (record->flush_group != hmp->flusher.next) {
1f07f686
MD
925 ip->flags |= HAMMER_INODE_REFLUSH;
926 return(-1);
f90dde4c 927 }
1f07f686
MD
928 if (record->type == HAMMER_MEM_RECORD_ADD)
929 return(1);
e8599db1 930 /* GENERAL or DEL */
1f07f686
MD
931 return(0);
932 }
933
934 /*
935 * It must be a setup record. Try to resolve the setup dependancies
936 * by recursing upwards so we can place ip on the flush list.
937 */
938 KKASSERT(record->flush_state == HAMMER_FST_SETUP);
939
940 good = 0;
941 TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
942 r = hammer_setup_parent_inodes(depend);
943 if (r < 0 && good == 0)
944 good = -1;
945 if (r > 0)
946 good = 1;
947 }
948
949 /*
950 * We can't flush ip because it has no connectivity (XXX also check
951 * nlinks for pre-existing connectivity!). Flag it so any resolution
952 * recurses back down.
953 */
954 if (good < 0) {
955 ip->flags |= HAMMER_INODE_REFLUSH;
956 return(good);
957 }
958
959 /*
960 * We are go, place the parent inode in a flushing state so we can
961 * place its record in a flushing state. Note that the parent
962 * may already be flushing. The record must be in the same flush
963 * group as the parent.
964 */
965 if (ip->flush_state != HAMMER_FST_FLUSH)
966 hammer_flush_inode_core(ip, HAMMER_FLUSH_RECURSION);
967 KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
968 KKASSERT(record->flush_state == HAMMER_FST_SETUP);
969
970#if 0
971 if (record->type == HAMMER_MEM_RECORD_DEL &&
869e8f55 972 (record->target_ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_DELONDISK)) == 0) {
1f07f686
MD
973 /*
974 * Regardless of flushing state we cannot sync this path if the
975 * record represents a delete-on-disk but the target inode
976 * is not ready to sync its own deletion.
977 *
978 * XXX need to count effective nlinks to determine whether
979 * the flush is ok, otherwise removing a hardlink will
980 * just leave the DEL record to rot.
981 */
982 record->target_ip->flags |= HAMMER_INODE_REFLUSH;
983 return(-1);
984 } else
985#endif
da2da375 986 if (ip->flush_group == ip->hmp->flusher.next) {
1f07f686
MD
987 /*
988 * This is the record we wanted to synchronize.
989 */
990 record->flush_state = HAMMER_FST_FLUSH;
991 record->flush_group = ip->flush_group;
992 hammer_ref(&record->lock);
993 if (record->type == HAMMER_MEM_RECORD_ADD)
994 return(1);
995
996 /*
e8599db1
MD
997 * A general or delete-on-disk record does not contribute
998 * to our visibility. We can still flush it, however.
1f07f686
MD
999 */
1000 return(0);
1001 } else {
1002 /*
1003 * We couldn't resolve the dependancies, request that the
1004 * inode be flushed when the dependancies can be resolved.
1005 */
1006 ip->flags |= HAMMER_INODE_REFLUSH;
1007 return(-1);
7f7c1f84 1008 }
c0ade690
MD
1009}
1010
1011/*
1f07f686 1012 * This is the core routine placing an inode into the FST_FLUSH state.
c0ade690 1013 */
b84de5af 1014static void
1f07f686 1015hammer_flush_inode_core(hammer_inode_t ip, int flags)
b84de5af 1016{
1f07f686 1017 int go_count;
1f07f686 1018
4e17f465
MD
1019 /*
1020 * Set flush state and prevent the flusher from cycling into
1021 * the next flush group. Do not place the ip on the list yet.
1022 * Inodes not in the idle state get an extra reference.
1023 */
1f07f686
MD
1024 KKASSERT(ip->flush_state != HAMMER_FST_FLUSH);
1025 if (ip->flush_state == HAMMER_FST_IDLE)
1026 hammer_ref(&ip->lock);
1027 ip->flush_state = HAMMER_FST_FLUSH;
da2da375
MD
1028 ip->flush_group = ip->hmp->flusher.next;
1029 ++ip->hmp->flusher.group_lock;
b84de5af 1030
e8599db1
MD
1031 /*
1032 * We need to be able to vfsync/truncate from the backend.
1033 */
1034 KKASSERT((ip->flags & HAMMER_INODE_VHELD) == 0);
1035 if (ip->vp && (ip->vp->v_flag & VINACTIVE) == 0) {
1036 ip->flags |= HAMMER_INODE_VHELD;
1037 vref(ip->vp);
1038 }
1039
ec4e8497 1040 /*
1f07f686
MD
1041 * Figure out how many in-memory records we can actually flush
1042 * (not including inode meta-data, buffers, etc).
ec4e8497 1043 */
1f07f686
MD
1044 if (flags & HAMMER_FLUSH_RECURSION) {
1045 go_count = 1;
1046 } else {
1047 go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1048 hammer_setup_child_callback, NULL);
1049 }
b84de5af
MD
1050
1051 /*
1f07f686
MD
1052 * This is a more involved test that includes go_count. If we
1053 * can't flush, flag the inode and return. If go_count is 0 we
1054 * were are unable to flush any records in our rec_tree and
1055 * must ignore the XDIRTY flag.
b84de5af 1056 */
1f07f686
MD
1057 if (go_count == 0) {
1058 if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
1059 ip->flags |= HAMMER_INODE_REFLUSH;
1060 ip->flush_state = HAMMER_FST_SETUP;
e8599db1
MD
1061 if (ip->flags & HAMMER_INODE_VHELD) {
1062 ip->flags &= ~HAMMER_INODE_VHELD;
1063 vrele(ip->vp);
1064 }
4e17f465
MD
1065 if (flags & HAMMER_FLUSH_SIGNAL) {
1066 ip->flags |= HAMMER_INODE_RESIGNAL;
1067 hammer_flusher_async(ip->hmp);
1068 }
da2da375
MD
1069 if (--ip->hmp->flusher.group_lock == 0)
1070 wakeup(&ip->hmp->flusher.group_lock);
1f07f686
MD
1071 return;
1072 }
1073 }
b84de5af 1074
b84de5af
MD
1075 /*
1076 * Snapshot the state of the inode for the backend flusher.
1077 *
1078 * The truncation must be retained in the frontend until after
1079 * we've actually performed the record deletion.
1f07f686
MD
1080 *
1081 * NOTE: The DELETING flag is a mod flag, but it is also sticky,
1082 * and stays in ip->flags. Once set, it stays set until the
1083 * inode is destroyed.
b84de5af
MD
1084 */
1085 ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
1086 ip->sync_trunc_off = ip->trunc_off;
11ad5ade 1087 ip->sync_ino_leaf = ip->ino_leaf;
b84de5af 1088 ip->sync_ino_data = ip->ino_data;
47637bff
MD
1089 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1090 ip->flags &= ~HAMMER_INODE_MODMASK;
0832c9bb
MD
1091#ifdef DEBUG_TRUNCATE
1092 if ((ip->sync_flags & HAMMER_INODE_TRUNCATED) && ip == HammerTruncIp)
1093 kprintf("truncateS %016llx\n", ip->sync_trunc_off);
1094#endif
b84de5af
MD
1095
1096 /*
4e17f465 1097 * The flusher list inherits our inode and reference.
b84de5af 1098 */
1f07f686 1099 TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
da2da375
MD
1100 if (--ip->hmp->flusher.group_lock == 0)
1101 wakeup(&ip->hmp->flusher.group_lock);
1f07f686 1102
0832c9bb 1103 if (flags & HAMMER_FLUSH_SIGNAL) {
1f07f686 1104 hammer_flusher_async(ip->hmp);
0832c9bb 1105 }
b84de5af
MD
1106}
1107
ec4e8497 1108/*
1f07f686
MD
1109 * Callback for scan of ip->rec_tree. Try to include each record in our
1110 * flush. ip->flush_group has been set but the inode has not yet been
1111 * moved into a flushing state.
1112 *
1113 * If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on
1114 * both inodes.
1115 *
1116 * We return 1 for any record placed or found in FST_FLUSH, which prevents
1117 * the caller from shortcutting the flush.
ec4e8497 1118 */
c0ade690 1119static int
1f07f686 1120hammer_setup_child_callback(hammer_record_t rec, void *data)
b84de5af 1121{
1f07f686
MD
1122 hammer_inode_t target_ip;
1123 hammer_inode_t ip;
1124 int r;
1125
1126 /*
1127 * If the record has been deleted by the backend (it's being held
1128 * by the frontend in a race), just ignore it.
1129 */
1130 if (rec->flags & HAMMER_RECF_DELETED_BE)
ec4e8497 1131 return(0);
1f07f686
MD
1132
1133 /*
1134 * If the record is in an idle state it has no dependancies and
1135 * can be flushed.
1136 */
1137 ip = rec->ip;
1138 r = 0;
1139
1140 switch(rec->flush_state) {
1141 case HAMMER_FST_IDLE:
1142 /*
1143 * Record has no setup dependancy, we can flush it.
1144 */
1145 KKASSERT(rec->target_ip == NULL);
1146 rec->flush_state = HAMMER_FST_FLUSH;
1147 rec->flush_group = ip->flush_group;
b84de5af 1148 hammer_ref(&rec->lock);
1f07f686
MD
1149 r = 1;
1150 break;
1151 case HAMMER_FST_SETUP:
1152 /*
1153 * Record has a setup dependancy. Try to include the
1154 * target ip in the flush.
1155 *
1156 * We have to be careful here, if we do not do the right
1157 * thing we can lose track of dirty inodes and the system
1158 * will lockup trying to allocate buffers.
1159 */
1160 target_ip = rec->target_ip;
1161 KKASSERT(target_ip != NULL);
1162 KKASSERT(target_ip->flush_state != HAMMER_FST_IDLE);
1163 if (target_ip->flush_state == HAMMER_FST_FLUSH) {
1164 /*
1165 * If the target IP is already flushing in our group
1166 * we are golden, otherwise make sure the target
1167 * reflushes.
1168 */
1169 if (target_ip->flush_group == ip->flush_group) {
1170 rec->flush_state = HAMMER_FST_FLUSH;
1171 rec->flush_group = ip->flush_group;
1172 hammer_ref(&rec->lock);
1173 r = 1;
1174 } else {
1175 target_ip->flags |= HAMMER_INODE_REFLUSH;
1176 }
1177 } else if (rec->type == HAMMER_MEM_RECORD_ADD) {
1178 /*
1179 * If the target IP is not flushing we can force
1180 * it to flush, even if it is unable to write out
1181 * any of its own records we have at least one in
1182 * hand that we CAN deal with.
1183 */
1184 rec->flush_state = HAMMER_FST_FLUSH;
1185 rec->flush_group = ip->flush_group;
1186 hammer_ref(&rec->lock);
1187 hammer_flush_inode_core(target_ip,
1188 HAMMER_FLUSH_RECURSION);
1189 r = 1;
1190 } else {
1191 /*
e8599db1
MD
1192 * General or delete-on-disk record.
1193 *
1194 * XXX this needs help. If a delete-on-disk we could
1195 * disconnect the target. If the target has its own
1196 * dependancies they really need to be flushed.
1f07f686
MD
1197 *
1198 * XXX
1199 */
1200 rec->flush_state = HAMMER_FST_FLUSH;
1201 rec->flush_group = ip->flush_group;
1202 hammer_ref(&rec->lock);
1203 hammer_flush_inode_core(target_ip,
1204 HAMMER_FLUSH_RECURSION);
1205 r = 1;
1206 }
1207 break;
1208 case HAMMER_FST_FLUSH:
1209 /*
1210 * Record already associated with a flush group. It had
1211 * better be ours.
1212 */
1213 KKASSERT(rec->flush_group == ip->flush_group);
1214 r = 1;
1215 break;
b84de5af 1216 }
1f07f686 1217 return(r);
b84de5af
MD
1218}
1219
b84de5af
MD
1220/*
1221 * Wait for a previously queued flush to complete
1222 */
1223void
1224hammer_wait_inode(hammer_inode_t ip)
1225{
e8599db1 1226 while (ip->flush_state != HAMMER_FST_IDLE) {
0832c9bb
MD
1227 if (ip->flush_state == HAMMER_FST_SETUP) {
1228 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1229 } else {
1230 ip->flags |= HAMMER_INODE_FLUSHW;
1231 tsleep(&ip->flags, 0, "hmrwin", 0);
1232 }
b84de5af
MD
1233 }
1234}
1235
1236/*
1237 * Called by the backend code when a flush has been completed.
1238 * The inode has already been removed from the flush list.
1239 *
1240 * A pipelined flush can occur, in which case we must re-enter the
1241 * inode on the list and re-copy its fields.
1242 */
1243void
1244hammer_flush_inode_done(hammer_inode_t ip)
1245{
1f07f686 1246 int dorel = 0;
1955afa7 1247
b84de5af
MD
1248 KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
1249
1f07f686
MD
1250 /*
1251 * Merge left-over flags back into the frontend and fix the state.
1252 */
b84de5af 1253 ip->flags |= ip->sync_flags;
1f07f686
MD
1254
1255 /*
1256 * The backend may have adjusted nlinks, so if the adjusted nlinks
1257 * does not match the fronttend set the frontend's RDIRTY flag again.
1258 */
11ad5ade
MD
1259 if (ip->ino_data.nlinks != ip->sync_ino_data.nlinks)
1260 ip->flags |= HAMMER_INODE_DDIRTY;
b84de5af 1261
4e17f465 1262 /*
e63644f0
MD
1263 * Fix up the dirty buffer status. IO completions will also
1264 * try to clean up rsv_databufs.
4e17f465 1265 */
0832c9bb 1266 if (ip->vp && RB_ROOT(&ip->vp->v_rbdirty_tree)) {
1f07f686 1267 ip->flags |= HAMMER_INODE_BUFS;
e63644f0
MD
1268 } else {
1269 ip->hmp->rsv_databufs -= ip->rsv_databufs;
1270 ip->rsv_databufs = 0;
1f07f686
MD
1271 }
1272
1273 /*
1274 * Re-set the XDIRTY flag if some of the inode's in-memory records
1275 * could not be flushed.
1276 */
0832c9bb
MD
1277 KKASSERT((RB_EMPTY(&ip->rec_tree) &&
1278 (ip->flags & HAMMER_INODE_XDIRTY) == 0) ||
1279 (!RB_EMPTY(&ip->rec_tree) &&
1280 (ip->flags & HAMMER_INODE_XDIRTY) != 0));
4e17f465
MD
1281
1282 /*
1283 * Do not lose track of inodes which no longer have vnode
1284 * assocations, otherwise they may never get flushed again.
1285 */
1286 if ((ip->flags & HAMMER_INODE_MODMASK) && ip->vp == NULL)
b84de5af 1287 ip->flags |= HAMMER_INODE_REFLUSH;
4e17f465
MD
1288
1289 /*
1290 * Adjust flush_state. The target state (idle or setup) shouldn't
1291 * be terribly important since we will reflush if we really need
1292 * to do anything. XXX
1293 */
1294 if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) {
1295 ip->flush_state = HAMMER_FST_IDLE;
1296 dorel = 1;
1297 } else {
1298 ip->flush_state = HAMMER_FST_SETUP;
b84de5af 1299 }
b84de5af 1300
e8599db1
MD
1301 /*
1302 * Clean up the vnode ref
1303 */
1304 if (ip->flags & HAMMER_INODE_VHELD) {
1305 ip->flags &= ~HAMMER_INODE_VHELD;
1306 vrele(ip->vp);
1307 }
1308
b84de5af
MD
1309 /*
1310 * If the frontend made more changes and requested another flush,
4e17f465 1311 * then try to get it running.
b84de5af
MD
1312 */
1313 if (ip->flags & HAMMER_INODE_REFLUSH) {
1314 ip->flags &= ~HAMMER_INODE_REFLUSH;
4e17f465
MD
1315 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1316 ip->flags &= ~HAMMER_INODE_RESIGNAL;
1317 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1318 } else {
1319 hammer_flush_inode(ip, 0);
0729c8c8 1320 }
4e17f465
MD
1321 }
1322
e63644f0
MD
1323 /*
1324 * If the inode is now clean drop the space reservation.
1325 */
1326 if ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
1327 (ip->flags & HAMMER_INODE_RSV_INODES)) {
1328 ip->flags &= ~HAMMER_INODE_RSV_INODES;
1329 --ip->hmp->rsv_inodes;
1330 }
1331
4e17f465
MD
1332 /*
1333 * Finally, if the frontend is waiting for a flush to complete,
1334 * wake it up.
1335 */
1336 if (ip->flush_state != HAMMER_FST_FLUSH) {
b84de5af
MD
1337 if (ip->flags & HAMMER_INODE_FLUSHW) {
1338 ip->flags &= ~HAMMER_INODE_FLUSHW;
1339 wakeup(&ip->flags);
1340 }
1341 }
1f07f686
MD
1342 if (dorel)
1343 hammer_rel_inode(ip, 0);
b84de5af
MD
1344}
1345
1346/*
1347 * Called from hammer_sync_inode() to synchronize in-memory records
1348 * to the media.
1349 */
1350static int
1351hammer_sync_record_callback(hammer_record_t record, void *data)
c0ade690 1352{
4e17f465
MD
1353 hammer_cursor_t cursor = data;
1354 hammer_transaction_t trans = cursor->trans;
c0ade690
MD
1355 int error;
1356
b84de5af 1357 /*
1f07f686 1358 * Skip records that do not belong to the current flush.
b84de5af 1359 */
47637bff 1360 ++hammer_stats_record_iterations;
1f07f686 1361 if (record->flush_state != HAMMER_FST_FLUSH)
b84de5af 1362 return(0);
47637bff 1363
1f07f686
MD
1364#if 1
1365 if (record->flush_group != record->ip->flush_group) {
1366 kprintf("sync_record %p ip %p bad flush group %d %d\n", record, record->ip, record->flush_group ,record->ip->flush_group);
1367 Debugger("blah2");
1368 return(0);
1369 }
1370#endif
1371 KKASSERT(record->flush_group == record->ip->flush_group);
d36ec43b
MD
1372
1373 /*
1374 * Interlock the record using the BE flag. Once BE is set the
1375 * frontend cannot change the state of FE.
1376 *
1377 * NOTE: If FE is set prior to us setting BE we still sync the
1378 * record out, but the flush completion code converts it to
1379 * a delete-on-disk record instead of destroying it.
1380 */
4e17f465 1381 KKASSERT((record->flags & HAMMER_RECF_INTERLOCK_BE) == 0);
d36ec43b
MD
1382 record->flags |= HAMMER_RECF_INTERLOCK_BE;
1383
47637bff
MD
1384 /*
1385 * The backend may have already disposed of the record.
1386 */
1387 if (record->flags & HAMMER_RECF_DELETED_BE) {
1388 error = 0;
1389 goto done;
1390 }
1391
98f7132d
MD
1392 /*
1393 * If the whole inode is being deleting all on-disk records will
930bf163
MD
1394 * be deleted very soon, we can't sync any new records to disk
1395 * because they will be deleted in the same transaction they were
1396 * created in (delete_tid == create_tid), which will assert.
1397 *
1398 * XXX There may be a case with RECORD_ADD with DELETED_FE set
1399 * that we currently panic on.
98f7132d
MD
1400 */
1401 if (record->ip->sync_flags & HAMMER_INODE_DELETING) {
930bf163 1402 switch(record->type) {
47637bff
MD
1403 case HAMMER_MEM_RECORD_DATA:
1404 /*
1405 * We don't have to do anything, if the record was
1406 * committed the space will have been accounted for
1407 * in the blockmap.
1408 */
1409 /* fall through */
930bf163 1410 case HAMMER_MEM_RECORD_GENERAL:
98f7132d
MD
1411 record->flags |= HAMMER_RECF_DELETED_FE;
1412 record->flags |= HAMMER_RECF_DELETED_BE;
930bf163
MD
1413 error = 0;
1414 goto done;
1415 case HAMMER_MEM_RECORD_ADD:
1416 panic("hammer_sync_record_callback: illegal add "
1417 "during inode deletion record %p", record);
1418 break; /* NOT REACHED */
1419 case HAMMER_MEM_RECORD_INODE:
1420 panic("hammer_sync_record_callback: attempt to "
1421 "sync inode record %p?", record);
1422 break; /* NOT REACHED */
1423 case HAMMER_MEM_RECORD_DEL:
1424 /*
1425 * Follow through and issue the on-disk deletion
98f7132d 1426 */
930bf163 1427 break;
98f7132d 1428 }
98f7132d
MD
1429 }
1430
d36ec43b
MD
1431 /*
1432 * If DELETED_FE is set we may have already sent dependant pieces
1433 * to the disk and we must flush the record as if it hadn't been
1434 * deleted. This creates a bit of a mess because we have to
1f07f686 1435 * have ip_sync_record convert the record to MEM_RECORD_DEL before
d36ec43b
MD
1436 * it inserts the B-Tree record. Otherwise the media sync might
1437 * be visible to the frontend.
1438 */
1f07f686 1439 if (record->flags & HAMMER_RECF_DELETED_FE) {
e8599db1
MD
1440 if (record->type == HAMMER_MEM_RECORD_ADD) {
1441 record->flags |= HAMMER_RECF_CONVERT_DELETE;
1442 } else {
1443 KKASSERT(record->type != HAMMER_MEM_RECORD_DEL);
1444 return(0);
1445 }
1f07f686 1446 }
b84de5af
MD
1447
1448 /*
1449 * Assign the create_tid for new records. Deletions already
1450 * have the record's entire key properly set up.
1451 */
1f07f686 1452 if (record->type != HAMMER_MEM_RECORD_DEL)
11ad5ade 1453 record->leaf.base.create_tid = trans->tid;
4e17f465
MD
1454 for (;;) {
1455 error = hammer_ip_sync_record_cursor(cursor, record);
1456 if (error != EDEADLK)
1457 break;
1458 hammer_done_cursor(cursor);
1459 error = hammer_init_cursor(trans, cursor, &record->ip->cache[0],
1460 record->ip);
1461 if (error)
1462 break;
1463 }
1464 record->flags &= ~HAMMER_RECF_CONVERT_DELETE;
c0ade690
MD
1465
1466 if (error) {
b3deaf57
MD
1467 error = -error;
1468 if (error != -ENOSPC) {
b84de5af
MD
1469 kprintf("hammer_sync_record_callback: sync failed rec "
1470 "%p, error %d\n", record, error);
1471 Debugger("sync failed rec");
b3deaf57 1472 }
c0ade690 1473 }
98f7132d 1474done:
d36ec43b 1475 hammer_flush_record_done(record, error);
b3deaf57 1476 return(error);
c0ade690
MD
1477}
1478
1479/*
1480 * XXX error handling
1481 */
1482int
1f07f686 1483hammer_sync_inode(hammer_inode_t ip)
c0ade690
MD
1484{
1485 struct hammer_transaction trans;
4e17f465 1486 struct hammer_cursor cursor;
1f07f686
MD
1487 hammer_record_t depend;
1488 hammer_record_t next;
ec4e8497 1489 int error, tmp_error;
1f07f686 1490 u_int64_t nlinks;
c0ade690 1491
1f07f686 1492 if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0)
d113fda1 1493 return(0);
d113fda1 1494
b84de5af 1495 hammer_start_transaction_fls(&trans, ip->hmp);
4e17f465
MD
1496 error = hammer_init_cursor(&trans, &cursor, &ip->cache[0], ip);
1497 if (error)
1498 goto done;
c0ade690 1499
ec4e8497 1500 /*
1f07f686
MD
1501 * Any directory records referencing this inode which are not in
1502 * our current flush group must adjust our nlink count for the
1503 * purposes of synchronization to disk.
1504 *
1505 * Records which are in our flush group can be unlinked from our
c4bae5fd
MD
1506 * inode now, potentially allowing the inode to be physically
1507 * deleted.
ec4e8497 1508 */
11ad5ade 1509 nlinks = ip->ino_data.nlinks;
1f07f686
MD
1510 next = TAILQ_FIRST(&ip->target_list);
1511 while ((depend = next) != NULL) {
1512 next = TAILQ_NEXT(depend, target_entry);
1513 if (depend->flush_state == HAMMER_FST_FLUSH &&
da2da375 1514 depend->flush_group == ip->hmp->flusher.act) {
c4bae5fd
MD
1515 /*
1516 * If this is an ADD that was deleted by the frontend
1517 * the frontend nlinks count will have already been
1518 * decremented, but the backend is going to sync its
1519 * directory entry and must account for it. The
1520 * record will be converted to a delete-on-disk when
1521 * it gets synced.
1522 *
1523 * If the ADD was not deleted by the frontend we
1524 * can remove the dependancy from our target_list.
1525 */
1526 if (depend->flags & HAMMER_RECF_DELETED_FE) {
1527 ++nlinks;
1528 } else {
1529 TAILQ_REMOVE(&ip->target_list, depend,
1530 target_entry);
1531 depend->target_ip = NULL;
1532 }
1f07f686 1533 } else if ((depend->flags & HAMMER_RECF_DELETED_FE) == 0) {
c4bae5fd
MD
1534 /*
1535 * Not part of our flush group
1536 */
1537 KKASSERT((depend->flags & HAMMER_RECF_DELETED_BE) == 0);
1f07f686
MD
1538 switch(depend->type) {
1539 case HAMMER_MEM_RECORD_ADD:
1540 --nlinks;
1541 break;
1542 case HAMMER_MEM_RECORD_DEL:
1543 ++nlinks;
1544 break;
e8599db1
MD
1545 default:
1546 break;
1f07f686 1547 }
ec4e8497 1548 }
ec4e8497
MD
1549 }
1550
c0ade690 1551 /*
1f07f686 1552 * Set dirty if we had to modify the link count.
c0ade690 1553 */
11ad5ade 1554 if (ip->sync_ino_data.nlinks != nlinks) {
1f07f686 1555 KKASSERT((int64_t)nlinks >= 0);
11ad5ade
MD
1556 ip->sync_ino_data.nlinks = nlinks;
1557 ip->sync_flags |= HAMMER_INODE_DDIRTY;
1f07f686 1558 }
b84de5af 1559
47637bff
MD
1560#if 0
1561 /*
1562 * XXX DISABLED FOR NOW. With the new reservation support
1563 * we cannot resync pending data without confusing the hell
1564 * out of the in-memory record tree.
1565 */
4e17f465 1566 /*
e8599db1
MD
1567 * Queue up as many dirty buffers as we can then set a flag to
1568 * cause any further BIOs to go to the alternative queue.
4e17f465 1569 */
e8599db1 1570 if (ip->flags & HAMMER_INODE_VHELD)
4e17f465
MD
1571 error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
1572 ip->flags |= HAMMER_INODE_WRITE_ALT;
1573
1574 /*
1575 * The buffer cache may contain dirty buffers beyond the inode
1576 * state we copied from the frontend to the backend. Because
1577 * we are syncing our buffer cache on the backend, resync
1578 * the truncation point and the file size so we don't wipe out
1579 * any data.
1580 *
1581 * Syncing the buffer cache on the frontend has serious problems
1582 * because it prevents us from passively queueing dirty inodes
1583 * to the backend (the BIO's could stall indefinitely).
1584 */
1585 if (ip->flags & HAMMER_INODE_TRUNCATED) {
1586 ip->sync_trunc_off = ip->trunc_off;
1587 ip->sync_flags |= HAMMER_INODE_TRUNCATED;
1588 }
11ad5ade
MD
1589 if (ip->sync_ino_data.size != ip->ino_data.size) {
1590 ip->sync_ino_data.size = ip->ino_data.size;
1591 ip->sync_flags |= HAMMER_INODE_DDIRTY;
4e17f465 1592 }
47637bff 1593#endif
4e17f465 1594
1f07f686 1595 /*
869e8f55
MD
1596 * If there is a trunction queued destroy any data past the (aligned)
1597 * truncation point. Userland will have dealt with the buffer
1598 * containing the truncation point for us.
1599 *
1600 * We don't flush pending frontend data buffers until after we've
1601 * dealth with the truncation.
1f07f686 1602 *
869e8f55 1603 * Don't bother if the inode is or has been deleted.
1f07f686 1604 */
869e8f55 1605 if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
b84de5af
MD
1606 /*
1607 * Interlock trunc_off. The VOP front-end may continue to
1608 * make adjustments to it while we are blocked.
1609 */
1610 off_t trunc_off;
1611 off_t aligned_trunc_off;
c0ade690 1612
b84de5af
MD
1613 trunc_off = ip->sync_trunc_off;
1614 aligned_trunc_off = (trunc_off + HAMMER_BUFMASK) &
1615 ~HAMMER_BUFMASK64;
1616
1617 /*
1618 * Delete any whole blocks on-media. The front-end has
1619 * already cleaned out any partial block and made it
1620 * pending. The front-end may have updated trunc_off
47637bff 1621 * while we were blocked so we only use sync_trunc_off.
b84de5af 1622 */
4e17f465 1623 error = hammer_ip_delete_range(&cursor, ip,
b84de5af 1624 aligned_trunc_off,
47637bff 1625 0x7FFFFFFFFFFFFFFFLL, 1);
b84de5af
MD
1626 if (error)
1627 Debugger("hammer_ip_delete_range errored");
47637bff
MD
1628
1629 /*
1630 * Clear the truncation flag on the backend after we have
1631 * complete the deletions. Backend data is now good again
1632 * (including new records we are about to sync, below).
1633 */
b84de5af 1634 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
47637bff 1635 ip->sync_trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1f07f686
MD
1636 } else {
1637 error = 0;
f3b0f382
MD
1638 }
1639
1f07f686
MD
1640 /*
1641 * Now sync related records. These will typically be directory
1642 * entries or delete-on-disk records.
869e8f55
MD
1643 *
1644 * Not all records will be flushed, but clear XDIRTY anyway. We
1645 * will set it again in the frontend hammer_flush_inode_done()
1646 * if records remain.
1f07f686
MD
1647 */
1648 if (error == 0) {
1649 tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
4e17f465 1650 hammer_sync_record_callback, &cursor);
1f07f686
MD
1651 if (tmp_error < 0)
1652 tmp_error = -error;
1653 if (tmp_error)
1654 error = tmp_error;
1655 }
1656
1657 /*
869e8f55
MD
1658 * If we are deleting the inode the frontend had better not have
1659 * any active references on elements making up the inode.
1f07f686 1660 */
11ad5ade 1661 if (error == 0 && ip->sync_ino_data.nlinks == 0 &&
869e8f55
MD
1662 RB_EMPTY(&ip->rec_tree) &&
1663 (ip->sync_flags & HAMMER_INODE_DELETING) &&
1664 (ip->flags & HAMMER_INODE_DELETED) == 0) {
1665 int count1 = 0;
1f07f686 1666
869e8f55 1667 ip->flags |= HAMMER_INODE_DELETED;
4e17f465 1668 error = hammer_ip_delete_range_all(&cursor, ip, &count1);
869e8f55
MD
1669 if (error == 0) {
1670 ip->sync_flags &= ~HAMMER_INODE_DELETING;
1671 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1672 KKASSERT(RB_EMPTY(&ip->rec_tree));
1f07f686 1673
869e8f55
MD
1674 /*
1675 * Set delete_tid in both the frontend and backend
1676 * copy of the inode record. The DELETED flag handles
1677 * this, do not set RDIRTY.
1678 */
11ad5ade
MD
1679 ip->ino_leaf.base.delete_tid = trans.tid;
1680 ip->sync_ino_leaf.base.delete_tid = trans.tid;
1f07f686 1681
869e8f55
MD
1682 /*
1683 * Adjust the inode count in the volume header
1684 */
f36a9737
MD
1685 if (ip->flags & HAMMER_INODE_ONDISK) {
1686 hammer_modify_volume_field(&trans,
1687 trans.rootvol,
1688 vol0_stat_inodes);
1689 --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
1690 hammer_modify_volume_done(trans.rootvol);
1691 }
869e8f55
MD
1692 } else {
1693 ip->flags &= ~HAMMER_INODE_DELETED;
1694 Debugger("hammer_ip_delete_range_all errored");
1695 }
1f07f686 1696 }
b84de5af 1697
b84de5af 1698 ip->sync_flags &= ~HAMMER_INODE_BUFS;
c0ade690 1699
b84de5af
MD
1700 if (error)
1701 Debugger("RB_SCAN errored");
c0ade690
MD
1702
1703 /*
1704 * Now update the inode's on-disk inode-data and/or on-disk record.
b84de5af 1705 * DELETED and ONDISK are managed only in ip->flags.
c0ade690 1706 */
b84de5af 1707 switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) {
c0ade690
MD
1708 case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
1709 /*
1710 * If deleted and on-disk, don't set any additional flags.
1711 * the delete flag takes care of things.
869e8f55
MD
1712 *
1713 * Clear flags which may have been set by the frontend.
c0ade690 1714 */
11ad5ade 1715 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
869e8f55
MD
1716 HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1717 HAMMER_INODE_DELETING);
c0ade690
MD
1718 break;
1719 case HAMMER_INODE_DELETED:
1720 /*
1721 * Take care of the case where a deleted inode was never
1722 * flushed to the disk in the first place.
869e8f55
MD
1723 *
1724 * Clear flags which may have been set by the frontend.
c0ade690 1725 */
11ad5ade 1726 ip->sync_flags &= ~(HAMMER_INODE_DDIRTY|
869e8f55
MD
1727 HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES|
1728 HAMMER_INODE_DELETING);
d26d0ae9 1729 while (RB_ROOT(&ip->rec_tree)) {
d36ec43b
MD
1730 hammer_record_t record = RB_ROOT(&ip->rec_tree);
1731 hammer_ref(&record->lock);
1732 KKASSERT(record->lock.refs == 1);
1733 record->flags |= HAMMER_RECF_DELETED_FE;
1734 record->flags |= HAMMER_RECF_DELETED_BE;
d36ec43b 1735 hammer_rel_mem_record(record);
d26d0ae9 1736 }
c0ade690
MD
1737 break;
1738 case HAMMER_INODE_ONDISK:
1739 /*
1740 * If already on-disk, do not set any additional flags.
1741 */
1742 break;
1743 default:
1744 /*
1745 * If not on-disk and not deleted, set both dirty flags
b84de5af
MD
1746 * to force an initial record to be written. Also set
1747 * the create_tid for the inode.
1748 *
1749 * Set create_tid in both the frontend and backend
1750 * copy of the inode record.
c0ade690 1751 */
11ad5ade
MD
1752 ip->ino_leaf.base.create_tid = trans.tid;
1753 ip->sync_ino_leaf.base.create_tid = trans.tid;
1754 ip->sync_flags |= HAMMER_INODE_DDIRTY;
c0ade690
MD
1755 break;
1756 }
1757
1758 /*
d113fda1
MD
1759 * If RDIRTY or DDIRTY is set, write out a new record. If the inode
1760 * is already on-disk the old record is marked as deleted.
1761 *
1762 * If DELETED is set hammer_update_inode() will delete the existing
1763 * record without writing out a new one.
1764 *
1765 * If *ONLY* the ITIMES flag is set we can update the record in-place.
c0ade690 1766 */
b84de5af 1767 if (ip->flags & HAMMER_INODE_DELETED) {
4e17f465 1768 error = hammer_update_inode(&cursor, ip);
b84de5af 1769 } else
11ad5ade
MD
1770 if ((ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) ==
1771 HAMMER_INODE_ITIMES) {
4e17f465 1772 error = hammer_update_itimes(&cursor, ip);
d113fda1 1773 } else
11ad5ade 1774 if (ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_ITIMES)) {
4e17f465 1775 error = hammer_update_inode(&cursor, ip);
c0ade690 1776 }
b84de5af
MD
1777 if (error)
1778 Debugger("hammer_update_itimes/inode errored");
4e17f465 1779done:
b84de5af
MD
1780 /*
1781 * Save the TID we used to sync the inode with to make sure we
1782 * do not improperly reuse it.
1783 */
4e17f465 1784 hammer_done_cursor(&cursor);
b84de5af 1785 hammer_done_transaction(&trans);
c0ade690 1786 return(error);
8cd0a023
MD
1787}
1788
1f07f686
MD
1789/*
1790 * This routine is called when the OS is no longer actively referencing
1791 * the inode (but might still be keeping it cached), or when releasing
1792 * the last reference to an inode.
1793 *
1794 * At this point if the inode's nlinks count is zero we want to destroy
1795 * it, which may mean destroying it on-media too.
1796 */
3bf2d80a 1797void
e8599db1 1798hammer_inode_unloadable_check(hammer_inode_t ip, int getvp)
1f07f686 1799{
e8599db1
MD
1800 struct vnode *vp;
1801
1f07f686 1802 /*
c4bae5fd
MD
1803 * Set the DELETING flag when the link count drops to 0 and the
1804 * OS no longer has any opens on the inode.
1805 *
1806 * The backend will clear DELETING (a mod flag) and set DELETED
1807 * (a state flag) when it is actually able to perform the
1808 * operation.
1f07f686 1809 */
11ad5ade 1810 if (ip->ino_data.nlinks == 0 &&
869e8f55 1811 (ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) == 0) {
e8599db1
MD
1812 ip->flags |= HAMMER_INODE_DELETING;
1813 ip->flags |= HAMMER_INODE_TRUNCATED;
1814 ip->trunc_off = 0;
1815 vp = NULL;
1816 if (getvp) {
1817 if (hammer_get_vnode(ip, &vp) != 0)
1818 return;
1819 }
29ce0677 1820
29ce0677
MD
1821 /*
1822 * Final cleanup
1823 */
869e8f55
MD
1824 if (ip->vp) {
1825 vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
1826 vnode_pager_setsize(ip->vp, 0);
1827 }
e8599db1
MD
1828 if (getvp) {
1829 vput(vp);
1830 }
1f07f686 1831 }
1f07f686
MD
1832}
1833
3bf2d80a
MD
1834/*
1835 * Re-test an inode when a dependancy had gone away to see if we
1836 * can chain flush it.
1837 */
1f07f686
MD
1838void
1839hammer_test_inode(hammer_inode_t ip)
1840{
1841 if (ip->flags & HAMMER_INODE_REFLUSH) {
1842 ip->flags &= ~HAMMER_INODE_REFLUSH;
1843 hammer_ref(&ip->lock);
3bf2d80a
MD
1844 if (ip->flags & HAMMER_INODE_RESIGNAL) {
1845 ip->flags &= ~HAMMER_INODE_RESIGNAL;
1846 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1847 } else {
1848 hammer_flush_inode(ip, 0);
1849 }
1f07f686
MD
1850 hammer_rel_inode(ip, 0);
1851 }
1852}
1853
9f5097dc
MD
1854/*
1855 * When a HAMMER inode is reclaimed it may have to be queued to the backend
1856 * for its final sync to disk. Programs like blogbench can cause the backlog
1857 * to grow indefinitely. Put a cap on the number of inodes we allow to be
1858 * in this state by giving the flusher time to drain.
1859 */
1860void
1861hammer_inode_waitreclaims(hammer_mount_t hmp)
1862{
3897d7e9
MD
1863 int count;
1864 int delay;
1865 int minpt;
1866 int maxpt;
1867
1868 while (hmp->inode_reclaims > HAMMER_RECLAIM_MIN) {
da2da375
MD
1869 count = hmp->count_inodes - hmp->inode_reclaims;
1870 if (count < 100)
1871 count = 100;
3897d7e9
MD
1872 minpt = count * HAMMER_RECLAIM_SLOPCT / 100;
1873 maxpt = count * HAMMER_RECLAIM_MAXPCT / 100;
1874
1875 if (hmp->inode_reclaims < minpt)
1876 break;
1877 if (hmp->inode_reclaims < maxpt) {
1878 delay = (hmp->inode_reclaims - minpt) * hz /
1879 (maxpt - minpt);
1880 if (delay == 0)
1881 delay = 1;
1882 hammer_flusher_async(hmp);
1883 tsleep(&count, 0, "hmitik", delay);
1884 break;
1885 }
9f5097dc
MD
1886 hmp->flags |= HAMMER_MOUNT_WAITIMAX;
1887 hammer_flusher_async(hmp);
3897d7e9 1888 tsleep(&hmp->inode_reclaims, 0, "hmimax", hz / 10);
9f5097dc
MD
1889 }
1890}
1891
1892void
1893hammer_inode_wakereclaims(hammer_mount_t hmp)
1894{
3897d7e9
MD
1895 int maxpt;
1896
1897 if ((hmp->flags & HAMMER_MOUNT_WAITIMAX) == 0)
1898 return;
1899 maxpt = hmp->count_inodes * HAMMER_RECLAIM_MAXPCT / 100;
9f5097dc 1900 if (hmp->inode_reclaims <= HAMMER_RECLAIM_MIN ||
3897d7e9 1901 hmp->inode_reclaims < maxpt) {
9f5097dc 1902 hmp->flags &= ~HAMMER_MOUNT_WAITIMAX;
3897d7e9 1903 wakeup(&hmp->inode_reclaims);
9f5097dc
MD
1904 }
1905}
1906