Sweep over our manual pages and remove .Pp before a .Bd or .Bl without
[dragonfly.git] / sys / vfs / hammer / hammer_inode.c
CommitLineData
427e5fc6 1/*
b84de5af 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
427e5fc6
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
1f07f686 34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.45 2008/05/02 01:00:42 dillon Exp $
427e5fc6
MD
35 */
36
37#include "hammer.h"
38#include <sys/buf.h>
39#include <sys/buf2.h>
40
ec4e8497 41static int hammer_unload_inode(struct hammer_inode *ip);
1f07f686
MD
42static void hammer_flush_inode_core(hammer_inode_t ip, int flags);
43static int hammer_setup_child_callback(hammer_record_t rec, void *data);
44static int hammer_inode_unloadable_check(hammer_inode_t ip);
45static int hammer_setup_parent_inodes(hammer_record_t record);
b84de5af 46
d113fda1
MD
47/*
48 * The kernel is not actively referencing this vnode but is still holding
49 * it cached.
b84de5af
MD
50 *
51 * This is called from the frontend.
d113fda1 52 */
427e5fc6
MD
53int
54hammer_vop_inactive(struct vop_inactive_args *ap)
55{
66325755 56 struct hammer_inode *ip = VTOI(ap->a_vp);
27ea2398 57
c0ade690
MD
58 /*
59 * Degenerate case
60 */
61 if (ip == NULL) {
66325755 62 vrecycle(ap->a_vp);
c0ade690
MD
63 return(0);
64 }
65
66 /*
1f07f686
MD
67 * If the inode no longer has visibility in the filesystem and is
68 * fairly clean, try to recycle it immediately. This can deadlock
69 * in vfsync() if we aren't careful.
c0ade690 70 */
1f07f686 71 if (hammer_inode_unloadable_check(ip) && ip->ino_rec.ino_nlinks == 0)
d113fda1 72 vrecycle(ap->a_vp);
427e5fc6
MD
73 return(0);
74}
75
d113fda1
MD
76/*
77 * Release the vnode association. This is typically (but not always)
1f07f686 78 * the last reference on the inode.
d113fda1 79 *
1f07f686
MD
80 * Once the association is lost we are on our own with regards to
81 * flushing the inode.
d113fda1 82 */
427e5fc6
MD
83int
84hammer_vop_reclaim(struct vop_reclaim_args *ap)
85{
427e5fc6
MD
86 struct hammer_inode *ip;
87 struct vnode *vp;
88
89 vp = ap->a_vp;
c0ade690 90
a89aec1b
MD
91 if ((ip = vp->v_data) != NULL) {
92 vp->v_data = NULL;
93 ip->vp = NULL;
ec4e8497 94 hammer_rel_inode(ip, 1);
a89aec1b 95 }
427e5fc6
MD
96 return(0);
97}
98
66325755
MD
99/*
100 * Return a locked vnode for the specified inode. The inode must be
101 * referenced but NOT LOCKED on entry and will remain referenced on
102 * return.
b84de5af
MD
103 *
104 * Called from the frontend.
66325755
MD
105 */
106int
107hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp)
108{
109 struct vnode *vp;
110 int error = 0;
111
112 for (;;) {
113 if ((vp = ip->vp) == NULL) {
114 error = getnewvnode(VT_HAMMER, ip->hmp->mp, vpp, 0, 0);
115 if (error)
116 break;
8cd0a023
MD
117 hammer_lock_ex(&ip->lock);
118 if (ip->vp != NULL) {
119 hammer_unlock(&ip->lock);
120 vp->v_type = VBAD;
121 vx_put(vp);
122 continue;
66325755 123 }
8cd0a023
MD
124 hammer_ref(&ip->lock);
125 vp = *vpp;
126 ip->vp = vp;
127 vp->v_type = hammer_get_vnode_type(
128 ip->ino_rec.base.base.obj_type);
7a04d74f
MD
129
130 switch(ip->ino_rec.base.base.obj_type) {
131 case HAMMER_OBJTYPE_CDEV:
132 case HAMMER_OBJTYPE_BDEV:
133 vp->v_ops = &ip->hmp->mp->mnt_vn_spec_ops;
134 addaliasu(vp, ip->ino_data.rmajor,
135 ip->ino_data.rminor);
136 break;
137 case HAMMER_OBJTYPE_FIFO:
138 vp->v_ops = &ip->hmp->mp->mnt_vn_fifo_ops;
139 break;
140 default:
141 break;
142 }
42c7d26b
MD
143
144 /*
145 * Only mark as the root vnode if the ip is not
146 * historical, otherwise the VFS cache will get
147 * confused. The other half of the special handling
148 * is in hammer_vop_nlookupdotdot().
149 */
150 if (ip->obj_id == HAMMER_OBJID_ROOT &&
151 ip->obj_asof == ip->hmp->asof) {
7a04d74f 152 vp->v_flag |= VROOT;
42c7d26b 153 }
7a04d74f 154
8cd0a023
MD
155 vp->v_data = (void *)ip;
156 /* vnode locked by getnewvnode() */
157 /* make related vnode dirty if inode dirty? */
158 hammer_unlock(&ip->lock);
a89aec1b
MD
159 if (vp->v_type == VREG)
160 vinitvmio(vp, ip->ino_rec.ino_size);
8cd0a023
MD
161 break;
162 }
163
164 /*
165 * loop if the vget fails (aka races), or if the vp
166 * no longer matches ip->vp.
167 */
168 if (vget(vp, LK_EXCLUSIVE) == 0) {
169 if (vp == ip->vp)
170 break;
171 vput(vp);
66325755
MD
172 }
173 }
a89aec1b 174 *vpp = vp;
66325755
MD
175 return(error);
176}
177
178/*
8cd0a023
MD
179 * Acquire a HAMMER inode. The returned inode is not locked. These functions
180 * do not attach or detach the related vnode (use hammer_get_vnode() for
181 * that).
d113fda1
MD
182 *
183 * The flags argument is only applied for newly created inodes, and only
184 * certain flags are inherited.
b84de5af
MD
185 *
186 * Called from the frontend.
66325755
MD
187 */
188struct hammer_inode *
36f82b23 189hammer_get_inode(hammer_transaction_t trans, struct hammer_node **cache,
61aeeb33 190 u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
66325755 191{
36f82b23 192 hammer_mount_t hmp = trans->hmp;
427e5fc6 193 struct hammer_inode_info iinfo;
8cd0a023 194 struct hammer_cursor cursor;
427e5fc6 195 struct hammer_inode *ip;
427e5fc6
MD
196
197 /*
198 * Determine if we already have an inode cached. If we do then
199 * we are golden.
200 */
66325755 201 iinfo.obj_id = obj_id;
7f7c1f84 202 iinfo.obj_asof = asof;
427e5fc6
MD
203loop:
204 ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
205 if (ip) {
8cd0a023 206 hammer_ref(&ip->lock);
66325755
MD
207 *errorp = 0;
208 return(ip);
427e5fc6
MD
209 }
210
427e5fc6 211 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
b3deaf57 212 ++hammer_count_inodes;
66325755 213 ip->obj_id = obj_id;
27ea2398 214 ip->obj_asof = iinfo.obj_asof;
66325755 215 ip->hmp = hmp;
d113fda1
MD
216 ip->flags = flags & HAMMER_INODE_RO;
217 if (hmp->ronly)
218 ip->flags |= HAMMER_INODE_RO;
a5fddc16 219 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
8cd0a023 220 RB_INIT(&ip->rec_tree);
059819e3 221 TAILQ_INIT(&ip->bio_list);
b84de5af 222 TAILQ_INIT(&ip->bio_alt_list);
1f07f686 223 TAILQ_INIT(&ip->target_list);
427e5fc6
MD
224
225 /*
8cd0a023 226 * Locate the on-disk inode.
427e5fc6 227 */
6a37e7e4 228retry:
36f82b23 229 hammer_init_cursor(trans, &cursor, cache);
8cd0a023
MD
230 cursor.key_beg.obj_id = ip->obj_id;
231 cursor.key_beg.key = 0;
d5530d22 232 cursor.key_beg.create_tid = 0;
8cd0a023
MD
233 cursor.key_beg.delete_tid = 0;
234 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
235 cursor.key_beg.obj_type = 0;
d5530d22
MD
236 cursor.asof = iinfo.obj_asof;
237 cursor.flags = HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_GET_DATA |
238 HAMMER_CURSOR_ASOF;
8cd0a023
MD
239
240 *errorp = hammer_btree_lookup(&cursor);
6a37e7e4
MD
241 if (*errorp == EDEADLK) {
242 hammer_done_cursor(&cursor);
243 goto retry;
244 }
427e5fc6
MD
245
246 /*
247 * On success the B-Tree lookup will hold the appropriate
248 * buffer cache buffers and provide a pointer to the requested
d113fda1
MD
249 * information. Copy the information to the in-memory inode
250 * and cache the B-Tree node to improve future operations.
427e5fc6 251 */
66325755 252 if (*errorp == 0) {
8cd0a023 253 ip->ino_rec = cursor.record->inode;
40043e7f 254 ip->ino_data = cursor.data->inode;
61aeeb33
MD
255 hammer_cache_node(cursor.node, &ip->cache[0]);
256 if (cache)
257 hammer_cache_node(cursor.node, cache);
427e5fc6 258 }
427e5fc6
MD
259
260 /*
261 * On success load the inode's record and data and insert the
262 * inode into the B-Tree. It is possible to race another lookup
263 * insertion of the same inode so deal with that condition too.
b3deaf57
MD
264 *
265 * The cursor's locked node interlocks against others creating and
266 * destroying ip while we were blocked.
427e5fc6 267 */
66325755 268 if (*errorp == 0) {
8cd0a023 269 hammer_ref(&ip->lock);
427e5fc6 270 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
61aeeb33
MD
271 hammer_uncache_node(&ip->cache[0]);
272 hammer_uncache_node(&ip->cache[1]);
b84de5af 273 KKASSERT(ip->lock.refs == 1);
b3deaf57 274 --hammer_count_inodes;
427e5fc6 275 kfree(ip, M_HAMMER);
b3deaf57 276 hammer_done_cursor(&cursor);
427e5fc6
MD
277 goto loop;
278 }
c0ade690 279 ip->flags |= HAMMER_INODE_ONDISK;
427e5fc6 280 } else {
b3deaf57 281 --hammer_count_inodes;
66325755
MD
282 kfree(ip, M_HAMMER);
283 ip = NULL;
427e5fc6 284 }
b3deaf57 285 hammer_done_cursor(&cursor);
66325755
MD
286 return (ip);
287}
288
8cd0a023
MD
289/*
290 * Create a new filesystem object, returning the inode in *ipp. The
1f07f686 291 * returned inode will be referenced.
8cd0a023 292 *
b84de5af 293 * The inode is created in-memory.
8cd0a023
MD
294 */
295int
a89aec1b
MD
296hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
297 struct ucred *cred, hammer_inode_t dip,
8cd0a023 298 struct hammer_inode **ipp)
66325755 299{
a89aec1b
MD
300 hammer_mount_t hmp;
301 hammer_inode_t ip;
6b4f890b 302 uid_t xuid;
66325755 303
8cd0a023
MD
304 hmp = trans->hmp;
305 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
b3deaf57 306 ++hammer_count_inodes;
0729c8c8 307 ip->obj_id = hammer_alloc_objid(trans, dip);
8cd0a023 308 KKASSERT(ip->obj_id != 0);
7f7c1f84 309 ip->obj_asof = hmp->asof;
8cd0a023 310 ip->hmp = hmp;
b84de5af 311 ip->flush_state = HAMMER_FST_IDLE;
8cd0a023 312 ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_RDIRTY |
b84de5af 313 HAMMER_INODE_ITIMES;
8cd0a023 314
a5fddc16 315 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
8cd0a023 316 RB_INIT(&ip->rec_tree);
059819e3 317 TAILQ_INIT(&ip->bio_list);
b84de5af 318 TAILQ_INIT(&ip->bio_alt_list);
1f07f686 319 TAILQ_INIT(&ip->target_list);
8cd0a023 320
b84de5af
MD
321 ip->ino_rec.ino_atime = trans->time;
322 ip->ino_rec.ino_mtime = trans->time;
8cd0a023
MD
323 ip->ino_rec.ino_size = 0;
324 ip->ino_rec.ino_nlinks = 0;
325 /* XXX */
fe7678ee 326 ip->ino_rec.base.base.btype = HAMMER_BTREE_TYPE_RECORD;
8cd0a023
MD
327 ip->ino_rec.base.base.obj_id = ip->obj_id;
328 ip->ino_rec.base.base.key = 0;
b84de5af 329 ip->ino_rec.base.base.create_tid = 0;
8cd0a023
MD
330 ip->ino_rec.base.base.delete_tid = 0;
331 ip->ino_rec.base.base.rec_type = HAMMER_RECTYPE_INODE;
332 ip->ino_rec.base.base.obj_type = hammer_get_obj_type(vap->va_type);
333
334 ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
335 ip->ino_data.mode = vap->va_mode;
b84de5af 336 ip->ino_data.ctime = trans->time;
8cd0a023 337 ip->ino_data.parent_obj_id = (dip) ? dip->ino_rec.base.base.obj_id : 0;
6b4f890b 338
7a04d74f
MD
339 switch(ip->ino_rec.base.base.obj_type) {
340 case HAMMER_OBJTYPE_CDEV:
341 case HAMMER_OBJTYPE_BDEV:
342 ip->ino_data.rmajor = vap->va_rmajor;
343 ip->ino_data.rminor = vap->va_rminor;
344 break;
345 default:
346 break;
347 }
348
6b4f890b
MD
349 /*
350 * Calculate default uid/gid and overwrite with information from
351 * the vap.
352 */
353 xuid = hammer_to_unix_xid(&dip->ino_data.uid);
354 ip->ino_data.gid = dip->ino_data.gid;
355 xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
356 &vap->va_mode);
357 ip->ino_data.mode = vap->va_mode;
358
8cd0a023
MD
359 if (vap->va_vaflags & VA_UID_UUID_VALID)
360 ip->ino_data.uid = vap->va_uid_uuid;
6b4f890b
MD
361 else if (vap->va_uid != (uid_t)VNOVAL)
362 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
8cd0a023
MD
363 if (vap->va_vaflags & VA_GID_UUID_VALID)
364 ip->ino_data.gid = vap->va_gid_uuid;
6b4f890b 365 else if (vap->va_gid != (gid_t)VNOVAL)
8cd0a023
MD
366 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
367
368 hammer_ref(&ip->lock);
369 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
370 hammer_unref(&ip->lock);
a89aec1b 371 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
8cd0a023
MD
372 }
373 *ipp = ip;
374 return(0);
66325755
MD
375}
376
d113fda1
MD
377/*
378 * Called by hammer_sync_inode().
379 */
380static int
36f82b23 381hammer_update_inode(hammer_transaction_t trans, hammer_inode_t ip)
c0ade690
MD
382{
383 struct hammer_cursor cursor;
384 hammer_record_t record;
385 int error;
386
387 /*
76376933 388 * Locate the record on-disk and mark it as deleted. Both the B-Tree
195c19a1
MD
389 * node and the record must be marked deleted. The record may or
390 * may not be physically deleted, depending on the retention policy.
76376933 391 *
195c19a1
MD
392 * If the inode has already been deleted on-disk we have nothing
393 * to do.
c0ade690
MD
394 *
395 * XXX Update the inode record and data in-place if the retention
396 * policy allows it.
397 */
d26d0ae9 398retry:
c0ade690
MD
399 error = 0;
400
76376933
MD
401 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
402 HAMMER_INODE_ONDISK) {
36f82b23 403 hammer_init_cursor(trans, &cursor, &ip->cache[0]);
c0ade690
MD
404 cursor.key_beg.obj_id = ip->obj_id;
405 cursor.key_beg.key = 0;
d5530d22 406 cursor.key_beg.create_tid = 0;
c0ade690
MD
407 cursor.key_beg.delete_tid = 0;
408 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
409 cursor.key_beg.obj_type = 0;
d5530d22
MD
410 cursor.asof = ip->obj_asof;
411 cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
b84de5af 412 cursor.flags |= HAMMER_CURSOR_BACKEND;
c0ade690
MD
413
414 error = hammer_btree_lookup(&cursor);
b84de5af
MD
415 if (error) {
416 kprintf("error %d\n", error);
417 Debugger("hammer_update_inode");
418 }
419
c0ade690 420 if (error == 0) {
855942b6 421 error = hammer_ip_delete_record(&cursor, trans->tid);
f90dde4c 422 if (error && error != EDEADLK) {
b84de5af
MD
423 kprintf("error %d\n", error);
424 Debugger("hammer_update_inode2");
425 }
1f07f686 426 if (error == 0) {
195c19a1 427 ip->flags |= HAMMER_INODE_DELONDISK;
1f07f686
MD
428 ip->sync_flags &= ~HAMMER_INODE_DELETING;
429 }
6a37e7e4 430 hammer_cache_node(cursor.node, &ip->cache[0]);
c0ade690 431 }
c0ade690 432 hammer_done_cursor(&cursor);
6a37e7e4
MD
433 if (error == EDEADLK)
434 goto retry;
c0ade690
MD
435 }
436
437 /*
438 * Write out a new record if the in-memory inode is not marked
fbc6e32a
MD
439 * as having been deleted. Update our inode statistics if this
440 * is the first application of the inode on-disk.
76376933
MD
441 *
442 * If the inode has been deleted permanently, HAMMER_INODE_DELONDISK
443 * will remain set and prevent further updates.
c0ade690
MD
444 */
445 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
40043e7f 446 record = hammer_alloc_mem_record(ip);
1f07f686 447 record->flush_state = HAMMER_FST_FLUSH;
b84de5af 448 record->rec.inode = ip->sync_ino_rec;
855942b6 449 record->rec.inode.base.base.create_tid = trans->tid;
b84de5af
MD
450 record->rec.inode.base.data_len = sizeof(ip->sync_ino_data);
451 record->data = (void *)&ip->sync_ino_data;
d36ec43b 452 record->flags |= HAMMER_RECF_INTERLOCK_BE;
36f82b23 453 error = hammer_ip_sync_record(trans, record);
b84de5af
MD
454 if (error) {
455 kprintf("error %d\n", error);
456 Debugger("hammer_update_inode3");
457 }
d36ec43b
MD
458
459 /*
460 * The record isn't managed by the inode's record tree,
461 * destroy it whether we succeed or fail.
462 */
463 record->flags &= ~HAMMER_RECF_INTERLOCK_BE;
464 record->flags |= HAMMER_RECF_DELETED_FE;
1f07f686 465 record->flush_state = HAMMER_FST_IDLE;
b3deaf57 466 hammer_rel_mem_record(record);
d36ec43b 467
d26d0ae9 468 if (error == 0) {
b84de5af
MD
469 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
470 HAMMER_INODE_DDIRTY |
471 HAMMER_INODE_ITIMES);
472 ip->flags &= ~HAMMER_INODE_DELONDISK;
1f07f686
MD
473
474 /*
475 * Root volume count of inodes
476 */
d26d0ae9 477 if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
10a5d1ba 478 hammer_modify_volume(trans, trans->rootvol,
36f82b23 479 NULL, 0);
0b075555 480 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
10a5d1ba 481 hammer_modify_volume_done(trans->rootvol);
d26d0ae9
MD
482 ip->flags |= HAMMER_INODE_ONDISK;
483 }
fbc6e32a 484 }
c0ade690 485 }
f90dde4c
MD
486 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) {
487 /*
488 * Clean out any left-over flags if the inode has been
489 * destroyed.
490 */
491 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
492 HAMMER_INODE_DDIRTY |
493 HAMMER_INODE_ITIMES);
494 }
c0ade690
MD
495 return(error);
496}
497
a89aec1b 498/*
d113fda1
MD
499 * Update only the itimes fields. This is done no-historically. The
500 * record is updated in-place on the disk.
501 */
502static int
36f82b23 503hammer_update_itimes(hammer_transaction_t trans, hammer_inode_t ip)
d113fda1
MD
504{
505 struct hammer_cursor cursor;
506 struct hammer_inode_record *rec;
507 int error;
508
6a37e7e4 509retry:
d113fda1
MD
510 error = 0;
511 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
512 HAMMER_INODE_ONDISK) {
36f82b23 513 hammer_init_cursor(trans, &cursor, &ip->cache[0]);
d113fda1
MD
514 cursor.key_beg.obj_id = ip->obj_id;
515 cursor.key_beg.key = 0;
d5530d22 516 cursor.key_beg.create_tid = 0;
d113fda1
MD
517 cursor.key_beg.delete_tid = 0;
518 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
519 cursor.key_beg.obj_type = 0;
d5530d22
MD
520 cursor.asof = ip->obj_asof;
521 cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
b84de5af 522 cursor.flags |= HAMMER_CURSOR_BACKEND;
d113fda1
MD
523
524 error = hammer_btree_lookup(&cursor);
b84de5af
MD
525 if (error) {
526 kprintf("error %d\n", error);
527 Debugger("hammer_update_itimes1");
528 }
d113fda1 529 if (error == 0) {
10a5d1ba
MD
530 /*
531 * Do not generate UNDO records for atime/mtime
532 * updates.
533 */
d113fda1 534 rec = &cursor.record->inode;
36f82b23
MD
535 hammer_modify_buffer(cursor.trans, cursor.record_buffer,
536 NULL, 0);
b84de5af
MD
537 rec->ino_atime = ip->sync_ino_rec.ino_atime;
538 rec->ino_mtime = ip->sync_ino_rec.ino_mtime;
10a5d1ba 539 hammer_modify_buffer_done(cursor.record_buffer);
b84de5af 540 ip->sync_flags &= ~HAMMER_INODE_ITIMES;
d113fda1 541 /* XXX recalculate crc */
6a37e7e4 542 hammer_cache_node(cursor.node, &ip->cache[0]);
d113fda1 543 }
d113fda1 544 hammer_done_cursor(&cursor);
6a37e7e4
MD
545 if (error == EDEADLK)
546 goto retry;
d113fda1
MD
547 }
548 return(error);
549}
550
551/*
1f07f686 552 * Release a reference on an inode, flush as requested.
b84de5af
MD
553 *
554 * On the last reference we queue the inode to the flusher for its final
555 * disposition.
a89aec1b 556 */
66325755 557void
a89aec1b 558hammer_rel_inode(struct hammer_inode *ip, int flush)
66325755 559{
1f07f686
MD
560 hammer_mount_t hmp = ip->hmp;
561
f90dde4c
MD
562 /*
563 * Handle disposition when dropping the last ref.
564 */
1f07f686
MD
565 for (;;) {
566 if (ip->lock.refs == 1) {
567 /*
568 * Determine whether on-disk action is needed for
569 * the inode's final disposition.
570 */
571 if (hammer_inode_unloadable_check(ip)) {
572 hammer_unload_inode(ip);
573 break;
574 }
575 hammer_flush_inode(ip, 0);
b84de5af 576 } else {
1f07f686
MD
577 /*
578 * We gotta flush inodes which do not have vnode
579 * associations.
580 */
581#if 0
582 if (ip->vp == NULL) {
583 kprintf("v%d:%04x\n", ip->flush_state, ip->flags);
584 hammer_flush_inode(ip, 0);
585 } else
586#endif
587 if (flush) {
588 hammer_flush_inode(ip, 0);
589 }
590 /*
591 * The inode still has multiple refs, try to drop
592 * one ref.
593 */
594 KKASSERT(ip->lock.refs >= 1);
595 if (ip->lock.refs > 1) {
596 hammer_unref(&ip->lock);
597 break;
598 }
b84de5af 599 }
f90dde4c
MD
600 }
601
602 /*
1f07f686
MD
603 * XXX bad hack until I add code to track inodes in SETUP. We
604 * can queue a lot of inodes to the syncer but if we don't wake
605 * it up the undo sets will be too large or too many unflushed
606 * records will build up and blow our malloc limit.
f90dde4c 607 */
1f07f686
MD
608 if (++hmp->reclaim_count > 256) {
609 hmp->reclaim_count = 0;
610 hammer_flusher_async(hmp);
9480ff55 611 }
427e5fc6
MD
612}
613
27ea2398 614/*
b84de5af
MD
615 * Unload and destroy the specified inode. Must be called with one remaining
616 * reference. The reference is disposed of.
8cd0a023 617 *
b84de5af 618 * This can only be called in the context of the flusher.
27ea2398 619 */
b84de5af 620static int
ec4e8497 621hammer_unload_inode(struct hammer_inode *ip)
27ea2398 622{
b84de5af 623 KASSERT(ip->lock.refs == 1,
a89aec1b 624 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
8cd0a023 625 KKASSERT(ip->vp == NULL);
f90dde4c
MD
626 KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
627 KKASSERT(ip->cursor_ip_refs == 0);
628 KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
629
630 KKASSERT(RB_EMPTY(&ip->rec_tree));
1f07f686 631 KKASSERT(TAILQ_EMPTY(&ip->target_list));
f90dde4c
MD
632 KKASSERT(TAILQ_EMPTY(&ip->bio_list));
633 KKASSERT(TAILQ_EMPTY(&ip->bio_alt_list));
634
635 RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
636
637 hammer_uncache_node(&ip->cache[0]);
638 hammer_uncache_node(&ip->cache[1]);
0729c8c8
MD
639 if (ip->objid_cache)
640 hammer_clear_objid(ip);
f90dde4c
MD
641 --hammer_count_inodes;
642 kfree(ip, M_HAMMER);
6b4f890b 643
27ea2398
MD
644 return(0);
645}
646
427e5fc6 647/*
d113fda1
MD
648 * A transaction has modified an inode, requiring updates as specified by
649 * the passed flags.
7f7c1f84 650 *
d113fda1
MD
651 * HAMMER_INODE_RDIRTY: Inode record has been updated
652 * HAMMER_INODE_DDIRTY: Inode data has been updated
1f07f686
MD
653 * HAMMER_INODE_XDIRTY: Dirty in-memory records
654 * HAMMER_INODE_BUFS: Dirty front-end buffer cache buffers
d113fda1
MD
655 * HAMMER_INODE_DELETED: Inode record/data must be deleted
656 * HAMMER_INODE_ITIMES: mtime/atime has been updated
427e5fc6 657 */
66325755 658void
b84de5af 659hammer_modify_inode(hammer_transaction_t trans, hammer_inode_t ip, int flags)
427e5fc6 660{
d113fda1 661 KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
b84de5af 662 (flags & (HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
1f07f686 663 HAMMER_INODE_XDIRTY|HAMMER_INODE_BUFS|
b84de5af
MD
664 HAMMER_INODE_DELETED|HAMMER_INODE_ITIMES)) == 0);
665
666 ip->flags |= flags;
667}
668
669/*
1f07f686
MD
670 * Request that an inode be flushed. This whole mess cannot block and may
671 * recurse. Once requested HAMMER will attempt to actively flush it until
672 * the flush can be done.
b84de5af 673 *
1f07f686
MD
674 * The inode may already be flushing, or may be in a setup state. We can
675 * place the inode in a flushing state if it is currently idle and flag it
676 * to reflush if it is currently flushing.
b84de5af
MD
677 */
678void
f90dde4c 679hammer_flush_inode(hammer_inode_t ip, int flags)
b84de5af 680{
1f07f686
MD
681 hammer_record_t depend;
682 int r, good;
683
684 /*
685 * Trivial 'nothing to flush' case. If the inode is ina SETUP
686 * state we have to put it back into an IDLE state so we can
687 * drop the extra ref.
688 */
689 if ((ip->flags & HAMMER_INODE_MODMASK) == 0 &&
690 (flags & HAMMER_FLUSH_FORCE) == 0) {
691 if (ip->flush_state == HAMMER_FST_SETUP) {
692 ip->flush_state = HAMMER_FST_IDLE;
693 hammer_rel_inode(ip, 0);
ec4e8497 694 }
b84de5af
MD
695 return;
696 }
42c7d26b 697
1f07f686
MD
698 /*
699 * Our flush action will depend on the current state.
700 */
701 switch(ip->flush_state) {
702 case HAMMER_FST_IDLE:
703 /*
704 * We have no dependancies and can flush immediately. Some
705 * our children may not be flushable so we have to re-test
706 * with that additional knowledge.
707 */
708 hammer_flush_inode_core(ip, flags);
709 break;
710 case HAMMER_FST_SETUP:
711 /*
712 * Recurse upwards through dependancies via target_list
713 * and start their flusher actions going if possible.
714 *
715 * 'good' is our connectivity. -1 means we have none and
716 * can't flush, 0 means there weren't any dependancies, and
717 * 1 means we have good connectivity.
718 */
719 good = 0;
720 TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
721 r = hammer_setup_parent_inodes(depend);
722 if (r < 0 && good == 0)
723 good = -1;
724 if (r > 0)
725 good = 1;
726 }
727
728 /*
729 * We can continue if good >= 0. Determine how many records
730 * under our inode can be flushed (and mark them).
731 */
732 kprintf("g%d", good);
733 if (good >= 0) {
734 hammer_flush_inode_core(ip, flags);
735 } else {
736 ip->flags |= HAMMER_INODE_REFLUSH;
737 }
738 break;
739 default:
740 /*
741 * We are already flushing, flag the inode to reflush
742 * if needed after it completes its current flush.
743 */
744 if ((ip->flags & HAMMER_INODE_REFLUSH) == 0)
745 ip->flags |= HAMMER_INODE_REFLUSH;
746 break;
747 }
748}
749
750/*
751 * We are asked to recurse upwards and convert the record from SETUP
752 * to FLUSH if possible. record->ip is a parent of the caller's inode,
753 * and record->target_ip is the caller's inode.
754 *
755 * Return 1 if the record gives us connectivity
756 *
757 * Return 0 if the record is not relevant
758 *
759 * Return -1 if we can't resolve the dependancy and there is no connectivity.
760 */
761static int
762hammer_setup_parent_inodes(hammer_record_t record)
763{
764 hammer_mount_t hmp = record->ip->hmp;
765 hammer_record_t depend;
766 hammer_inode_t ip;
767 int r, good;
768
769 KKASSERT(record->flush_state != HAMMER_FST_IDLE);
770 ip = record->ip;
771
772 /*
773 * If the record is already flushing, is it in our flush group?
774 *
775 * If it is in our flush group but it is a delete-on-disk, it
776 * does not improve our connectivity (return 0), and if the
777 * target inode is not trying to destroy itself we can't allow
778 * the operation yet anyway (the second return -1).
779 */
780 if (record->flush_state == HAMMER_FST_FLUSH) {
781 if (record->flush_group != hmp->flusher_next) {
782 ip->flags |= HAMMER_INODE_REFLUSH;
783 return(-1);
f90dde4c 784 }
1f07f686
MD
785 if (record->type == HAMMER_MEM_RECORD_ADD)
786 return(1);
787 return(0);
788 }
789
790 /*
791 * It must be a setup record. Try to resolve the setup dependancies
792 * by recursing upwards so we can place ip on the flush list.
793 */
794 KKASSERT(record->flush_state == HAMMER_FST_SETUP);
795
796 good = 0;
797 TAILQ_FOREACH(depend, &ip->target_list, target_entry) {
798 r = hammer_setup_parent_inodes(depend);
799 if (r < 0 && good == 0)
800 good = -1;
801 if (r > 0)
802 good = 1;
803 }
804
805 /*
806 * We can't flush ip because it has no connectivity (XXX also check
807 * nlinks for pre-existing connectivity!). Flag it so any resolution
808 * recurses back down.
809 */
810 if (good < 0) {
811 ip->flags |= HAMMER_INODE_REFLUSH;
812 return(good);
813 }
814
815 /*
816 * We are go, place the parent inode in a flushing state so we can
817 * place its record in a flushing state. Note that the parent
818 * may already be flushing. The record must be in the same flush
819 * group as the parent.
820 */
821 if (ip->flush_state != HAMMER_FST_FLUSH)
822 hammer_flush_inode_core(ip, HAMMER_FLUSH_RECURSION);
823 KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
824 KKASSERT(record->flush_state == HAMMER_FST_SETUP);
825
826#if 0
827 if (record->type == HAMMER_MEM_RECORD_DEL &&
828 (record->target_ip->flags & (HAMMER_INODE_DELETING|HAMMER_INODE_DELONDISK)) == 0) {
829 /*
830 * Regardless of flushing state we cannot sync this path if the
831 * record represents a delete-on-disk but the target inode
832 * is not ready to sync its own deletion.
833 *
834 * XXX need to count effective nlinks to determine whether
835 * the flush is ok, otherwise removing a hardlink will
836 * just leave the DEL record to rot.
837 */
838 record->target_ip->flags |= HAMMER_INODE_REFLUSH;
839 return(-1);
840 } else
841#endif
842 if (ip->flush_group == ip->hmp->flusher_next) {
843 /*
844 * This is the record we wanted to synchronize.
845 */
846 record->flush_state = HAMMER_FST_FLUSH;
847 record->flush_group = ip->flush_group;
848 hammer_ref(&record->lock);
849 if (record->type == HAMMER_MEM_RECORD_ADD)
850 return(1);
851
852 /*
853 * The record is a delete-n-disk. It does not contribute
854 * to our visibility. We can still flush it.
855 */
856 return(0);
857 } else {
858 /*
859 * We couldn't resolve the dependancies, request that the
860 * inode be flushed when the dependancies can be resolved.
861 */
862 ip->flags |= HAMMER_INODE_REFLUSH;
863 return(-1);
7f7c1f84 864 }
c0ade690
MD
865}
866
867/*
1f07f686 868 * This is the core routine placing an inode into the FST_FLUSH state.
c0ade690 869 */
b84de5af 870static void
1f07f686 871hammer_flush_inode_core(hammer_inode_t ip, int flags)
b84de5af 872{
1f07f686 873 int go_count;
b84de5af 874 int error;
1f07f686
MD
875
876 KKASSERT(ip->flush_state != HAMMER_FST_FLUSH);
877 if (ip->flush_state == HAMMER_FST_IDLE)
878 hammer_ref(&ip->lock);
879 ip->flush_state = HAMMER_FST_FLUSH;
880 ip->flush_group = ip->hmp->flusher_next;
b84de5af 881
ec4e8497 882 /*
1f07f686
MD
883 * Figure out how many in-memory records we can actually flush
884 * (not including inode meta-data, buffers, etc).
ec4e8497 885 */
1f07f686
MD
886 if (flags & HAMMER_FLUSH_RECURSION) {
887 go_count = 1;
888 } else {
889 go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
890 hammer_setup_child_callback, NULL);
891 }
b84de5af
MD
892
893 /*
1f07f686
MD
894 * This is a more involved test that includes go_count. If we
895 * can't flush, flag the inode and return. If go_count is 0 we
896 * were are unable to flush any records in our rec_tree and
897 * must ignore the XDIRTY flag.
b84de5af 898 */
1f07f686
MD
899 if (go_count == 0) {
900 if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) {
901 ip->flags |= HAMMER_INODE_REFLUSH;
902 ip->flush_state = HAMMER_FST_SETUP;
903 return;
904 }
905 }
b84de5af
MD
906
907 /*
1f07f686
MD
908 * Inodes not in an IDLE state get an extra reference.
909 *
910 * Place the inode in a flush state and sync all frontend
911 * information to the backend.
b84de5af 912 */
1f07f686
MD
913
914 if ((flags & HAMMER_FLUSH_RECURSION) == 0) {
915 if (ip->vp != NULL)
916 error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
917 else
918 error = 0;
919 }
920
921 /*
922 * Any further strategy calls will go into the inode's alternative
923 * bioq.
924 */
925 ip->flags |= HAMMER_INODE_WRITE_ALT;
b84de5af
MD
926
927 /*
928 * Snapshot the state of the inode for the backend flusher.
929 *
930 * The truncation must be retained in the frontend until after
931 * we've actually performed the record deletion.
1f07f686
MD
932 *
933 * NOTE: The DELETING flag is a mod flag, but it is also sticky,
934 * and stays in ip->flags. Once set, it stays set until the
935 * inode is destroyed.
b84de5af
MD
936 */
937 ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
1f07f686 938 ip->sync_flags &= ~HAMMER_INODE_DELETING;
b84de5af
MD
939 ip->sync_trunc_off = ip->trunc_off;
940 ip->sync_ino_rec = ip->ino_rec;
941 ip->sync_ino_data = ip->ino_data;
942 ip->flags &= ~HAMMER_INODE_MODMASK |
1f07f686
MD
943 HAMMER_INODE_TRUNCATED | HAMMER_INODE_BUFS |
944 HAMMER_INODE_DELETING;
b84de5af
MD
945
946 /*
947 * Fix up the dirty buffer status.
948 */
1f07f686
MD
949 if (ip->vp == NULL || RB_ROOT(&ip->vp->v_rbdirty_tree) == NULL) {
950 if (TAILQ_FIRST(&ip->bio_alt_list) == NULL)
951 ip->flags &= ~HAMMER_INODE_BUFS;
952 }
b84de5af
MD
953 if (TAILQ_FIRST(&ip->bio_list))
954 ip->sync_flags |= HAMMER_INODE_BUFS;
955 else
956 ip->sync_flags &= ~HAMMER_INODE_BUFS;
957
958 /*
1f07f686 959 * The flusher inherits our inode and reference.
b84de5af 960 */
1f07f686
MD
961 TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
962
963 if (flags & HAMMER_FLUSH_SIGNAL)
964 hammer_flusher_async(ip->hmp);
b84de5af
MD
965}
966
ec4e8497 967/*
1f07f686
MD
968 * Callback for scan of ip->rec_tree. Try to include each record in our
969 * flush. ip->flush_group has been set but the inode has not yet been
970 * moved into a flushing state.
971 *
972 * If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on
973 * both inodes.
974 *
975 * We return 1 for any record placed or found in FST_FLUSH, which prevents
976 * the caller from shortcutting the flush.
ec4e8497 977 */
c0ade690 978static int
1f07f686 979hammer_setup_child_callback(hammer_record_t rec, void *data)
b84de5af 980{
1f07f686
MD
981 hammer_inode_t target_ip;
982 hammer_inode_t ip;
983 int r;
984
985 /*
986 * If the record has been deleted by the backend (it's being held
987 * by the frontend in a race), just ignore it.
988 */
989 if (rec->flags & HAMMER_RECF_DELETED_BE)
ec4e8497 990 return(0);
1f07f686
MD
991
992 /*
993 * If the record is in an idle state it has no dependancies and
994 * can be flushed.
995 */
996 ip = rec->ip;
997 r = 0;
998
999 switch(rec->flush_state) {
1000 case HAMMER_FST_IDLE:
1001 /*
1002 * Record has no setup dependancy, we can flush it.
1003 */
1004 KKASSERT(rec->target_ip == NULL);
1005 rec->flush_state = HAMMER_FST_FLUSH;
1006 rec->flush_group = ip->flush_group;
b84de5af 1007 hammer_ref(&rec->lock);
1f07f686
MD
1008 r = 1;
1009 break;
1010 case HAMMER_FST_SETUP:
1011 /*
1012 * Record has a setup dependancy. Try to include the
1013 * target ip in the flush.
1014 *
1015 * We have to be careful here, if we do not do the right
1016 * thing we can lose track of dirty inodes and the system
1017 * will lockup trying to allocate buffers.
1018 */
1019 target_ip = rec->target_ip;
1020 KKASSERT(target_ip != NULL);
1021 KKASSERT(target_ip->flush_state != HAMMER_FST_IDLE);
1022 if (target_ip->flush_state == HAMMER_FST_FLUSH) {
1023 /*
1024 * If the target IP is already flushing in our group
1025 * we are golden, otherwise make sure the target
1026 * reflushes.
1027 */
1028 if (target_ip->flush_group == ip->flush_group) {
1029 rec->flush_state = HAMMER_FST_FLUSH;
1030 rec->flush_group = ip->flush_group;
1031 hammer_ref(&rec->lock);
1032 r = 1;
1033 } else {
1034 target_ip->flags |= HAMMER_INODE_REFLUSH;
1035 }
1036 } else if (rec->type == HAMMER_MEM_RECORD_ADD) {
1037 /*
1038 * If the target IP is not flushing we can force
1039 * it to flush, even if it is unable to write out
1040 * any of its own records we have at least one in
1041 * hand that we CAN deal with.
1042 */
1043 rec->flush_state = HAMMER_FST_FLUSH;
1044 rec->flush_group = ip->flush_group;
1045 hammer_ref(&rec->lock);
1046 hammer_flush_inode_core(target_ip,
1047 HAMMER_FLUSH_RECURSION);
1048 r = 1;
1049 } else {
1050 /*
1051 * XXX this needs help. We have a delete-on-disk
1052 * which could disconnect the target. If the target
1053 * has its own dependancies they really need to
1054 * be flushed.
1055 *
1056 * XXX
1057 */
1058 rec->flush_state = HAMMER_FST_FLUSH;
1059 rec->flush_group = ip->flush_group;
1060 hammer_ref(&rec->lock);
1061 hammer_flush_inode_core(target_ip,
1062 HAMMER_FLUSH_RECURSION);
1063 r = 1;
1064 }
1065 break;
1066 case HAMMER_FST_FLUSH:
1067 /*
1068 * Record already associated with a flush group. It had
1069 * better be ours.
1070 */
1071 KKASSERT(rec->flush_group == ip->flush_group);
1072 r = 1;
1073 break;
b84de5af 1074 }
1f07f686 1075 return(r);
b84de5af
MD
1076}
1077
b84de5af
MD
1078/*
1079 * Wait for a previously queued flush to complete
1080 */
1081void
1082hammer_wait_inode(hammer_inode_t ip)
1083{
1084 while (ip->flush_state == HAMMER_FST_FLUSH) {
1085 ip->flags |= HAMMER_INODE_FLUSHW;
1086 tsleep(&ip->flags, 0, "hmrwin", 0);
1087 }
1088}
1089
1090/*
1091 * Called by the backend code when a flush has been completed.
1092 * The inode has already been removed from the flush list.
1093 *
1094 * A pipelined flush can occur, in which case we must re-enter the
1095 * inode on the list and re-copy its fields.
1096 */
1097void
1098hammer_flush_inode_done(hammer_inode_t ip)
1099{
1955afa7 1100 struct bio *bio;
1f07f686 1101 int dorel = 0;
1955afa7 1102
b84de5af
MD
1103 KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
1104
1f07f686
MD
1105 /*
1106 * Allow BIOs to queue to the inode's primary bioq again.
1107 */
1108 ip->flags &= ~HAMMER_INODE_WRITE_ALT;
1109
1110 /*
1111 * Merge left-over flags back into the frontend and fix the state.
1112 */
b84de5af 1113 ip->flags |= ip->sync_flags;
1f07f686
MD
1114 if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) {
1115 ip->flush_state = HAMMER_FST_IDLE;
1116 dorel = 1;
1117 } else {
1118 ip->flush_state = HAMMER_FST_SETUP;
1119 }
1120
1121 /*
1122 * The backend may have adjusted nlinks, so if the adjusted nlinks
1123 * does not match the fronttend set the frontend's RDIRTY flag again.
1124 */
1125 if (ip->ino_rec.ino_nlinks != ip->sync_ino_rec.ino_nlinks)
1126 ip->flags |= HAMMER_INODE_RDIRTY;
b84de5af 1127
b84de5af 1128 /*
1955afa7
MD
1129 * Reflush any BIOs that wound up in the alt list. Our inode will
1130 * also wind up at the end of the flusher's list.
b84de5af
MD
1131 */
1132 while ((bio = TAILQ_FIRST(&ip->bio_alt_list)) != NULL) {
1133 TAILQ_REMOVE(&ip->bio_alt_list, bio, bio_act);
1134 TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act);
1f07f686
MD
1135 kprintf("d");
1136 ip->flags |= HAMMER_INODE_BUFS;
1137 ip->flags |= HAMMER_INODE_REFLUSH;
1138 }
1139
1140 /*
1141 * Re-set the XDIRTY flag if some of the inode's in-memory records
1142 * could not be flushed.
1143 */
1144 if (RB_ROOT(&ip->rec_tree)) {
f90dde4c 1145 ip->flags |= HAMMER_INODE_XDIRTY;
b84de5af 1146 ip->flags |= HAMMER_INODE_REFLUSH;
1f07f686 1147 kprintf("e");
b84de5af 1148 }
b84de5af
MD
1149
1150 /*
1151 * If the frontend made more changes and requested another flush,
1152 * do it.
1153 */
1154 if (ip->flags & HAMMER_INODE_REFLUSH) {
1155 ip->flags &= ~HAMMER_INODE_REFLUSH;
1f07f686 1156 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
0729c8c8
MD
1157 if (ip->flush_state == HAMMER_FST_IDLE) {
1158 if (ip->flags & HAMMER_INODE_FLUSHW) {
1159 ip->flags &= ~HAMMER_INODE_FLUSHW;
1160 wakeup(&ip->flags);
1161 }
1162 }
b84de5af
MD
1163 } else {
1164 if (ip->flags & HAMMER_INODE_FLUSHW) {
1165 ip->flags &= ~HAMMER_INODE_FLUSHW;
1166 wakeup(&ip->flags);
1167 }
1168 }
1f07f686
MD
1169 if (dorel)
1170 hammer_rel_inode(ip, 0);
b84de5af
MD
1171}
1172
1173/*
1174 * Called from hammer_sync_inode() to synchronize in-memory records
1175 * to the media.
1176 */
1177static int
1178hammer_sync_record_callback(hammer_record_t record, void *data)
c0ade690 1179{
36f82b23 1180 hammer_transaction_t trans = data;
c0ade690
MD
1181 int error;
1182
b84de5af 1183 /*
1f07f686 1184 * Skip records that do not belong to the current flush.
b84de5af 1185 */
1f07f686 1186 if (record->flush_state != HAMMER_FST_FLUSH)
b84de5af 1187 return(0);
1f07f686
MD
1188 KKASSERT((record->flags & HAMMER_RECF_DELETED_BE) == 0);
1189#if 1
1190 if (record->flush_group != record->ip->flush_group) {
1191 kprintf("sync_record %p ip %p bad flush group %d %d\n", record, record->ip, record->flush_group ,record->ip->flush_group);
1192 Debugger("blah2");
1193 return(0);
1194 }
1195#endif
1196 KKASSERT(record->flush_group == record->ip->flush_group);
d36ec43b
MD
1197
1198 /*
1199 * Interlock the record using the BE flag. Once BE is set the
1200 * frontend cannot change the state of FE.
1201 *
1202 * NOTE: If FE is set prior to us setting BE we still sync the
1203 * record out, but the flush completion code converts it to
1204 * a delete-on-disk record instead of destroying it.
1205 */
d36ec43b 1206 if (record->flags & HAMMER_RECF_INTERLOCK_BE) {
1f07f686 1207 hammer_flush_record_done(record, 0);
b84de5af
MD
1208 return(0);
1209 }
d36ec43b
MD
1210 record->flags |= HAMMER_RECF_INTERLOCK_BE;
1211
1212 /*
1213 * If DELETED_FE is set we may have already sent dependant pieces
1214 * to the disk and we must flush the record as if it hadn't been
1215 * deleted. This creates a bit of a mess because we have to
1f07f686 1216 * have ip_sync_record convert the record to MEM_RECORD_DEL before
d36ec43b
MD
1217 * it inserts the B-Tree record. Otherwise the media sync might
1218 * be visible to the frontend.
1219 */
1f07f686
MD
1220 if (record->flags & HAMMER_RECF_DELETED_FE) {
1221 KKASSERT(record->type == HAMMER_MEM_RECORD_ADD);
1222 record->flags |= HAMMER_RECF_CONVERT_DELETE;
1223 }
b84de5af
MD
1224
1225 /*
1226 * Assign the create_tid for new records. Deletions already
1227 * have the record's entire key properly set up.
1228 */
1f07f686 1229 if (record->type != HAMMER_MEM_RECORD_DEL)
b84de5af
MD
1230 record->rec.inode.base.base.create_tid = trans->tid;
1231 error = hammer_ip_sync_record(trans, record);
c0ade690
MD
1232
1233 if (error) {
b3deaf57
MD
1234 error = -error;
1235 if (error != -ENOSPC) {
b84de5af
MD
1236 kprintf("hammer_sync_record_callback: sync failed rec "
1237 "%p, error %d\n", record, error);
1238 Debugger("sync failed rec");
b3deaf57 1239 }
c0ade690 1240 }
d36ec43b 1241 hammer_flush_record_done(record, error);
b3deaf57 1242 return(error);
c0ade690
MD
1243}
1244
1245/*
1246 * XXX error handling
1247 */
1248int
1f07f686 1249hammer_sync_inode(hammer_inode_t ip)
c0ade690
MD
1250{
1251 struct hammer_transaction trans;
059819e3 1252 struct bio *bio;
1f07f686
MD
1253 hammer_record_t depend;
1254 hammer_record_t next;
ec4e8497 1255 int error, tmp_error;
1f07f686 1256 u_int64_t nlinks;
c0ade690 1257
1f07f686 1258 if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0)
d113fda1 1259 return(0);
d113fda1 1260
b84de5af 1261 hammer_start_transaction_fls(&trans, ip->hmp);
c0ade690 1262
ec4e8497 1263 /*
1f07f686
MD
1264 * Any directory records referencing this inode which are not in
1265 * our current flush group must adjust our nlink count for the
1266 * purposes of synchronization to disk.
1267 *
1268 * Records which are in our flush group can be unlinked from our
1269 * inode now, allowing the inode to be physically deleted.
ec4e8497 1270 */
1f07f686
MD
1271 nlinks = ip->ino_rec.ino_nlinks;
1272 next = TAILQ_FIRST(&ip->target_list);
1273 while ((depend = next) != NULL) {
1274 next = TAILQ_NEXT(depend, target_entry);
1275 if (depend->flush_state == HAMMER_FST_FLUSH &&
1276 depend->flush_group == ip->hmp->flusher_act) {
1277 TAILQ_REMOVE(&ip->target_list, depend, target_entry);
1278 depend->target_ip = NULL;
1279 /* no need to signal target_ip, it is us */
1280 } else if ((depend->flags & HAMMER_RECF_DELETED_FE) == 0) {
1281 switch(depend->type) {
1282 case HAMMER_MEM_RECORD_ADD:
1283 --nlinks;
1284 break;
1285 case HAMMER_MEM_RECORD_DEL:
1286 ++nlinks;
1287 break;
1288 }
ec4e8497 1289 }
ec4e8497
MD
1290 }
1291
c0ade690 1292 /*
1f07f686 1293 * Set dirty if we had to modify the link count.
c0ade690 1294 */
1f07f686
MD
1295 if (ip->sync_ino_rec.ino_nlinks != nlinks) {
1296 KKASSERT((int64_t)nlinks >= 0);
1297 ip->sync_ino_rec.ino_nlinks = nlinks;
1298 ip->sync_flags |= HAMMER_INODE_RDIRTY;
1299 }
b84de5af 1300
1f07f686
MD
1301 /*
1302 * If the inode has been unlinked and no longer has a vnode
1303 * ref, destroy its data.
1304 *
1305 * Otherwise if there is a trunction queued destroy any data past
1306 * the (aligned) truncation point. Userland will have dealt with
1307 * the buffer containing the truncation point for us.
1308 */
1309 if (ip->sync_ino_rec.ino_nlinks == 0 && ip->vp == NULL) {
7a04d74f 1310 error = hammer_ip_delete_range_all(&trans, ip);
b84de5af
MD
1311 if (error)
1312 Debugger("hammer_ip_delete_range_all errored");
b84de5af
MD
1313 } else if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
1314 /*
1315 * Interlock trunc_off. The VOP front-end may continue to
1316 * make adjustments to it while we are blocked.
1317 */
1318 off_t trunc_off;
1319 off_t aligned_trunc_off;
c0ade690 1320
b84de5af
MD
1321 trunc_off = ip->sync_trunc_off;
1322 aligned_trunc_off = (trunc_off + HAMMER_BUFMASK) &
1323 ~HAMMER_BUFMASK64;
1324
1325 /*
1326 * Delete any whole blocks on-media. The front-end has
1327 * already cleaned out any partial block and made it
1328 * pending. The front-end may have updated trunc_off
1329 * while we were blocked so do not just unconditionally
1330 * set it to the maximum offset.
1331 */
b84de5af
MD
1332 error = hammer_ip_delete_range(&trans, ip,
1333 aligned_trunc_off,
1334 0x7FFFFFFFFFFFFFFFLL);
1335 if (error)
1336 Debugger("hammer_ip_delete_range errored");
1337 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1338 if (ip->trunc_off >= trunc_off) {
1339 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1340 ip->flags &= ~HAMMER_INODE_TRUNCATED;
1341 }
1f07f686
MD
1342 } else {
1343 error = 0;
f3b0f382
MD
1344 }
1345
1f07f686
MD
1346 /*
1347 * Now sync related records. These will typically be directory
1348 * entries or delete-on-disk records.
1349 */
1350 if (error == 0) {
1351 tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1352 hammer_sync_record_callback, &trans);
1353 if (tmp_error < 0)
1354 tmp_error = -error;
1355 if (tmp_error)
1356 error = tmp_error;
1357 }
1358
1359 /*
1360 * Sync inode deletions, with certain restrictions.
1361 *
1362 * - Nlinks must be 0 for both the frontend and the backend.
1363 * - All related directory entries and our own records must
1364 * be synchronized.
1365 *
1366 * In the latter case a directory containing numerous directory
1367 * entries may not be able to sync those entries due to topological
1368 * recursion. If this is the case those records would not have
1369 * been marked for flush action and ip->rec_tree will not be empty.
1370 */
1371 if (ip->sync_ino_rec.ino_nlinks == 0 &&
1372 ip->ino_rec.ino_nlinks == 0 &&
1373 TAILQ_FIRST(&ip->target_list) == NULL &&
1374 RB_ROOT(&ip->rec_tree) == NULL &&
1375 (ip->flags & HAMMER_INODE_GONE) == 0) {
1376 /*
1377 * Handle the case where the inode has been completely deleted
1378 * and is no longer referenceable from the filesystem
1379 * namespace.
1380 *
1381 * NOTE: We do not set the RDIRTY flag when updating the
1382 * delete_tid, setting HAMMER_INODE_DELETED takes care of it.
1383 */
1384 kprintf("Y");
1385
1386 ip->flags |= HAMMER_INODE_GONE | HAMMER_INODE_DELETED;
1387 ip->flags &= ~HAMMER_INODE_TRUNCATED;
1388 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1389 if (ip->vp)
1390 vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
1391
1392 /*
1393 * Set delete_tid in both the frontend and backend
1394 * copy of the inode record.
1395 */
1396 ip->ino_rec.base.base.delete_tid = trans.tid;
1397 ip->sync_ino_rec.base.base.delete_tid = trans.tid;
1398
1399 /*
1400 * Indicate that the inode has/is-being deleted.
1401 */
1402 ip->flags |= HAMMER_NODE_DELETED;
1403 hammer_modify_inode(&trans, ip, HAMMER_INODE_RDIRTY);
1404 hammer_modify_volume(&trans, trans.rootvol, NULL, 0);
1405 --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
1406 hammer_modify_volume_done(trans.rootvol);
1407 }
b84de5af 1408
059819e3 1409 /*
b84de5af 1410 * Flush any queued BIOs.
059819e3
MD
1411 */
1412 while ((bio = TAILQ_FIRST(&ip->bio_list)) != NULL) {
1f07f686 1413 KKASSERT((ip->flags & HAMMER_INODE_DELETED) == 0);
059819e3 1414 TAILQ_REMOVE(&ip->bio_list, bio, bio_act);
b84de5af
MD
1415#if 0
1416 kprintf("dowrite %016llx ip %p bio %p @ %016llx\n", trans.tid, ip, bio, bio->bio_offset);
1417#endif
ec4e8497
MD
1418 tmp_error = hammer_dowrite(&trans, ip, bio);
1419 if (tmp_error)
1420 error = tmp_error;
059819e3 1421 }
b84de5af 1422 ip->sync_flags &= ~HAMMER_INODE_BUFS;
c0ade690
MD
1423
1424 /*
1f07f686
MD
1425 * We better have nothing left if the inode has been deleted. If it
1426 * hasn't the frontend may have queued more stuff, which would be ok.
c0ade690 1427 */
1f07f686
MD
1428 KKASSERT((ip->flags & HAMMER_INODE_DELETED) == 0 ||
1429 RB_ROOT(&ip->rec_tree) == NULL);
ec4e8497
MD
1430
1431 /*
1432 * XDIRTY represents rec_tree and bio_list. However, rec_tree may
1433 * contain new front-end records so short of scanning it we can't
1434 * just test whether it is empty or not.
1435 *
1436 * If no error occured assume we succeeded.
1437 */
1438 if (error == 0)
b84de5af 1439 ip->sync_flags &= ~HAMMER_INODE_XDIRTY;
ec4e8497 1440
b84de5af
MD
1441 if (error)
1442 Debugger("RB_SCAN errored");
c0ade690
MD
1443
1444 /*
1445 * Now update the inode's on-disk inode-data and/or on-disk record.
b84de5af 1446 * DELETED and ONDISK are managed only in ip->flags.
c0ade690 1447 */
b84de5af 1448 switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) {
c0ade690
MD
1449 case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
1450 /*
1451 * If deleted and on-disk, don't set any additional flags.
1452 * the delete flag takes care of things.
1453 */
1454 break;
1455 case HAMMER_INODE_DELETED:
1456 /*
1457 * Take care of the case where a deleted inode was never
1458 * flushed to the disk in the first place.
1459 */
b84de5af
MD
1460 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
1461 HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES);
d26d0ae9 1462 while (RB_ROOT(&ip->rec_tree)) {
d36ec43b
MD
1463 hammer_record_t record = RB_ROOT(&ip->rec_tree);
1464 hammer_ref(&record->lock);
1465 KKASSERT(record->lock.refs == 1);
1466 record->flags |= HAMMER_RECF_DELETED_FE;
1467 record->flags |= HAMMER_RECF_DELETED_BE;
d36ec43b 1468 hammer_rel_mem_record(record);
d26d0ae9 1469 }
c0ade690
MD
1470 break;
1471 case HAMMER_INODE_ONDISK:
1472 /*
1473 * If already on-disk, do not set any additional flags.
1474 */
1475 break;
1476 default:
1477 /*
1478 * If not on-disk and not deleted, set both dirty flags
b84de5af
MD
1479 * to force an initial record to be written. Also set
1480 * the create_tid for the inode.
1481 *
1482 * Set create_tid in both the frontend and backend
1483 * copy of the inode record.
c0ade690 1484 */
b84de5af
MD
1485 ip->ino_rec.base.base.create_tid = trans.tid;
1486 ip->sync_ino_rec.base.base.create_tid = trans.tid;
1487 ip->sync_flags |= HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY;
c0ade690
MD
1488 break;
1489 }
1490
1491 /*
d113fda1
MD
1492 * If RDIRTY or DDIRTY is set, write out a new record. If the inode
1493 * is already on-disk the old record is marked as deleted.
1494 *
1495 * If DELETED is set hammer_update_inode() will delete the existing
1496 * record without writing out a new one.
1497 *
1498 * If *ONLY* the ITIMES flag is set we can update the record in-place.
c0ade690 1499 */
b84de5af
MD
1500 if (ip->flags & HAMMER_INODE_DELETED) {
1501 error = hammer_update_inode(&trans, ip);
1502 } else
1503 if ((ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
1504 HAMMER_INODE_ITIMES)) == HAMMER_INODE_ITIMES) {
36f82b23 1505 error = hammer_update_itimes(&trans, ip);
d113fda1 1506 } else
b84de5af
MD
1507 if (ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
1508 HAMMER_INODE_ITIMES)) {
36f82b23 1509 error = hammer_update_inode(&trans, ip);
c0ade690 1510 }
b84de5af
MD
1511 if (error)
1512 Debugger("hammer_update_itimes/inode errored");
1513
1514 /*
1515 * Save the TID we used to sync the inode with to make sure we
1516 * do not improperly reuse it.
1517 */
b84de5af 1518 hammer_done_transaction(&trans);
c0ade690 1519 return(error);
8cd0a023
MD
1520}
1521
1f07f686
MD
1522/*
1523 * This routine is called when the OS is no longer actively referencing
1524 * the inode (but might still be keeping it cached), or when releasing
1525 * the last reference to an inode.
1526 *
1527 * At this point if the inode's nlinks count is zero we want to destroy
1528 * it, which may mean destroying it on-media too.
1529 */
1530static int
1531hammer_inode_unloadable_check(hammer_inode_t ip)
1532{
1533 /*
1534 * If the inode is on-media and the link count is 0 we MUST delete
1535 * it on-media.
1536 */
1537 if (ip->ino_rec.ino_nlinks == 0 &&
1538 (ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
1539 HAMMER_INODE_ONDISK) {
1540 ip->flags |= HAMMER_INODE_DELETING;
1541 } else {
1542 ip->flags &= ~HAMMER_INODE_DELETING;
1543 }
1544
1545 /*
1546 * If only one ref remains and the inode is not dirty, telling
1547 * the caller that he can dispose of the inode.
1548 */
1549 if (ip->lock.refs == 1 && (ip->flags & HAMMER_INODE_MODMASK) == 0)
1550 return(1);
1551 return(0);
1552}
1553
1554void
1555hammer_test_inode(hammer_inode_t ip)
1556{
1557 if (ip->flags & HAMMER_INODE_REFLUSH) {
1558 ip->flags &= ~HAMMER_INODE_REFLUSH;
1559 hammer_ref(&ip->lock);
1560 hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL);
1561 hammer_rel_inode(ip, 0);
1562 }
1563}
1564