Properly yield to userland processes.
[dragonfly.git] / sys / vfs / hammer / hammer_inode.c
CommitLineData
427e5fc6 1/*
b84de5af 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
427e5fc6
MD
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
c32a6806 34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.44 2008/04/29 04:43:08 dillon Exp $
427e5fc6
MD
35 */
36
37#include "hammer.h"
38#include <sys/buf.h>
39#include <sys/buf2.h>
40
ec4e8497 41static int hammer_unload_inode(struct hammer_inode *ip);
b84de5af
MD
42static void hammer_flush_inode_copysync(hammer_inode_t ip);
43static int hammer_mark_record_callback(hammer_record_t rec, void *data);
44
d113fda1
MD
45/*
46 * The kernel is not actively referencing this vnode but is still holding
47 * it cached.
b84de5af
MD
48 *
49 * This is called from the frontend.
d113fda1 50 */
427e5fc6
MD
51int
52hammer_vop_inactive(struct vop_inactive_args *ap)
53{
66325755 54 struct hammer_inode *ip = VTOI(ap->a_vp);
27ea2398 55
c0ade690
MD
56 /*
57 * Degenerate case
58 */
59 if (ip == NULL) {
66325755 60 vrecycle(ap->a_vp);
c0ade690
MD
61 return(0);
62 }
63
64 /*
65 * If the inode no longer has any references we recover its
66 * in-memory resources immediately.
b84de5af
MD
67 *
68 * NOTE: called from frontend, use ino_rec instead of sync_ino_rec.
c0ade690 69 */
d113fda1
MD
70 if (ip->ino_rec.ino_nlinks == 0)
71 vrecycle(ap->a_vp);
427e5fc6
MD
72 return(0);
73}
74
d113fda1
MD
75/*
76 * Release the vnode association. This is typically (but not always)
77 * the last reference on the inode and will flush the inode to the
78 * buffer cache.
79 *
80 * XXX Currently our sync code only runs through inodes with vnode
81 * associations, so we depend on hammer_rel_inode() to sync any inode
82 * record data to the block device prior to losing the association.
83 * Otherwise transactions that the user expected to be distinct by
84 * doing a manual sync may be merged.
85 */
427e5fc6
MD
86int
87hammer_vop_reclaim(struct vop_reclaim_args *ap)
88{
427e5fc6
MD
89 struct hammer_inode *ip;
90 struct vnode *vp;
91
92 vp = ap->a_vp;
c0ade690 93
a89aec1b
MD
94 if ((ip = vp->v_data) != NULL) {
95 vp->v_data = NULL;
96 ip->vp = NULL;
ec4e8497
MD
97
98 /*
99 * Don't let too many dependancies build up on unreferenced
100 * inodes or we could run ourselves out of memory.
101 */
102 if (TAILQ_FIRST(&ip->depend_list)) {
103 ip->hmp->reclaim_count += ip->depend_count;
104 if (ip->hmp->reclaim_count > 256) {
105 ip->hmp->reclaim_count = 0;
106 hammer_flusher_async(ip->hmp);
107 }
108 }
109 hammer_rel_inode(ip, 1);
a89aec1b 110 }
427e5fc6
MD
111 return(0);
112}
113
66325755
MD
114/*
115 * Return a locked vnode for the specified inode. The inode must be
116 * referenced but NOT LOCKED on entry and will remain referenced on
117 * return.
b84de5af
MD
118 *
119 * Called from the frontend.
66325755
MD
120 */
121int
122hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp)
123{
124 struct vnode *vp;
125 int error = 0;
126
127 for (;;) {
128 if ((vp = ip->vp) == NULL) {
129 error = getnewvnode(VT_HAMMER, ip->hmp->mp, vpp, 0, 0);
130 if (error)
131 break;
8cd0a023
MD
132 hammer_lock_ex(&ip->lock);
133 if (ip->vp != NULL) {
134 hammer_unlock(&ip->lock);
135 vp->v_type = VBAD;
136 vx_put(vp);
137 continue;
66325755 138 }
8cd0a023
MD
139 hammer_ref(&ip->lock);
140 vp = *vpp;
141 ip->vp = vp;
142 vp->v_type = hammer_get_vnode_type(
143 ip->ino_rec.base.base.obj_type);
7a04d74f
MD
144
145 switch(ip->ino_rec.base.base.obj_type) {
146 case HAMMER_OBJTYPE_CDEV:
147 case HAMMER_OBJTYPE_BDEV:
148 vp->v_ops = &ip->hmp->mp->mnt_vn_spec_ops;
149 addaliasu(vp, ip->ino_data.rmajor,
150 ip->ino_data.rminor);
151 break;
152 case HAMMER_OBJTYPE_FIFO:
153 vp->v_ops = &ip->hmp->mp->mnt_vn_fifo_ops;
154 break;
155 default:
156 break;
157 }
42c7d26b
MD
158
159 /*
160 * Only mark as the root vnode if the ip is not
161 * historical, otherwise the VFS cache will get
162 * confused. The other half of the special handling
163 * is in hammer_vop_nlookupdotdot().
164 */
165 if (ip->obj_id == HAMMER_OBJID_ROOT &&
166 ip->obj_asof == ip->hmp->asof) {
7a04d74f 167 vp->v_flag |= VROOT;
42c7d26b 168 }
7a04d74f 169
8cd0a023
MD
170 vp->v_data = (void *)ip;
171 /* vnode locked by getnewvnode() */
172 /* make related vnode dirty if inode dirty? */
173 hammer_unlock(&ip->lock);
a89aec1b
MD
174 if (vp->v_type == VREG)
175 vinitvmio(vp, ip->ino_rec.ino_size);
8cd0a023
MD
176 break;
177 }
178
179 /*
180 * loop if the vget fails (aka races), or if the vp
181 * no longer matches ip->vp.
182 */
183 if (vget(vp, LK_EXCLUSIVE) == 0) {
184 if (vp == ip->vp)
185 break;
186 vput(vp);
66325755
MD
187 }
188 }
a89aec1b 189 *vpp = vp;
66325755
MD
190 return(error);
191}
192
193/*
8cd0a023
MD
194 * Acquire a HAMMER inode. The returned inode is not locked. These functions
195 * do not attach or detach the related vnode (use hammer_get_vnode() for
196 * that).
d113fda1
MD
197 *
198 * The flags argument is only applied for newly created inodes, and only
199 * certain flags are inherited.
b84de5af
MD
200 *
201 * Called from the frontend.
66325755
MD
202 */
203struct hammer_inode *
36f82b23 204hammer_get_inode(hammer_transaction_t trans, struct hammer_node **cache,
61aeeb33 205 u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
66325755 206{
36f82b23 207 hammer_mount_t hmp = trans->hmp;
427e5fc6 208 struct hammer_inode_info iinfo;
8cd0a023 209 struct hammer_cursor cursor;
427e5fc6 210 struct hammer_inode *ip;
427e5fc6
MD
211
212 /*
213 * Determine if we already have an inode cached. If we do then
214 * we are golden.
215 */
66325755 216 iinfo.obj_id = obj_id;
7f7c1f84 217 iinfo.obj_asof = asof;
427e5fc6
MD
218loop:
219 ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
220 if (ip) {
8cd0a023 221 hammer_ref(&ip->lock);
66325755
MD
222 *errorp = 0;
223 return(ip);
427e5fc6
MD
224 }
225
427e5fc6 226 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
b3deaf57 227 ++hammer_count_inodes;
66325755 228 ip->obj_id = obj_id;
27ea2398 229 ip->obj_asof = iinfo.obj_asof;
66325755 230 ip->hmp = hmp;
d113fda1
MD
231 ip->flags = flags & HAMMER_INODE_RO;
232 if (hmp->ronly)
233 ip->flags |= HAMMER_INODE_RO;
a5fddc16 234 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
8cd0a023 235 RB_INIT(&ip->rec_tree);
059819e3 236 TAILQ_INIT(&ip->bio_list);
b84de5af 237 TAILQ_INIT(&ip->bio_alt_list);
ec4e8497 238 TAILQ_INIT(&ip->depend_list);
427e5fc6
MD
239
240 /*
8cd0a023 241 * Locate the on-disk inode.
427e5fc6 242 */
6a37e7e4 243retry:
36f82b23 244 hammer_init_cursor(trans, &cursor, cache);
8cd0a023
MD
245 cursor.key_beg.obj_id = ip->obj_id;
246 cursor.key_beg.key = 0;
d5530d22 247 cursor.key_beg.create_tid = 0;
8cd0a023
MD
248 cursor.key_beg.delete_tid = 0;
249 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
250 cursor.key_beg.obj_type = 0;
d5530d22
MD
251 cursor.asof = iinfo.obj_asof;
252 cursor.flags = HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_GET_DATA |
253 HAMMER_CURSOR_ASOF;
8cd0a023
MD
254
255 *errorp = hammer_btree_lookup(&cursor);
6a37e7e4
MD
256 if (*errorp == EDEADLK) {
257 hammer_done_cursor(&cursor);
258 goto retry;
259 }
427e5fc6
MD
260
261 /*
262 * On success the B-Tree lookup will hold the appropriate
263 * buffer cache buffers and provide a pointer to the requested
d113fda1
MD
264 * information. Copy the information to the in-memory inode
265 * and cache the B-Tree node to improve future operations.
427e5fc6 266 */
66325755 267 if (*errorp == 0) {
8cd0a023 268 ip->ino_rec = cursor.record->inode;
40043e7f 269 ip->ino_data = cursor.data->inode;
61aeeb33
MD
270 hammer_cache_node(cursor.node, &ip->cache[0]);
271 if (cache)
272 hammer_cache_node(cursor.node, cache);
427e5fc6 273 }
427e5fc6
MD
274
275 /*
276 * On success load the inode's record and data and insert the
277 * inode into the B-Tree. It is possible to race another lookup
278 * insertion of the same inode so deal with that condition too.
b3deaf57
MD
279 *
280 * The cursor's locked node interlocks against others creating and
281 * destroying ip while we were blocked.
427e5fc6 282 */
66325755 283 if (*errorp == 0) {
8cd0a023 284 hammer_ref(&ip->lock);
427e5fc6 285 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
61aeeb33
MD
286 hammer_uncache_node(&ip->cache[0]);
287 hammer_uncache_node(&ip->cache[1]);
b84de5af 288 KKASSERT(ip->lock.refs == 1);
b3deaf57 289 --hammer_count_inodes;
427e5fc6 290 kfree(ip, M_HAMMER);
b3deaf57 291 hammer_done_cursor(&cursor);
427e5fc6
MD
292 goto loop;
293 }
c0ade690 294 ip->flags |= HAMMER_INODE_ONDISK;
427e5fc6 295 } else {
b3deaf57 296 --hammer_count_inodes;
66325755
MD
297 kfree(ip, M_HAMMER);
298 ip = NULL;
427e5fc6 299 }
b3deaf57 300 hammer_done_cursor(&cursor);
66325755
MD
301 return (ip);
302}
303
8cd0a023
MD
304/*
305 * Create a new filesystem object, returning the inode in *ipp. The
9480ff55
MD
306 * returned inode will be referenced and also marked HAMMER_INODE_NEW,
307 * preventing it from being synchronized too early. The caller must
308 * call hammer_finalize_inode() to make it available for media sync.
8cd0a023 309 *
b84de5af 310 * The inode is created in-memory.
8cd0a023
MD
311 */
312int
a89aec1b
MD
313hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
314 struct ucred *cred, hammer_inode_t dip,
8cd0a023 315 struct hammer_inode **ipp)
66325755 316{
a89aec1b
MD
317 hammer_mount_t hmp;
318 hammer_inode_t ip;
6b4f890b 319 uid_t xuid;
66325755 320
8cd0a023
MD
321 hmp = trans->hmp;
322 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
b3deaf57 323 ++hammer_count_inodes;
0729c8c8 324 ip->obj_id = hammer_alloc_objid(trans, dip);
8cd0a023 325 KKASSERT(ip->obj_id != 0);
7f7c1f84 326 ip->obj_asof = hmp->asof;
8cd0a023 327 ip->hmp = hmp;
b84de5af 328 ip->flush_state = HAMMER_FST_IDLE;
8cd0a023 329 ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_RDIRTY |
b84de5af 330 HAMMER_INODE_ITIMES;
9480ff55 331 ip->flags |= HAMMER_INODE_NEW;
8cd0a023 332
a5fddc16 333 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
8cd0a023 334 RB_INIT(&ip->rec_tree);
059819e3 335 TAILQ_INIT(&ip->bio_list);
b84de5af 336 TAILQ_INIT(&ip->bio_alt_list);
ec4e8497 337 TAILQ_INIT(&ip->depend_list);
8cd0a023 338
b84de5af
MD
339 ip->ino_rec.ino_atime = trans->time;
340 ip->ino_rec.ino_mtime = trans->time;
8cd0a023
MD
341 ip->ino_rec.ino_size = 0;
342 ip->ino_rec.ino_nlinks = 0;
343 /* XXX */
fe7678ee 344 ip->ino_rec.base.base.btype = HAMMER_BTREE_TYPE_RECORD;
8cd0a023
MD
345 ip->ino_rec.base.base.obj_id = ip->obj_id;
346 ip->ino_rec.base.base.key = 0;
b84de5af 347 ip->ino_rec.base.base.create_tid = 0;
8cd0a023
MD
348 ip->ino_rec.base.base.delete_tid = 0;
349 ip->ino_rec.base.base.rec_type = HAMMER_RECTYPE_INODE;
350 ip->ino_rec.base.base.obj_type = hammer_get_obj_type(vap->va_type);
351
352 ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
353 ip->ino_data.mode = vap->va_mode;
b84de5af 354 ip->ino_data.ctime = trans->time;
8cd0a023 355 ip->ino_data.parent_obj_id = (dip) ? dip->ino_rec.base.base.obj_id : 0;
6b4f890b 356
7a04d74f
MD
357 switch(ip->ino_rec.base.base.obj_type) {
358 case HAMMER_OBJTYPE_CDEV:
359 case HAMMER_OBJTYPE_BDEV:
360 ip->ino_data.rmajor = vap->va_rmajor;
361 ip->ino_data.rminor = vap->va_rminor;
362 break;
363 default:
364 break;
365 }
366
6b4f890b
MD
367 /*
368 * Calculate default uid/gid and overwrite with information from
369 * the vap.
370 */
371 xuid = hammer_to_unix_xid(&dip->ino_data.uid);
372 ip->ino_data.gid = dip->ino_data.gid;
373 xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
374 &vap->va_mode);
375 ip->ino_data.mode = vap->va_mode;
376
8cd0a023
MD
377 if (vap->va_vaflags & VA_UID_UUID_VALID)
378 ip->ino_data.uid = vap->va_uid_uuid;
6b4f890b
MD
379 else if (vap->va_uid != (uid_t)VNOVAL)
380 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
8cd0a023
MD
381 if (vap->va_vaflags & VA_GID_UUID_VALID)
382 ip->ino_data.gid = vap->va_gid_uuid;
6b4f890b 383 else if (vap->va_gid != (gid_t)VNOVAL)
8cd0a023
MD
384 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
385
386 hammer_ref(&ip->lock);
387 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
388 hammer_unref(&ip->lock);
a89aec1b 389 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
8cd0a023
MD
390 }
391 *ipp = ip;
392 return(0);
66325755
MD
393}
394
9480ff55
MD
395/*
396 * Finalize a newly created inode, allowing it to be synchronized to the
397 * media. If an error occured make sure the inode has been cleaned up and
398 * will not be synchronized to the media.
399 */
400void
401hammer_finalize_inode(hammer_transaction_t trans, hammer_inode_t ip, int error)
402{
403 if (error) {
404 ip->flags &= ~HAMMER_INODE_MODMASK;
405
406 KASSERT(ip->lock.refs == 1,
407 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
408 KKASSERT(ip->vp == NULL);
409 KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
410 KKASSERT(ip->cursor_ip_refs == 0);
411 KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
412
413 KKASSERT(RB_EMPTY(&ip->rec_tree));
414 KKASSERT(TAILQ_EMPTY(&ip->bio_list));
415 KKASSERT(TAILQ_EMPTY(&ip->bio_alt_list));
416 }
417 ip->flags &= ~HAMMER_INODE_NEW;
418}
419
d113fda1
MD
420/*
421 * Called by hammer_sync_inode().
422 */
423static int
36f82b23 424hammer_update_inode(hammer_transaction_t trans, hammer_inode_t ip)
c0ade690
MD
425{
426 struct hammer_cursor cursor;
427 hammer_record_t record;
428 int error;
429
430 /*
76376933 431 * Locate the record on-disk and mark it as deleted. Both the B-Tree
195c19a1
MD
432 * node and the record must be marked deleted. The record may or
433 * may not be physically deleted, depending on the retention policy.
76376933 434 *
195c19a1
MD
435 * If the inode has already been deleted on-disk we have nothing
436 * to do.
c0ade690
MD
437 *
438 * XXX Update the inode record and data in-place if the retention
439 * policy allows it.
440 */
d26d0ae9 441retry:
c0ade690
MD
442 error = 0;
443
76376933
MD
444 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
445 HAMMER_INODE_ONDISK) {
36f82b23 446 hammer_init_cursor(trans, &cursor, &ip->cache[0]);
c0ade690
MD
447 cursor.key_beg.obj_id = ip->obj_id;
448 cursor.key_beg.key = 0;
d5530d22 449 cursor.key_beg.create_tid = 0;
c0ade690
MD
450 cursor.key_beg.delete_tid = 0;
451 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
452 cursor.key_beg.obj_type = 0;
d5530d22
MD
453 cursor.asof = ip->obj_asof;
454 cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
b84de5af 455 cursor.flags |= HAMMER_CURSOR_BACKEND;
c0ade690
MD
456
457 error = hammer_btree_lookup(&cursor);
b84de5af
MD
458 if (error) {
459 kprintf("error %d\n", error);
460 Debugger("hammer_update_inode");
461 }
462
c0ade690
MD
463
464 if (error == 0) {
855942b6 465 error = hammer_ip_delete_record(&cursor, trans->tid);
f90dde4c 466 if (error && error != EDEADLK) {
b84de5af
MD
467 kprintf("error %d\n", error);
468 Debugger("hammer_update_inode2");
469 }
195c19a1
MD
470 if (error == 0)
471 ip->flags |= HAMMER_INODE_DELONDISK;
6a37e7e4 472 hammer_cache_node(cursor.node, &ip->cache[0]);
c0ade690 473 }
c0ade690 474 hammer_done_cursor(&cursor);
6a37e7e4
MD
475 if (error == EDEADLK)
476 goto retry;
c0ade690
MD
477 }
478
479 /*
480 * Write out a new record if the in-memory inode is not marked
fbc6e32a
MD
481 * as having been deleted. Update our inode statistics if this
482 * is the first application of the inode on-disk.
76376933
MD
483 *
484 * If the inode has been deleted permanently, HAMMER_INODE_DELONDISK
485 * will remain set and prevent further updates.
c0ade690
MD
486 */
487 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
40043e7f 488 record = hammer_alloc_mem_record(ip);
b84de5af
MD
489 record->state = HAMMER_FST_FLUSH;
490 record->rec.inode = ip->sync_ino_rec;
855942b6 491 record->rec.inode.base.base.create_tid = trans->tid;
b84de5af
MD
492 record->rec.inode.base.data_len = sizeof(ip->sync_ino_data);
493 record->data = (void *)&ip->sync_ino_data;
d36ec43b 494 record->flags |= HAMMER_RECF_INTERLOCK_BE;
36f82b23 495 error = hammer_ip_sync_record(trans, record);
b84de5af
MD
496 if (error) {
497 kprintf("error %d\n", error);
498 Debugger("hammer_update_inode3");
499 }
d36ec43b
MD
500
501 /*
502 * The record isn't managed by the inode's record tree,
503 * destroy it whether we succeed or fail.
504 */
505 record->flags &= ~HAMMER_RECF_INTERLOCK_BE;
506 record->flags |= HAMMER_RECF_DELETED_FE;
507 record->state = HAMMER_FST_IDLE;
508 KKASSERT(TAILQ_FIRST(&record->depend_list) == NULL);
b3deaf57 509 hammer_rel_mem_record(record);
d36ec43b 510
d26d0ae9 511 if (error == 0) {
b84de5af
MD
512 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
513 HAMMER_INODE_DDIRTY |
514 HAMMER_INODE_ITIMES);
515 ip->flags &= ~HAMMER_INODE_DELONDISK;
d26d0ae9 516 if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
10a5d1ba 517 hammer_modify_volume(trans, trans->rootvol,
36f82b23 518 NULL, 0);
0b075555 519 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
10a5d1ba 520 hammer_modify_volume_done(trans->rootvol);
d26d0ae9
MD
521 ip->flags |= HAMMER_INODE_ONDISK;
522 }
fbc6e32a 523 }
c0ade690 524 }
f90dde4c
MD
525 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) {
526 /*
527 * Clean out any left-over flags if the inode has been
528 * destroyed.
529 */
530 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
531 HAMMER_INODE_DDIRTY |
532 HAMMER_INODE_ITIMES);
533 }
c0ade690
MD
534 return(error);
535}
536
a89aec1b 537/*
d113fda1
MD
538 * Update only the itimes fields. This is done no-historically. The
539 * record is updated in-place on the disk.
540 */
541static int
36f82b23 542hammer_update_itimes(hammer_transaction_t trans, hammer_inode_t ip)
d113fda1
MD
543{
544 struct hammer_cursor cursor;
545 struct hammer_inode_record *rec;
546 int error;
547
6a37e7e4 548retry:
d113fda1
MD
549 error = 0;
550 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
551 HAMMER_INODE_ONDISK) {
36f82b23 552 hammer_init_cursor(trans, &cursor, &ip->cache[0]);
d113fda1
MD
553 cursor.key_beg.obj_id = ip->obj_id;
554 cursor.key_beg.key = 0;
d5530d22 555 cursor.key_beg.create_tid = 0;
d113fda1
MD
556 cursor.key_beg.delete_tid = 0;
557 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
558 cursor.key_beg.obj_type = 0;
d5530d22
MD
559 cursor.asof = ip->obj_asof;
560 cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
b84de5af 561 cursor.flags |= HAMMER_CURSOR_BACKEND;
d113fda1
MD
562
563 error = hammer_btree_lookup(&cursor);
b84de5af
MD
564 if (error) {
565 kprintf("error %d\n", error);
566 Debugger("hammer_update_itimes1");
567 }
d113fda1 568 if (error == 0) {
10a5d1ba
MD
569 /*
570 * Do not generate UNDO records for atime/mtime
571 * updates.
572 */
d113fda1 573 rec = &cursor.record->inode;
36f82b23
MD
574 hammer_modify_buffer(cursor.trans, cursor.record_buffer,
575 NULL, 0);
b84de5af
MD
576 rec->ino_atime = ip->sync_ino_rec.ino_atime;
577 rec->ino_mtime = ip->sync_ino_rec.ino_mtime;
10a5d1ba 578 hammer_modify_buffer_done(cursor.record_buffer);
b84de5af 579 ip->sync_flags &= ~HAMMER_INODE_ITIMES;
d113fda1 580 /* XXX recalculate crc */
6a37e7e4 581 hammer_cache_node(cursor.node, &ip->cache[0]);
d113fda1 582 }
d113fda1 583 hammer_done_cursor(&cursor);
6a37e7e4
MD
584 if (error == EDEADLK)
585 goto retry;
d113fda1
MD
586 }
587 return(error);
588}
589
590/*
591 * Release a reference on an inode. If asked to flush the last release
592 * will flush the inode.
b84de5af
MD
593 *
594 * On the last reference we queue the inode to the flusher for its final
595 * disposition.
a89aec1b 596 */
66325755 597void
a89aec1b 598hammer_rel_inode(struct hammer_inode *ip, int flush)
66325755 599{
f90dde4c
MD
600 /*
601 * Handle disposition when dropping the last ref.
602 */
603 while (ip->lock.refs == 1) {
f90dde4c 604 if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
ec4e8497 605 hammer_unload_inode(ip);
f90dde4c
MD
606 return;
607 }
608
609 /*
610 * Hand the inode over to the flusher, which will
611 * add another ref to it.
612 */
613 if (++ip->hmp->reclaim_count > 256) {
614 ip->hmp->reclaim_count = 0;
615 hammer_flush_inode(ip, HAMMER_FLUSH_FORCE |
616 HAMMER_FLUSH_SIGNAL);
b84de5af 617 } else {
f90dde4c 618 hammer_flush_inode(ip, HAMMER_FLUSH_FORCE);
b84de5af 619 }
f90dde4c
MD
620 /* retry */
621 }
622
623 /*
ec4e8497 624 * The inode still has multiple refs, drop one ref. If a flush was
9480ff55
MD
625 * requested make sure the flusher sees it. New inodes which have
626 * not been finalized cannot be flushed.
f90dde4c 627 */
9480ff55
MD
628 if (flush && ip->flush_state == HAMMER_FST_IDLE &&
629 (ip->flags & HAMMER_INODE_NEW) == 0) {
ec4e8497 630 hammer_flush_inode(ip, HAMMER_FLUSH_RELEASE);
9480ff55 631 } else {
b84de5af 632 hammer_unref(&ip->lock);
9480ff55 633 }
427e5fc6
MD
634}
635
27ea2398 636/*
b84de5af
MD
637 * Unload and destroy the specified inode. Must be called with one remaining
638 * reference. The reference is disposed of.
8cd0a023 639 *
b84de5af 640 * This can only be called in the context of the flusher.
27ea2398 641 */
b84de5af 642static int
ec4e8497 643hammer_unload_inode(struct hammer_inode *ip)
27ea2398 644{
b84de5af 645 KASSERT(ip->lock.refs == 1,
a89aec1b 646 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
8cd0a023 647 KKASSERT(ip->vp == NULL);
f90dde4c
MD
648 KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
649 KKASSERT(ip->cursor_ip_refs == 0);
650 KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
651
652 KKASSERT(RB_EMPTY(&ip->rec_tree));
653 KKASSERT(TAILQ_EMPTY(&ip->bio_list));
654 KKASSERT(TAILQ_EMPTY(&ip->bio_alt_list));
655
656 RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
657
658 hammer_uncache_node(&ip->cache[0]);
659 hammer_uncache_node(&ip->cache[1]);
0729c8c8
MD
660 if (ip->objid_cache)
661 hammer_clear_objid(ip);
f90dde4c
MD
662 --hammer_count_inodes;
663 kfree(ip, M_HAMMER);
6b4f890b 664
27ea2398
MD
665 return(0);
666}
667
427e5fc6 668/*
d113fda1
MD
669 * A transaction has modified an inode, requiring updates as specified by
670 * the passed flags.
7f7c1f84 671 *
d113fda1
MD
672 * HAMMER_INODE_RDIRTY: Inode record has been updated
673 * HAMMER_INODE_DDIRTY: Inode data has been updated
b84de5af 674 * HAMMER_INODE_XDIRTY: Dirty frontend buffer cache buffer strategized
d113fda1
MD
675 * HAMMER_INODE_DELETED: Inode record/data must be deleted
676 * HAMMER_INODE_ITIMES: mtime/atime has been updated
427e5fc6 677 */
66325755 678void
b84de5af 679hammer_modify_inode(hammer_transaction_t trans, hammer_inode_t ip, int flags)
427e5fc6 680{
d113fda1 681 KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
b84de5af
MD
682 (flags & (HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
683 HAMMER_INODE_XDIRTY|
684 HAMMER_INODE_DELETED|HAMMER_INODE_ITIMES)) == 0);
685
686 ip->flags |= flags;
687}
688
689/*
690 * Flush an inode. If the inode is already being flushed wait for
691 * it to complete, then flush it again. The interlock is against
692 * front-end transactions, the backend flusher does not hold the lock.
693 *
694 * The flusher must distinguish between the records that are part of the
695 * flush and any new records created in parallel with the flush. The
696 * inode data and truncation fields are also copied. BIOs are a bit more
697 * troublesome because some dirty buffers may not have been queued yet.
698 */
699void
f90dde4c 700hammer_flush_inode(hammer_inode_t ip, int flags)
b84de5af 701{
9480ff55 702 KKASSERT((ip->flags & HAMMER_INODE_NEW) == 0);
b84de5af
MD
703 if (ip->flush_state != HAMMER_FST_IDLE &&
704 (ip->flags & HAMMER_INODE_MODMASK)) {
c32a6806
MD
705 if ((ip->flags & HAMMER_INODE_REFLUSH) == 0) {
706 ip->flags |= HAMMER_INODE_REFLUSH;
707 if (flags & HAMMER_FLUSH_RELEASE) {
708 hammer_unref(&ip->lock);
709 KKASSERT(ip->lock.refs > 0);
710 }
711 if (flags & HAMMER_FLUSH_SIGNAL)
712 hammer_flusher_async(ip->hmp);
ec4e8497 713 }
b84de5af
MD
714 return;
715 }
f90dde4c
MD
716 if (ip->flush_state == HAMMER_FST_IDLE) {
717 if ((ip->flags & HAMMER_INODE_MODMASK) ||
718 (flags & HAMMER_FLUSH_FORCE)) {
ec4e8497
MD
719 /*
720 * Add a reference to represent the inode being queued
721 * to the flusher. If the caller wants us to
722 * release a reference the two cancel each other out.
723 */
724 if ((flags & HAMMER_FLUSH_RELEASE) == 0)
725 hammer_ref(&ip->lock);
42c7d26b 726
f90dde4c
MD
727 hammer_flush_inode_copysync(ip);
728 /*
729 * Move the inode to the flush list and add a ref to
730 * it representing it on the list.
731 */
732 TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
733 if (flags & HAMMER_FLUSH_SIGNAL)
734 hammer_flusher_async(ip->hmp);
735 }
7f7c1f84 736 }
c0ade690
MD
737}
738
739/*
b84de5af
MD
740 * Helper routine to copy the frontend synchronization state to the backend.
741 * This routine may be called by either the frontend or the backend.
c0ade690 742 */
b84de5af
MD
743static void
744hammer_flush_inode_copysync(hammer_inode_t ip)
745{
746 int error;
ec4e8497 747 int count;
b84de5af 748
ec4e8497
MD
749 /*
750 * Prevent anyone else from trying to do the same thing.
751 */
b84de5af
MD
752 ip->flush_state = HAMMER_FST_SETUP;
753
754 /*
755 * Sync the buffer cache. This will queue the BIOs. If called
756 * from the context of the flusher the BIO's are thrown into bio_list
757 * regardless of ip->flush_state.
758 */
759 if (ip->vp != NULL)
760 error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
761 else
762 error = 0;
763
764 /*
765 * This freezes strategy writes, any further BIOs will be
766 * queued to alt_bio (unless we are
767 */
768 ip->flush_state = HAMMER_FST_FLUSH;
769
770 /*
771 * Snapshot the state of the inode for the backend flusher.
772 *
773 * The truncation must be retained in the frontend until after
774 * we've actually performed the record deletion.
775 */
776 ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
777 ip->sync_trunc_off = ip->trunc_off;
778 ip->sync_ino_rec = ip->ino_rec;
779 ip->sync_ino_data = ip->ino_data;
780 ip->flags &= ~HAMMER_INODE_MODMASK |
781 HAMMER_INODE_TRUNCATED | HAMMER_INODE_BUFS;
782
783 /*
784 * Fix up the dirty buffer status.
785 */
786 if (ip->vp == NULL || RB_ROOT(&ip->vp->v_rbdirty_tree) == NULL)
787 ip->flags &= ~HAMMER_INODE_BUFS;
788 if (TAILQ_FIRST(&ip->bio_list))
789 ip->sync_flags |= HAMMER_INODE_BUFS;
790 else
791 ip->sync_flags &= ~HAMMER_INODE_BUFS;
792
793 /*
ec4e8497
MD
794 * Set the state for the inode's in-memory records. If some records
795 * could not be marked for backend flush (i.e. deleted records),
796 * re-set the XDIRTY flag.
b84de5af 797 */
ec4e8497
MD
798 count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
799 hammer_mark_record_callback, NULL);
800 if (count)
801 ip->flags |= HAMMER_INODE_XDIRTY;
b84de5af
MD
802}
803
ec4e8497
MD
804/*
805 * Mark records for backend flush, accumulate a count of the number of
d36ec43b
MD
806 * records which could not be marked. Records marked for deletion
807 * by the frontend never make it to the media. It is possible for
808 * a record queued to the backend to wind up with FE set after the
809 * fact, as long as BE has not yet been set. The backend deals with
810 * this race by syncing the record as if FE had not been set, and
811 * then converting the record to a delete-on-disk record.
ec4e8497 812 */
c0ade690 813static int
b84de5af
MD
814hammer_mark_record_callback(hammer_record_t rec, void *data)
815{
ec4e8497
MD
816 if (rec->state == HAMMER_FST_FLUSH) {
817 return(0);
818 } else if ((rec->flags & HAMMER_RECF_DELETED_FE) == 0) {
b84de5af
MD
819 rec->state = HAMMER_FST_FLUSH;
820 hammer_ref(&rec->lock);
ec4e8497
MD
821 return(0);
822 } else {
823 return(1);
b84de5af 824 }
b84de5af
MD
825}
826
827
828
829/*
830 * Wait for a previously queued flush to complete
831 */
832void
833hammer_wait_inode(hammer_inode_t ip)
834{
835 while (ip->flush_state == HAMMER_FST_FLUSH) {
836 ip->flags |= HAMMER_INODE_FLUSHW;
837 tsleep(&ip->flags, 0, "hmrwin", 0);
838 }
839}
840
841/*
842 * Called by the backend code when a flush has been completed.
843 * The inode has already been removed from the flush list.
844 *
845 * A pipelined flush can occur, in which case we must re-enter the
846 * inode on the list and re-copy its fields.
847 */
848void
849hammer_flush_inode_done(hammer_inode_t ip)
850{
1955afa7
MD
851 struct bio *bio;
852
b84de5af
MD
853 KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
854
855 if (ip->sync_flags)
856 kprintf("ip %p leftover sync_flags %08x\n", ip, ip->sync_flags);
857 ip->flags |= ip->sync_flags;
858 ip->flush_state = HAMMER_FST_IDLE;
859
b84de5af 860 /*
1955afa7
MD
861 * Reflush any BIOs that wound up in the alt list. Our inode will
862 * also wind up at the end of the flusher's list.
b84de5af
MD
863 */
864 while ((bio = TAILQ_FIRST(&ip->bio_alt_list)) != NULL) {
865 TAILQ_REMOVE(&ip->bio_alt_list, bio, bio_act);
866 TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act);
f90dde4c 867 ip->flags |= HAMMER_INODE_XDIRTY;
b84de5af
MD
868 ip->flags |= HAMMER_INODE_REFLUSH;
869 kprintf("rebio %p ip %p @%016llx,%d\n", bio, ip, bio->bio_offset, bio->bio_buf->b_bufsize);
870 }
b84de5af
MD
871
872 /*
873 * If the frontend made more changes and requested another flush,
874 * do it.
875 */
876 if (ip->flags & HAMMER_INODE_REFLUSH) {
877 ip->flags &= ~HAMMER_INODE_REFLUSH;
b84de5af 878 hammer_flush_inode(ip, 0);
0729c8c8
MD
879 if (ip->flush_state == HAMMER_FST_IDLE) {
880 if (ip->flags & HAMMER_INODE_FLUSHW) {
881 ip->flags &= ~HAMMER_INODE_FLUSHW;
882 wakeup(&ip->flags);
883 }
884 }
b84de5af
MD
885 } else {
886 if (ip->flags & HAMMER_INODE_FLUSHW) {
887 ip->flags &= ~HAMMER_INODE_FLUSHW;
888 wakeup(&ip->flags);
889 }
890 }
891 hammer_rel_inode(ip, 0);
892}
893
894/*
895 * Called from hammer_sync_inode() to synchronize in-memory records
896 * to the media.
897 */
898static int
899hammer_sync_record_callback(hammer_record_t record, void *data)
c0ade690 900{
36f82b23 901 hammer_transaction_t trans = data;
c0ade690
MD
902 int error;
903
b84de5af
MD
904 /*
905 * Skip records that do not belong to the current flush. Records
906 * belonging to the flush will have been referenced for us.
b84de5af
MD
907 */
908 if (record->state != HAMMER_FST_FLUSH)
909 return(0);
d36ec43b
MD
910
911 /*
912 * Interlock the record using the BE flag. Once BE is set the
913 * frontend cannot change the state of FE.
914 *
915 * NOTE: If FE is set prior to us setting BE we still sync the
916 * record out, but the flush completion code converts it to
917 * a delete-on-disk record instead of destroying it.
918 */
919 hammer_lock_ex(&record->lock);
920 if (record->flags & HAMMER_RECF_INTERLOCK_BE) {
921 hammer_unlock(&record->lock);
b84de5af
MD
922 return(0);
923 }
d36ec43b
MD
924 record->flags |= HAMMER_RECF_INTERLOCK_BE;
925
926 /*
927 * If DELETED_FE is set we may have already sent dependant pieces
928 * to the disk and we must flush the record as if it hadn't been
929 * deleted. This creates a bit of a mess because we have to
930 * have ip_sync_record convert the record to DELETE_ONDISK before
931 * it inserts the B-Tree record. Otherwise the media sync might
932 * be visible to the frontend.
933 */
934 if (record->flags & HAMMER_RECF_DELETED_FE)
935 record->flags |= HAMMER_RECF_CONVERT_DELETE_ONDISK;
b84de5af
MD
936
937 /*
938 * Assign the create_tid for new records. Deletions already
939 * have the record's entire key properly set up.
940 */
941 if ((record->flags & HAMMER_RECF_DELETE_ONDISK) == 0)
942 record->rec.inode.base.base.create_tid = trans->tid;
943 error = hammer_ip_sync_record(trans, record);
c0ade690
MD
944
945 if (error) {
b3deaf57
MD
946 error = -error;
947 if (error != -ENOSPC) {
b84de5af
MD
948 kprintf("hammer_sync_record_callback: sync failed rec "
949 "%p, error %d\n", record, error);
950 Debugger("sync failed rec");
b3deaf57 951 }
c0ade690 952 }
d36ec43b 953 hammer_flush_record_done(record, error);
b3deaf57 954 return(error);
c0ade690
MD
955}
956
957/*
958 * XXX error handling
959 */
960int
b84de5af 961hammer_sync_inode(hammer_inode_t ip, int handle_delete)
c0ade690
MD
962{
963 struct hammer_transaction trans;
059819e3 964 struct bio *bio;
ec4e8497
MD
965 hammer_depend_t depend;
966 int error, tmp_error;
c0ade690 967
b84de5af
MD
968 if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0 &&
969 handle_delete == 0) {
d113fda1
MD
970 return(0);
971 }
972
b84de5af 973 hammer_start_transaction_fls(&trans, ip->hmp);
c0ade690 974
ec4e8497
MD
975 /*
976 * Any (directory) records this inode depends on must also be
977 * synchronized. The directory itself only needs to be flushed
978 * if its inode is not already on-disk.
979 */
980 while ((depend = TAILQ_FIRST(&ip->depend_list)) != NULL) {
981 hammer_record_t record;
982
983 record = depend->record;
984 TAILQ_REMOVE(&depend->record->depend_list, depend, rec_entry);
985 TAILQ_REMOVE(&ip->depend_list, depend, ip_entry);
986 --ip->depend_count;
987 if (record->state != HAMMER_FST_FLUSH) {
988 record->state = HAMMER_FST_FLUSH;
989 /* add ref (steal ref from dependancy) */
990 } else {
991 /* remove ref related to dependancy */
992 /* record still has at least one ref from state */
993 hammer_unref(&record->lock);
994 KKASSERT(record->lock.refs > 0);
995 }
996 if (record->ip->flags & HAMMER_INODE_ONDISK) {
997 kprintf("I");
998 hammer_sync_record_callback(record, &trans);
999 } else {
1000 kprintf("J");
9480ff55 1001 KKASSERT((record->ip->flags & HAMMER_INODE_NEW) == 0);
ec4e8497
MD
1002 hammer_flush_inode(record->ip, 0);
1003 }
1004 hammer_unref(&ip->lock);
1005 KKASSERT(ip->lock.refs > 0);
1006 kfree(depend, M_HAMMER);
1007 }
1008
1009
c0ade690 1010 /*
b84de5af 1011 * Sync inode deletions and truncations.
c0ade690 1012 */
b84de5af 1013 if (ip->sync_ino_rec.ino_nlinks == 0 && handle_delete &&
d113fda1 1014 (ip->flags & HAMMER_INODE_GONE) == 0) {
b84de5af
MD
1015 /*
1016 * Handle the case where the inode has been completely deleted
1017 * and is no longer referenceable from the filesystem
1018 * namespace.
1019 *
1020 * NOTE: We do not set the RDIRTY flag when updating the
1021 * delete_tid, setting HAMMER_INODE_DELETED takes care of it.
1022 */
1023
1024 ip->flags |= HAMMER_INODE_GONE | HAMMER_INODE_DELETED;
1025 ip->flags &= ~HAMMER_INODE_TRUNCATED;
1026 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
c0ade690
MD
1027 if (ip->vp)
1028 vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
7a04d74f 1029 error = hammer_ip_delete_range_all(&trans, ip);
b84de5af
MD
1030 if (error)
1031 Debugger("hammer_ip_delete_range_all errored");
1032
1033 /*
1034 * Sanity check. The only records that remain should be
1035 * marked for back-end deletion.
1036 */
1037 {
1038 hammer_record_t rec;
1039
1040 RB_FOREACH(rec, hammer_rec_rb_tree, &ip->rec_tree) {
d36ec43b 1041 KKASSERT(rec->state == HAMMER_FST_FLUSH);
b84de5af
MD
1042 }
1043 }
1044
1045 /*
1046 * Set delete_tid in both the frontend and backend
1047 * copy of the inode record.
1048 */
c0ade690 1049 ip->ino_rec.base.base.delete_tid = trans.tid;
b84de5af
MD
1050 ip->sync_ino_rec.base.base.delete_tid = trans.tid;
1051
1052 /*
1053 * Indicate that the inode has/is-being deleted.
1054 */
1055 ip->flags |= HAMMER_NODE_DELETED;
1056 hammer_modify_inode(&trans, ip, HAMMER_INODE_RDIRTY);
10a5d1ba 1057 hammer_modify_volume(&trans, trans.rootvol, NULL, 0);
0b075555 1058 --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
10a5d1ba 1059 hammer_modify_volume_done(trans.rootvol);
b84de5af
MD
1060 } else if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
1061 /*
1062 * Interlock trunc_off. The VOP front-end may continue to
1063 * make adjustments to it while we are blocked.
1064 */
1065 off_t trunc_off;
1066 off_t aligned_trunc_off;
c0ade690 1067
b84de5af
MD
1068 trunc_off = ip->sync_trunc_off;
1069 aligned_trunc_off = (trunc_off + HAMMER_BUFMASK) &
1070 ~HAMMER_BUFMASK64;
1071
1072 /*
1073 * Delete any whole blocks on-media. The front-end has
1074 * already cleaned out any partial block and made it
1075 * pending. The front-end may have updated trunc_off
1076 * while we were blocked so do not just unconditionally
1077 * set it to the maximum offset.
1078 */
1079 kprintf("sync truncation range @ %016llx\n", aligned_trunc_off);
1080 error = hammer_ip_delete_range(&trans, ip,
1081 aligned_trunc_off,
1082 0x7FFFFFFFFFFFFFFFLL);
1083 if (error)
1084 Debugger("hammer_ip_delete_range errored");
1085 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1086 if (ip->trunc_off >= trunc_off) {
1087 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1088 ip->flags &= ~HAMMER_INODE_TRUNCATED;
1089 }
f3b0f382
MD
1090 }
1091
b84de5af
MD
1092 error = 0; /* XXX vfsync used to be here */
1093
059819e3 1094 /*
b84de5af 1095 * Flush any queued BIOs.
059819e3
MD
1096 */
1097 while ((bio = TAILQ_FIRST(&ip->bio_list)) != NULL) {
1098 TAILQ_REMOVE(&ip->bio_list, bio, bio_act);
b84de5af
MD
1099#if 0
1100 kprintf("dowrite %016llx ip %p bio %p @ %016llx\n", trans.tid, ip, bio, bio->bio_offset);
1101#endif
ec4e8497
MD
1102 tmp_error = hammer_dowrite(&trans, ip, bio);
1103 if (tmp_error)
1104 error = tmp_error;
059819e3 1105 }
b84de5af 1106 ip->sync_flags &= ~HAMMER_INODE_BUFS;
c0ade690
MD
1107
1108 /*
b84de5af 1109 * Now sync related records.
c0ade690 1110 */
d26d0ae9 1111 for (;;) {
ec4e8497 1112 tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
b84de5af 1113 hammer_sync_record_callback, &trans);
d26d0ae9 1114 KKASSERT(error <= 0);
ec4e8497
MD
1115 if (tmp_error < 0)
1116 tmp_error = -error;
1117 if (tmp_error)
1118 error = tmp_error;
d26d0ae9 1119 break;
c0ade690 1120 }
ec4e8497
MD
1121
1122 /*
1123 * XDIRTY represents rec_tree and bio_list. However, rec_tree may
1124 * contain new front-end records so short of scanning it we can't
1125 * just test whether it is empty or not.
1126 *
1127 * If no error occured assume we succeeded.
1128 */
1129 if (error == 0)
b84de5af 1130 ip->sync_flags &= ~HAMMER_INODE_XDIRTY;
ec4e8497 1131
b84de5af
MD
1132 if (error)
1133 Debugger("RB_SCAN errored");
c0ade690
MD
1134
1135 /*
1136 * Now update the inode's on-disk inode-data and/or on-disk record.
b84de5af 1137 * DELETED and ONDISK are managed only in ip->flags.
c0ade690 1138 */
b84de5af 1139 switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) {
c0ade690
MD
1140 case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
1141 /*
1142 * If deleted and on-disk, don't set any additional flags.
1143 * the delete flag takes care of things.
1144 */
1145 break;
1146 case HAMMER_INODE_DELETED:
1147 /*
1148 * Take care of the case where a deleted inode was never
1149 * flushed to the disk in the first place.
1150 */
b84de5af
MD
1151 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
1152 HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES);
d26d0ae9 1153 while (RB_ROOT(&ip->rec_tree)) {
d36ec43b
MD
1154 hammer_record_t record = RB_ROOT(&ip->rec_tree);
1155 hammer_ref(&record->lock);
1156 KKASSERT(record->lock.refs == 1);
1157 record->flags |= HAMMER_RECF_DELETED_FE;
1158 record->flags |= HAMMER_RECF_DELETED_BE;
1159 hammer_cleardep_mem_record(record);
1160 hammer_rel_mem_record(record);
d26d0ae9 1161 }
c0ade690
MD
1162 break;
1163 case HAMMER_INODE_ONDISK:
1164 /*
1165 * If already on-disk, do not set any additional flags.
1166 */
1167 break;
1168 default:
1169 /*
1170 * If not on-disk and not deleted, set both dirty flags
b84de5af
MD
1171 * to force an initial record to be written. Also set
1172 * the create_tid for the inode.
1173 *
1174 * Set create_tid in both the frontend and backend
1175 * copy of the inode record.
c0ade690 1176 */
b84de5af
MD
1177 ip->ino_rec.base.base.create_tid = trans.tid;
1178 ip->sync_ino_rec.base.base.create_tid = trans.tid;
1179 ip->sync_flags |= HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY;
c0ade690
MD
1180 break;
1181 }
1182
1183 /*
d113fda1
MD
1184 * If RDIRTY or DDIRTY is set, write out a new record. If the inode
1185 * is already on-disk the old record is marked as deleted.
1186 *
1187 * If DELETED is set hammer_update_inode() will delete the existing
1188 * record without writing out a new one.
1189 *
1190 * If *ONLY* the ITIMES flag is set we can update the record in-place.
c0ade690 1191 */
b84de5af
MD
1192 if (ip->flags & HAMMER_INODE_DELETED) {
1193 error = hammer_update_inode(&trans, ip);
1194 } else
1195 if ((ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
1196 HAMMER_INODE_ITIMES)) == HAMMER_INODE_ITIMES) {
36f82b23 1197 error = hammer_update_itimes(&trans, ip);
d113fda1 1198 } else
b84de5af
MD
1199 if (ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
1200 HAMMER_INODE_ITIMES)) {
36f82b23 1201 error = hammer_update_inode(&trans, ip);
c0ade690 1202 }
b84de5af
MD
1203 if (error)
1204 Debugger("hammer_update_itimes/inode errored");
1205
1206 /*
1207 * Save the TID we used to sync the inode with to make sure we
1208 * do not improperly reuse it.
1209 */
b84de5af 1210 hammer_done_transaction(&trans);
c0ade690 1211 return(error);
8cd0a023
MD
1212}
1213