wi(4) depends on wlan(4)
[dragonfly.git] / sys / vfs / hammer / hammer_inode.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.39 2008/04/26 08:02:17 dillon Exp $
35 */
36
37#include "hammer.h"
38#include <sys/buf.h>
39#include <sys/buf2.h>
40
41static int hammer_unload_inode(struct hammer_inode *ip);
42static void hammer_flush_inode_copysync(hammer_inode_t ip);
43static int hammer_mark_record_callback(hammer_record_t rec, void *data);
44
45/*
46 * The kernel is not actively referencing this vnode but is still holding
47 * it cached.
48 *
49 * This is called from the frontend.
50 */
51int
52hammer_vop_inactive(struct vop_inactive_args *ap)
53{
54 struct hammer_inode *ip = VTOI(ap->a_vp);
55
56 /*
57 * Degenerate case
58 */
59 if (ip == NULL) {
60 vrecycle(ap->a_vp);
61 return(0);
62 }
63
64 /*
65 * If the inode no longer has any references we recover its
66 * in-memory resources immediately.
67 *
68 * NOTE: called from frontend, use ino_rec instead of sync_ino_rec.
69 */
70 if (ip->ino_rec.ino_nlinks == 0)
71 vrecycle(ap->a_vp);
72 return(0);
73}
74
75/*
76 * Release the vnode association. This is typically (but not always)
77 * the last reference on the inode and will flush the inode to the
78 * buffer cache.
79 *
80 * XXX Currently our sync code only runs through inodes with vnode
81 * associations, so we depend on hammer_rel_inode() to sync any inode
82 * record data to the block device prior to losing the association.
83 * Otherwise transactions that the user expected to be distinct by
84 * doing a manual sync may be merged.
85 */
86int
87hammer_vop_reclaim(struct vop_reclaim_args *ap)
88{
89 struct hammer_inode *ip;
90 struct vnode *vp;
91
92 vp = ap->a_vp;
93
94 if ((ip = vp->v_data) != NULL) {
95 vp->v_data = NULL;
96 ip->vp = NULL;
97
98 /*
99 * Don't let too many dependancies build up on unreferenced
100 * inodes or we could run ourselves out of memory.
101 */
102 if (TAILQ_FIRST(&ip->depend_list)) {
103 ip->hmp->reclaim_count += ip->depend_count;
104 if (ip->hmp->reclaim_count > 256) {
105 ip->hmp->reclaim_count = 0;
106 hammer_flusher_async(ip->hmp);
107 }
108 }
109 hammer_rel_inode(ip, 1);
110 }
111 return(0);
112}
113
114/*
115 * Return a locked vnode for the specified inode. The inode must be
116 * referenced but NOT LOCKED on entry and will remain referenced on
117 * return.
118 *
119 * Called from the frontend.
120 */
121int
122hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp)
123{
124 struct vnode *vp;
125 int error = 0;
126
127 for (;;) {
128 if ((vp = ip->vp) == NULL) {
129 error = getnewvnode(VT_HAMMER, ip->hmp->mp, vpp, 0, 0);
130 if (error)
131 break;
132 hammer_lock_ex(&ip->lock);
133 if (ip->vp != NULL) {
134 hammer_unlock(&ip->lock);
135 vp->v_type = VBAD;
136 vx_put(vp);
137 continue;
138 }
139 hammer_ref(&ip->lock);
140 vp = *vpp;
141 ip->vp = vp;
142 vp->v_type = hammer_get_vnode_type(
143 ip->ino_rec.base.base.obj_type);
144
145 switch(ip->ino_rec.base.base.obj_type) {
146 case HAMMER_OBJTYPE_CDEV:
147 case HAMMER_OBJTYPE_BDEV:
148 vp->v_ops = &ip->hmp->mp->mnt_vn_spec_ops;
149 addaliasu(vp, ip->ino_data.rmajor,
150 ip->ino_data.rminor);
151 break;
152 case HAMMER_OBJTYPE_FIFO:
153 vp->v_ops = &ip->hmp->mp->mnt_vn_fifo_ops;
154 break;
155 default:
156 break;
157 }
158
159 /*
160 * Only mark as the root vnode if the ip is not
161 * historical, otherwise the VFS cache will get
162 * confused. The other half of the special handling
163 * is in hammer_vop_nlookupdotdot().
164 */
165 if (ip->obj_id == HAMMER_OBJID_ROOT &&
166 ip->obj_asof == ip->hmp->asof) {
167 vp->v_flag |= VROOT;
168 }
169
170 vp->v_data = (void *)ip;
171 /* vnode locked by getnewvnode() */
172 /* make related vnode dirty if inode dirty? */
173 hammer_unlock(&ip->lock);
174 if (vp->v_type == VREG)
175 vinitvmio(vp, ip->ino_rec.ino_size);
176 break;
177 }
178
179 /*
180 * loop if the vget fails (aka races), or if the vp
181 * no longer matches ip->vp.
182 */
183 if (vget(vp, LK_EXCLUSIVE) == 0) {
184 if (vp == ip->vp)
185 break;
186 vput(vp);
187 }
188 }
189 *vpp = vp;
190 return(error);
191}
192
193/*
194 * Acquire a HAMMER inode. The returned inode is not locked. These functions
195 * do not attach or detach the related vnode (use hammer_get_vnode() for
196 * that).
197 *
198 * The flags argument is only applied for newly created inodes, and only
199 * certain flags are inherited.
200 *
201 * Called from the frontend.
202 */
203struct hammer_inode *
204hammer_get_inode(hammer_transaction_t trans, struct hammer_node **cache,
205 u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
206{
207 hammer_mount_t hmp = trans->hmp;
208 struct hammer_inode_info iinfo;
209 struct hammer_cursor cursor;
210 struct hammer_inode *ip;
211
212 /*
213 * Determine if we already have an inode cached. If we do then
214 * we are golden.
215 */
216 iinfo.obj_id = obj_id;
217 iinfo.obj_asof = asof;
218loop:
219 ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
220 if (ip) {
221 hammer_ref(&ip->lock);
222 *errorp = 0;
223 return(ip);
224 }
225
226 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
227 ++hammer_count_inodes;
228 ip->obj_id = obj_id;
229 ip->obj_asof = iinfo.obj_asof;
230 ip->hmp = hmp;
231 ip->flags = flags & HAMMER_INODE_RO;
232 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
233 if (hmp->ronly)
234 ip->flags |= HAMMER_INODE_RO;
235 RB_INIT(&ip->rec_tree);
236 TAILQ_INIT(&ip->bio_list);
237 TAILQ_INIT(&ip->bio_alt_list);
238 TAILQ_INIT(&ip->depend_list);
239
240 /*
241 * Locate the on-disk inode.
242 */
243retry:
244 hammer_init_cursor(trans, &cursor, cache);
245 cursor.key_beg.obj_id = ip->obj_id;
246 cursor.key_beg.key = 0;
247 cursor.key_beg.create_tid = 0;
248 cursor.key_beg.delete_tid = 0;
249 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
250 cursor.key_beg.obj_type = 0;
251 cursor.asof = iinfo.obj_asof;
252 cursor.flags = HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_GET_DATA |
253 HAMMER_CURSOR_ASOF;
254
255 *errorp = hammer_btree_lookup(&cursor);
256 if (*errorp == EDEADLK) {
257 hammer_done_cursor(&cursor);
258 goto retry;
259 }
260
261 /*
262 * On success the B-Tree lookup will hold the appropriate
263 * buffer cache buffers and provide a pointer to the requested
264 * information. Copy the information to the in-memory inode
265 * and cache the B-Tree node to improve future operations.
266 */
267 if (*errorp == 0) {
268 ip->ino_rec = cursor.record->inode;
269 ip->ino_data = cursor.data->inode;
270 hammer_cache_node(cursor.node, &ip->cache[0]);
271 if (cache)
272 hammer_cache_node(cursor.node, cache);
273 }
274
275 /*
276 * On success load the inode's record and data and insert the
277 * inode into the B-Tree. It is possible to race another lookup
278 * insertion of the same inode so deal with that condition too.
279 *
280 * The cursor's locked node interlocks against others creating and
281 * destroying ip while we were blocked.
282 */
283 if (*errorp == 0) {
284 hammer_ref(&ip->lock);
285 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
286 hammer_uncache_node(&ip->cache[0]);
287 hammer_uncache_node(&ip->cache[1]);
288 KKASSERT(ip->lock.refs == 1);
289 --hammer_count_inodes;
290 kfree(ip, M_HAMMER);
291 hammer_done_cursor(&cursor);
292 goto loop;
293 }
294 ip->flags |= HAMMER_INODE_ONDISK;
295 } else {
296 --hammer_count_inodes;
297 kfree(ip, M_HAMMER);
298 ip = NULL;
299 }
300 hammer_done_cursor(&cursor);
301 return (ip);
302}
303
304/*
305 * Create a new filesystem object, returning the inode in *ipp. The
306 * returned inode will be referenced and shared-locked. The caller
307 * must unlock and release it when finished.
308 *
309 * The inode is created in-memory.
310 */
311int
312hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
313 struct ucred *cred, hammer_inode_t dip,
314 struct hammer_inode **ipp)
315{
316 hammer_mount_t hmp;
317 hammer_inode_t ip;
318 uid_t xuid;
319
320 hmp = trans->hmp;
321 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
322 ++hammer_count_inodes;
323 ip->obj_id = hammer_alloc_tid(trans);
324 KKASSERT(ip->obj_id != 0);
325 ip->obj_asof = hmp->asof;
326 ip->hmp = hmp;
327 ip->flush_state = HAMMER_FST_IDLE;
328 ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_RDIRTY |
329 HAMMER_INODE_ITIMES;
330
331 RB_INIT(&ip->rec_tree);
332 TAILQ_INIT(&ip->bio_list);
333 TAILQ_INIT(&ip->bio_alt_list);
334 TAILQ_INIT(&ip->depend_list);
335
336 ip->ino_rec.ino_atime = trans->time;
337 ip->ino_rec.ino_mtime = trans->time;
338 ip->ino_rec.ino_size = 0;
339 ip->ino_rec.ino_nlinks = 0;
340 /* XXX */
341 ip->ino_rec.base.base.btype = HAMMER_BTREE_TYPE_RECORD;
342 ip->ino_rec.base.base.obj_id = ip->obj_id;
343 ip->ino_rec.base.base.key = 0;
344 ip->ino_rec.base.base.create_tid = 0;
345 ip->ino_rec.base.base.delete_tid = 0;
346 ip->ino_rec.base.base.rec_type = HAMMER_RECTYPE_INODE;
347 ip->ino_rec.base.base.obj_type = hammer_get_obj_type(vap->va_type);
348
349 ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
350 ip->ino_data.mode = vap->va_mode;
351 ip->ino_data.ctime = trans->time;
352 ip->ino_data.parent_obj_id = (dip) ? dip->ino_rec.base.base.obj_id : 0;
353
354 switch(ip->ino_rec.base.base.obj_type) {
355 case HAMMER_OBJTYPE_CDEV:
356 case HAMMER_OBJTYPE_BDEV:
357 ip->ino_data.rmajor = vap->va_rmajor;
358 ip->ino_data.rminor = vap->va_rminor;
359 break;
360 default:
361 break;
362 }
363
364 /*
365 * Calculate default uid/gid and overwrite with information from
366 * the vap.
367 */
368 xuid = hammer_to_unix_xid(&dip->ino_data.uid);
369 ip->ino_data.gid = dip->ino_data.gid;
370 xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
371 &vap->va_mode);
372 ip->ino_data.mode = vap->va_mode;
373
374 if (vap->va_vaflags & VA_UID_UUID_VALID)
375 ip->ino_data.uid = vap->va_uid_uuid;
376 else if (vap->va_uid != (uid_t)VNOVAL)
377 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
378 if (vap->va_vaflags & VA_GID_UUID_VALID)
379 ip->ino_data.gid = vap->va_gid_uuid;
380 else if (vap->va_gid != (gid_t)VNOVAL)
381 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
382
383 hammer_ref(&ip->lock);
384 hammer_lock_sh(&ip->lock);
385 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
386 hammer_unref(&ip->lock);
387 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
388 }
389 *ipp = ip;
390 return(0);
391}
392
393/*
394 * Called by hammer_sync_inode().
395 */
396static int
397hammer_update_inode(hammer_transaction_t trans, hammer_inode_t ip)
398{
399 struct hammer_cursor cursor;
400 hammer_record_t record;
401 int error;
402
403 /*
404 * Locate the record on-disk and mark it as deleted. Both the B-Tree
405 * node and the record must be marked deleted. The record may or
406 * may not be physically deleted, depending on the retention policy.
407 *
408 * If the inode has already been deleted on-disk we have nothing
409 * to do.
410 *
411 * XXX Update the inode record and data in-place if the retention
412 * policy allows it.
413 */
414retry:
415 error = 0;
416
417 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
418 HAMMER_INODE_ONDISK) {
419 hammer_init_cursor(trans, &cursor, &ip->cache[0]);
420 cursor.key_beg.obj_id = ip->obj_id;
421 cursor.key_beg.key = 0;
422 cursor.key_beg.create_tid = 0;
423 cursor.key_beg.delete_tid = 0;
424 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
425 cursor.key_beg.obj_type = 0;
426 cursor.asof = ip->obj_asof;
427 cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
428 cursor.flags |= HAMMER_CURSOR_BACKEND;
429
430 error = hammer_btree_lookup(&cursor);
431 if (error) {
432 kprintf("error %d\n", error);
433 Debugger("hammer_update_inode");
434 }
435
436
437 if (error == 0) {
438 error = hammer_ip_delete_record(&cursor, trans->tid);
439 if (error && error != EDEADLK) {
440 kprintf("error %d\n", error);
441 Debugger("hammer_update_inode2");
442 }
443 if (error == 0)
444 ip->flags |= HAMMER_INODE_DELONDISK;
445 hammer_cache_node(cursor.node, &ip->cache[0]);
446 }
447 hammer_done_cursor(&cursor);
448 if (error == EDEADLK)
449 goto retry;
450 }
451
452 /*
453 * Write out a new record if the in-memory inode is not marked
454 * as having been deleted. Update our inode statistics if this
455 * is the first application of the inode on-disk.
456 *
457 * If the inode has been deleted permanently, HAMMER_INODE_DELONDISK
458 * will remain set and prevent further updates.
459 */
460 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
461 record = hammer_alloc_mem_record(ip);
462 record->state = HAMMER_FST_FLUSH;
463 record->rec.inode = ip->sync_ino_rec;
464 record->rec.inode.base.base.create_tid = trans->tid;
465 record->rec.inode.base.data_len = sizeof(ip->sync_ino_data);
466 record->data = (void *)&ip->sync_ino_data;
467 error = hammer_ip_sync_record(trans, record);
468 if (error) {
469 kprintf("error %d\n", error);
470 Debugger("hammer_update_inode3");
471 }
472 hammer_delete_mem_record(record);
473 hammer_rel_mem_record(record);
474 if (error == 0) {
475 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
476 HAMMER_INODE_DDIRTY |
477 HAMMER_INODE_ITIMES);
478 ip->flags &= ~HAMMER_INODE_DELONDISK;
479 if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
480 hammer_modify_volume(trans, trans->rootvol,
481 NULL, 0);
482 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
483 hammer_modify_volume_done(trans->rootvol);
484 ip->flags |= HAMMER_INODE_ONDISK;
485 }
486 }
487 }
488 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) {
489 /*
490 * Clean out any left-over flags if the inode has been
491 * destroyed.
492 */
493 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY |
494 HAMMER_INODE_DDIRTY |
495 HAMMER_INODE_ITIMES);
496 }
497 return(error);
498}
499
500/*
501 * Update only the itimes fields. This is done no-historically. The
502 * record is updated in-place on the disk.
503 */
504static int
505hammer_update_itimes(hammer_transaction_t trans, hammer_inode_t ip)
506{
507 struct hammer_cursor cursor;
508 struct hammer_inode_record *rec;
509 int error;
510
511retry:
512 error = 0;
513 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
514 HAMMER_INODE_ONDISK) {
515 hammer_init_cursor(trans, &cursor, &ip->cache[0]);
516 cursor.key_beg.obj_id = ip->obj_id;
517 cursor.key_beg.key = 0;
518 cursor.key_beg.create_tid = 0;
519 cursor.key_beg.delete_tid = 0;
520 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
521 cursor.key_beg.obj_type = 0;
522 cursor.asof = ip->obj_asof;
523 cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
524 cursor.flags |= HAMMER_CURSOR_BACKEND;
525
526 error = hammer_btree_lookup(&cursor);
527 if (error) {
528 kprintf("error %d\n", error);
529 Debugger("hammer_update_itimes1");
530 }
531 if (error == 0) {
532 /*
533 * Do not generate UNDO records for atime/mtime
534 * updates.
535 */
536 rec = &cursor.record->inode;
537 hammer_modify_buffer(cursor.trans, cursor.record_buffer,
538 NULL, 0);
539 rec->ino_atime = ip->sync_ino_rec.ino_atime;
540 rec->ino_mtime = ip->sync_ino_rec.ino_mtime;
541 hammer_modify_buffer_done(cursor.record_buffer);
542 ip->sync_flags &= ~HAMMER_INODE_ITIMES;
543 /* XXX recalculate crc */
544 hammer_cache_node(cursor.node, &ip->cache[0]);
545 }
546 hammer_done_cursor(&cursor);
547 if (error == EDEADLK)
548 goto retry;
549 }
550 return(error);
551}
552
553/*
554 * Release a reference on an inode. If asked to flush the last release
555 * will flush the inode.
556 *
557 * On the last reference we queue the inode to the flusher for its final
558 * disposition.
559 */
560void
561hammer_rel_inode(struct hammer_inode *ip, int flush)
562{
563 /*
564 * Handle disposition when dropping the last ref.
565 */
566 while (ip->lock.refs == 1) {
567#if 0
568 /*
569 * XXX this can create a deep stack recursion
570 */
571 if (curthread == ip->hmp->flusher_td) {
572 /*
573 * We are the flusher, do any required flushes
574 * before unloading the inode.
575 */
576 int error = 0;
577
578 KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
579 while (error == 0 &&
580 (ip->flags & HAMMER_INODE_MODMASK)) {
581 hammer_ref(&ip->lock);
582 hammer_flush_inode_copysync(ip);
583 error = hammer_sync_inode(ip, 1);
584 hammer_flush_inode_done(ip);
585 }
586 if (error)
587 kprintf("hammer_sync_inode failed error %d\n",
588 error);
589 if (ip->lock.refs > 1)
590 continue;
591 hammer_unload_inode(ip);
592 return;
593 }
594#endif
595 if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
596 hammer_unload_inode(ip);
597 return;
598 }
599
600 /*
601 * Hand the inode over to the flusher, which will
602 * add another ref to it.
603 */
604 if (++ip->hmp->reclaim_count > 256) {
605 ip->hmp->reclaim_count = 0;
606 hammer_flush_inode(ip, HAMMER_FLUSH_FORCE |
607 HAMMER_FLUSH_SIGNAL);
608 } else {
609 hammer_flush_inode(ip, HAMMER_FLUSH_FORCE);
610 }
611 /* retry */
612 }
613
614 /*
615 * The inode still has multiple refs, drop one ref. If a flush was
616 * requested make sure the flusher sees it.
617 */
618 if (flush && ip->flush_state == HAMMER_FST_IDLE)
619 hammer_flush_inode(ip, HAMMER_FLUSH_RELEASE);
620 else
621 hammer_unref(&ip->lock);
622}
623
624/*
625 * Unload and destroy the specified inode. Must be called with one remaining
626 * reference. The reference is disposed of.
627 *
628 * This can only be called in the context of the flusher.
629 */
630static int
631hammer_unload_inode(struct hammer_inode *ip)
632{
633
634 KASSERT(ip->lock.refs == 1,
635 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
636 KKASSERT(ip->vp == NULL);
637 KKASSERT(ip->flush_state == HAMMER_FST_IDLE);
638 KKASSERT(ip->cursor_ip_refs == 0);
639 KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0);
640
641 KKASSERT(RB_EMPTY(&ip->rec_tree));
642 KKASSERT(TAILQ_EMPTY(&ip->bio_list));
643 KKASSERT(TAILQ_EMPTY(&ip->bio_alt_list));
644
645 RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
646
647 hammer_uncache_node(&ip->cache[0]);
648 hammer_uncache_node(&ip->cache[1]);
649 --hammer_count_inodes;
650 kfree(ip, M_HAMMER);
651
652 return(0);
653}
654
655/*
656 * A transaction has modified an inode, requiring updates as specified by
657 * the passed flags.
658 *
659 * HAMMER_INODE_RDIRTY: Inode record has been updated
660 * HAMMER_INODE_DDIRTY: Inode data has been updated
661 * HAMMER_INODE_XDIRTY: Dirty frontend buffer cache buffer strategized
662 * HAMMER_INODE_DELETED: Inode record/data must be deleted
663 * HAMMER_INODE_ITIMES: mtime/atime has been updated
664 */
665void
666hammer_modify_inode(hammer_transaction_t trans, hammer_inode_t ip, int flags)
667{
668 KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
669 (flags & (HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
670 HAMMER_INODE_XDIRTY|
671 HAMMER_INODE_DELETED|HAMMER_INODE_ITIMES)) == 0);
672
673 ip->flags |= flags;
674}
675
676/*
677 * Flush an inode. If the inode is already being flushed wait for
678 * it to complete, then flush it again. The interlock is against
679 * front-end transactions, the backend flusher does not hold the lock.
680 *
681 * The flusher must distinguish between the records that are part of the
682 * flush and any new records created in parallel with the flush. The
683 * inode data and truncation fields are also copied. BIOs are a bit more
684 * troublesome because some dirty buffers may not have been queued yet.
685 */
686void
687hammer_flush_inode(hammer_inode_t ip, int flags)
688{
689 if (ip->flush_state != HAMMER_FST_IDLE &&
690 (ip->flags & HAMMER_INODE_MODMASK)) {
691 ip->flags |= HAMMER_INODE_REFLUSH;
692 if (flags & HAMMER_FLUSH_RELEASE) {
693 hammer_unref(&ip->lock);
694 KKASSERT(ip->lock.refs > 0);
695 }
696 return;
697 }
698 if (ip->flush_state == HAMMER_FST_IDLE) {
699 if ((ip->flags & HAMMER_INODE_MODMASK) ||
700 (flags & HAMMER_FLUSH_FORCE)) {
701 /*
702 * Add a reference to represent the inode being queued
703 * to the flusher. If the caller wants us to
704 * release a reference the two cancel each other out.
705 */
706 if ((flags & HAMMER_FLUSH_RELEASE) == 0)
707 hammer_ref(&ip->lock);
708
709 hammer_flush_inode_copysync(ip);
710 /*
711 * Move the inode to the flush list and add a ref to
712 * it representing it on the list.
713 */
714 TAILQ_INSERT_TAIL(&ip->hmp->flush_list, ip, flush_entry);
715 if (flags & HAMMER_FLUSH_SIGNAL)
716 hammer_flusher_async(ip->hmp);
717 }
718 }
719}
720
721/*
722 * Helper routine to copy the frontend synchronization state to the backend.
723 * This routine may be called by either the frontend or the backend.
724 */
725static void
726hammer_flush_inode_copysync(hammer_inode_t ip)
727{
728 int error;
729 int count;
730
731 /*
732 * Prevent anyone else from trying to do the same thing.
733 */
734 ip->flush_state = HAMMER_FST_SETUP;
735
736 /*
737 * Sync the buffer cache. This will queue the BIOs. If called
738 * from the context of the flusher the BIO's are thrown into bio_list
739 * regardless of ip->flush_state.
740 */
741 if (ip->vp != NULL)
742 error = vfsync(ip->vp, MNT_NOWAIT, 1, NULL, NULL);
743 else
744 error = 0;
745
746 /*
747 * This freezes strategy writes, any further BIOs will be
748 * queued to alt_bio (unless we are
749 */
750 ip->flush_state = HAMMER_FST_FLUSH;
751
752 /*
753 * Snapshot the state of the inode for the backend flusher.
754 *
755 * The truncation must be retained in the frontend until after
756 * we've actually performed the record deletion.
757 */
758 ip->sync_flags = (ip->flags & HAMMER_INODE_MODMASK);
759 ip->sync_trunc_off = ip->trunc_off;
760 ip->sync_ino_rec = ip->ino_rec;
761 ip->sync_ino_data = ip->ino_data;
762 ip->flags &= ~HAMMER_INODE_MODMASK |
763 HAMMER_INODE_TRUNCATED | HAMMER_INODE_BUFS;
764
765 /*
766 * Fix up the dirty buffer status.
767 */
768 if (ip->vp == NULL || RB_ROOT(&ip->vp->v_rbdirty_tree) == NULL)
769 ip->flags &= ~HAMMER_INODE_BUFS;
770 if (TAILQ_FIRST(&ip->bio_list))
771 ip->sync_flags |= HAMMER_INODE_BUFS;
772 else
773 ip->sync_flags &= ~HAMMER_INODE_BUFS;
774
775 /*
776 * Set the state for the inode's in-memory records. If some records
777 * could not be marked for backend flush (i.e. deleted records),
778 * re-set the XDIRTY flag.
779 */
780 count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
781 hammer_mark_record_callback, NULL);
782 if (count)
783 ip->flags |= HAMMER_INODE_XDIRTY;
784}
785
786/*
787 * Mark records for backend flush, accumulate a count of the number of
788 * records which could not be marked.
789 */
790static int
791hammer_mark_record_callback(hammer_record_t rec, void *data)
792{
793 if (rec->state == HAMMER_FST_FLUSH) {
794 return(0);
795 } else if ((rec->flags & HAMMER_RECF_DELETED_FE) == 0) {
796 rec->state = HAMMER_FST_FLUSH;
797 hammer_ref(&rec->lock);
798 return(0);
799 } else {
800 return(1);
801 }
802}
803
804
805
806/*
807 * Wait for a previously queued flush to complete
808 */
809void
810hammer_wait_inode(hammer_inode_t ip)
811{
812 while (ip->flush_state == HAMMER_FST_FLUSH) {
813 ip->flags |= HAMMER_INODE_FLUSHW;
814 tsleep(&ip->flags, 0, "hmrwin", 0);
815 }
816}
817
818/*
819 * Called by the backend code when a flush has been completed.
820 * The inode has already been removed from the flush list.
821 *
822 * A pipelined flush can occur, in which case we must re-enter the
823 * inode on the list and re-copy its fields.
824 */
825void
826hammer_flush_inode_done(hammer_inode_t ip)
827{
828 struct bio *bio;
829
830 KKASSERT(ip->flush_state == HAMMER_FST_FLUSH);
831
832 if (ip->sync_flags)
833 kprintf("ip %p leftover sync_flags %08x\n", ip, ip->sync_flags);
834 ip->flags |= ip->sync_flags;
835 ip->flush_state = HAMMER_FST_IDLE;
836
837 /*
838 * Reflush any BIOs that wound up in the alt list. Our inode will
839 * also wind up at the end of the flusher's list.
840 */
841 while ((bio = TAILQ_FIRST(&ip->bio_alt_list)) != NULL) {
842 TAILQ_REMOVE(&ip->bio_alt_list, bio, bio_act);
843 TAILQ_INSERT_TAIL(&ip->bio_list, bio, bio_act);
844 ip->flags |= HAMMER_INODE_XDIRTY;
845 ip->flags |= HAMMER_INODE_REFLUSH;
846 kprintf("rebio %p ip %p @%016llx,%d\n", bio, ip, bio->bio_offset, bio->bio_buf->b_bufsize);
847 }
848
849 /*
850 * If the frontend made more changes and requested another flush,
851 * do it.
852 */
853 if (ip->flags & HAMMER_INODE_REFLUSH) {
854 ip->flags &= ~HAMMER_INODE_REFLUSH;
855 hammer_flush_inode(ip, 0);
856 } else {
857 if (ip->flags & HAMMER_INODE_FLUSHW) {
858 ip->flags &= ~HAMMER_INODE_FLUSHW;
859 wakeup(&ip->flags);
860 }
861 }
862 hammer_rel_inode(ip, 0);
863}
864
865/*
866 * Called from hammer_sync_inode() to synchronize in-memory records
867 * to the media.
868 */
869static int
870hammer_sync_record_callback(hammer_record_t record, void *data)
871{
872 hammer_transaction_t trans = data;
873 int error;
874
875 /*
876 * Skip records that do not belong to the current flush. Records
877 * belonging to the flush will have been referenced for us.
878 *
879 * Skip records that were deleted by the backend itself. Records
880 * deleted by the frontend after their state has changed to FLUSH
881 * are not considered to be deleted by the backend.
882 *
883 * XXX special delete-on-disk records can be deleted by the backend
884 * prior to the sync due to a truncation operation. This is kinda
885 * a hack to deal with it.
886 */
887 if (record->state != HAMMER_FST_FLUSH)
888 return(0);
889 if (record->flags & HAMMER_RECF_DELETED_BE) {
890 hammer_flush_record_done(record);
891 return(0);
892 }
893
894 /*
895 * Assign the create_tid for new records. Deletions already
896 * have the record's entire key properly set up.
897 */
898 if ((record->flags & HAMMER_RECF_DELETE_ONDISK) == 0)
899 record->rec.inode.base.base.create_tid = trans->tid;
900 error = hammer_ip_sync_record(trans, record);
901
902 if (error) {
903 error = -error;
904 if (error != -ENOSPC) {
905 kprintf("hammer_sync_record_callback: sync failed rec "
906 "%p, error %d\n", record, error);
907 Debugger("sync failed rec");
908 }
909 }
910 hammer_flush_record_done(record);
911 return(error);
912}
913
914/*
915 * XXX error handling
916 */
917int
918hammer_sync_inode(hammer_inode_t ip, int handle_delete)
919{
920 struct hammer_transaction trans;
921 struct bio *bio;
922 hammer_depend_t depend;
923 int error, tmp_error;
924
925 if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0 &&
926 handle_delete == 0) {
927 return(0);
928 }
929
930
931 hammer_lock_ex(&ip->lock);
932
933 hammer_start_transaction_fls(&trans, ip->hmp);
934
935 /*
936 * Any (directory) records this inode depends on must also be
937 * synchronized. The directory itself only needs to be flushed
938 * if its inode is not already on-disk.
939 */
940 while ((depend = TAILQ_FIRST(&ip->depend_list)) != NULL) {
941 hammer_record_t record;
942
943 record = depend->record;
944 TAILQ_REMOVE(&depend->record->depend_list, depend, rec_entry);
945 TAILQ_REMOVE(&ip->depend_list, depend, ip_entry);
946 --ip->depend_count;
947 if (record->state != HAMMER_FST_FLUSH) {
948 record->state = HAMMER_FST_FLUSH;
949 /* add ref (steal ref from dependancy) */
950 } else {
951 /* remove ref related to dependancy */
952 /* record still has at least one ref from state */
953 hammer_unref(&record->lock);
954 KKASSERT(record->lock.refs > 0);
955 }
956 if (record->ip->flags & HAMMER_INODE_ONDISK) {
957 kprintf("I");
958 hammer_sync_record_callback(record, &trans);
959 } else {
960 kprintf("J");
961 hammer_flush_inode(record->ip, 0);
962 }
963 hammer_unref(&ip->lock);
964 KKASSERT(ip->lock.refs > 0);
965 kfree(depend, M_HAMMER);
966 }
967
968
969 /*
970 * Sync inode deletions and truncations.
971 */
972 if (ip->sync_ino_rec.ino_nlinks == 0 && handle_delete &&
973 (ip->flags & HAMMER_INODE_GONE) == 0) {
974 /*
975 * Handle the case where the inode has been completely deleted
976 * and is no longer referenceable from the filesystem
977 * namespace.
978 *
979 * NOTE: We do not set the RDIRTY flag when updating the
980 * delete_tid, setting HAMMER_INODE_DELETED takes care of it.
981 */
982
983 ip->flags |= HAMMER_INODE_GONE | HAMMER_INODE_DELETED;
984 ip->flags &= ~HAMMER_INODE_TRUNCATED;
985 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
986 if (ip->vp)
987 vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
988 error = hammer_ip_delete_range_all(&trans, ip);
989 if (error)
990 Debugger("hammer_ip_delete_range_all errored");
991
992 /*
993 * Sanity check. The only records that remain should be
994 * marked for back-end deletion.
995 */
996 {
997 hammer_record_t rec;
998
999 RB_FOREACH(rec, hammer_rec_rb_tree, &ip->rec_tree) {
1000 KKASSERT(rec->flags & HAMMER_RECF_DELETED_BE);
1001 }
1002 }
1003
1004 /*
1005 * Set delete_tid in both the frontend and backend
1006 * copy of the inode record.
1007 */
1008 ip->ino_rec.base.base.delete_tid = trans.tid;
1009 ip->sync_ino_rec.base.base.delete_tid = trans.tid;
1010
1011 /*
1012 * Indicate that the inode has/is-being deleted.
1013 */
1014 ip->flags |= HAMMER_NODE_DELETED;
1015 hammer_modify_inode(&trans, ip, HAMMER_INODE_RDIRTY);
1016 hammer_modify_volume(&trans, trans.rootvol, NULL, 0);
1017 --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
1018 hammer_modify_volume_done(trans.rootvol);
1019 } else if (ip->sync_flags & HAMMER_INODE_TRUNCATED) {
1020 /*
1021 * Interlock trunc_off. The VOP front-end may continue to
1022 * make adjustments to it while we are blocked.
1023 */
1024 off_t trunc_off;
1025 off_t aligned_trunc_off;
1026
1027 trunc_off = ip->sync_trunc_off;
1028 aligned_trunc_off = (trunc_off + HAMMER_BUFMASK) &
1029 ~HAMMER_BUFMASK64;
1030
1031 /*
1032 * Delete any whole blocks on-media. The front-end has
1033 * already cleaned out any partial block and made it
1034 * pending. The front-end may have updated trunc_off
1035 * while we were blocked so do not just unconditionally
1036 * set it to the maximum offset.
1037 */
1038 kprintf("sync truncation range @ %016llx\n", aligned_trunc_off);
1039 error = hammer_ip_delete_range(&trans, ip,
1040 aligned_trunc_off,
1041 0x7FFFFFFFFFFFFFFFLL);
1042 if (error)
1043 Debugger("hammer_ip_delete_range errored");
1044 ip->sync_flags &= ~HAMMER_INODE_TRUNCATED;
1045 if (ip->trunc_off >= trunc_off) {
1046 ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL;
1047 ip->flags &= ~HAMMER_INODE_TRUNCATED;
1048 }
1049 }
1050
1051 error = 0; /* XXX vfsync used to be here */
1052
1053 /*
1054 * Flush any queued BIOs.
1055 */
1056 while ((bio = TAILQ_FIRST(&ip->bio_list)) != NULL) {
1057 TAILQ_REMOVE(&ip->bio_list, bio, bio_act);
1058#if 0
1059 kprintf("dowrite %016llx ip %p bio %p @ %016llx\n", trans.tid, ip, bio, bio->bio_offset);
1060#endif
1061 tmp_error = hammer_dowrite(&trans, ip, bio);
1062 if (tmp_error)
1063 error = tmp_error;
1064 }
1065 ip->sync_flags &= ~HAMMER_INODE_BUFS;
1066
1067 /*
1068 * Now sync related records.
1069 */
1070 for (;;) {
1071 tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
1072 hammer_sync_record_callback, &trans);
1073 KKASSERT(error <= 0);
1074 if (tmp_error < 0)
1075 tmp_error = -error;
1076 if (tmp_error)
1077 error = tmp_error;
1078 break;
1079 }
1080
1081 /*
1082 * XDIRTY represents rec_tree and bio_list. However, rec_tree may
1083 * contain new front-end records so short of scanning it we can't
1084 * just test whether it is empty or not.
1085 *
1086 * If no error occured assume we succeeded.
1087 */
1088 if (error == 0)
1089 ip->sync_flags &= ~HAMMER_INODE_XDIRTY;
1090
1091 if (error)
1092 Debugger("RB_SCAN errored");
1093
1094 /*
1095 * Now update the inode's on-disk inode-data and/or on-disk record.
1096 * DELETED and ONDISK are managed only in ip->flags.
1097 */
1098 switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) {
1099 case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
1100 /*
1101 * If deleted and on-disk, don't set any additional flags.
1102 * the delete flag takes care of things.
1103 */
1104 break;
1105 case HAMMER_INODE_DELETED:
1106 /*
1107 * Take care of the case where a deleted inode was never
1108 * flushed to the disk in the first place.
1109 */
1110 ip->sync_flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
1111 HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES);
1112 while (RB_ROOT(&ip->rec_tree)) {
1113 hammer_record_t rec = RB_ROOT(&ip->rec_tree);
1114 hammer_ref(&rec->lock);
1115 KKASSERT(rec->lock.refs == 1);
1116 hammer_delete_mem_record(rec);
1117 rec->flags |= HAMMER_RECF_DELETED_BE;
1118 hammer_rel_mem_record(rec);
1119 }
1120 break;
1121 case HAMMER_INODE_ONDISK:
1122 /*
1123 * If already on-disk, do not set any additional flags.
1124 */
1125 break;
1126 default:
1127 /*
1128 * If not on-disk and not deleted, set both dirty flags
1129 * to force an initial record to be written. Also set
1130 * the create_tid for the inode.
1131 *
1132 * Set create_tid in both the frontend and backend
1133 * copy of the inode record.
1134 */
1135 ip->ino_rec.base.base.create_tid = trans.tid;
1136 ip->sync_ino_rec.base.base.create_tid = trans.tid;
1137 ip->sync_flags |= HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY;
1138 break;
1139 }
1140
1141 /*
1142 * If RDIRTY or DDIRTY is set, write out a new record. If the inode
1143 * is already on-disk the old record is marked as deleted.
1144 *
1145 * If DELETED is set hammer_update_inode() will delete the existing
1146 * record without writing out a new one.
1147 *
1148 * If *ONLY* the ITIMES flag is set we can update the record in-place.
1149 */
1150 if (ip->flags & HAMMER_INODE_DELETED) {
1151 error = hammer_update_inode(&trans, ip);
1152 } else
1153 if ((ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
1154 HAMMER_INODE_ITIMES)) == HAMMER_INODE_ITIMES) {
1155 error = hammer_update_itimes(&trans, ip);
1156 } else
1157 if (ip->sync_flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
1158 HAMMER_INODE_ITIMES)) {
1159 error = hammer_update_inode(&trans, ip);
1160 }
1161 if (error)
1162 Debugger("hammer_update_itimes/inode errored");
1163
1164 /*
1165 * Save the TID we used to sync the inode with to make sure we
1166 * do not improperly reuse it.
1167 */
1168 hammer_unlock(&ip->lock);
1169 hammer_done_transaction(&trans);
1170 return(error);
1171}
1172