2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.10 2007/11/30 00:16:56 dillon Exp $
42 hammer_vop_inactive(struct vop_inactive_args *ap)
44 struct hammer_inode *ip = VTOI(ap->a_vp);
55 * If the inode no longer has any references we recover its
56 * in-memory resources immediately.
58 if (ip->ino_rec.ino_nlinks == 0 &&
59 (ip->hmp->mp->mnt_flag & MNT_RDONLY) == 0) {
60 hammer_sync_inode(ip, MNT_NOWAIT, 1);
66 hammer_vop_reclaim(struct vop_reclaim_args *ap)
68 struct hammer_inode *ip;
74 * Release the vnode association and ask that the inode be flushed.
76 if ((ip = vp->v_data) != NULL) {
79 hammer_rel_inode(ip, 1);
85 * Obtain a vnode for the specified inode number. An exclusively locked
89 hammer_vfs_vget(struct mount *mp, ino_t ino, struct vnode **vpp)
91 struct hammer_mount *hmp = (void *)mp->mnt_data;
92 struct hammer_inode *ip;
96 * Get/allocate the hammer_inode structure. The structure must be
97 * unlocked while we manipulate the related vnode to avoid a
100 ip = hammer_get_inode(hmp, ino, hmp->asof, &error);
105 error = hammer_get_vnode(ip, LK_EXCLUSIVE, vpp);
106 hammer_rel_inode(ip, 0);
111 * Return a locked vnode for the specified inode. The inode must be
112 * referenced but NOT LOCKED on entry and will remain referenced on
116 hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp)
122 if ((vp = ip->vp) == NULL) {
123 error = getnewvnode(VT_HAMMER, ip->hmp->mp, vpp, 0, 0);
126 hammer_lock_ex(&ip->lock);
127 if (ip->vp != NULL) {
128 hammer_unlock(&ip->lock);
133 hammer_ref(&ip->lock);
136 vp->v_type = hammer_get_vnode_type(
137 ip->ino_rec.base.base.obj_type);
138 vp->v_data = (void *)ip;
139 /* vnode locked by getnewvnode() */
140 /* make related vnode dirty if inode dirty? */
141 hammer_unlock(&ip->lock);
142 if (vp->v_type == VREG)
143 vinitvmio(vp, ip->ino_rec.ino_size);
148 * loop if the vget fails (aka races), or if the vp
149 * no longer matches ip->vp.
151 if (vget(vp, LK_EXCLUSIVE) == 0) {
162 * Acquire a HAMMER inode. The returned inode is not locked. These functions
163 * do not attach or detach the related vnode (use hammer_get_vnode() for
166 struct hammer_inode *
167 hammer_get_inode(struct hammer_mount *hmp, u_int64_t obj_id, hammer_tid_t asof,
170 struct hammer_inode_info iinfo;
171 struct hammer_cursor cursor;
172 struct hammer_inode *ip;
175 * Determine if we already have an inode cached. If we do then
178 iinfo.obj_id = obj_id;
179 iinfo.obj_asof = asof;
181 ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
183 hammer_ref(&ip->lock);
188 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
190 ip->obj_asof = iinfo.obj_asof;
192 RB_INIT(&ip->rec_tree);
195 * Locate the on-disk inode.
196 * If we do not have an inode cached search the HAMMER on-disk B-Tree
200 hammer_init_cursor_hmp(&cursor, hmp);
201 cursor.key_beg.obj_id = ip->obj_id;
202 cursor.key_beg.key = 0;
203 cursor.key_beg.create_tid = iinfo.obj_asof;
204 cursor.key_beg.delete_tid = 0;
205 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
206 cursor.key_beg.obj_type = 0;
207 cursor.flags = HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_GET_DATA;
209 *errorp = hammer_btree_lookup(&cursor);
212 * On success the B-Tree lookup will hold the appropriate
213 * buffer cache buffers and provide a pointer to the requested
214 * information. Copy the information to the in-memory inode.
217 ip->ino_rec = cursor.record->inode;
218 ip->ino_data = cursor.data->inode;
220 hammer_cache_node(cursor.node, &ip->cache);
221 hammer_done_cursor(&cursor);
224 * On success load the inode's record and data and insert the
225 * inode into the B-Tree. It is possible to race another lookup
226 * insertion of the same inode so deal with that condition too.
229 hammer_ref(&ip->lock);
230 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
231 hammer_uncache_node(&ip->cache);
232 hammer_unref(&ip->lock);
236 ip->flags |= HAMMER_INODE_ONDISK;
245 * Create a new filesystem object, returning the inode in *ipp. The
246 * returned inode will be referenced but not locked.
248 * The inode is created in-memory and will be delay-synchronized to the
252 hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
253 struct ucred *cred, hammer_inode_t dip,
254 struct hammer_inode **ipp)
261 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
262 ip->obj_id = hammer_alloc_tid(trans);
263 KKASSERT(ip->obj_id != 0);
264 ip->obj_asof = hmp->asof;
266 ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_RDIRTY |
268 ip->last_tid = trans->tid;
270 RB_INIT(&ip->rec_tree);
272 ip->ino_rec.ino_atime = trans->tid;
273 ip->ino_rec.ino_mtime = trans->tid;
274 ip->ino_rec.ino_size = 0;
275 ip->ino_rec.ino_nlinks = 0;
277 kprintf("rootvol %p ondisk %p\n", hmp->rootvol, hmp->rootvol->ondisk);
278 ip->ino_rec.base.rec_id = hammer_alloc_recid(trans);
279 KKASSERT(ip->ino_rec.base.rec_id != 0);
280 ip->ino_rec.base.base.obj_id = ip->obj_id;
281 ip->ino_rec.base.base.key = 0;
282 ip->ino_rec.base.base.create_tid = trans->tid;
283 ip->ino_rec.base.base.delete_tid = 0;
284 ip->ino_rec.base.base.rec_type = HAMMER_RECTYPE_INODE;
285 ip->ino_rec.base.base.obj_type = hammer_get_obj_type(vap->va_type);
287 ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
288 ip->ino_data.mode = vap->va_mode;
289 ip->ino_data.ctime = trans->tid;
290 ip->ino_data.parent_obj_id = (dip) ? dip->ino_rec.base.base.obj_id : 0;
293 * Calculate default uid/gid and overwrite with information from
296 xuid = hammer_to_unix_xid(&dip->ino_data.uid);
297 ip->ino_data.gid = dip->ino_data.gid;
298 xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
300 ip->ino_data.mode = vap->va_mode;
302 if (vap->va_vaflags & VA_UID_UUID_VALID)
303 ip->ino_data.uid = vap->va_uid_uuid;
304 else if (vap->va_uid != (uid_t)VNOVAL)
305 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
306 if (vap->va_vaflags & VA_GID_UUID_VALID)
307 ip->ino_data.gid = vap->va_gid_uuid;
308 else if (vap->va_gid != (gid_t)VNOVAL)
309 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
311 hammer_ref(&ip->lock);
312 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
313 hammer_unref(&ip->lock);
314 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
321 hammer_update_inode(hammer_inode_t ip)
323 struct hammer_cursor cursor;
324 hammer_record_t record;
328 * Locate the record on-disk and mark it as deleted. Both the B-Tree
329 * node and the record must be marked deleted. The record may or
330 * may not be physically deleted, depending on the retention policy.
332 * If the inode has already been deleted on-disk we have nothing
335 * XXX Update the inode record and data in-place if the retention
340 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
341 HAMMER_INODE_ONDISK) {
342 hammer_init_cursor_ip(&cursor, ip);
343 cursor.key_beg.obj_id = ip->obj_id;
344 cursor.key_beg.key = 0;
345 cursor.key_beg.create_tid = ip->obj_asof;
346 cursor.key_beg.delete_tid = 0;
347 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
348 cursor.key_beg.obj_type = 0;
349 cursor.flags = HAMMER_CURSOR_GET_RECORD;
351 error = hammer_btree_lookup(&cursor);
354 error = hammer_ip_delete_record(&cursor, ip->last_tid);
356 ip->flags |= HAMMER_INODE_DELONDISK;
358 hammer_cache_node(cursor.node, &ip->cache);
359 hammer_done_cursor(&cursor);
363 * Write out a new record if the in-memory inode is not marked
364 * as having been deleted.
366 * If the inode has been deleted permanently, HAMMER_INODE_DELONDISK
367 * will remain set and prevent further updates.
369 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
370 record = hammer_alloc_mem_record(ip);
371 record->rec.inode = ip->ino_rec;
372 record->rec.inode.base.base.create_tid = ip->last_tid;
373 record->rec.inode.base.data_len = sizeof(ip->ino_data);
374 record->data = (void *)&ip->ino_data;
375 error = hammer_ip_sync_record(record);
376 hammer_free_mem_record(record);
377 ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
378 HAMMER_INODE_DELONDISK);
379 ip->flags |= HAMMER_INODE_ONDISK;
381 ip->flags &= ~HAMMER_INODE_TID;
386 * Release a reference on an inode and unload it if told to flush.
389 hammer_rel_inode(struct hammer_inode *ip, int flush)
391 hammer_unref(&ip->lock);
392 if (flush || ip->ino_rec.ino_nlinks == 0)
393 ip->flags |= HAMMER_INODE_FLUSH;
394 if (ip->lock.refs == 0 && (ip->flags & HAMMER_INODE_FLUSH))
395 hammer_unload_inode(ip, NULL);
399 * Unload and destroy the specified inode.
401 * (called via RB_SCAN)
404 hammer_unload_inode(struct hammer_inode *ip, void *data __unused)
408 KASSERT(ip->lock.refs == 0,
409 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
410 KKASSERT(ip->vp == NULL);
411 hammer_ref(&ip->lock);
413 error = hammer_sync_inode(ip, MNT_WAIT, 1);
415 kprintf("hammer_sync_inode failed error %d\n", error);
417 RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
419 hammer_uncache_node(&ip->cache);
425 * A transaction has modified an inode, requiring a new record and possibly
426 * also data to be written out.
428 * last_tid is the TID to use for the disk sync.
431 hammer_modify_inode(struct hammer_transaction *trans,
432 struct hammer_inode *ip, int flags)
434 if ((flags & HAMMER_INODE_TID) && (ip->flags & HAMMER_INODE_TID) == 0) {
435 ip->last_tid = trans->tid;
441 * Sync any dirty buffers and records associated with an inode. The
442 * inode's last_tid field is used as the transaction id for the sync,
443 * overriding any intermediate TIDs that were used for records. Note
444 * that the dirty buffer cache buffers do not have any knowledge of
445 * the transaction id they were modified under.
448 hammer_sync_inode_callback(hammer_record_t rec, void *data __unused)
453 if ((rec->flags & HAMMER_RECF_DELETED) == 0)
454 error = hammer_ip_sync_record(rec);
457 kprintf("hammer_sync_inode_callback: sync failed rec %p\n",
461 hammer_free_mem_record(rec);
469 hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete)
471 struct hammer_transaction trans;
475 hammer_lock_ex(&ip->lock);
476 hammer_start_transaction(&trans, ip->hmp);
479 * If the inode has been deleted (nlinks == 0), and the OS no longer
480 * has any references to it (handle_delete != 0), clean up in-memory
483 * NOTE: We do not set the RDIRTY flag when updating the delete_tid,
484 * setting HAMMER_INODE_DELETED takes care of it.
486 * NOTE: Because we may sync records within this new transaction,
487 * force the inode update later on to use our transaction id or
488 * the delete_tid of the inode may be less then the create_tid of
489 * the inode update. XXX shouldn't happen but don't take the chance.
491 if (ip->ino_rec.ino_nlinks == 0 && handle_delete) {
493 vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
494 error = hammer_ip_delete_range(&trans, ip,
495 HAMMER_MIN_KEY, HAMMER_MAX_KEY);
496 KKASSERT(RB_EMPTY(&ip->rec_tree));
497 ip->flags &= ~HAMMER_INODE_TID;
498 ip->ino_rec.base.base.delete_tid = trans.tid;
499 hammer_modify_inode(&trans, ip,
500 HAMMER_INODE_DELETED | HAMMER_INODE_TID);
504 * Sync the buffer cache
507 error = vfsync(ip->vp, waitfor, 1, NULL, NULL);
512 * Now sync related records
515 r = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
516 hammer_sync_inode_callback, NULL);
522 * Now update the inode's on-disk inode-data and/or on-disk record.
524 switch(ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK)) {
525 case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
527 * If deleted and on-disk, don't set any additional flags.
528 * the delete flag takes care of things.
531 case HAMMER_INODE_DELETED:
533 * Take care of the case where a deleted inode was never
534 * flushed to the disk in the first place.
536 ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY);
537 while (RB_ROOT(&ip->rec_tree))
538 hammer_free_mem_record(RB_ROOT(&ip->rec_tree));
540 case HAMMER_INODE_ONDISK:
542 * If already on-disk, do not set any additional flags.
547 * If not on-disk and not deleted, set both dirty flags
548 * to force an initial record to be written.
550 ip->flags |= HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY;
555 * If RDIRTY or DDIRTY is set, write out a new record. If the
556 * inode is already on-disk, the old record is marked as deleted.
558 if (ip->flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
559 HAMMER_INODE_DELETED)) {
560 error = hammer_update_inode(ip);
562 hammer_commit_transaction(&trans);
563 hammer_unlock(&ip->lock);
568 * Access the filesystem buffer containing the cluster-relative byte
569 * offset, validate the buffer type, load *bufferp and return a
570 * pointer to the requested data. The buffer is reference and locked on
573 * If buf_type is 0 the buffer is assumed to be a pure-data buffer and
574 * no type or crc check is performed.
576 * If *bufferp is not NULL on entry it is assumed to contain a locked
577 * and referenced buffer which will then be replaced.
579 * If the caller is holding another unrelated buffer locked it must be
580 * passed in reorderbuf so we can properly order buffer locks.
582 * XXX add a flag for the buffer type and check the CRC here XXX
585 hammer_bread(hammer_cluster_t cluster, int32_t cloff,
586 u_int64_t buf_type, int *errorp,
587 struct hammer_buffer **bufferp)
589 hammer_buffer_t buffer;
594 * Load the correct filesystem buffer, replacing *bufferp.
596 buf_no = cloff / HAMMER_BUFSIZE;
598 if (buffer == NULL || buffer->cluster != cluster ||
599 buffer->buf_no != buf_no) {
601 /*hammer_unlock(&buffer->io.lock);*/
602 hammer_rel_buffer(buffer, 0);
604 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
608 /*hammer_lock_ex(&buffer->io.lock);*/
612 * Validate the buffer type
614 buf_off = cloff & HAMMER_BUFMASK;
616 if (buf_type != buffer->ondisk->head.buf_type) {
617 kprintf("BUFFER HEAD TYPE MISMATCH %llx %llx\n",
618 buf_type, buffer->ondisk->head.buf_type);
622 if (buf_off < sizeof(buffer->ondisk->head)) {
623 kprintf("BUFFER OFFSET TOO LOW %d\n", buf_off);
630 * Return a pointer to the buffer data.
633 return((char *)buffer->ondisk + buf_off);