Fix some NFS related bugs which cause the mount point's mnt_refs counter
[dragonfly.git] / sys / vfs / hammer / hammer_inode.c
CommitLineData
427e5fc6
MD
1/*
2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
513ca7d7 34 * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.27 2008/02/05 20:52:01 dillon Exp $
427e5fc6
MD
35 */
36
37#include "hammer.h"
38#include <sys/buf.h>
39#include <sys/buf2.h>
40
d113fda1
MD
41/*
42 * The kernel is not actively referencing this vnode but is still holding
43 * it cached.
44 */
427e5fc6
MD
45int
46hammer_vop_inactive(struct vop_inactive_args *ap)
47{
66325755 48 struct hammer_inode *ip = VTOI(ap->a_vp);
27ea2398 49
c0ade690
MD
50 /*
51 * Degenerate case
52 */
53 if (ip == NULL) {
66325755 54 vrecycle(ap->a_vp);
c0ade690
MD
55 return(0);
56 }
57
58 /*
59 * If the inode no longer has any references we recover its
60 * in-memory resources immediately.
61 */
d113fda1
MD
62 if (ip->ino_rec.ino_nlinks == 0)
63 vrecycle(ap->a_vp);
427e5fc6
MD
64 return(0);
65}
66
d113fda1
MD
67/*
68 * Release the vnode association. This is typically (but not always)
69 * the last reference on the inode and will flush the inode to the
70 * buffer cache.
71 *
72 * XXX Currently our sync code only runs through inodes with vnode
73 * associations, so we depend on hammer_rel_inode() to sync any inode
74 * record data to the block device prior to losing the association.
75 * Otherwise transactions that the user expected to be distinct by
76 * doing a manual sync may be merged.
77 */
427e5fc6
MD
78int
79hammer_vop_reclaim(struct vop_reclaim_args *ap)
80{
427e5fc6
MD
81 struct hammer_inode *ip;
82 struct vnode *vp;
83
84 vp = ap->a_vp;
c0ade690 85
a89aec1b
MD
86 if ((ip = vp->v_data) != NULL) {
87 vp->v_data = NULL;
88 ip->vp = NULL;
d113fda1 89 hammer_rel_inode(ip, 0);
a89aec1b 90 }
427e5fc6
MD
91 return(0);
92}
93
66325755
MD
94/*
95 * Return a locked vnode for the specified inode. The inode must be
96 * referenced but NOT LOCKED on entry and will remain referenced on
97 * return.
98 */
99int
100hammer_get_vnode(struct hammer_inode *ip, int lktype, struct vnode **vpp)
101{
102 struct vnode *vp;
103 int error = 0;
104
105 for (;;) {
106 if ((vp = ip->vp) == NULL) {
107 error = getnewvnode(VT_HAMMER, ip->hmp->mp, vpp, 0, 0);
108 if (error)
109 break;
8cd0a023
MD
110 hammer_lock_ex(&ip->lock);
111 if (ip->vp != NULL) {
112 hammer_unlock(&ip->lock);
113 vp->v_type = VBAD;
114 vx_put(vp);
115 continue;
66325755 116 }
8cd0a023
MD
117 hammer_ref(&ip->lock);
118 vp = *vpp;
119 ip->vp = vp;
120 vp->v_type = hammer_get_vnode_type(
121 ip->ino_rec.base.base.obj_type);
7a04d74f
MD
122
123 switch(ip->ino_rec.base.base.obj_type) {
124 case HAMMER_OBJTYPE_CDEV:
125 case HAMMER_OBJTYPE_BDEV:
126 vp->v_ops = &ip->hmp->mp->mnt_vn_spec_ops;
127 addaliasu(vp, ip->ino_data.rmajor,
128 ip->ino_data.rminor);
129 break;
130 case HAMMER_OBJTYPE_FIFO:
131 vp->v_ops = &ip->hmp->mp->mnt_vn_fifo_ops;
132 break;
133 default:
134 break;
135 }
136 if (ip->obj_id == HAMMER_OBJID_ROOT)
137 vp->v_flag |= VROOT;
138
8cd0a023
MD
139 vp->v_data = (void *)ip;
140 /* vnode locked by getnewvnode() */
141 /* make related vnode dirty if inode dirty? */
142 hammer_unlock(&ip->lock);
a89aec1b
MD
143 if (vp->v_type == VREG)
144 vinitvmio(vp, ip->ino_rec.ino_size);
8cd0a023
MD
145 break;
146 }
147
148 /*
149 * loop if the vget fails (aka races), or if the vp
150 * no longer matches ip->vp.
151 */
152 if (vget(vp, LK_EXCLUSIVE) == 0) {
153 if (vp == ip->vp)
154 break;
155 vput(vp);
66325755
MD
156 }
157 }
a89aec1b 158 *vpp = vp;
66325755
MD
159 return(error);
160}
161
162/*
8cd0a023
MD
163 * Acquire a HAMMER inode. The returned inode is not locked. These functions
164 * do not attach or detach the related vnode (use hammer_get_vnode() for
165 * that).
d113fda1
MD
166 *
167 * The flags argument is only applied for newly created inodes, and only
168 * certain flags are inherited.
66325755
MD
169 */
170struct hammer_inode *
61aeeb33
MD
171hammer_get_inode(struct hammer_mount *hmp, struct hammer_node **cache,
172 u_int64_t obj_id, hammer_tid_t asof, int flags, int *errorp)
66325755 173{
427e5fc6 174 struct hammer_inode_info iinfo;
8cd0a023 175 struct hammer_cursor cursor;
427e5fc6 176 struct hammer_inode *ip;
427e5fc6
MD
177
178 /*
179 * Determine if we already have an inode cached. If we do then
180 * we are golden.
181 */
66325755 182 iinfo.obj_id = obj_id;
7f7c1f84 183 iinfo.obj_asof = asof;
427e5fc6
MD
184loop:
185 ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo);
186 if (ip) {
8cd0a023 187 hammer_ref(&ip->lock);
66325755
MD
188 *errorp = 0;
189 return(ip);
427e5fc6
MD
190 }
191
427e5fc6 192 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
b3deaf57 193 ++hammer_count_inodes;
66325755 194 ip->obj_id = obj_id;
27ea2398 195 ip->obj_asof = iinfo.obj_asof;
66325755 196 ip->hmp = hmp;
d113fda1
MD
197 ip->flags = flags & HAMMER_INODE_RO;
198 if (hmp->ronly)
199 ip->flags |= HAMMER_INODE_RO;
8cd0a023 200 RB_INIT(&ip->rec_tree);
427e5fc6
MD
201
202 /*
8cd0a023 203 * Locate the on-disk inode.
427e5fc6 204 */
6a37e7e4 205retry:
61aeeb33 206 hammer_init_cursor_hmp(&cursor, cache, hmp);
8cd0a023
MD
207 cursor.key_beg.obj_id = ip->obj_id;
208 cursor.key_beg.key = 0;
d5530d22 209 cursor.key_beg.create_tid = 0;
8cd0a023
MD
210 cursor.key_beg.delete_tid = 0;
211 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
212 cursor.key_beg.obj_type = 0;
d5530d22
MD
213 cursor.asof = iinfo.obj_asof;
214 cursor.flags = HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_GET_DATA |
215 HAMMER_CURSOR_ASOF;
8cd0a023
MD
216
217 *errorp = hammer_btree_lookup(&cursor);
6a37e7e4
MD
218 if (*errorp == EDEADLK) {
219 hammer_done_cursor(&cursor);
220 goto retry;
221 }
427e5fc6
MD
222
223 /*
224 * On success the B-Tree lookup will hold the appropriate
225 * buffer cache buffers and provide a pointer to the requested
d113fda1
MD
226 * information. Copy the information to the in-memory inode
227 * and cache the B-Tree node to improve future operations.
427e5fc6 228 */
66325755 229 if (*errorp == 0) {
8cd0a023
MD
230 ip->ino_rec = cursor.record->inode;
231 ip->ino_data = cursor.data->inode;
61aeeb33
MD
232 hammer_cache_node(cursor.node, &ip->cache[0]);
233 if (cache)
234 hammer_cache_node(cursor.node, cache);
427e5fc6 235 }
427e5fc6
MD
236
237 /*
238 * On success load the inode's record and data and insert the
239 * inode into the B-Tree. It is possible to race another lookup
240 * insertion of the same inode so deal with that condition too.
b3deaf57
MD
241 *
242 * The cursor's locked node interlocks against others creating and
243 * destroying ip while we were blocked.
427e5fc6 244 */
66325755 245 if (*errorp == 0) {
8cd0a023 246 hammer_ref(&ip->lock);
427e5fc6 247 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
61aeeb33
MD
248 hammer_uncache_node(&ip->cache[0]);
249 hammer_uncache_node(&ip->cache[1]);
8cd0a023 250 hammer_unref(&ip->lock);
b3deaf57 251 --hammer_count_inodes;
427e5fc6 252 kfree(ip, M_HAMMER);
b3deaf57 253 hammer_done_cursor(&cursor);
427e5fc6
MD
254 goto loop;
255 }
c0ade690 256 ip->flags |= HAMMER_INODE_ONDISK;
427e5fc6 257 } else {
b3deaf57 258 --hammer_count_inodes;
66325755
MD
259 kfree(ip, M_HAMMER);
260 ip = NULL;
427e5fc6 261 }
b3deaf57 262 hammer_done_cursor(&cursor);
66325755
MD
263 return (ip);
264}
265
8cd0a023
MD
266/*
267 * Create a new filesystem object, returning the inode in *ipp. The
268 * returned inode will be referenced but not locked.
269 *
270 * The inode is created in-memory and will be delay-synchronized to the
271 * disk.
272 */
273int
a89aec1b
MD
274hammer_create_inode(hammer_transaction_t trans, struct vattr *vap,
275 struct ucred *cred, hammer_inode_t dip,
8cd0a023 276 struct hammer_inode **ipp)
66325755 277{
a89aec1b
MD
278 hammer_mount_t hmp;
279 hammer_inode_t ip;
6b4f890b 280 uid_t xuid;
66325755 281
8cd0a023
MD
282 hmp = trans->hmp;
283 ip = kmalloc(sizeof(*ip), M_HAMMER, M_WAITOK|M_ZERO);
b3deaf57 284 ++hammer_count_inodes;
a89aec1b 285 ip->obj_id = hammer_alloc_tid(trans);
8cd0a023 286 KKASSERT(ip->obj_id != 0);
7f7c1f84 287 ip->obj_asof = hmp->asof;
8cd0a023
MD
288 ip->hmp = hmp;
289 ip->flags = HAMMER_INODE_DDIRTY | HAMMER_INODE_RDIRTY |
290 HAMMER_INODE_ITIMES;
291 ip->last_tid = trans->tid;
292
293 RB_INIT(&ip->rec_tree);
294
295 ip->ino_rec.ino_atime = trans->tid;
296 ip->ino_rec.ino_mtime = trans->tid;
297 ip->ino_rec.ino_size = 0;
298 ip->ino_rec.ino_nlinks = 0;
299 /* XXX */
fe7678ee 300 ip->ino_rec.base.base.btype = HAMMER_BTREE_TYPE_RECORD;
8cd0a023
MD
301 ip->ino_rec.base.base.obj_id = ip->obj_id;
302 ip->ino_rec.base.base.key = 0;
303 ip->ino_rec.base.base.create_tid = trans->tid;
304 ip->ino_rec.base.base.delete_tid = 0;
305 ip->ino_rec.base.base.rec_type = HAMMER_RECTYPE_INODE;
306 ip->ino_rec.base.base.obj_type = hammer_get_obj_type(vap->va_type);
307
308 ip->ino_data.version = HAMMER_INODE_DATA_VERSION;
309 ip->ino_data.mode = vap->va_mode;
310 ip->ino_data.ctime = trans->tid;
311 ip->ino_data.parent_obj_id = (dip) ? dip->ino_rec.base.base.obj_id : 0;
6b4f890b 312
7a04d74f
MD
313 switch(ip->ino_rec.base.base.obj_type) {
314 case HAMMER_OBJTYPE_CDEV:
315 case HAMMER_OBJTYPE_BDEV:
316 ip->ino_data.rmajor = vap->va_rmajor;
317 ip->ino_data.rminor = vap->va_rminor;
318 break;
319 default:
320 break;
321 }
322
6b4f890b
MD
323 /*
324 * Calculate default uid/gid and overwrite with information from
325 * the vap.
326 */
327 xuid = hammer_to_unix_xid(&dip->ino_data.uid);
328 ip->ino_data.gid = dip->ino_data.gid;
329 xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, xuid, cred,
330 &vap->va_mode);
331 ip->ino_data.mode = vap->va_mode;
332
8cd0a023
MD
333 if (vap->va_vaflags & VA_UID_UUID_VALID)
334 ip->ino_data.uid = vap->va_uid_uuid;
6b4f890b
MD
335 else if (vap->va_uid != (uid_t)VNOVAL)
336 hammer_guid_to_uuid(&ip->ino_data.uid, xuid);
8cd0a023
MD
337 if (vap->va_vaflags & VA_GID_UUID_VALID)
338 ip->ino_data.gid = vap->va_gid_uuid;
6b4f890b 339 else if (vap->va_gid != (gid_t)VNOVAL)
8cd0a023
MD
340 hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid);
341
342 hammer_ref(&ip->lock);
343 if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) {
344 hammer_unref(&ip->lock);
a89aec1b 345 panic("hammer_create_inode: duplicate obj_id %llx", ip->obj_id);
8cd0a023
MD
346 }
347 *ipp = ip;
348 return(0);
66325755
MD
349}
350
d113fda1
MD
351/*
352 * Called by hammer_sync_inode().
353 */
354static int
7f7c1f84 355hammer_update_inode(hammer_inode_t ip)
c0ade690
MD
356{
357 struct hammer_cursor cursor;
d26d0ae9 358 struct hammer_cursor *spike = NULL;
c0ade690
MD
359 hammer_record_t record;
360 int error;
d113fda1 361 hammer_tid_t last_tid;
c0ade690
MD
362
363 /*
76376933 364 * Locate the record on-disk and mark it as deleted. Both the B-Tree
195c19a1
MD
365 * node and the record must be marked deleted. The record may or
366 * may not be physically deleted, depending on the retention policy.
76376933 367 *
195c19a1
MD
368 * If the inode has already been deleted on-disk we have nothing
369 * to do.
c0ade690
MD
370 *
371 * XXX Update the inode record and data in-place if the retention
372 * policy allows it.
373 */
d113fda1 374 last_tid = ip->last_tid;
d26d0ae9 375retry:
c0ade690
MD
376 error = 0;
377
76376933
MD
378 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
379 HAMMER_INODE_ONDISK) {
61aeeb33 380 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp);
c0ade690
MD
381 cursor.key_beg.obj_id = ip->obj_id;
382 cursor.key_beg.key = 0;
d5530d22 383 cursor.key_beg.create_tid = 0;
c0ade690
MD
384 cursor.key_beg.delete_tid = 0;
385 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
386 cursor.key_beg.obj_type = 0;
d5530d22
MD
387 cursor.asof = ip->obj_asof;
388 cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
c0ade690
MD
389
390 error = hammer_btree_lookup(&cursor);
391
392 if (error == 0) {
d113fda1 393 error = hammer_ip_delete_record(&cursor, last_tid);
195c19a1
MD
394 if (error == 0)
395 ip->flags |= HAMMER_INODE_DELONDISK;
6a37e7e4 396 hammer_cache_node(cursor.node, &ip->cache[0]);
c0ade690 397 }
c0ade690 398 hammer_done_cursor(&cursor);
6a37e7e4
MD
399 if (error == EDEADLK)
400 goto retry;
c0ade690
MD
401 }
402
403 /*
404 * Write out a new record if the in-memory inode is not marked
fbc6e32a
MD
405 * as having been deleted. Update our inode statistics if this
406 * is the first application of the inode on-disk.
76376933
MD
407 *
408 * If the inode has been deleted permanently, HAMMER_INODE_DELONDISK
409 * will remain set and prevent further updates.
c0ade690
MD
410 */
411 if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) {
7f7c1f84 412 record = hammer_alloc_mem_record(ip);
c0ade690 413 record->rec.inode = ip->ino_rec;
d113fda1 414 record->rec.inode.base.base.create_tid = last_tid;
c0ade690
MD
415 record->rec.inode.base.data_len = sizeof(ip->ino_data);
416 record->data = (void *)&ip->ino_data;
d26d0ae9 417 error = hammer_ip_sync_record(record, &spike);
b3deaf57
MD
418 record->flags |= HAMMER_RECF_DELETED;
419 hammer_rel_mem_record(record);
d26d0ae9
MD
420 if (error == ENOSPC) {
421 error = hammer_spike(&spike);
422 if (error == 0)
423 goto retry;
424 }
425 KKASSERT(spike == NULL);
426 if (error == 0) {
d113fda1
MD
427 ip->flags &= ~(HAMMER_INODE_RDIRTY |
428 HAMMER_INODE_DDIRTY |
429 HAMMER_INODE_DELONDISK |
430 HAMMER_INODE_ITIMES);
d26d0ae9 431 if ((ip->flags & HAMMER_INODE_ONDISK) == 0) {
d26d0ae9 432 hammer_modify_volume(ip->hmp->rootvol);
0b075555 433 ++ip->hmp->rootvol->ondisk->vol0_stat_inodes;
d26d0ae9
MD
434 ip->flags |= HAMMER_INODE_ONDISK;
435 }
fbc6e32a 436 }
c0ade690
MD
437 }
438 return(error);
439}
440
a89aec1b 441/*
d113fda1
MD
442 * Update only the itimes fields. This is done no-historically. The
443 * record is updated in-place on the disk.
444 */
445static int
446hammer_update_itimes(hammer_inode_t ip)
447{
448 struct hammer_cursor cursor;
449 struct hammer_inode_record *rec;
450 int error;
451
6a37e7e4 452retry:
d113fda1
MD
453 error = 0;
454 if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) ==
455 HAMMER_INODE_ONDISK) {
61aeeb33 456 hammer_init_cursor_hmp(&cursor, &ip->cache[0], ip->hmp);
d113fda1
MD
457 cursor.key_beg.obj_id = ip->obj_id;
458 cursor.key_beg.key = 0;
d5530d22 459 cursor.key_beg.create_tid = 0;
d113fda1
MD
460 cursor.key_beg.delete_tid = 0;
461 cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE;
462 cursor.key_beg.obj_type = 0;
d5530d22
MD
463 cursor.asof = ip->obj_asof;
464 cursor.flags |= HAMMER_CURSOR_GET_RECORD | HAMMER_CURSOR_ASOF;
d113fda1
MD
465
466 error = hammer_btree_lookup(&cursor);
d113fda1
MD
467 if (error == 0) {
468 rec = &cursor.record->inode;
46fe7ae1 469 hammer_modify_buffer_nodep(cursor.record_buffer);
d113fda1
MD
470 rec->ino_atime = ip->ino_rec.ino_atime;
471 rec->ino_mtime = ip->ino_rec.ino_mtime;
d113fda1
MD
472 ip->flags &= ~HAMMER_INODE_ITIMES;
473 /* XXX recalculate crc */
6a37e7e4 474 hammer_cache_node(cursor.node, &ip->cache[0]);
d113fda1 475 }
d113fda1 476 hammer_done_cursor(&cursor);
6a37e7e4
MD
477 if (error == EDEADLK)
478 goto retry;
d113fda1
MD
479 }
480 return(error);
481}
482
483/*
484 * Release a reference on an inode. If asked to flush the last release
485 * will flush the inode.
a89aec1b 486 */
66325755 487void
a89aec1b 488hammer_rel_inode(struct hammer_inode *ip, int flush)
66325755
MD
489{
490 hammer_unref(&ip->lock);
d113fda1 491 if (flush)
a89aec1b 492 ip->flags |= HAMMER_INODE_FLUSH;
d113fda1
MD
493 if (ip->lock.refs == 0) {
494 if (ip->flags & HAMMER_INODE_FLUSH)
495 hammer_unload_inode(ip, (void *)MNT_WAIT);
496 else
497 hammer_unload_inode(ip, (void *)MNT_NOWAIT);
498 }
427e5fc6
MD
499}
500
27ea2398 501/*
8cd0a023
MD
502 * Unload and destroy the specified inode.
503 *
b33e2cc0 504 * (typically called via RB_SCAN)
27ea2398
MD
505 */
506int
d113fda1 507hammer_unload_inode(struct hammer_inode *ip, void *data)
27ea2398 508{
c0ade690
MD
509 int error;
510
a89aec1b
MD
511 KASSERT(ip->lock.refs == 0,
512 ("hammer_unload_inode: %d refs\n", ip->lock.refs));
8cd0a023
MD
513 KKASSERT(ip->vp == NULL);
514 hammer_ref(&ip->lock);
6b4f890b 515
d113fda1 516 error = hammer_sync_inode(ip, (int)data, 1);
c0ade690
MD
517 if (error)
518 kprintf("hammer_sync_inode failed error %d\n", error);
d113fda1
MD
519 if (ip->lock.refs == 1) {
520 KKASSERT(RB_EMPTY(&ip->rec_tree));
521 RB_REMOVE(hammer_ino_rb_tree, &ip->hmp->rb_inos_root, ip);
8cd0a023 522
61aeeb33
MD
523 hammer_uncache_node(&ip->cache[0]);
524 hammer_uncache_node(&ip->cache[1]);
d113fda1
MD
525 --hammer_count_inodes;
526 kfree(ip, M_HAMMER);
527 } else {
528 hammer_unref(&ip->lock);
529 }
27ea2398
MD
530 return(0);
531}
532
427e5fc6 533/*
d113fda1
MD
534 * A transaction has modified an inode, requiring updates as specified by
535 * the passed flags.
7f7c1f84 536 *
d113fda1
MD
537 * HAMMER_INODE_RDIRTY: Inode record has been updated
538 * HAMMER_INODE_DDIRTY: Inode data has been updated
539 * HAMMER_INODE_DELETED: Inode record/data must be deleted
540 * HAMMER_INODE_ITIMES: mtime/atime has been updated
541 *
542 * last_tid is the TID to use to generate the correct TID when the inode
32c90105
MD
543 * is synced to disk. The first inode record laid out on disk must match
544 * the transaction id of the related directory entry so only update last_tid
545 * if that has already occured.
427e5fc6 546 */
66325755
MD
547void
548hammer_modify_inode(struct hammer_transaction *trans,
549 struct hammer_inode *ip, int flags)
427e5fc6 550{
d113fda1
MD
551 KKASSERT ((ip->flags & HAMMER_INODE_RO) == 0 ||
552 (HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
553 HAMMER_INODE_DELETED|HAMMER_INODE_ITIMES) == 0);
554
555 if (flags &
556 (HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|HAMMER_INODE_DELETED)) {
557 if (hammer_debug_tid) {
558 kprintf("hammer_modify_inode: %016llx (%08x)\n",
559 trans->tid, (int)(trans->tid / 1000000000LL));
560 }
32c90105
MD
561 if (ip->flags & HAMMER_INODE_ONDISK)
562 ip->last_tid = trans->tid;
7f7c1f84
MD
563 }
564 ip->flags |= flags;
c0ade690
MD
565}
566
567/*
568 * Sync any dirty buffers and records associated with an inode. The
569 * inode's last_tid field is used as the transaction id for the sync,
570 * overriding any intermediate TIDs that were used for records. Note
571 * that the dirty buffer cache buffers do not have any knowledge of
572 * the transaction id they were modified under.
d26d0ae9
MD
573 *
574 * If we can't sync due to a cluster becoming full the spike structure
575 * will be filled in and ENOSPC returned. We must return -ENOSPC to
576 * terminate the RB_SCAN.
c0ade690
MD
577 */
578static int
d26d0ae9 579hammer_sync_inode_callback(hammer_record_t rec, void *data)
c0ade690 580{
d26d0ae9 581 struct hammer_cursor **spike = data;
c0ade690
MD
582 int error;
583
d26d0ae9 584 hammer_ref(&rec->lock);
b3deaf57
MD
585 error = hammer_ip_sync_record(rec, spike);
586 hammer_rel_mem_record(rec);
c0ade690
MD
587
588 if (error) {
b3deaf57
MD
589 error = -error;
590 if (error != -ENOSPC) {
591 kprintf("hammer_sync_inode_callback: sync failed rec "
592 "%p, error %d\n", rec, error);
593 }
c0ade690 594 }
b3deaf57 595 return(error);
c0ade690
MD
596}
597
598/*
599 * XXX error handling
600 */
601int
602hammer_sync_inode(hammer_inode_t ip, int waitfor, int handle_delete)
603{
604 struct hammer_transaction trans;
d26d0ae9 605 struct hammer_cursor *spike = NULL;
c0ade690 606 int error;
c0ade690 607
d113fda1
MD
608 if ((ip->flags & HAMMER_INODE_MODMASK) == 0) {
609 return(0);
610 }
611
c0ade690 612 hammer_lock_ex(&ip->lock);
d113fda1
MD
613
614 /*
615 * Use the transaction id of the last operation to sync.
616 */
617 if (ip->last_tid)
618 hammer_start_transaction_tid(&trans, ip->hmp, ip->last_tid);
619 else
620 hammer_start_transaction(&trans, ip->hmp);
c0ade690
MD
621
622 /*
623 * If the inode has been deleted (nlinks == 0), and the OS no longer
624 * has any references to it (handle_delete != 0), clean up in-memory
625 * data.
626 *
627 * NOTE: We do not set the RDIRTY flag when updating the delete_tid,
628 * setting HAMMER_INODE_DELETED takes care of it.
7f7c1f84
MD
629 *
630 * NOTE: Because we may sync records within this new transaction,
631 * force the inode update later on to use our transaction id or
632 * the delete_tid of the inode may be less then the create_tid of
633 * the inode update. XXX shouldn't happen but don't take the chance.
d26d0ae9
MD
634 *
635 * NOTE: The call to hammer_ip_delete_range() cannot return ENOSPC
636 * so we can pass a NULL spike structure, because no partial data
637 * deletion can occur (yet).
c0ade690 638 */
d113fda1
MD
639 if (ip->ino_rec.ino_nlinks == 0 && handle_delete &&
640 (ip->flags & HAMMER_INODE_GONE) == 0) {
641 ip->flags |= HAMMER_INODE_GONE;
c0ade690
MD
642 if (ip->vp)
643 vtruncbuf(ip->vp, 0, HAMMER_BUFSIZE);
7a04d74f 644 error = hammer_ip_delete_range_all(&trans, ip);
c0ade690
MD
645 KKASSERT(RB_EMPTY(&ip->rec_tree));
646 ip->ino_rec.base.base.delete_tid = trans.tid;
d113fda1 647 hammer_modify_inode(&trans, ip, HAMMER_INODE_DELETED);
fbc6e32a 648 hammer_modify_volume(ip->hmp->rootvol);
0b075555 649 --ip->hmp->rootvol->ondisk->vol0_stat_inodes;
c0ade690
MD
650 }
651
652 /*
f3b0f382 653 * Sync the buffer cache.
c0ade690 654 */
f3b0f382 655 if (ip->vp != NULL) {
c0ade690 656 error = vfsync(ip->vp, waitfor, 1, NULL, NULL);
f3b0f382
MD
657 if (RB_ROOT(&ip->vp->v_rbdirty_tree) == NULL)
658 ip->flags &= ~HAMMER_INODE_BUFS;
659 } else {
c0ade690 660 error = 0;
f3b0f382
MD
661 }
662
c0ade690
MD
663
664 /*
665 * Now sync related records
666 */
d26d0ae9
MD
667 for (;;) {
668 error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL,
669 hammer_sync_inode_callback, &spike);
670 KKASSERT(error <= 0);
671 if (error < 0)
672 error = -error;
673 if (error == ENOSPC) {
674 error = hammer_spike(&spike);
675 if (error == 0)
676 continue;
677 }
678 break;
c0ade690 679 }
d113fda1
MD
680 if (RB_EMPTY(&ip->rec_tree))
681 ip->flags &= ~HAMMER_INODE_XDIRTY;
c0ade690
MD
682
683 /*
684 * Now update the inode's on-disk inode-data and/or on-disk record.
685 */
686 switch(ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK)) {
687 case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK:
688 /*
689 * If deleted and on-disk, don't set any additional flags.
690 * the delete flag takes care of things.
691 */
692 break;
693 case HAMMER_INODE_DELETED:
694 /*
695 * Take care of the case where a deleted inode was never
696 * flushed to the disk in the first place.
697 */
d113fda1
MD
698 ip->flags &= ~(HAMMER_INODE_RDIRTY|HAMMER_INODE_DDIRTY|
699 HAMMER_INODE_XDIRTY|HAMMER_INODE_ITIMES);
d26d0ae9
MD
700 while (RB_ROOT(&ip->rec_tree)) {
701 hammer_record_t rec = RB_ROOT(&ip->rec_tree);
702 hammer_ref(&rec->lock);
b3deaf57
MD
703 rec->flags |= HAMMER_RECF_DELETED;
704 hammer_rel_mem_record(rec);
d26d0ae9 705 }
c0ade690
MD
706 break;
707 case HAMMER_INODE_ONDISK:
708 /*
709 * If already on-disk, do not set any additional flags.
710 */
711 break;
712 default:
713 /*
714 * If not on-disk and not deleted, set both dirty flags
715 * to force an initial record to be written.
716 */
717 ip->flags |= HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY;
718 break;
719 }
720
721 /*
d113fda1
MD
722 * If RDIRTY or DDIRTY is set, write out a new record. If the inode
723 * is already on-disk the old record is marked as deleted.
724 *
725 * If DELETED is set hammer_update_inode() will delete the existing
726 * record without writing out a new one.
727 *
728 * If *ONLY* the ITIMES flag is set we can update the record in-place.
c0ade690 729 */
d113fda1
MD
730 if ((ip->flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
731 HAMMER_INODE_ITIMES | HAMMER_INODE_DELETED)) ==
732 HAMMER_INODE_ITIMES) {
733 error = hammer_update_itimes(ip);
734 } else
c0ade690 735 if (ip->flags & (HAMMER_INODE_RDIRTY | HAMMER_INODE_DDIRTY |
d113fda1 736 HAMMER_INODE_ITIMES | HAMMER_INODE_DELETED)) {
7f7c1f84 737 error = hammer_update_inode(ip);
c0ade690
MD
738 }
739 hammer_commit_transaction(&trans);
740 hammer_unlock(&ip->lock);
741 return(error);
8cd0a023
MD
742}
743
427e5fc6
MD
744/*
745 * Access the filesystem buffer containing the cluster-relative byte
746 * offset, validate the buffer type, load *bufferp and return a
8cd0a023
MD
747 * pointer to the requested data. The buffer is reference and locked on
748 * return.
427e5fc6
MD
749 *
750 * If buf_type is 0 the buffer is assumed to be a pure-data buffer and
751 * no type or crc check is performed.
752 *
8cd0a023
MD
753 * If *bufferp is not NULL on entry it is assumed to contain a locked
754 * and referenced buffer which will then be replaced.
755 *
756 * If the caller is holding another unrelated buffer locked it must be
757 * passed in reorderbuf so we can properly order buffer locks.
758 *
427e5fc6
MD
759 * XXX add a flag for the buffer type and check the CRC here XXX
760 */
761void *
8cd0a023
MD
762hammer_bread(hammer_cluster_t cluster, int32_t cloff,
763 u_int64_t buf_type, int *errorp,
764 struct hammer_buffer **bufferp)
427e5fc6 765{
8cd0a023 766 hammer_buffer_t buffer;
427e5fc6
MD
767 int32_t buf_no;
768 int32_t buf_off;
769
770 /*
771 * Load the correct filesystem buffer, replacing *bufferp.
772 */
773 buf_no = cloff / HAMMER_BUFSIZE;
774 buffer = *bufferp;
775 if (buffer == NULL || buffer->cluster != cluster ||
776 buffer->buf_no != buf_no) {
8cd0a023 777 if (buffer) {
c0ade690 778 /*hammer_unlock(&buffer->io.lock);*/
8cd0a023
MD
779 hammer_rel_buffer(buffer, 0);
780 }
427e5fc6
MD
781 buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
782 *bufferp = buffer;
783 if (buffer == NULL)
784 return(NULL);
c0ade690 785 /*hammer_lock_ex(&buffer->io.lock);*/
427e5fc6
MD
786 }
787
788 /*
8cd0a023 789 * Validate the buffer type
427e5fc6
MD
790 */
791 buf_off = cloff & HAMMER_BUFMASK;
792 if (buf_type) {
793 if (buf_type != buffer->ondisk->head.buf_type) {
27ea2398
MD
794 kprintf("BUFFER HEAD TYPE MISMATCH %llx %llx\n",
795 buf_type, buffer->ondisk->head.buf_type);
b33e2cc0 796 KKASSERT(0);
427e5fc6
MD
797 *errorp = EIO;
798 return(NULL);
799 }
800 if (buf_off < sizeof(buffer->ondisk->head)) {
27ea2398 801 kprintf("BUFFER OFFSET TOO LOW %d\n", buf_off);
427e5fc6 802 *errorp = EIO;
b33e2cc0 803 KKASSERT(0);
427e5fc6
MD
804 return(NULL);
805 }
427e5fc6
MD
806 }
807
808 /*
809 * Return a pointer to the buffer data.
810 */
811 *errorp = 0;
812 return((char *)buffer->ondisk + buf_off);
813}
814