| Commit | Line | Data |
|---|---|---|
| 427e5fc6 | 1 | /* |
| b84de5af | 2 | * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. |
| 427e5fc6 MD |
3 | * |
| 4 | * This code is derived from software contributed to The DragonFly Project | |
| 5 | * by Matthew Dillon <dillon@backplane.com> | |
| 6 | * | |
| 7 | * Redistribution and use in source and binary forms, with or without | |
| 8 | * modification, are permitted provided that the following conditions | |
| 9 | * are met: | |
| 10 | * | |
| 11 | * 1. Redistributions of source code must retain the above copyright | |
| 12 | * notice, this list of conditions and the following disclaimer. | |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 14 | * notice, this list of conditions and the following disclaimer in | |
| 15 | * the documentation and/or other materials provided with the | |
| 16 | * distribution. | |
| 17 | * 3. Neither the name of The DragonFly Project nor the names of its | |
| 18 | * contributors may be used to endorse or promote products derived | |
| 19 | * from this software without specific, prior written permission. | |
| 20 | * | |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
| 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
| 25 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| 26 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
| 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
| 29 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 30 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
| 31 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 32 | * SUCH DAMAGE. | |
| 33 | * | |
| 69ce4424 | 34 | * $DragonFly: src/sys/vfs/hammer/hammer_inode.c,v 1.114 2008/09/24 00:53:51 dillon Exp $ |
| 427e5fc6 MD |
35 | */ |
| 36 | ||
| 37 | #include "hammer.h" | |
| 869e8f55 | 38 | #include <vm/vm_extern.h> |
| 427e5fc6 | 39 | |
| af209b0f | 40 | static int hammer_unload_inode(struct hammer_inode *ip); |
| 5fa5c92f | 41 | static void hammer_free_inode(hammer_inode_t ip); |
| 7a61b85d MD |
42 | static void hammer_flush_inode_core(hammer_inode_t ip, |
| 43 | hammer_flush_group_t flg, int flags); | |
| af209b0f | 44 | static int hammer_setup_child_callback(hammer_record_t rec, void *data); |
| 7a61b85d | 45 | #if 0 |
| 525aad3a | 46 | static int hammer_syncgrp_child_callback(hammer_record_t rec, void *data); |
| 7a61b85d | 47 | #endif |
| cc0758d0 | 48 | static int hammer_setup_parent_inodes(hammer_inode_t ip, int depth, |
| 7a61b85d MD |
49 | hammer_flush_group_t flg); |
| 50 | static int hammer_setup_parent_inodes_helper(hammer_record_t record, | |
| cc0758d0 | 51 | int depth, hammer_flush_group_t flg); |
| ccf6a64d | 52 | static void hammer_inode_wakereclaims(hammer_inode_t ip); |
| e98f1b96 MD |
53 | static struct hammer_inostats *hammer_inode_inostats(hammer_mount_t hmp, |
| 54 | pid_t pid); | |
| b84de5af | 55 | |
| 0832c9bb MD |
56 | #ifdef DEBUG_TRUNCATE |
| 57 | extern struct hammer_inode *HammerTruncIp; | |
| 58 | #endif | |
| 59 | ||
| d113fda1 | 60 | /* |
| 5fa5c92f | 61 | * RB-Tree support for inode structures |
| 43c665ae MD |
62 | */ |
| 63 | int | |
| 64 | hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2) | |
| 65 | { | |
| 66 | if (ip1->obj_localization < ip2->obj_localization) | |
| 67 | return(-1); | |
| 68 | if (ip1->obj_localization > ip2->obj_localization) | |
| 69 | return(1); | |
| 70 | if (ip1->obj_id < ip2->obj_id) | |
| 71 | return(-1); | |
| 72 | if (ip1->obj_id > ip2->obj_id) | |
| 73 | return(1); | |
| 74 | if (ip1->obj_asof < ip2->obj_asof) | |
| 75 | return(-1); | |
| 76 | if (ip1->obj_asof > ip2->obj_asof) | |
| 77 | return(1); | |
| 78 | return(0); | |
| 79 | } | |
| 80 | ||
| 73896937 MD |
81 | int |
| 82 | hammer_redo_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2) | |
| 83 | { | |
| 84 | if (ip1->redo_fifo_start < ip2->redo_fifo_start) | |
| 85 | return(-1); | |
| 86 | if (ip1->redo_fifo_start > ip2->redo_fifo_start) | |
| 87 | return(1); | |
| 88 | return(0); | |
| 89 | } | |
| 90 | ||
| 43c665ae | 91 | /* |
| 5fa5c92f | 92 | * RB-Tree support for inode structures / special LOOKUP_INFO |
| 43c665ae MD |
93 | */ |
| 94 | static int | |
| 95 | hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip) | |
| 96 | { | |
| 97 | if (info->obj_localization < ip->obj_localization) | |
| 98 | return(-1); | |
| 99 | if (info->obj_localization > ip->obj_localization) | |
| 100 | return(1); | |
| 101 | if (info->obj_id < ip->obj_id) | |
| 102 | return(-1); | |
| 103 | if (info->obj_id > ip->obj_id) | |
| 104 | return(1); | |
| 105 | if (info->obj_asof < ip->obj_asof) | |
| 106 | return(-1); | |
| 107 | if (info->obj_asof > ip->obj_asof) | |
| 108 | return(1); | |
| 109 | return(0); | |
| 110 | } | |
| 111 | ||
| 112 | /* | |
| 113 | * Used by hammer_scan_inode_snapshots() to locate all of an object's | |
| 114 | * snapshots. Note that the asof field is not tested, which we can get | |
| 115 | * away with because it is the lowest-priority field. | |
| 116 | */ | |
| 117 | static int | |
| 118 | hammer_inode_info_cmp_all_history(hammer_inode_t ip, void *data) | |
| 119 | { | |
| 120 | hammer_inode_info_t info = data; | |
| 121 | ||
| 122 | if (ip->obj_localization > info->obj_localization) | |
| 123 | return(1); | |
| 124 | if (ip->obj_localization < info->obj_localization) | |
| 125 | return(-1); | |
| 126 | if (ip->obj_id > info->obj_id) | |
| 127 | return(1); | |
| 128 | if (ip->obj_id < info->obj_id) | |
| 129 | return(-1); | |
| 130 | return(0); | |
| 131 | } | |
| 132 | ||
| 5fa5c92f | 133 | /* |
| 842e7a70 MD |
134 | * Used by hammer_unload_pseudofs() to locate all inodes associated with |
| 135 | * a particular PFS. | |
| 136 | */ | |
| 137 | static int | |
| 138 | hammer_inode_pfs_cmp(hammer_inode_t ip, void *data) | |
| 139 | { | |
| 140 | u_int32_t localization = *(u_int32_t *)data; | |
| 141 | if (ip->obj_localization > localization) | |
| 142 | return(1); | |
| 143 | if (ip->obj_localization < localization) | |
| 144 | return(-1); | |
| 145 | return(0); | |
| 146 | } | |
| 147 | ||
| 148 | /* | |
| 5fa5c92f MD |
149 | * RB-Tree support for pseudofs structures |
| 150 | */ | |
| 151 | static int | |
| 152 | hammer_pfs_rb_compare(hammer_pseudofs_inmem_t p1, hammer_pseudofs_inmem_t p2) | |
| 153 | { | |
| 154 | if (p1->localization < p2->localization) | |
| 155 | return(-1); | |
| 156 | if (p1->localization > p2->localization) | |
| 157 | return(1); | |
| 158 | return(0); | |
| 159 | } | |
| 160 | ||
| 161 | ||
| 43c665ae MD |
162 | RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare); |
| 163 | RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node, | |
| 164 | hammer_inode_info_cmp, hammer_inode_info_t); | |
| 5fa5c92f MD |
165 | RB_GENERATE2(hammer_pfs_rb_tree, hammer_pseudofs_inmem, rb_node, |
| 166 | hammer_pfs_rb_compare, u_int32_t, localization); | |
| 43c665ae MD |
167 | |
| 168 | /* | |
| d113fda1 MD |
169 | * The kernel is not actively referencing this vnode but is still holding |
| 170 | * it cached. | |
| b84de5af MD |
171 | * |
| 172 | * This is called from the frontend. | |
| 2247fe02 MD |
173 | * |
| 174 | * MPALMOSTSAFE | |
| d113fda1 | 175 | */ |
| 427e5fc6 MD |
176 | int |
| 177 | hammer_vop_inactive(struct vop_inactive_args *ap) | |
| 178 | { | |
| 66325755 | 179 | struct hammer_inode *ip = VTOI(ap->a_vp); |
| b0aab9b9 | 180 | hammer_mount_t hmp; |
| 27ea2398 | 181 | |
| c0ade690 MD |
182 | /* |
| 183 | * Degenerate case | |
| 184 | */ | |
| 185 | if (ip == NULL) { | |
| 66325755 | 186 | vrecycle(ap->a_vp); |
| c0ade690 MD |
187 | return(0); |
| 188 | } | |
| 189 | ||
| 190 | /* | |
| 4a2796f3 MD |
191 | * If the inode no longer has visibility in the filesystem try to |
| 192 | * recycle it immediately, even if the inode is dirty. Recycling | |
| 193 | * it quickly allows the system to reclaim buffer cache and VM | |
| 194 | * resources which can matter a lot in a heavily loaded system. | |
| 195 | * | |
| 196 | * This can deadlock in vfsync() if we aren't careful. | |
| 4e97774c MD |
197 | * |
| 198 | * Do not queue the inode to the flusher if we still have visibility, | |
| 199 | * otherwise namespace calls such as chmod will unnecessarily generate | |
| 200 | * multiple inode updates. | |
| c0ade690 | 201 | */ |
| 4e97774c | 202 | if (ip->ino_data.nlinks == 0) { |
| b0aab9b9 MD |
203 | hmp = ip->hmp; |
| 204 | lwkt_gettoken(&hmp->fs_token); | |
| 2247fe02 | 205 | hammer_inode_unloadable_check(ip, 0); |
| 4e97774c MD |
206 | if (ip->flags & HAMMER_INODE_MODMASK) |
| 207 | hammer_flush_inode(ip, 0); | |
| b0aab9b9 | 208 | lwkt_reltoken(&hmp->fs_token); |
| 4a2796f3 | 209 | vrecycle(ap->a_vp); |
| 4e97774c | 210 | } |
| 427e5fc6 MD |
211 | return(0); |
| 212 | } | |
| 213 | ||
| d113fda1 MD |
214 | /* |
| 215 | * Release the vnode association. This is typically (but not always) | |
| 1f07f686 | 216 | * the last reference on the inode. |
| d113fda1 | 217 | * |
| 1f07f686 MD |
218 | * Once the association is lost we are on our own with regards to |
| 219 | * flushing the inode. | |
| e0092341 MD |
220 | * |
| 221 | * We must interlock ip->vp so hammer_get_vnode() can avoid races. | |
| d113fda1 | 222 | */ |
| 427e5fc6 MD |
223 | int |
| 224 | hammer_vop_reclaim(struct vop_reclaim_args *ap) | |
| 225 | { | |
| 427e5fc6 | 226 | struct hammer_inode *ip; |
| 7bc5b8c2 | 227 | hammer_mount_t hmp; |
| 427e5fc6 MD |
228 | struct vnode *vp; |
| 229 | ||
| 230 | vp = ap->a_vp; | |
| c0ade690 | 231 | |
| a89aec1b | 232 | if ((ip = vp->v_data) != NULL) { |
| da2da375 | 233 | hmp = ip->hmp; |
| b0aab9b9 | 234 | lwkt_gettoken(&hmp->fs_token); |
| e0092341 | 235 | hammer_lock_ex(&ip->lock); |
| a89aec1b MD |
236 | vp->v_data = NULL; |
| 237 | ip->vp = NULL; | |
| 7bc5b8c2 | 238 | |
| 4a2796f3 | 239 | if ((ip->flags & HAMMER_INODE_RECLAIM) == 0) { |
| 9f5097dc | 240 | ++hammer_count_reclaiming; |
| da2da375 | 241 | ++hmp->inode_reclaims; |
| 9f5097dc MD |
242 | ip->flags |= HAMMER_INODE_RECLAIM; |
| 243 | } | |
| e0092341 | 244 | hammer_unlock(&ip->lock); |
| ec4e8497 | 245 | hammer_rel_inode(ip, 1); |
| b0aab9b9 | 246 | lwkt_reltoken(&hmp->fs_token); |
| a89aec1b | 247 | } |
| 427e5fc6 MD |
248 | return(0); |
| 249 | } | |
| 250 | ||
| 251 | /* | |
| 66325755 MD |
252 | * Return a locked vnode for the specified inode. The inode must be |
| 253 | * referenced but NOT LOCKED on entry and will remain referenced on | |
| 254 | * return. | |
| b84de5af MD |
255 | * |
| 256 | * Called from the frontend. | |
| 66325755 MD |
257 | */ |
| 258 | int | |
| e8599db1 | 259 | hammer_get_vnode(struct hammer_inode *ip, struct vnode **vpp) |
| 66325755 | 260 | { |
| 9f5097dc | 261 | hammer_mount_t hmp; |
| 66325755 MD |
262 | struct vnode *vp; |
| 263 | int error = 0; | |
| ea434b6f | 264 | u_int8_t obj_type; |
| 66325755 | 265 | |
| 9f5097dc MD |
266 | hmp = ip->hmp; |
| 267 | ||
| 66325755 MD |
268 | for (;;) { |
| 269 | if ((vp = ip->vp) == NULL) { | |
| 9f5097dc | 270 | error = getnewvnode(VT_HAMMER, hmp->mp, vpp, 0, 0); |
| 66325755 MD |
271 | if (error) |
| 272 | break; | |
| 8cd0a023 MD |
273 | hammer_lock_ex(&ip->lock); |
| 274 | if (ip->vp != NULL) { | |
| 275 | hammer_unlock(&ip->lock); | |
| ee23ac7d | 276 | vp = *vpp; |
| 8cd0a023 MD |
277 | vp->v_type = VBAD; |
| 278 | vx_put(vp); | |
| 279 | continue; | |
| 66325755 | 280 | } |
| 8cd0a023 MD |
281 | hammer_ref(&ip->lock); |
| 282 | vp = *vpp; | |
| 283 | ip->vp = vp; | |
| ea434b6f MD |
284 | |
| 285 | obj_type = ip->ino_data.obj_type; | |
| 286 | vp->v_type = hammer_get_vnode_type(obj_type); | |
| 7a04d74f | 287 | |
| ccf6a64d | 288 | hammer_inode_wakereclaims(ip); |
| 9f5097dc | 289 | |
| 11ad5ade | 290 | switch(ip->ino_data.obj_type) { |
| 7a04d74f MD |
291 | case HAMMER_OBJTYPE_CDEV: |
| 292 | case HAMMER_OBJTYPE_BDEV: | |
| 9f5097dc | 293 | vp->v_ops = &hmp->mp->mnt_vn_spec_ops; |
| 7a04d74f MD |
294 | addaliasu(vp, ip->ino_data.rmajor, |
| 295 | ip->ino_data.rminor); | |
| 296 | break; | |
| 297 | case HAMMER_OBJTYPE_FIFO: | |
| 9f5097dc | 298 | vp->v_ops = &hmp->mp->mnt_vn_fifo_ops; |
| 7a04d74f | 299 | break; |
| 42cd5131 | 300 | case HAMMER_OBJTYPE_REGFILE: |
| 42cd5131 | 301 | break; |
| 7a04d74f MD |
302 | default: |
| 303 | break; | |
| 304 | } | |
| 42c7d26b MD |
305 | |
| 306 | /* | |
| 307 | * Only mark as the root vnode if the ip is not | |
| 308 | * historical, otherwise the VFS cache will get | |
| 309 | * confused. The other half of the special handling | |
| 310 | * is in hammer_vop_nlookupdotdot(). | |
| ddfdf542 | 311 | * |
| 67863d04 MD |
312 | * Pseudo-filesystem roots can be accessed via |
| 313 | * non-root filesystem paths and setting VROOT may | |
| 314 | * confuse the namecache. Set VPFSROOT instead. | |
| 42c7d26b MD |
315 | */ |
| 316 | if (ip->obj_id == HAMMER_OBJID_ROOT && | |
| 67863d04 MD |
317 | ip->obj_asof == hmp->asof) { |
| 318 | if (ip->obj_localization == 0) | |
| 2247fe02 | 319 | vsetflags(vp, VROOT); |
| 67863d04 | 320 | else |
| 2247fe02 | 321 | vsetflags(vp, VPFSROOT); |
| 42c7d26b | 322 | } |
| 7a04d74f | 323 | |
| 8cd0a023 MD |
324 | vp->v_data = (void *)ip; |
| 325 | /* vnode locked by getnewvnode() */ | |
| 326 | /* make related vnode dirty if inode dirty? */ | |
| 327 | hammer_unlock(&ip->lock); | |
| b0d18f7d MD |
328 | if (vp->v_type == VREG) { |
| 329 | vinitvmio(vp, ip->ino_data.size, | |
| 330 | hammer_blocksize(ip->ino_data.size), | |
| 331 | hammer_blockoff(ip->ino_data.size)); | |
| 332 | } | |
| 8cd0a023 MD |
333 | break; |
| 334 | } | |
| 335 | ||
| 336 | /* | |
| e0092341 MD |
337 | * Interlock vnode clearing. This does not prevent the |
| 338 | * vnode from going into a reclaimed state but it does | |
| 339 | * prevent it from being destroyed or reused so the vget() | |
| 340 | * will properly fail. | |
| 341 | */ | |
| 342 | hammer_lock_ex(&ip->lock); | |
| 343 | if ((vp = ip->vp) == NULL) { | |
| 344 | hammer_unlock(&ip->lock); | |
| 345 | continue; | |
| 346 | } | |
| 347 | vhold_interlocked(vp); | |
| 348 | hammer_unlock(&ip->lock); | |
| 349 | ||
| 350 | /* | |
| 8cd0a023 MD |
351 | * loop if the vget fails (aka races), or if the vp |
| 352 | * no longer matches ip->vp. | |
| 353 | */ | |
| 354 | if (vget(vp, LK_EXCLUSIVE) == 0) { | |
| e0092341 MD |
355 | if (vp == ip->vp) { |
| 356 | vdrop(vp); | |
| 8cd0a023 | 357 | break; |
| e0092341 | 358 | } |
| 8cd0a023 | 359 | vput(vp); |
| 66325755 | 360 | } |
| e0092341 | 361 | vdrop(vp); |
| 66325755 | 362 | } |
| a89aec1b | 363 | *vpp = vp; |
| 66325755 MD |
364 | return(error); |
| 365 | } | |
| 366 | ||
| 367 | /* | |
| 43c665ae MD |
368 | * Locate all copies of the inode for obj_id compatible with the specified |
| 369 | * asof, reference, and issue the related call-back. This routine is used | |
| 370 | * for direct-io invalidation and does not create any new inodes. | |
| 371 | */ | |
| 372 | void | |
| 373 | hammer_scan_inode_snapshots(hammer_mount_t hmp, hammer_inode_info_t iinfo, | |
| 374 | int (*callback)(hammer_inode_t ip, void *data), | |
| 375 | void *data) | |
| 376 | { | |
| 377 | hammer_ino_rb_tree_RB_SCAN(&hmp->rb_inos_root, | |
| 378 | hammer_inode_info_cmp_all_history, | |
| 379 | callback, iinfo); | |
| 380 | } | |
| 381 | ||
| 382 | /* | |
| 8cd0a023 MD |
383 | * Acquire a HAMMER inode. The returned inode is not locked. These functions |
| 384 | * do not attach or detach the related vnode (use hammer_get_vnode() for | |
| 385 | * that). | |
| d113fda1 MD |
386 | * |
| 387 | * The flags argument is only applied for newly created inodes, and only | |
| 388 | * certain flags are inherited. | |
| b84de5af MD |
389 | * |
| 390 | * Called from the frontend. | |
| 66325755 MD |
391 | */ |
| 392 | struct hammer_inode * | |
| bcac4bbb | 393 | hammer_get_inode(hammer_transaction_t trans, hammer_inode_t dip, |
| adf01747 | 394 | int64_t obj_id, hammer_tid_t asof, u_int32_t localization, |
| ddfdf542 | 395 | int flags, int *errorp) |
| 66325755 | 396 | { |
| 36f82b23 | 397 | hammer_mount_t hmp = trans->hmp; |
| 39d8fd63 | 398 | struct hammer_node_cache *cachep; |
| 427e5fc6 | 399 | struct hammer_inode_info iinfo; |
| 8cd0a023 | 400 | struct hammer_cursor cursor; |
| 427e5fc6 | 401 | struct hammer_inode *ip; |
| 427e5fc6 | 402 | |
| 5fa5c92f | 403 | |
| 427e5fc6 MD |
404 | /* |
| 405 | * Determine if we already have an inode cached. If we do then | |
| 406 | * we are golden. | |
| 82010f9f MD |
407 | * |
| 408 | * If we find an inode with no vnode we have to mark the | |
| 409 | * transaction such that hammer_inode_waitreclaims() is | |
| 410 | * called later on to avoid building up an infinite number | |
| 411 | * of inodes. Otherwise we can continue to * add new inodes | |
| 412 | * faster then they can be disposed of, even with the tsleep | |
| 413 | * delay. | |
| 4c286c36 MD |
414 | * |
| 415 | * If we find a dummy inode we return a failure so dounlink | |
| 416 | * (which does another lookup) doesn't try to mess with the | |
| 417 | * link count. hammer_vop_nresolve() uses hammer_get_dummy_inode() | |
| 418 | * to ref dummy inodes. | |
| 427e5fc6 | 419 | */ |
| 66325755 | 420 | iinfo.obj_id = obj_id; |
| 7f7c1f84 | 421 | iinfo.obj_asof = asof; |
| ddfdf542 | 422 | iinfo.obj_localization = localization; |
| 427e5fc6 MD |
423 | loop: |
| 424 | ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo); | |
| 425 | if (ip) { | |
| 4c286c36 MD |
426 | if (ip->flags & HAMMER_INODE_DUMMY) { |
| 427 | *errorp = ENOENT; | |
| 428 | return(NULL); | |
| 429 | } | |
| 8cd0a023 | 430 | hammer_ref(&ip->lock); |
| 66325755 MD |
431 | *errorp = 0; |
| 432 | return(ip); | |
| 427e5fc6 MD |
433 | } |
| 434 | ||
| 3897d7e9 MD |
435 | /* |
| 436 | * Allocate a new inode structure and deal with races later. | |
| 437 | */ | |
| bac808fe | 438 | ip = kmalloc(sizeof(*ip), hmp->m_inodes, M_WAITOK|M_ZERO); |
| b3deaf57 | 439 | ++hammer_count_inodes; |
| 9f5097dc | 440 | ++hmp->count_inodes; |
| 66325755 | 441 | ip->obj_id = obj_id; |
| 27ea2398 | 442 | ip->obj_asof = iinfo.obj_asof; |
| ddfdf542 | 443 | ip->obj_localization = localization; |
| 66325755 | 444 | ip->hmp = hmp; |
| d113fda1 | 445 | ip->flags = flags & HAMMER_INODE_RO; |
| bcac4bbb MD |
446 | ip->cache[0].ip = ip; |
| 447 | ip->cache[1].ip = ip; | |
| 39d8fd63 MD |
448 | ip->cache[2].ip = ip; |
| 449 | ip->cache[3].ip = ip; | |
| 5fa5c92f | 450 | if (hmp->ronly) |
| d113fda1 | 451 | ip->flags |= HAMMER_INODE_RO; |
| a9d52b76 MD |
452 | ip->sync_trunc_off = ip->trunc_off = ip->save_trunc_off = |
| 453 | 0x7FFFFFFFFFFFFFFFLL; | |
| 8cd0a023 | 454 | RB_INIT(&ip->rec_tree); |
| 1f07f686 | 455 | TAILQ_INIT(&ip->target_list); |
| 5fa5c92f | 456 | hammer_ref(&ip->lock); |
| 427e5fc6 MD |
457 | |
| 458 | /* | |
| ea434b6f MD |
459 | * Locate the on-disk inode. If this is a PFS root we always |
| 460 | * access the current version of the root inode and (if it is not | |
| 461 | * a master) always access information under it with a snapshot | |
| 462 | * TID. | |
| 39d8fd63 MD |
463 | * |
| 464 | * We cache recent inode lookups in this directory in dip->cache[2]. | |
| 465 | * If we can't find it we assume the inode we are looking for is | |
| 466 | * close to the directory inode. | |
| 427e5fc6 | 467 | */ |
| 6a37e7e4 | 468 | retry: |
| 39d8fd63 MD |
469 | cachep = NULL; |
| 470 | if (dip) { | |
| 471 | if (dip->cache[2].node) | |
| 472 | cachep = &dip->cache[2]; | |
| 473 | else | |
| 474 | cachep = &dip->cache[0]; | |
| 475 | } | |
| 476 | hammer_init_cursor(trans, &cursor, cachep, NULL); | |
| 5a930e66 | 477 | cursor.key_beg.localization = localization + HAMMER_LOCALIZE_INODE; |
| 8cd0a023 MD |
478 | cursor.key_beg.obj_id = ip->obj_id; |
| 479 | cursor.key_beg.key = 0; | |
| d5530d22 | 480 | cursor.key_beg.create_tid = 0; |
| 8cd0a023 MD |
481 | cursor.key_beg.delete_tid = 0; |
| 482 | cursor.key_beg.rec_type = HAMMER_RECTYPE_INODE; | |
| 483 | cursor.key_beg.obj_type = 0; | |
| ea434b6f | 484 | |
| d5530d22 | 485 | cursor.asof = iinfo.obj_asof; |
| 11ad5ade | 486 | cursor.flags = HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_GET_DATA | |
| d5530d22 | 487 | HAMMER_CURSOR_ASOF; |
| 8cd0a023 MD |
488 | |
| 489 | *errorp = hammer_btree_lookup(&cursor); | |
| 6a37e7e4 MD |
490 | if (*errorp == EDEADLK) { |
| 491 | hammer_done_cursor(&cursor); | |
| 492 | goto retry; | |
| 493 | } | |
| 427e5fc6 MD |
494 | |
| 495 | /* | |
| 496 | * On success the B-Tree lookup will hold the appropriate | |
| 497 | * buffer cache buffers and provide a pointer to the requested | |
| d113fda1 MD |
498 | * information. Copy the information to the in-memory inode |
| 499 | * and cache the B-Tree node to improve future operations. | |
| 427e5fc6 | 500 | */ |
| 66325755 | 501 | if (*errorp == 0) { |
| 11ad5ade | 502 | ip->ino_leaf = cursor.node->ondisk->elms[cursor.index].leaf; |
| 40043e7f | 503 | ip->ino_data = cursor.data->inode; |
| bcac4bbb MD |
504 | |
| 505 | /* | |
| 506 | * cache[0] tries to cache the location of the object inode. | |
| 507 | * The assumption is that it is near the directory inode. | |
| 508 | * | |
| 509 | * cache[1] tries to cache the location of the object data. | |
| 39d8fd63 MD |
510 | * We might have something in the governing directory from |
| 511 | * scan optimizations (see the strategy code in | |
| 512 | * hammer_vnops.c). | |
| 513 | * | |
| 514 | * We update dip->cache[2], if possible, with the location | |
| 515 | * of the object inode for future directory shortcuts. | |
| bcac4bbb MD |
516 | */ |
| 517 | hammer_cache_node(&ip->cache[0], cursor.node); | |
| 39d8fd63 MD |
518 | if (dip) { |
| 519 | if (dip->cache[3].node) { | |
| 520 | hammer_cache_node(&ip->cache[1], | |
| 521 | dip->cache[3].node); | |
| 522 | } | |
| 523 | hammer_cache_node(&dip->cache[2], cursor.node); | |
| 524 | } | |
| cb51be26 MD |
525 | |
| 526 | /* | |
| 527 | * The file should not contain any data past the file size | |
| a9d52b76 | 528 | * stored in the inode. Setting save_trunc_off to the |
| cb51be26 MD |
529 | * file size instead of max reduces B-Tree lookup overheads |
| 530 | * on append by allowing the flusher to avoid checking for | |
| 531 | * record overwrites. | |
| 532 | */ | |
| a9d52b76 | 533 | ip->save_trunc_off = ip->ino_data.size; |
| 5fa5c92f MD |
534 | |
| 535 | /* | |
| 536 | * Locate and assign the pseudofs management structure to | |
| 537 | * the inode. | |
| 538 | */ | |
| 539 | if (dip && dip->obj_localization == ip->obj_localization) { | |
| 540 | ip->pfsm = dip->pfsm; | |
| 541 | hammer_ref(&ip->pfsm->lock); | |
| 542 | } else { | |
| ea434b6f MD |
543 | ip->pfsm = hammer_load_pseudofs(trans, |
| 544 | ip->obj_localization, | |
| 545 | errorp); | |
| 546 | *errorp = 0; /* ignore ENOENT */ | |
| 5fa5c92f | 547 | } |
| 427e5fc6 | 548 | } |
| 427e5fc6 MD |
549 | |
| 550 | /* | |
| cb51be26 MD |
551 | * The inode is placed on the red-black tree and will be synced to |
| 552 | * the media when flushed or by the filesystem sync. If this races | |
| 553 | * another instantiation/lookup the insertion will fail. | |
| 427e5fc6 | 554 | */ |
| 66325755 | 555 | if (*errorp == 0) { |
| 427e5fc6 | 556 | if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) { |
| 5fa5c92f | 557 | hammer_free_inode(ip); |
| b3deaf57 | 558 | hammer_done_cursor(&cursor); |
| 427e5fc6 MD |
559 | goto loop; |
| 560 | } | |
| c0ade690 | 561 | ip->flags |= HAMMER_INODE_ONDISK; |
| 427e5fc6 | 562 | } else { |
| e63644f0 MD |
563 | if (ip->flags & HAMMER_INODE_RSV_INODES) { |
| 564 | ip->flags &= ~HAMMER_INODE_RSV_INODES; /* sanity */ | |
| 9f5097dc | 565 | --hmp->rsv_inodes; |
| e63644f0 | 566 | } |
| e63644f0 | 567 | |
| 5fa5c92f | 568 | hammer_free_inode(ip); |
| 66325755 | 569 | ip = NULL; |
| 427e5fc6 | 570 | } |
| b3deaf57 | 571 | hammer_done_cursor(&cursor); |
| e98f1b96 MD |
572 | |
| 573 | /* | |
| 574 | * NEWINODE is only set if the inode becomes dirty later, | |
| 575 | * setting it here just leads to unnecessary stalls. | |
| 576 | * | |
| 577 | * trans->flags |= HAMMER_TRANSF_NEWINODE; | |
| 578 | */ | |
| 66325755 MD |
579 | return (ip); |
| 580 | } | |
| 581 | ||
| 8cd0a023 | 582 | /* |
| 4c286c36 MD |
583 | * Get a dummy inode to placemark a broken directory entry. |
| 584 | */ | |
| 585 | struct hammer_inode * | |
| 586 | hammer_get_dummy_inode(hammer_transaction_t trans, hammer_inode_t dip, | |
| 587 | int64_t obj_id, hammer_tid_t asof, u_int32_t localization, | |
| 588 | int flags, int *errorp) | |
| 589 | { | |
| 590 | hammer_mount_t hmp = trans->hmp; | |
| 591 | struct hammer_inode_info iinfo; | |
| 592 | struct hammer_inode *ip; | |
| 593 | ||
| 594 | /* | |
| 595 | * Determine if we already have an inode cached. If we do then | |
| 596 | * we are golden. | |
| 597 | * | |
| 598 | * If we find an inode with no vnode we have to mark the | |
| 599 | * transaction such that hammer_inode_waitreclaims() is | |
| 600 | * called later on to avoid building up an infinite number | |
| 601 | * of inodes. Otherwise we can continue to * add new inodes | |
| 602 | * faster then they can be disposed of, even with the tsleep | |
| 603 | * delay. | |
| 604 | * | |
| 605 | * If we find a non-fake inode we return an error. Only fake | |
| 606 | * inodes can be returned by this routine. | |
| 607 | */ | |
| 608 | iinfo.obj_id = obj_id; | |
| 609 | iinfo.obj_asof = asof; | |
| 610 | iinfo.obj_localization = localization; | |
| 611 | loop: | |
| 612 | *errorp = 0; | |
| 613 | ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo); | |
| 614 | if (ip) { | |
| 615 | if ((ip->flags & HAMMER_INODE_DUMMY) == 0) { | |
| 616 | *errorp = ENOENT; | |
| 617 | return(NULL); | |
| 618 | } | |
| 619 | hammer_ref(&ip->lock); | |
| 620 | return(ip); | |
| 621 | } | |
| 622 | ||
| 623 | /* | |
| 624 | * Allocate a new inode structure and deal with races later. | |
| 625 | */ | |
| 626 | ip = kmalloc(sizeof(*ip), hmp->m_inodes, M_WAITOK|M_ZERO); | |
| 627 | ++hammer_count_inodes; | |
| 628 | ++hmp->count_inodes; | |
| 629 | ip->obj_id = obj_id; | |
| 630 | ip->obj_asof = iinfo.obj_asof; | |
| 631 | ip->obj_localization = localization; | |
| 632 | ip->hmp = hmp; | |
| 633 | ip->flags = flags | HAMMER_INODE_RO | HAMMER_INODE_DUMMY; | |
| 634 | ip->cache[0].ip = ip; | |
| 635 | ip->cache[1].ip = ip; | |
| 39d8fd63 MD |
636 | ip->cache[2].ip = ip; |
| 637 | ip->cache[3].ip = ip; | |
| 4c286c36 MD |
638 | ip->sync_trunc_off = ip->trunc_off = ip->save_trunc_off = |
| 639 | 0x7FFFFFFFFFFFFFFFLL; | |
| 640 | RB_INIT(&ip->rec_tree); | |
| 641 | TAILQ_INIT(&ip->target_list); | |
| 642 | hammer_ref(&ip->lock); | |
| 643 | ||
| 644 | /* | |
| 645 | * Populate the dummy inode. Leave everything zero'd out. | |
| 646 | * | |
| 647 | * (ip->ino_leaf and ip->ino_data) | |
| 648 | * | |
| 649 | * Make the dummy inode a FIFO object which most copy programs | |
| 650 | * will properly ignore. | |
| 651 | */ | |
| 652 | ip->save_trunc_off = ip->ino_data.size; | |
| 653 | ip->ino_data.obj_type = HAMMER_OBJTYPE_FIFO; | |
| 654 | ||
| 655 | /* | |
| 656 | * Locate and assign the pseudofs management structure to | |
| 657 | * the inode. | |
| 658 | */ | |
| 659 | if (dip && dip->obj_localization == ip->obj_localization) { | |
| 660 | ip->pfsm = dip->pfsm; | |
| 661 | hammer_ref(&ip->pfsm->lock); | |
| 662 | } else { | |
| 663 | ip->pfsm = hammer_load_pseudofs(trans, ip->obj_localization, | |
| 664 | errorp); | |
| 665 | *errorp = 0; /* ignore ENOENT */ | |
| 666 | } | |
| 667 | ||
| 668 | /* | |
| 669 | * The inode is placed on the red-black tree and will be synced to | |
| 670 | * the media when flushed or by the filesystem sync. If this races | |
| 671 | * another instantiation/lookup the insertion will fail. | |
| 672 | * | |
| 673 | * NOTE: Do not set HAMMER_INODE_ONDISK. The inode is a fake. | |
| 674 | */ | |
| 675 | if (*errorp == 0) { | |
| 676 | if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) { | |
| 677 | hammer_free_inode(ip); | |
| 678 | goto loop; | |
| 679 | } | |
| 680 | } else { | |
| 681 | if (ip->flags & HAMMER_INODE_RSV_INODES) { | |
| 682 | ip->flags &= ~HAMMER_INODE_RSV_INODES; /* sanity */ | |
| 683 | --hmp->rsv_inodes; | |
| 684 | } | |
| 685 | hammer_free_inode(ip); | |
| 686 | ip = NULL; | |
| 687 | } | |
| 688 | trans->flags |= HAMMER_TRANSF_NEWINODE; | |
| 689 | return (ip); | |
| 690 | } | |
| 691 | ||
| 692 | /* | |
| 39d8fd63 MD |
693 | * Return a referenced inode only if it is in our inode cache. |
| 694 | * | |
| 695 | * Dummy inodes do not count. | |
| 696 | */ | |
| 697 | struct hammer_inode * | |
| 698 | hammer_find_inode(hammer_transaction_t trans, int64_t obj_id, | |
| 699 | hammer_tid_t asof, u_int32_t localization) | |
| 700 | { | |
| 701 | hammer_mount_t hmp = trans->hmp; | |
| 702 | struct hammer_inode_info iinfo; | |
| 703 | struct hammer_inode *ip; | |
| 704 | ||
| 705 | iinfo.obj_id = obj_id; | |
| 706 | iinfo.obj_asof = asof; | |
| 707 | iinfo.obj_localization = localization; | |
| 5a64efa1 | 708 | |
| 39d8fd63 MD |
709 | ip = hammer_ino_rb_tree_RB_LOOKUP_INFO(&hmp->rb_inos_root, &iinfo); |
| 710 | if (ip) { | |
| 711 | if (ip->flags & HAMMER_INODE_DUMMY) | |
| 712 | ip = NULL; | |
| 713 | else | |
| 714 | hammer_ref(&ip->lock); | |
| 715 | } | |
| 716 | return(ip); | |
| 717 | } | |
| 718 | ||
| 719 | /* | |
| 8cd0a023 | 720 | * Create a new filesystem object, returning the inode in *ipp. The |
| ea434b6f | 721 | * returned inode will be referenced. The inode is created in-memory. |
| 8cd0a023 | 722 | * |
| ea434b6f MD |
723 | * If pfsm is non-NULL the caller wishes to create the root inode for |
| 724 | * a master PFS. | |
| 8cd0a023 MD |
725 | */ |
| 726 | int | |
| a89aec1b | 727 | hammer_create_inode(hammer_transaction_t trans, struct vattr *vap, |
| 5a64efa1 MD |
728 | struct ucred *cred, |
| 729 | hammer_inode_t dip, const char *name, int namelen, | |
| ea434b6f | 730 | hammer_pseudofs_inmem_t pfsm, struct hammer_inode **ipp) |
| 66325755 | 731 | { |
| a89aec1b MD |
732 | hammer_mount_t hmp; |
| 733 | hammer_inode_t ip; | |
| 6b4f890b | 734 | uid_t xuid; |
| 5a930e66 | 735 | int error; |
| 5a64efa1 MD |
736 | int64_t namekey; |
| 737 | u_int32_t dummy; | |
| 66325755 | 738 | |
| 8cd0a023 | 739 | hmp = trans->hmp; |
| 5a930e66 | 740 | |
| bac808fe | 741 | ip = kmalloc(sizeof(*ip), hmp->m_inodes, M_WAITOK|M_ZERO); |
| b3deaf57 | 742 | ++hammer_count_inodes; |
| 9f5097dc | 743 | ++hmp->count_inodes; |
| 82010f9f | 744 | trans->flags |= HAMMER_TRANSF_NEWINODE; |
| 5a930e66 | 745 | |
| ea434b6f MD |
746 | if (pfsm) { |
| 747 | KKASSERT(pfsm->localization != 0); | |
| 5a930e66 | 748 | ip->obj_id = HAMMER_OBJID_ROOT; |
| ea434b6f MD |
749 | ip->obj_localization = pfsm->localization; |
| 750 | } else { | |
| 751 | KKASSERT(dip != NULL); | |
| 5a64efa1 MD |
752 | namekey = hammer_directory_namekey(dip, name, namelen, &dummy); |
| 753 | ip->obj_id = hammer_alloc_objid(hmp, dip, namekey); | |
| ea434b6f MD |
754 | ip->obj_localization = dip->obj_localization; |
| 755 | } | |
| 5a930e66 | 756 | |
| 8cd0a023 | 757 | KKASSERT(ip->obj_id != 0); |
| 7f7c1f84 | 758 | ip->obj_asof = hmp->asof; |
| 8cd0a023 | 759 | ip->hmp = hmp; |
| b84de5af | 760 | ip->flush_state = HAMMER_FST_IDLE; |
| ddfdf542 MD |
761 | ip->flags = HAMMER_INODE_DDIRTY | |
| 762 | HAMMER_INODE_ATIME | HAMMER_INODE_MTIME; | |
| bcac4bbb MD |
763 | ip->cache[0].ip = ip; |
| 764 | ip->cache[1].ip = ip; | |
| 39d8fd63 MD |
765 | ip->cache[2].ip = ip; |
| 766 | ip->cache[3].ip = ip; | |
| 8cd0a023 | 767 | |
| a5fddc16 | 768 | ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL; |
| a9d52b76 | 769 | /* ip->save_trunc_off = 0; (already zero) */ |
| 8cd0a023 | 770 | RB_INIT(&ip->rec_tree); |
| 1f07f686 | 771 | TAILQ_INIT(&ip->target_list); |
| 8cd0a023 | 772 | |
| bcac4bbb | 773 | ip->ino_data.atime = trans->time; |
| 11ad5ade MD |
774 | ip->ino_data.mtime = trans->time; |
| 775 | ip->ino_data.size = 0; | |
| 776 | ip->ino_data.nlinks = 0; | |
| e63644f0 MD |
777 | |
| 778 | /* | |
| 779 | * A nohistory designator on the parent directory is inherited by | |
| 5a930e66 MD |
780 | * the child. We will do this even for pseudo-fs creation... the |
| 781 | * sysad can turn it off. | |
| e63644f0 | 782 | */ |
| ea434b6f MD |
783 | if (dip) { |
| 784 | ip->ino_data.uflags = dip->ino_data.uflags & | |
| 785 | (SF_NOHISTORY|UF_NOHISTORY|UF_NODUMP); | |
| 786 | } | |
| e63644f0 | 787 | |
| 11ad5ade | 788 | ip->ino_leaf.base.btype = HAMMER_BTREE_TYPE_RECORD; |
| 5a930e66 MD |
789 | ip->ino_leaf.base.localization = ip->obj_localization + |
| 790 | HAMMER_LOCALIZE_INODE; | |
| 11ad5ade MD |
791 | ip->ino_leaf.base.obj_id = ip->obj_id; |
| 792 | ip->ino_leaf.base.key = 0; | |
| 793 | ip->ino_leaf.base.create_tid = 0; | |
| 794 | ip->ino_leaf.base.delete_tid = 0; | |
| 795 | ip->ino_leaf.base.rec_type = HAMMER_RECTYPE_INODE; | |
| 796 | ip->ino_leaf.base.obj_type = hammer_get_obj_type(vap->va_type); | |
| 797 | ||
| 798 | ip->ino_data.obj_type = ip->ino_leaf.base.obj_type; | |
| 8cd0a023 MD |
799 | ip->ino_data.version = HAMMER_INODE_DATA_VERSION; |
| 800 | ip->ino_data.mode = vap->va_mode; | |
| b84de5af | 801 | ip->ino_data.ctime = trans->time; |
| 5a930e66 MD |
802 | |
| 803 | /* | |
| beec5dc4 MD |
804 | * If we are running version 2 or greater directory entries are |
| 805 | * inode-localized instead of data-localized. | |
| 5e435c92 MD |
806 | */ |
| 807 | if (trans->hmp->version >= HAMMER_VOL_VERSION_TWO) { | |
| 808 | if (ip->ino_leaf.base.obj_type == HAMMER_OBJTYPE_DIRECTORY) { | |
| beec5dc4 MD |
809 | ip->ino_data.cap_flags |= |
| 810 | HAMMER_INODE_CAP_DIR_LOCAL_INO; | |
| 5e435c92 MD |
811 | } |
| 812 | } | |
| 89cba4df MD |
813 | if (trans->hmp->version >= HAMMER_VOL_VERSION_SIX) { |
| 814 | if (ip->ino_leaf.base.obj_type == HAMMER_OBJTYPE_DIRECTORY) { | |
| 815 | ip->ino_data.cap_flags |= | |
| 816 | HAMMER_INODE_CAP_DIRHASH_ALG1; | |
| 817 | } | |
| 818 | } | |
| 5e435c92 MD |
819 | |
| 820 | /* | |
| 5a930e66 MD |
821 | * Setup the ".." pointer. This only needs to be done for directories |
| 822 | * but we do it for all objects as a recovery aid. | |
| ea434b6f MD |
823 | */ |
| 824 | if (dip) | |
| 825 | ip->ino_data.parent_obj_id = dip->ino_leaf.base.obj_id; | |
| 826 | #if 0 | |
| 827 | /* | |
| 5a930e66 | 828 | * The parent_obj_localization field only applies to pseudo-fs roots. |
| ea434b6f MD |
829 | * XXX this is no longer applicable, PFSs are no longer directly |
| 830 | * tied into the parent's directory structure. | |
| 5a930e66 | 831 | */ |
| 5a930e66 MD |
832 | if (ip->ino_data.obj_type == HAMMER_OBJTYPE_DIRECTORY && |
| 833 | ip->obj_id == HAMMER_OBJID_ROOT) { | |
| 834 | ip->ino_data.ext.obj.parent_obj_localization = | |
| 835 | dip->obj_localization; | |
| 836 | } | |
| ea434b6f | 837 | #endif |
| 6b4f890b | 838 | |
| 11ad5ade | 839 | switch(ip->ino_leaf.base.obj_type) { |
| 7a04d74f MD |
840 | case HAMMER_OBJTYPE_CDEV: |
| 841 | case HAMMER_OBJTYPE_BDEV: | |
| 842 | ip->ino_data.rmajor = vap->va_rmajor; | |
| 843 | ip->ino_data.rminor = vap->va_rminor; | |
| 844 | break; | |
| 845 | default: | |
| 846 | break; | |
| 847 | } | |
| 848 | ||
| 6b4f890b MD |
849 | /* |
| 850 | * Calculate default uid/gid and overwrite with information from | |
| 851 | * the vap. | |
| 852 | */ | |
| ea434b6f MD |
853 | if (dip) { |
| 854 | xuid = hammer_to_unix_xid(&dip->ino_data.uid); | |
| 855 | xuid = vop_helper_create_uid(hmp->mp, dip->ino_data.mode, | |
| 856 | xuid, cred, &vap->va_mode); | |
| 857 | } else { | |
| 858 | xuid = 0; | |
| 859 | } | |
| 6b4f890b MD |
860 | ip->ino_data.mode = vap->va_mode; |
| 861 | ||
| 8cd0a023 MD |
862 | if (vap->va_vaflags & VA_UID_UUID_VALID) |
| 863 | ip->ino_data.uid = vap->va_uid_uuid; | |
| 6b4f890b | 864 | else if (vap->va_uid != (uid_t)VNOVAL) |
| 7538695e MD |
865 | hammer_guid_to_uuid(&ip->ino_data.uid, vap->va_uid); |
| 866 | else | |
| 6b4f890b | 867 | hammer_guid_to_uuid(&ip->ino_data.uid, xuid); |
| 7538695e | 868 | |
| 8cd0a023 MD |
869 | if (vap->va_vaflags & VA_GID_UUID_VALID) |
| 870 | ip->ino_data.gid = vap->va_gid_uuid; | |
| 6b4f890b | 871 | else if (vap->va_gid != (gid_t)VNOVAL) |
| 8cd0a023 | 872 | hammer_guid_to_uuid(&ip->ino_data.gid, vap->va_gid); |
| ea434b6f | 873 | else if (dip) |
| 7538695e | 874 | ip->ino_data.gid = dip->ino_data.gid; |
| 8cd0a023 MD |
875 | |
| 876 | hammer_ref(&ip->lock); | |
| 5fa5c92f | 877 | |
| ea434b6f MD |
878 | if (pfsm) { |
| 879 | ip->pfsm = pfsm; | |
| 880 | hammer_ref(&pfsm->lock); | |
| 881 | error = 0; | |
| 882 | } else if (dip->obj_localization == ip->obj_localization) { | |
| 5fa5c92f MD |
883 | ip->pfsm = dip->pfsm; |
| 884 | hammer_ref(&ip->pfsm->lock); | |
| 885 | error = 0; | |
| 886 | } else { | |
| ea434b6f MD |
887 | ip->pfsm = hammer_load_pseudofs(trans, |
| 888 | ip->obj_localization, | |
| 889 | &error); | |
| 890 | error = 0; /* ignore ENOENT */ | |
| 5fa5c92f MD |
891 | } |
| 892 | ||
| 893 | if (error) { | |
| 894 | hammer_free_inode(ip); | |
| 895 | ip = NULL; | |
| 896 | } else if (RB_INSERT(hammer_ino_rb_tree, &hmp->rb_inos_root, ip)) { | |
| 973c11b9 MD |
897 | panic("hammer_create_inode: duplicate obj_id %llx", |
| 898 | (long long)ip->obj_id); | |
| 5fa5c92f MD |
899 | /* not reached */ |
| 900 | hammer_free_inode(ip); | |
| 8cd0a023 MD |
901 | } |
| 902 | *ipp = ip; | |
| 5fa5c92f MD |
903 | return(error); |
| 904 | } | |
| 905 | ||
| 906 | /* | |
| 907 | * Final cleanup / freeing of an inode structure | |
| 908 | */ | |
| 909 | static void | |
| 910 | hammer_free_inode(hammer_inode_t ip) | |
| 911 | { | |
| bac808fe MD |
912 | struct hammer_mount *hmp; |
| 913 | ||
| 914 | hmp = ip->hmp; | |
| 250aec18 | 915 | KKASSERT(hammer_oneref(&ip->lock)); |
| 5fa5c92f MD |
916 | hammer_uncache_node(&ip->cache[0]); |
| 917 | hammer_uncache_node(&ip->cache[1]); | |
| 39d8fd63 MD |
918 | hammer_uncache_node(&ip->cache[2]); |
| 919 | hammer_uncache_node(&ip->cache[3]); | |
| ccf6a64d | 920 | hammer_inode_wakereclaims(ip); |
| 5fa5c92f MD |
921 | if (ip->objid_cache) |
| 922 | hammer_clear_objid(ip); | |
| 923 | --hammer_count_inodes; | |
| bac808fe | 924 | --hmp->count_inodes; |
| 5fa5c92f | 925 | if (ip->pfsm) { |
| bac808fe | 926 | hammer_rel_pseudofs(hmp, ip->pfsm); |
| 5fa5c92f MD |
927 | ip->pfsm = NULL; |
| 928 | } | |
| bac808fe | 929 | kfree(ip, hmp->m_inodes); |
| 5fa5c92f MD |
930 | ip = NULL; |
| 931 | } | |
| 932 | ||
| 933 | /* | |
| ea434b6f MD |
934 | * Retrieve pseudo-fs data. NULL will never be returned. |
| 935 | * | |
| 936 | * If an error occurs *errorp will be set and a default template is returned, | |
| 937 | * otherwise *errorp is set to 0. Typically when an error occurs it will | |
| 938 | * be ENOENT. | |
| 5fa5c92f | 939 | */ |
| ea434b6f MD |
940 | hammer_pseudofs_inmem_t |
| 941 | hammer_load_pseudofs(hammer_transaction_t trans, | |
| 942 | u_int32_t localization, int *errorp) | |
| 5fa5c92f MD |
943 | { |
| 944 | hammer_mount_t hmp = trans->hmp; | |
| ea434b6f | 945 | hammer_inode_t ip; |
| 5fa5c92f MD |
946 | hammer_pseudofs_inmem_t pfsm; |
| 947 | struct hammer_cursor cursor; | |
| 5fa5c92f MD |
948 | int bytes; |
| 949 | ||
| 950 | retry: | |
| ea434b6f | 951 | pfsm = RB_LOOKUP(hammer_pfs_rb_tree, &hmp->rb_pfsm_root, localization); |
| 5fa5c92f | 952 | if (pfsm) { |
| 5fa5c92f | 953 | hammer_ref(&pfsm->lock); |
| ea434b6f MD |
954 | *errorp = 0; |
| 955 | return(pfsm); | |
| 956 | } | |
| 957 | ||
| 958 | /* | |
| 959 | * PFS records are stored in the root inode (not the PFS root inode, | |
| 960 | * but the real root). Avoid an infinite recursion if loading | |
| 961 | * the PFS for the real root. | |
| 962 | */ | |
| 963 | if (localization) { | |
| 964 | ip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, | |
| 965 | HAMMER_MAX_TID, | |
| 966 | HAMMER_DEF_LOCALIZATION, 0, errorp); | |
| 967 | } else { | |
| 968 | ip = NULL; | |
| 5fa5c92f MD |
969 | } |
| 970 | ||
| bac808fe | 971 | pfsm = kmalloc(sizeof(*pfsm), hmp->m_misc, M_WAITOK | M_ZERO); |
| ea434b6f | 972 | pfsm->localization = localization; |
| a56cb012 MD |
973 | pfsm->pfsd.unique_uuid = trans->rootvol->ondisk->vol_fsid; |
| 974 | pfsm->pfsd.shared_uuid = pfsm->pfsd.unique_uuid; | |
| 5fa5c92f | 975 | |
| ea434b6f MD |
976 | hammer_init_cursor(trans, &cursor, (ip ? &ip->cache[1] : NULL), ip); |
| 977 | cursor.key_beg.localization = HAMMER_DEF_LOCALIZATION + | |
| 5fa5c92f MD |
978 | HAMMER_LOCALIZE_MISC; |
| 979 | cursor.key_beg.obj_id = HAMMER_OBJID_ROOT; | |
| 980 | cursor.key_beg.create_tid = 0; | |
| 981 | cursor.key_beg.delete_tid = 0; | |
| ea434b6f | 982 | cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS; |
| 5fa5c92f | 983 | cursor.key_beg.obj_type = 0; |
| ea434b6f | 984 | cursor.key_beg.key = localization; |
| 5fa5c92f MD |
985 | cursor.asof = HAMMER_MAX_TID; |
| 986 | cursor.flags |= HAMMER_CURSOR_ASOF; | |
| 987 | ||
| ea434b6f MD |
988 | if (ip) |
| 989 | *errorp = hammer_ip_lookup(&cursor); | |
| 990 | else | |
| 991 | *errorp = hammer_btree_lookup(&cursor); | |
| 992 | if (*errorp == 0) { | |
| 993 | *errorp = hammer_ip_resolve_data(&cursor); | |
| 994 | if (*errorp == 0) { | |
| 842e7a70 MD |
995 | if (cursor.data->pfsd.mirror_flags & |
| 996 | HAMMER_PFSD_DELETED) { | |
| 997 | *errorp = ENOENT; | |
| 998 | } else { | |
| 999 | bytes = cursor.leaf->data_len; | |
| 1000 | if (bytes > sizeof(pfsm->pfsd)) | |
| 1001 | bytes = sizeof(pfsm->pfsd); | |
| 1002 | bcopy(cursor.data, &pfsm->pfsd, bytes); | |
| 1003 | } | |
| 5fa5c92f | 1004 | } |
| 5fa5c92f | 1005 | } |
| 5fa5c92f MD |
1006 | hammer_done_cursor(&cursor); |
| 1007 | ||
| ea434b6f MD |
1008 | pfsm->fsid_udev = hammer_fsid_to_udev(&pfsm->pfsd.shared_uuid); |
| 1009 | hammer_ref(&pfsm->lock); | |
| 1010 | if (ip) | |
| 1011 | hammer_rel_inode(ip, 0); | |
| 1012 | if (RB_INSERT(hammer_pfs_rb_tree, &hmp->rb_pfsm_root, pfsm)) { | |
| bac808fe | 1013 | kfree(pfsm, hmp->m_misc); |
| ea434b6f | 1014 | goto retry; |
| 5fa5c92f | 1015 | } |
| ea434b6f | 1016 | return(pfsm); |
| 5fa5c92f MD |
1017 | } |
| 1018 | ||
| 1019 | /* | |
| 1020 | * Store pseudo-fs data. The backend will automatically delete any prior | |
| 1021 | * on-disk pseudo-fs data but we have to delete in-memory versions. | |
| 1022 | */ | |
| 1023 | int | |
| ea434b6f | 1024 | hammer_save_pseudofs(hammer_transaction_t trans, hammer_pseudofs_inmem_t pfsm) |
| 5fa5c92f MD |
1025 | { |
| 1026 | struct hammer_cursor cursor; | |
| 5fa5c92f | 1027 | hammer_record_t record; |
| ea434b6f | 1028 | hammer_inode_t ip; |
| 5fa5c92f MD |
1029 | int error; |
| 1030 | ||
| ea434b6f MD |
1031 | ip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID, |
| 1032 | HAMMER_DEF_LOCALIZATION, 0, &error); | |
| 5fa5c92f | 1033 | retry: |
| a56cb012 | 1034 | pfsm->fsid_udev = hammer_fsid_to_udev(&pfsm->pfsd.shared_uuid); |
| 5fa5c92f MD |
1035 | hammer_init_cursor(trans, &cursor, &ip->cache[1], ip); |
| 1036 | cursor.key_beg.localization = ip->obj_localization + | |
| 1037 | HAMMER_LOCALIZE_MISC; | |
| ea434b6f | 1038 | cursor.key_beg.obj_id = HAMMER_OBJID_ROOT; |
| 5fa5c92f MD |
1039 | cursor.key_beg.create_tid = 0; |
| 1040 | cursor.key_beg.delete_tid = 0; | |
| ea434b6f | 1041 | cursor.key_beg.rec_type = HAMMER_RECTYPE_PFS; |
| 5fa5c92f | 1042 | cursor.key_beg.obj_type = 0; |
| ea434b6f | 1043 | cursor.key_beg.key = pfsm->localization; |
| 5fa5c92f MD |
1044 | cursor.asof = HAMMER_MAX_TID; |
| 1045 | cursor.flags |= HAMMER_CURSOR_ASOF; | |
| 1046 | ||
| 3214ade6 MD |
1047 | /* |
| 1048 | * Replace any in-memory version of the record. | |
| 1049 | */ | |
| 5fa5c92f MD |
1050 | error = hammer_ip_lookup(&cursor); |
| 1051 | if (error == 0 && hammer_cursor_inmem(&cursor)) { | |
| 1052 | record = cursor.iprec; | |
| 1053 | if (record->flags & HAMMER_RECF_INTERLOCK_BE) { | |
| 1054 | KKASSERT(cursor.deadlk_rec == NULL); | |
| 1055 | hammer_ref(&record->lock); | |
| 1056 | cursor.deadlk_rec = record; | |
| 1057 | error = EDEADLK; | |
| 1058 | } else { | |
| 1059 | record->flags |= HAMMER_RECF_DELETED_FE; | |
| 1060 | error = 0; | |
| 1061 | } | |
| 1062 | } | |
| 3214ade6 MD |
1063 | |
| 1064 | /* | |
| 1065 | * Allocate replacement general record. The backend flush will | |
| 1066 | * delete any on-disk version of the record. | |
| 1067 | */ | |
| 5fa5c92f MD |
1068 | if (error == 0 || error == ENOENT) { |
| 1069 | record = hammer_alloc_mem_record(ip, sizeof(pfsm->pfsd)); | |
| 1070 | record->type = HAMMER_MEM_RECORD_GENERAL; | |
| 1071 | ||
| 1072 | record->leaf.base.localization = ip->obj_localization + | |
| 1073 | HAMMER_LOCALIZE_MISC; | |
| ea434b6f MD |
1074 | record->leaf.base.rec_type = HAMMER_RECTYPE_PFS; |
| 1075 | record->leaf.base.key = pfsm->localization; | |
| 5fa5c92f MD |
1076 | record->leaf.data_len = sizeof(pfsm->pfsd); |
| 1077 | bcopy(&pfsm->pfsd, record->data, sizeof(pfsm->pfsd)); | |
| 1078 | error = hammer_ip_add_record(trans, record); | |
| 1079 | } | |
| 1080 | hammer_done_cursor(&cursor); | |
| 1081 | if (error == EDEADLK) | |
| 1082 | goto retry; | |
| ea434b6f MD |
1083 | hammer_rel_inode(ip, 0); |
| 1084 | return(error); | |
| 1085 | } | |
| 1086 | ||
| 1087 | /* | |
| 1088 | * Create a root directory for a PFS if one does not alredy exist. | |
| 4c038e17 MD |
1089 | * |
| 1090 | * The PFS root stands alone so we must also bump the nlinks count | |
| 1091 | * to prevent it from being destroyed on release. | |
| ea434b6f MD |
1092 | */ |
| 1093 | int | |
| 1094 | hammer_mkroot_pseudofs(hammer_transaction_t trans, struct ucred *cred, | |
| 1095 | hammer_pseudofs_inmem_t pfsm) | |
| 1096 | { | |
| 1097 | hammer_inode_t ip; | |
| 1098 | struct vattr vap; | |
| 1099 | int error; | |
| 1100 | ||
| 1101 | ip = hammer_get_inode(trans, NULL, HAMMER_OBJID_ROOT, HAMMER_MAX_TID, | |
| 1102 | pfsm->localization, 0, &error); | |
| 1103 | if (ip == NULL) { | |
| 1104 | vattr_null(&vap); | |
| 1105 | vap.va_mode = 0755; | |
| 1106 | vap.va_type = VDIR; | |
| 5a64efa1 MD |
1107 | error = hammer_create_inode(trans, &vap, cred, |
| 1108 | NULL, NULL, 0, | |
| 1109 | pfsm, &ip); | |
| 4c038e17 MD |
1110 | if (error == 0) { |
| 1111 | ++ip->ino_data.nlinks; | |
| e98f1b96 | 1112 | hammer_modify_inode(trans, ip, HAMMER_INODE_DDIRTY); |
| 4c038e17 | 1113 | } |
| 5fa5c92f | 1114 | } |
| ea434b6f MD |
1115 | if (ip) |
| 1116 | hammer_rel_inode(ip, 0); | |
| 5fa5c92f MD |
1117 | return(error); |
| 1118 | } | |
| 1119 | ||
| ea434b6f | 1120 | /* |
| 842e7a70 MD |
1121 | * Unload any vnodes & inodes associated with a PFS, return ENOTEMPTY |
| 1122 | * if we are unable to disassociate all the inodes. | |
| 1123 | */ | |
| 1124 | static | |
| 1125 | int | |
| 1126 | hammer_unload_pseudofs_callback(hammer_inode_t ip, void *data) | |
| 1127 | { | |
| 1128 | int res; | |
| 1129 | ||
| 1130 | hammer_ref(&ip->lock); | |
| 250aec18 | 1131 | if (hammer_isactive(&ip->lock) == 2 && ip->vp) |
| 842e7a70 | 1132 | vclean_unlocked(ip->vp); |
| 250aec18 | 1133 | if (hammer_isactive(&ip->lock) == 1 && ip->vp == NULL) |
| 842e7a70 MD |
1134 | res = 0; |
| 1135 | else | |
| 1136 | res = -1; /* stop, someone is using the inode */ | |
| 1137 | hammer_rel_inode(ip, 0); | |
| 1138 | return(res); | |
| 1139 | } | |
| 1140 | ||
| 1141 | int | |
| 1142 | hammer_unload_pseudofs(hammer_transaction_t trans, u_int32_t localization) | |
| 1143 | { | |
| 1144 | int res; | |
| 1145 | int try; | |
| 1146 | ||
| 1147 | for (try = res = 0; try < 4; ++try) { | |
| 1148 | res = hammer_ino_rb_tree_RB_SCAN(&trans->hmp->rb_inos_root, | |
| 1149 | hammer_inode_pfs_cmp, | |
| 1150 | hammer_unload_pseudofs_callback, | |
| 1151 | &localization); | |
| 1152 | if (res == 0 && try > 1) | |
| 1153 | break; | |
| 1154 | hammer_flusher_sync(trans->hmp); | |
| 1155 | } | |
| 1156 | if (res != 0) | |
| 1157 | res = ENOTEMPTY; | |
| 1158 | return(res); | |
| 1159 | } | |
| 1160 | ||
| 1161 | ||
| 1162 | /* | |
| ea434b6f MD |
1163 | * Release a reference on a PFS |
| 1164 | */ | |
| 5fa5c92f MD |
1165 | void |
| 1166 | hammer_rel_pseudofs(hammer_mount_t hmp, hammer_pseudofs_inmem_t pfsm) | |
| 1167 | { | |
| 250aec18 MD |
1168 | hammer_rel(&pfsm->lock); |
| 1169 | if (hammer_norefs(&pfsm->lock)) { | |
| 5fa5c92f | 1170 | RB_REMOVE(hammer_pfs_rb_tree, &hmp->rb_pfsm_root, pfsm); |
| bac808fe | 1171 | kfree(pfsm, hmp->m_misc); |
| 5fa5c92f | 1172 | } |
| 66325755 MD |
1173 | } |
| 1174 | ||
| d113fda1 MD |
1175 | /* |
| 1176 | * Called by hammer_sync_inode(). | |
| 1177 | */ | |
| 1178 | static int | |
| 4e17f465 | 1179 | hammer_update_inode(hammer_cursor_t cursor, hammer_inode_t ip) |
| c0ade690 | 1180 | { |
| 4e17f465 | 1181 | hammer_transaction_t trans = cursor->trans; |
| c0ade690 MD |
1182 | hammer_record_t record; |
| 1183 | int error; | |
| 06ad81ff | 1184 | int redirty; |
| c0ade690 | 1185 | |
| d26d0ae9 | 1186 | retry: |
| c0ade690 MD |
1187 | error = 0; |
| 1188 | ||
| 869e8f55 MD |
1189 | /* |
| 1190 | * If the inode has a presence on-disk then locate it and mark | |
| 1191 | * it deleted, setting DELONDISK. | |
| 1192 | * | |
| 1193 | * The record may or may not be physically deleted, depending on | |
| 1194 | * the retention policy. | |
| 1195 | */ | |
| 76376933 MD |
1196 | if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) == |
| 1197 | HAMMER_INODE_ONDISK) { | |
| 4e17f465 | 1198 | hammer_normalize_cursor(cursor); |
| 5a930e66 MD |
1199 | cursor->key_beg.localization = ip->obj_localization + |
| 1200 | HAMMER_LOCALIZE_INODE; | |
| 4e17f465 MD |
1201 | cursor->key_beg.obj_id = ip->obj_id; |
| 1202 | cursor->key_beg.key = 0; | |
| 1203 | cursor->key_beg.create_tid = 0; | |
| 1204 | cursor->key_beg.delete_tid = 0; | |
| 1205 | cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE; | |
| 1206 | cursor->key_beg.obj_type = 0; | |
| 1207 | cursor->asof = ip->obj_asof; | |
| 1208 | cursor->flags &= ~HAMMER_CURSOR_INITMASK; | |
| 11ad5ade | 1209 | cursor->flags |= HAMMER_CURSOR_GET_LEAF | HAMMER_CURSOR_ASOF; |
| 4e17f465 MD |
1210 | cursor->flags |= HAMMER_CURSOR_BACKEND; |
| 1211 | ||
| 1212 | error = hammer_btree_lookup(cursor); | |
| e8599db1 MD |
1213 | if (hammer_debug_inode) |
| 1214 | kprintf("IPDEL %p %08x %d", ip, ip->flags, error); | |
| b84de5af | 1215 | |
| c0ade690 | 1216 | if (error == 0) { |
| e63644f0 | 1217 | error = hammer_ip_delete_record(cursor, ip, trans->tid); |
| e8599db1 MD |
1218 | if (hammer_debug_inode) |
| 1219 | kprintf(" error %d\n", error); | |
| 1f07f686 | 1220 | if (error == 0) { |
| 195c19a1 | 1221 | ip->flags |= HAMMER_INODE_DELONDISK; |
| 1f07f686 | 1222 | } |
| e8599db1 | 1223 | if (cursor->node) |
| bcac4bbb | 1224 | hammer_cache_node(&ip->cache[0], cursor->node); |
| 4e17f465 MD |
1225 | } |
| 1226 | if (error == EDEADLK) { | |
| 1227 | hammer_done_cursor(cursor); | |
| 1228 | error = hammer_init_cursor(trans, cursor, | |
| 1229 | &ip->cache[0], ip); | |
| e8599db1 MD |
1230 | if (hammer_debug_inode) |
| 1231 | kprintf("IPDED %p %d\n", ip, error); | |
| 4e17f465 MD |
1232 | if (error == 0) |
| 1233 | goto retry; | |
| c0ade690 | 1234 | } |
| c0ade690 MD |
1235 | } |
| 1236 | ||
| 1237 | /* | |
| 869e8f55 MD |
1238 | * Ok, write out the initial record or a new record (after deleting |
| 1239 | * the old one), unless the DELETED flag is set. This routine will | |
| 1240 | * clear DELONDISK if it writes out a record. | |
| 76376933 | 1241 | * |
| 869e8f55 MD |
1242 | * Update our inode statistics if this is the first application of |
| 1243 | * the inode on-disk. | |
| c0ade690 | 1244 | */ |
| 869e8f55 MD |
1245 | if (error == 0 && (ip->flags & HAMMER_INODE_DELETED) == 0) { |
| 1246 | /* | |
| 7a61b85d MD |
1247 | * Generate a record and write it to the media. We clean-up |
| 1248 | * the state before releasing so we do not have to set-up | |
| 1249 | * a flush_group. | |
| 869e8f55 | 1250 | */ |
| 11ad5ade | 1251 | record = hammer_alloc_mem_record(ip, 0); |
| 930bf163 | 1252 | record->type = HAMMER_MEM_RECORD_INODE; |
| 1f07f686 | 1253 | record->flush_state = HAMMER_FST_FLUSH; |
| 11ad5ade MD |
1254 | record->leaf = ip->sync_ino_leaf; |
| 1255 | record->leaf.base.create_tid = trans->tid; | |
| 1256 | record->leaf.data_len = sizeof(ip->sync_ino_data); | |
| dd94f1b1 | 1257 | record->leaf.create_ts = trans->time32; |
| b84de5af | 1258 | record->data = (void *)&ip->sync_ino_data; |
| d36ec43b | 1259 | record->flags |= HAMMER_RECF_INTERLOCK_BE; |
| 06ad81ff MD |
1260 | |
| 1261 | /* | |
| 1262 | * If this flag is set we cannot sync the new file size | |
| 1263 | * because we haven't finished related truncations. The | |
| 1264 | * inode will be flushed in another flush group to finish | |
| 1265 | * the job. | |
| 1266 | */ | |
| 1267 | if ((ip->flags & HAMMER_INODE_WOULDBLOCK) && | |
| 1268 | ip->sync_ino_data.size != ip->ino_data.size) { | |
| 1269 | redirty = 1; | |
| 1270 | ip->sync_ino_data.size = ip->ino_data.size; | |
| 1271 | } else { | |
| 1272 | redirty = 0; | |
| 1273 | } | |
| 1274 | ||
| 4e17f465 MD |
1275 | for (;;) { |
| 1276 | error = hammer_ip_sync_record_cursor(cursor, record); | |
| e8599db1 MD |
1277 | if (hammer_debug_inode) |
| 1278 | kprintf("GENREC %p rec %08x %d\n", | |
| 1279 | ip, record->flags, error); | |
| 4e17f465 MD |
1280 | if (error != EDEADLK) |
| 1281 | break; | |
| 1282 | hammer_done_cursor(cursor); | |
| 1283 | error = hammer_init_cursor(trans, cursor, | |
| 1284 | &ip->cache[0], ip); | |
| e8599db1 MD |
1285 | if (hammer_debug_inode) |
| 1286 | kprintf("GENREC reinit %d\n", error); | |
| 4e17f465 MD |
1287 | if (error) |
| 1288 | break; | |
| 1289 | } | |
| d36ec43b MD |
1290 | |
| 1291 | /* | |
| 3214ade6 MD |
1292 | * Note: The record was never on the inode's record tree |
| 1293 | * so just wave our hands importantly and destroy it. | |
| d36ec43b | 1294 | */ |
| 3214ade6 | 1295 | record->flags |= HAMMER_RECF_COMMITTED; |
| d36ec43b | 1296 | record->flags &= ~HAMMER_RECF_INTERLOCK_BE; |
| 1f07f686 | 1297 | record->flush_state = HAMMER_FST_IDLE; |
| 3214ade6 | 1298 | ++ip->rec_generation; |
| b3deaf57 | 1299 | hammer_rel_mem_record(record); |
| d36ec43b | 1300 | |
| 869e8f55 MD |
1301 | /* |
| 1302 | * Finish up. | |
| 1303 | */ | |
| d26d0ae9 | 1304 | if (error == 0) { |
| e8599db1 MD |
1305 | if (hammer_debug_inode) |
| 1306 | kprintf("CLEANDELOND %p %08x\n", ip, ip->flags); | |
| 11ad5ade | 1307 | ip->sync_flags &= ~(HAMMER_INODE_DDIRTY | |
| 9192654c | 1308 | HAMMER_INODE_SDIRTY | |
| ddfdf542 MD |
1309 | HAMMER_INODE_ATIME | |
| 1310 | HAMMER_INODE_MTIME); | |
| b84de5af | 1311 | ip->flags &= ~HAMMER_INODE_DELONDISK; |
| 06ad81ff MD |
1312 | if (redirty) |
| 1313 | ip->sync_flags |= HAMMER_INODE_DDIRTY; | |
| 1f07f686 MD |
1314 | |
| 1315 | /* | |
| 1316 | * Root volume count of inodes | |
| 1317 | */ | |
| 98da6d8c | 1318 | hammer_sync_lock_sh(trans); |
| d26d0ae9 | 1319 | if ((ip->flags & HAMMER_INODE_ONDISK) == 0) { |
| e8599db1 MD |
1320 | hammer_modify_volume_field(trans, |
| 1321 | trans->rootvol, | |
| 1322 | vol0_stat_inodes); | |
| 0b075555 | 1323 | ++ip->hmp->rootvol->ondisk->vol0_stat_inodes; |
| 10a5d1ba | 1324 | hammer_modify_volume_done(trans->rootvol); |
| d26d0ae9 | 1325 | ip->flags |= HAMMER_INODE_ONDISK; |
| e8599db1 MD |
1326 | if (hammer_debug_inode) |
| 1327 | kprintf("NOWONDISK %p\n", ip); | |
| d26d0ae9 | 1328 | } |
| 98da6d8c | 1329 | hammer_sync_unlock(trans); |
| fbc6e32a | 1330 | } |
| c0ade690 | 1331 | } |
| 869e8f55 MD |
1332 | |
| 1333 | /* | |
| 1334 | * If the inode has been destroyed, clean out any left-over flags | |
| 1335 | * that may have been set by the frontend. | |
| 1336 | */ | |
| f90dde4c | 1337 | if (error == 0 && (ip->flags & HAMMER_INODE_DELETED)) { |
| 11ad5ade | 1338 | ip->sync_flags &= ~(HAMMER_INODE_DDIRTY | |
| 9192654c | 1339 | HAMMER_INODE_SDIRTY | |
| ddfdf542 MD |
1340 | HAMMER_INODE_ATIME | |
| 1341 | HAMMER_INODE_MTIME); | |
| f90dde4c | 1342 | } |
| c0ade690 MD |
1343 | return(error); |
| 1344 | } | |
| 1345 | ||
| a89aec1b | 1346 | /* |
| ddfdf542 MD |
1347 | * Update only the itimes fields. |
| 1348 | * | |
| 1349 | * ATIME can be updated without generating any UNDO. MTIME is updated | |
| 1350 | * with UNDO so it is guaranteed to be synchronized properly in case of | |
| 1351 | * a crash. | |
| 1352 | * | |
| 1353 | * Neither field is included in the B-Tree leaf element's CRC, which is how | |
| 1354 | * we can get away with updating ATIME the way we do. | |
| d113fda1 MD |
1355 | */ |
| 1356 | static int | |
| 4e17f465 | 1357 | hammer_update_itimes(hammer_cursor_t cursor, hammer_inode_t ip) |
| d113fda1 | 1358 | { |
| 4e17f465 | 1359 | hammer_transaction_t trans = cursor->trans; |
| d113fda1 MD |
1360 | int error; |
| 1361 | ||
| 6a37e7e4 | 1362 | retry: |
| ddfdf542 | 1363 | if ((ip->flags & (HAMMER_INODE_ONDISK|HAMMER_INODE_DELONDISK)) != |
| d113fda1 | 1364 | HAMMER_INODE_ONDISK) { |
| ddfdf542 MD |
1365 | return(0); |
| 1366 | } | |
| 4e17f465 | 1367 | |
| ddfdf542 | 1368 | hammer_normalize_cursor(cursor); |
| 5a930e66 MD |
1369 | cursor->key_beg.localization = ip->obj_localization + |
| 1370 | HAMMER_LOCALIZE_INODE; | |
| ddfdf542 MD |
1371 | cursor->key_beg.obj_id = ip->obj_id; |
| 1372 | cursor->key_beg.key = 0; | |
| 1373 | cursor->key_beg.create_tid = 0; | |
| 1374 | cursor->key_beg.delete_tid = 0; | |
| 1375 | cursor->key_beg.rec_type = HAMMER_RECTYPE_INODE; | |
| 1376 | cursor->key_beg.obj_type = 0; | |
| 1377 | cursor->asof = ip->obj_asof; | |
| 1378 | cursor->flags &= ~HAMMER_CURSOR_INITMASK; | |
| 1379 | cursor->flags |= HAMMER_CURSOR_ASOF; | |
| 1380 | cursor->flags |= HAMMER_CURSOR_GET_LEAF; | |
| 1381 | cursor->flags |= HAMMER_CURSOR_GET_DATA; | |
| 1382 | cursor->flags |= HAMMER_CURSOR_BACKEND; | |
| 1383 | ||
| 1384 | error = hammer_btree_lookup(cursor); | |
| ddfdf542 MD |
1385 | if (error == 0) { |
| 1386 | hammer_cache_node(&ip->cache[0], cursor->node); | |
| 1387 | if (ip->sync_flags & HAMMER_INODE_MTIME) { | |
| 10a5d1ba | 1388 | /* |
| ddfdf542 MD |
1389 | * Updating MTIME requires an UNDO. Just cover |
| 1390 | * both atime and mtime. | |
| 10a5d1ba | 1391 | */ |
| 98da6d8c | 1392 | hammer_sync_lock_sh(trans); |
| bcac4bbb MD |
1393 | hammer_modify_buffer(trans, cursor->data_buffer, |
| 1394 | HAMMER_ITIMES_BASE(&cursor->data->inode), | |
| 1395 | HAMMER_ITIMES_BYTES); | |
| 1396 | cursor->data->inode.atime = ip->sync_ino_data.atime; | |
| 1397 | cursor->data->inode.mtime = ip->sync_ino_data.mtime; | |
| 1398 | hammer_modify_buffer_done(cursor->data_buffer); | |
| 98da6d8c | 1399 | hammer_sync_unlock(trans); |
| ddfdf542 MD |
1400 | } else if (ip->sync_flags & HAMMER_INODE_ATIME) { |
| 1401 | /* | |
| 1402 | * Updating atime only can be done in-place with | |
| 1403 | * no UNDO. | |
| 1404 | */ | |
| 98da6d8c | 1405 | hammer_sync_lock_sh(trans); |
| ddfdf542 MD |
1406 | hammer_modify_buffer(trans, cursor->data_buffer, |
| 1407 | NULL, 0); | |
| 1408 | cursor->data->inode.atime = ip->sync_ino_data.atime; | |
| 1409 | hammer_modify_buffer_done(cursor->data_buffer); | |
| 98da6d8c | 1410 | hammer_sync_unlock(trans); |
| d113fda1 | 1411 | } |
| ddfdf542 MD |
1412 | ip->sync_flags &= ~(HAMMER_INODE_ATIME | HAMMER_INODE_MTIME); |
| 1413 | } | |
| 1414 | if (error == EDEADLK) { | |
| 1415 | hammer_done_cursor(cursor); | |
| 1416 | error = hammer_init_cursor(trans, cursor, | |
| 1417 | &ip->cache[0], ip); | |
| 1418 | if (error == 0) | |
| 1419 | goto retry; | |
| d113fda1 MD |
1420 | } |
| 1421 | return(error); | |
| 1422 | } | |
| 1423 | ||
| 1424 | /* | |
| 1f07f686 | 1425 | * Release a reference on an inode, flush as requested. |
| b84de5af MD |
1426 | * |
| 1427 | * On the last reference we queue the inode to the flusher for its final | |
| 1428 | * disposition. | |
| a89aec1b | 1429 | */ |
| 66325755 | 1430 | void |
| a89aec1b | 1431 | hammer_rel_inode(struct hammer_inode *ip, int flush) |
| 66325755 | 1432 | { |
| 35a49944 | 1433 | /*hammer_mount_t hmp = ip->hmp;*/ |
| 1f07f686 | 1434 | |
| f90dde4c MD |
1435 | /* |
| 1436 | * Handle disposition when dropping the last ref. | |
| 1437 | */ | |
| 1f07f686 | 1438 | for (;;) { |
| 250aec18 | 1439 | if (hammer_oneref(&ip->lock)) { |
| 1f07f686 MD |
1440 | /* |
| 1441 | * Determine whether on-disk action is needed for | |
| 1442 | * the inode's final disposition. | |
| 1443 | */ | |
| e8599db1 MD |
1444 | KKASSERT(ip->vp == NULL); |
| 1445 | hammer_inode_unloadable_check(ip, 0); | |
| 4e17f465 | 1446 | if (ip->flags & HAMMER_INODE_MODMASK) { |
| 35a49944 | 1447 | hammer_flush_inode(ip, 0); |
| 250aec18 | 1448 | } else if (hammer_oneref(&ip->lock)) { |
| 1f07f686 MD |
1449 | hammer_unload_inode(ip); |
| 1450 | break; | |
| 1451 | } | |
| b84de5af | 1452 | } else { |
| 4e17f465 | 1453 | if (flush) |
| 1f07f686 | 1454 | hammer_flush_inode(ip, 0); |
| 4e17f465 | 1455 | |
| 1f07f686 MD |
1456 | /* |
| 1457 | * The inode still has multiple refs, try to drop | |
| 1458 | * one ref. | |
| 1459 | */ | |
| 250aec18 MD |
1460 | KKASSERT(hammer_isactive(&ip->lock) >= 1); |
| 1461 | if (hammer_isactive(&ip->lock) > 1) { | |
| 1462 | hammer_rel(&ip->lock); | |
| 1f07f686 MD |
1463 | break; |
| 1464 | } | |
| b84de5af | 1465 | } |
| f90dde4c | 1466 | } |
| 427e5fc6 MD |
1467 | } |
| 1468 | ||
| 1469 | /* | |
| b84de5af MD |
1470 | * Unload and destroy the specified inode. Must be called with one remaining |
| 1471 | * reference. The reference is disposed of. | |
| 8cd0a023 | 1472 | * |
| cdb6e4e6 | 1473 | * The inode must be completely clean. |
| 27ea2398 | 1474 | */ |
| b84de5af | 1475 | static int |
| ec4e8497 | 1476 | hammer_unload_inode(struct hammer_inode *ip) |
| 27ea2398 | 1477 | { |
| 9f5097dc MD |
1478 | hammer_mount_t hmp = ip->hmp; |
| 1479 | ||
| 250aec18 MD |
1480 | KASSERT(hammer_oneref(&ip->lock), |
| 1481 | ("hammer_unload_inode: %d refs\n", hammer_isactive(&ip->lock))); | |
| 8cd0a023 | 1482 | KKASSERT(ip->vp == NULL); |
| f90dde4c MD |
1483 | KKASSERT(ip->flush_state == HAMMER_FST_IDLE); |
| 1484 | KKASSERT(ip->cursor_ip_refs == 0); | |
| 899eb297 | 1485 | KKASSERT(hammer_notlocked(&ip->lock)); |
| f90dde4c MD |
1486 | KKASSERT((ip->flags & HAMMER_INODE_MODMASK) == 0); |
| 1487 | ||
| 1488 | KKASSERT(RB_EMPTY(&ip->rec_tree)); | |
| 1f07f686 | 1489 | KKASSERT(TAILQ_EMPTY(&ip->target_list)); |
| f90dde4c | 1490 | |
| 73896937 MD |
1491 | if (ip->flags & HAMMER_INODE_RDIRTY) { |
| 1492 | RB_REMOVE(hammer_redo_rb_tree, &hmp->rb_redo_root, ip); | |
| 1493 | ip->flags &= ~HAMMER_INODE_RDIRTY; | |
| 1494 | } | |
| 9f5097dc | 1495 | RB_REMOVE(hammer_ino_rb_tree, &hmp->rb_inos_root, ip); |
| f90dde4c | 1496 | |
| 5fa5c92f | 1497 | hammer_free_inode(ip); |
| 27ea2398 MD |
1498 | return(0); |
| 1499 | } | |
| 1500 | ||
| 27ea2398 | 1501 | /* |
| cdb6e4e6 MD |
1502 | * Called during unmounting if a critical error occured. The in-memory |
| 1503 | * inode and all related structures are destroyed. | |
| 1504 | * | |
| 1505 | * If a critical error did not occur the unmount code calls the standard | |
| 1506 | * release and asserts that the inode is gone. | |
| 1507 | */ | |
| 1508 | int | |
| 1509 | hammer_destroy_inode_callback(struct hammer_inode *ip, void *data __unused) | |
| 1510 | { | |
| 1511 | hammer_record_t rec; | |
| 1512 | ||
| 1513 | /* | |
| 1514 | * Get rid of the inodes in-memory records, regardless of their | |
| 1515 | * state, and clear the mod-mask. | |
| 1516 | */ | |
| 1517 | while ((rec = TAILQ_FIRST(&ip->target_list)) != NULL) { | |
| 1518 | TAILQ_REMOVE(&ip->target_list, rec, target_entry); | |
| 1519 | rec->target_ip = NULL; | |
| 1520 | if (rec->flush_state == HAMMER_FST_SETUP) | |
| 1521 | rec->flush_state = HAMMER_FST_IDLE; | |
| 1522 | } | |
| 1523 | while ((rec = RB_ROOT(&ip->rec_tree)) != NULL) { | |
| 1524 | if (rec->flush_state == HAMMER_FST_FLUSH) | |
| 1525 | --rec->flush_group->refs; | |
| 1526 | else | |
| 1527 | hammer_ref(&rec->lock); | |
| 250aec18 | 1528 | KKASSERT(hammer_oneref(&rec->lock)); |
| cdb6e4e6 MD |
1529 | rec->flush_state = HAMMER_FST_IDLE; |
| 1530 | rec->flush_group = NULL; | |
| 3214ade6 MD |
1531 | rec->flags |= HAMMER_RECF_DELETED_FE; /* wave hands */ |
| 1532 | rec->flags |= HAMMER_RECF_DELETED_BE; /* wave hands */ | |
| 1533 | ++ip->rec_generation; | |
| cdb6e4e6 MD |
1534 | hammer_rel_mem_record(rec); |
| 1535 | } | |
| 1536 | ip->flags &= ~HAMMER_INODE_MODMASK; | |
| 1537 | ip->sync_flags &= ~HAMMER_INODE_MODMASK; | |
| 1538 | KKASSERT(ip->vp == NULL); | |
| 1539 | ||
| 1540 | /* | |
| 1541 | * Remove the inode from any flush group, force it idle. FLUSH | |
| 1542 | * and SETUP states have an inode ref. | |
| 1543 | */ | |
| 1544 | switch(ip->flush_state) { | |
| 1545 | case HAMMER_FST_FLUSH: | |
| ff003b11 | 1546 | RB_REMOVE(hammer_fls_rb_tree, &ip->flush_group->flush_tree, ip); |
| cdb6e4e6 MD |
1547 | --ip->flush_group->refs; |
| 1548 | ip->flush_group = NULL; | |
| 1549 | /* fall through */ | |
| 1550 | case HAMMER_FST_SETUP: | |
| 250aec18 | 1551 | hammer_rel(&ip->lock); |
| cdb6e4e6 MD |
1552 | ip->flush_state = HAMMER_FST_IDLE; |
| 1553 | /* fall through */ | |
| 1554 | case HAMMER_FST_IDLE: | |
| 1555 | break; | |
| 1556 | } | |
| 1557 | ||
| 1558 | /* | |
| 1559 | * There shouldn't be any associated vnode. The unload needs at | |
| 1560 | * least one ref, if we do have a vp steal its ip ref. | |
| 1561 | */ | |
| 1562 | if (ip->vp) { | |
| 1563 | kprintf("hammer_destroy_inode_callback: Unexpected " | |
| 1564 | "vnode association ip %p vp %p\n", ip, ip->vp); | |
| 1565 | ip->vp->v_data = NULL; | |
| 1566 | ip->vp = NULL; | |
| 1567 | } else { | |
| 1568 | hammer_ref(&ip->lock); | |
| 1569 | } | |
| 1570 | hammer_unload_inode(ip); | |
| 1571 | return(0); | |
| 1572 | } | |
| 1573 | ||
| 1574 | /* | |
| 51c35492 MD |
1575 | * Called on mount -u when switching from RW to RO or vise-versa. Adjust |
| 1576 | * the read-only flag for cached inodes. | |
| 1577 | * | |
| 1578 | * This routine is called from a RB_SCAN(). | |
| 1579 | */ | |
| 1580 | int | |
| 1581 | hammer_reload_inode(hammer_inode_t ip, void *arg __unused) | |
| 1582 | { | |
| 1583 | hammer_mount_t hmp = ip->hmp; | |
| 1584 | ||
| 1585 | if (hmp->ronly || hmp->asof != HAMMER_MAX_TID) | |
| 1586 | ip->flags |= HAMMER_INODE_RO; | |
| 1587 | else | |
| 1588 | ip->flags &= ~HAMMER_INODE_RO; | |
| 1589 | return(0); | |
| 1590 | } | |
| 1591 | ||
| 1592 | /* | |
| d113fda1 MD |
1593 | * A transaction has modified an inode, requiring updates as specified by |
| 1594 | * the passed flags. | |
| 7f7c1f84 | 1595 | * |
| 9192654c MD |
1596 | * HAMMER_INODE_DDIRTY: Inode data has been updated, not incl mtime/atime, |
| 1597 | * and not including size changes due to write-append | |
| 1598 | * (but other size changes are included). | |
| 1599 | * HAMMER_INODE_SDIRTY: Inode data has been updated, size changes due to | |
| 1600 | * write-append. | |
| 1f07f686 | 1601 | * HAMMER_INODE_XDIRTY: Dirty in-memory records |
| 4e17f465 | 1602 | * HAMMER_INODE_BUFS: Dirty buffer cache buffers |
| d113fda1 | 1603 | * HAMMER_INODE_DELETED: Inode record/data must be deleted |
| ddfdf542 | 1604 | * HAMMER_INODE_ATIME/MTIME: mtime/atime has been updated |
| 427e5fc6 | 1605 | */ |
| 66325755 | 1606 | void |
| e98f1b96 | 1607 | hammer_modify_inode(hammer_transaction_t trans, hammer_inode_t ip, int flags) |
| 427e5fc6 | 1608 | { |
| cdb6e4e6 MD |
1609 | /* |
| 1610 | * ronly of 0 or 2 does not trigger assertion. | |
| 1611 | * 2 is a special error state | |
| 1612 | */ | |
| 1613 | KKASSERT(ip->hmp->ronly != 1 || | |
| ddfdf542 | 1614 | (flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_XDIRTY | |
| 9192654c | 1615 | HAMMER_INODE_SDIRTY | |
| ddfdf542 MD |
1616 | HAMMER_INODE_BUFS | HAMMER_INODE_DELETED | |
| 1617 | HAMMER_INODE_ATIME | HAMMER_INODE_MTIME)) == 0); | |
| e63644f0 MD |
1618 | if ((ip->flags & HAMMER_INODE_RSV_INODES) == 0) { |
| 1619 | ip->flags |= HAMMER_INODE_RSV_INODES; | |
| 1620 | ++ip->hmp->rsv_inodes; | |
| 1621 | } | |
| b84de5af | 1622 | |
| e98f1b96 MD |
1623 | /* |
| 1624 | * Set the NEWINODE flag in the transaction if the inode | |
| 1625 | * transitions to a dirty state. This is used to track | |
| 1626 | * the load on the inode cache. | |
| 1627 | */ | |
| 1628 | if (trans && | |
| 1629 | (ip->flags & HAMMER_INODE_MODMASK) == 0 && | |
| 1630 | (flags & HAMMER_INODE_MODMASK)) { | |
| 1631 | trans->flags |= HAMMER_TRANSF_NEWINODE; | |
| 1632 | } | |
| 1633 | ||
| b84de5af MD |
1634 | ip->flags |= flags; |
| 1635 | } | |
| 1636 | ||
| 1637 | /* | |
| 1f07f686 | 1638 | * Request that an inode be flushed. This whole mess cannot block and may |
| 7bc5b8c2 MD |
1639 | * recurse (if not synchronous). Once requested HAMMER will attempt to |
| 1640 | * actively flush the inode until the flush can be done. | |
| b84de5af | 1641 | * |
| 1f07f686 MD |
1642 | * The inode may already be flushing, or may be in a setup state. We can |
| 1643 | * place the inode in a flushing state if it is currently idle and flag it | |
| 1644 | * to reflush if it is currently flushing. | |
| 7bc5b8c2 | 1645 | * |
| 4889cbd4 MD |
1646 | * Upon return if the inode could not be flushed due to a setup |
| 1647 | * dependancy, then it will be automatically flushed when the dependancy | |
| 1648 | * is satisfied. | |
| b84de5af MD |
1649 | */ |
| 1650 | void | |
| f90dde4c | 1651 | hammer_flush_inode(hammer_inode_t ip, int flags) |
| b84de5af | 1652 | { |
| 7a61b85d MD |
1653 | hammer_mount_t hmp; |
| 1654 | hammer_flush_group_t flg; | |
| bf3b416b | 1655 | int good; |
| 1f07f686 MD |
1656 | |
| 1657 | /* | |
| 37646115 MD |
1658 | * fill_flush_group is the first flush group we may be able to |
| 1659 | * continue filling, it may be open or closed but it will always | |
| 1660 | * be past the currently flushing (running) flg. | |
| 1661 | * | |
| 1662 | * next_flush_group is the next open flush group. | |
| 7a61b85d MD |
1663 | */ |
| 1664 | hmp = ip->hmp; | |
| 37646115 | 1665 | while ((flg = hmp->fill_flush_group) != NULL) { |
| 7b6ccb11 | 1666 | KKASSERT(flg->running == 0); |
| 37646115 MD |
1667 | if (flg->total_count + flg->refs <= ip->hmp->undo_rec_limit && |
| 1668 | flg->total_count <= hammer_autoflush) { | |
| 7b6ccb11 | 1669 | break; |
| 37646115 MD |
1670 | } |
| 1671 | hmp->fill_flush_group = TAILQ_NEXT(flg, flush_entry); | |
| 7b6ccb11 | 1672 | hammer_flusher_async(ip->hmp, flg); |
| 7a61b85d MD |
1673 | } |
| 1674 | if (flg == NULL) { | |
| bac808fe | 1675 | flg = kmalloc(sizeof(*flg), hmp->m_misc, M_WAITOK|M_ZERO); |
| 37646115 MD |
1676 | flg->seq = hmp->flusher.next++; |
| 1677 | if (hmp->next_flush_group == NULL) | |
| 1678 | hmp->next_flush_group = flg; | |
| 1679 | if (hmp->fill_flush_group == NULL) | |
| 1680 | hmp->fill_flush_group = flg; | |
| ff003b11 | 1681 | RB_INIT(&flg->flush_tree); |
| 7a61b85d MD |
1682 | TAILQ_INSERT_TAIL(&hmp->flush_group_list, flg, flush_entry); |
| 1683 | } | |
| 1684 | ||
| 1685 | /* | |
| 1686 | * Trivial 'nothing to flush' case. If the inode is in a SETUP | |
| 1f07f686 MD |
1687 | * state we have to put it back into an IDLE state so we can |
| 1688 | * drop the extra ref. | |
| 7a61b85d MD |
1689 | * |
| 1690 | * If we have a parent dependancy we must still fall through | |
| 1691 | * so we can run it. | |
| 1f07f686 | 1692 | */ |
| 4e17f465 | 1693 | if ((ip->flags & HAMMER_INODE_MODMASK) == 0) { |
| 7a61b85d MD |
1694 | if (ip->flush_state == HAMMER_FST_SETUP && |
| 1695 | TAILQ_EMPTY(&ip->target_list)) { | |
| 1f07f686 MD |
1696 | ip->flush_state = HAMMER_FST_IDLE; |
| 1697 | hammer_rel_inode(ip, 0); | |
| ec4e8497 | 1698 | } |
| 7a61b85d MD |
1699 | if (ip->flush_state == HAMMER_FST_IDLE) |
| 1700 | return; | |
| b84de5af | 1701 | } |
| 42c7d26b | 1702 | |
| 1f07f686 MD |
1703 | /* |
| 1704 | * Our flush action will depend on the current state. | |
| 1705 | */ | |
| 1706 | switch(ip->flush_state) { | |
| 1707 | case HAMMER_FST_IDLE: | |
| 1708 | /* | |
| 1709 | * We have no dependancies and can flush immediately. Some | |
| 1710 | * our children may not be flushable so we have to re-test | |
| 1711 | * with that additional knowledge. | |
| 1712 | */ | |
| 7a61b85d | 1713 | hammer_flush_inode_core(ip, flg, flags); |
| 1f07f686 MD |
1714 | break; |
| 1715 | case HAMMER_FST_SETUP: | |
| 1716 | /* | |
| 1717 | * Recurse upwards through dependancies via target_list | |
| 1718 | * and start their flusher actions going if possible. | |
| 1719 | * | |
| 1720 | * 'good' is our connectivity. -1 means we have none and | |
| 1721 | * can't flush, 0 means there weren't any dependancies, and | |
| 1722 | * 1 means we have good connectivity. | |
| 1723 | */ | |
| cc0758d0 | 1724 | good = hammer_setup_parent_inodes(ip, 0, flg); |
| 1f07f686 | 1725 | |
| 1f07f686 | 1726 | if (good >= 0) { |
| 7b6ccb11 MD |
1727 | /* |
| 1728 | * We can continue if good >= 0. Determine how | |
| 1729 | * many records under our inode can be flushed (and | |
| 1730 | * mark them). | |
| 1731 | */ | |
| 7a61b85d | 1732 | hammer_flush_inode_core(ip, flg, flags); |
| 1f07f686 | 1733 | } else { |
| 7b6ccb11 | 1734 | /* |
| 4889cbd4 | 1735 | * Parent has no connectivity, tell it to flush |
| 7b6ccb11 | 1736 | * us as soon as it does. |
| 4889cbd4 MD |
1737 | * |
| 1738 | * The REFLUSH flag is also needed to trigger | |
| 1739 | * dependancy wakeups. | |
| 7b6ccb11 | 1740 | */ |
| 4889cbd4 MD |
1741 | ip->flags |= HAMMER_INODE_CONN_DOWN | |
| 1742 | HAMMER_INODE_REFLUSH; | |
| 4e17f465 MD |
1743 | if (flags & HAMMER_FLUSH_SIGNAL) { |
| 1744 | ip->flags |= HAMMER_INODE_RESIGNAL; | |
| 7a61b85d | 1745 | hammer_flusher_async(ip->hmp, flg); |
| 4e17f465 | 1746 | } |
| 1f07f686 MD |
1747 | } |
| 1748 | break; | |
| 7b6ccb11 | 1749 | case HAMMER_FST_FLUSH: |
| 1f07f686 MD |
1750 | /* |
| 1751 | * We are already flushing, flag the inode to reflush | |
| 1752 | * if needed after it completes its current flush. | |
| 4889cbd4 MD |
1753 | * |
| 1754 | * The REFLUSH flag is also needed to trigger | |
| 1755 | * dependancy wakeups. | |
| 1f07f686 MD |
1756 | */ |
| 1757 | if ((ip->flags & HAMMER_INODE_REFLUSH) == 0) | |
| 1758 | ip->flags |= HAMMER_INODE_REFLUSH; | |
| 4e17f465 MD |
1759 | if (flags & HAMMER_FLUSH_SIGNAL) { |
| 1760 | ip->flags |= HAMMER_INODE_RESIGNAL; | |
| 7a61b85d | 1761 | hammer_flusher_async(ip->hmp, flg); |
| 4e17f465 | 1762 | } |
| 1f07f686 MD |
1763 | break; |
| 1764 | } | |
| 1765 | } | |
| 1766 | ||
| 1767 | /* | |
| bf3b416b MD |
1768 | * Scan ip->target_list, which is a list of records owned by PARENTS to our |
| 1769 | * ip which reference our ip. | |
| 1770 | * | |
| 1771 | * XXX This is a huge mess of recursive code, but not one bit of it blocks | |
| 1772 | * so for now do not ref/deref the structures. Note that if we use the | |
| 1773 | * ref/rel code later, the rel CAN block. | |
| 1774 | */ | |
| 1775 | static int | |
| cc0758d0 MD |
1776 | hammer_setup_parent_inodes(hammer_inode_t ip, int depth, |
| 1777 | hammer_flush_group_t flg) | |
| bf3b416b MD |
1778 | { |
| 1779 | hammer_record_t depend; | |
| bf3b416b MD |
1780 | int good; |
| 1781 | int r; | |
| 1782 | ||
| cc0758d0 MD |
1783 | /* |
| 1784 | * If we hit our recursion limit and we have parent dependencies | |
| 1785 | * We cannot continue. Returning < 0 will cause us to be flagged | |
| 1786 | * for reflush. Returning -2 cuts off additional dependency checks | |
| 1787 | * because they are likely to also hit the depth limit. | |
| 1788 | * | |
| 1789 | * We cannot return < 0 if there are no dependencies or there might | |
| 1790 | * not be anything to wakeup (ip). | |
| 1791 | */ | |
| 1792 | if (depth == 20 && TAILQ_FIRST(&ip->target_list)) { | |
| 1793 | kprintf("HAMMER Warning: depth limit reached on " | |
| 1794 | "setup recursion, inode %p %016llx\n", | |
| 1795 | ip, (long long)ip->obj_id); | |
| 1796 | return(-2); | |
| 1797 | } | |
| 1798 | ||
| 1799 | /* | |
| 1800 | * Scan dependencies | |
| 1801 | */ | |
| bf3b416b MD |
1802 | good = 0; |
| 1803 | TAILQ_FOREACH(depend, &ip->target_list, target_entry) { | |
| cc0758d0 | 1804 | r = hammer_setup_parent_inodes_helper(depend, depth, flg); |
| bf3b416b | 1805 | KKASSERT(depend->target_ip == ip); |
| bf3b416b MD |
1806 | if (r < 0 && good == 0) |
| 1807 | good = -1; | |
| 1808 | if (r > 0) | |
| 1809 | good = 1; | |
| cc0758d0 MD |
1810 | |
| 1811 | /* | |
| 1812 | * If we failed due to the recursion depth limit then stop | |
| 1813 | * now. | |
| 1814 | */ | |
| 1815 | if (r == -2) | |
| 1816 | break; | |
| bf3b416b MD |
1817 | } |
| 1818 | return(good); | |
| bf3b416b MD |
1819 | } |
| 1820 | ||
| 1821 | /* | |
| 1822 | * This helper function takes a record representing the dependancy between | |
| 1823 | * the parent inode and child inode. | |
| 1824 | * | |
| 1825 | * record->ip = parent inode | |
| 1826 | * record->target_ip = child inode | |
| 1827 | * | |
| 1f07f686 | 1828 | * We are asked to recurse upwards and convert the record from SETUP |
| bf3b416b | 1829 | * to FLUSH if possible. |
| 1f07f686 MD |
1830 | * |
| 1831 | * Return 1 if the record gives us connectivity | |
| 1832 | * | |
| 1833 | * Return 0 if the record is not relevant | |
| 1834 | * | |
| 1835 | * Return -1 if we can't resolve the dependancy and there is no connectivity. | |
| 1836 | */ | |
| 1837 | static int | |
| cc0758d0 | 1838 | hammer_setup_parent_inodes_helper(hammer_record_t record, int depth, |
| 7a61b85d | 1839 | hammer_flush_group_t flg) |
| 1f07f686 | 1840 | { |
| bf3b416b MD |
1841 | hammer_mount_t hmp; |
| 1842 | hammer_inode_t pip; | |
| 1843 | int good; | |
| 1f07f686 MD |
1844 | |
| 1845 | KKASSERT(record->flush_state != HAMMER_FST_IDLE); | |
| bf3b416b MD |
1846 | pip = record->ip; |
| 1847 | hmp = pip->hmp; | |
| 1f07f686 MD |
1848 | |
| 1849 | /* | |
| 1850 | * If the record is already flushing, is it in our flush group? | |
| 1851 | * | |
| e8599db1 MD |
1852 | * If it is in our flush group but it is a general record or a |
| 1853 | * delete-on-disk, it does not improve our connectivity (return 0), | |
| 1854 | * and if the target inode is not trying to destroy itself we can't | |
| 1855 | * allow the operation yet anyway (the second return -1). | |
| 1f07f686 MD |
1856 | */ |
| 1857 | if (record->flush_state == HAMMER_FST_FLUSH) { | |
| 7b6ccb11 MD |
1858 | /* |
| 1859 | * If not in our flush group ask the parent to reflush | |
| 1860 | * us as soon as possible. | |
| 1861 | */ | |
| 7a61b85d | 1862 | if (record->flush_group != flg) { |
| bf3b416b | 1863 | pip->flags |= HAMMER_INODE_REFLUSH; |
| 7b6ccb11 | 1864 | record->target_ip->flags |= HAMMER_INODE_CONN_DOWN; |
| 1f07f686 | 1865 | return(-1); |
| f90dde4c | 1866 | } |
| 7b6ccb11 MD |
1867 | |
| 1868 | /* | |
| 1869 | * If in our flush group everything is already set up, | |
| 1870 | * just return whether the record will improve our | |
| 1871 | * visibility or not. | |
| 1872 | */ | |
| 1f07f686 MD |
1873 | if (record->type == HAMMER_MEM_RECORD_ADD) |
| 1874 | return(1); | |
| 1875 | return(0); | |
| 1876 | } | |
| 1877 | ||
| 1878 | /* | |
| 1879 | * It must be a setup record. Try to resolve the setup dependancies | |
| 1880 | * by recursing upwards so we can place ip on the flush list. | |
| cc0758d0 MD |
1881 | * |
| 1882 | * Limit ourselves to 20 levels of recursion to avoid blowing out | |
| 1883 | * the kernel stack. If we hit the recursion limit we can't flush | |
| 1884 | * until the parent flushes. The parent will flush independantly | |
| 1885 | * on its own and ultimately a deep recursion will be resolved. | |
| 1f07f686 MD |
1886 | */ |
| 1887 | KKASSERT(record->flush_state == HAMMER_FST_SETUP); | |
| 1888 | ||
| cc0758d0 | 1889 | good = hammer_setup_parent_inodes(pip, depth + 1, flg); |
| 1f07f686 MD |
1890 | |
| 1891 | /* | |
| 7b6ccb11 MD |
1892 | * If good < 0 the parent has no connectivity and we cannot safely |
| 1893 | * flush the directory entry, which also means we can't flush our | |
| cc0758d0 MD |
1894 | * ip. Flag us for downward recursion once the parent's |
| 1895 | * connectivity is resolved. Flag the parent for [re]flush or it | |
| 1896 | * may not check for downward recursions. | |
| 1f07f686 MD |
1897 | */ |
| 1898 | if (good < 0) { | |
| cc0758d0 | 1899 | pip->flags |= HAMMER_INODE_REFLUSH; |
| 7b6ccb11 | 1900 | record->target_ip->flags |= HAMMER_INODE_CONN_DOWN; |
| 1f07f686 MD |
1901 | return(good); |
| 1902 | } | |
| 1903 | ||
| 1904 | /* | |
| 1905 | * We are go, place the parent inode in a flushing state so we can | |
| 1906 | * place its record in a flushing state. Note that the parent | |
| 1907 | * may already be flushing. The record must be in the same flush | |
| 1908 | * group as the parent. | |
| 1909 | */ | |
| bf3b416b | 1910 | if (pip->flush_state != HAMMER_FST_FLUSH) |
| 7a61b85d | 1911 | hammer_flush_inode_core(pip, flg, HAMMER_FLUSH_RECURSION); |
| bf3b416b | 1912 | KKASSERT(pip->flush_state == HAMMER_FST_FLUSH); |
| d006ca0c MD |
1913 | |
| 1914 | /* | |
| 1915 | * It is possible for a rename to create a loop in the recursion | |
| 1916 | * and revisit a record. This will result in the record being | |
| 1917 | * placed in a flush state unexpectedly. This check deals with | |
| 1918 | * the case. | |
| 1919 | */ | |
| 1920 | if (record->flush_state == HAMMER_FST_FLUSH) { | |
| 1921 | if (record->type == HAMMER_MEM_RECORD_ADD) | |
| 1922 | return(1); | |
| 1923 | return(0); | |
| 1924 | } | |
| 1925 | ||
| 1f07f686 MD |
1926 | KKASSERT(record->flush_state == HAMMER_FST_SETUP); |
| 1927 | ||
| 1928 | #if 0 | |
| 1929 | if (record->type == HAMMER_MEM_RECORD_DEL && | |
| 869e8f55 | 1930 | (record->target_ip->flags & (HAMMER_INODE_DELETED|HAMMER_INODE_DELONDISK)) == 0) { |
| 1f07f686 MD |
1931 | /* |
| 1932 | * Regardless of flushing state we cannot sync this path if the | |
| 1933 | * record represents a delete-on-disk but the target inode | |
| 1934 | * is not ready to sync its own deletion. | |
| 1935 | * | |
| 1936 | * XXX need to count effective nlinks to determine whether | |
| 1937 | * the flush is ok, otherwise removing a hardlink will | |
| 1938 | * just leave the DEL record to rot. | |
| 1939 | */ | |
| 1940 | record->target_ip->flags |= HAMMER_INODE_REFLUSH; | |
| 1941 | return(-1); | |
| 1942 | } else | |
| 1943 | #endif | |
| 7a61b85d | 1944 | if (pip->flush_group == flg) { |
| 1f07f686 | 1945 | /* |
| d7e278bb MD |
1946 | * Because we have not calculated nlinks yet we can just |
| 1947 | * set records to the flush state if the parent is in | |
| 1948 | * the same flush group as we are. | |
| 1f07f686 | 1949 | */ |
| 7b6ccb11 MD |
1950 | record->flush_state = HAMMER_FST_FLUSH; |
| 1951 | record->flush_group = flg; | |
| 1952 | ++record->flush_group->refs; | |
| 1953 | hammer_ref(&record->lock); | |
| 1f07f686 MD |
1954 | |
| 1955 | /* | |
| 7b6ccb11 MD |
1956 | * A general directory-add contributes to our visibility. |
| 1957 | * | |
| 1958 | * Otherwise it is probably a directory-delete or | |
| 1959 | * delete-on-disk record and does not contribute to our | |
| 1960 | * visbility (but we can still flush it). | |
| 1f07f686 | 1961 | */ |
| 7b6ccb11 MD |
1962 | if (record->type == HAMMER_MEM_RECORD_ADD) |
| 1963 | return(1); | |
| 1f07f686 MD |
1964 | return(0); |
| 1965 | } else { | |
| 1966 | /* | |
| 7b6ccb11 MD |
1967 | * If the parent is not in our flush group we cannot |
| 1968 | * flush this record yet, there is no visibility. | |
| 1969 | * We tell the parent to reflush and mark ourselves | |
| 1970 | * so the parent knows it should flush us too. | |
| 1f07f686 | 1971 | */ |
| bf3b416b | 1972 | pip->flags |= HAMMER_INODE_REFLUSH; |
| 7b6ccb11 | 1973 | record->target_ip->flags |= HAMMER_INODE_CONN_DOWN; |
| 1f07f686 | 1974 | return(-1); |
| 7f7c1f84 | 1975 | } |
| c0ade690 MD |
1976 | } |
| 1977 | ||
| 1978 | /* | |
| 1f07f686 | 1979 | * This is the core routine placing an inode into the FST_FLUSH state. |
| c0ade690 | 1980 | */ |
| b84de5af | 1981 | static void |
| 7a61b85d | 1982 | hammer_flush_inode_core(hammer_inode_t ip, hammer_flush_group_t flg, int flags) |
| b84de5af | 1983 | { |
| 37646115 | 1984 | hammer_mount_t hmp = ip->hmp; |
| 1f07f686 | 1985 | int go_count; |
| 1f07f686 | 1986 | |
| 4e17f465 MD |
1987 | /* |
| 1988 | * Set flush state and prevent the flusher from cycling into | |
| 1989 | * the next flush group. Do not place the ip on the list yet. | |
| 1990 | * Inodes not in the idle state get an extra reference. | |
| 1991 | */ | |
| 1f07f686 MD |
1992 | KKASSERT(ip->flush_state != HAMMER_FST_FLUSH); |
| 1993 | if (ip->flush_state == HAMMER_FST_IDLE) | |
| 1994 | hammer_ref(&ip->lock); | |
| 1995 | ip->flush_state = HAMMER_FST_FLUSH; | |
| 7a61b85d | 1996 | ip->flush_group = flg; |
| 37646115 MD |
1997 | ++hmp->flusher.group_lock; |
| 1998 | ++hmp->count_iqueued; | |
| af209b0f | 1999 | ++hammer_count_iqueued; |
| 7a61b85d | 2000 | ++flg->total_count; |
| 73896937 | 2001 | hammer_redo_fifo_start_flush(ip); |
| b84de5af | 2002 | |
| e0092341 | 2003 | #if 0 |
| 21fde338 | 2004 | /* |
| e8599db1 | 2005 | * We need to be able to vfsync/truncate from the backend. |
| e0092341 MD |
2006 | * |
| 2007 | * XXX Any truncation from the backend will acquire the vnode | |
| 2008 | * independently. | |
| e8599db1 MD |
2009 | */ |
| 2010 | KKASSERT((ip->flags & HAMMER_INODE_VHELD) == 0); | |
| 2011 | if (ip->vp && (ip->vp->v_flag & VINACTIVE) == 0) { | |
| 2012 | ip->flags |= HAMMER_INODE_VHELD; | |
| 2013 | vref(ip->vp); | |
| 2014 | } | |
| e0092341 | 2015 | #endif |
| e8599db1 MD |
2016 | |
| 2017 | /* | |
| 1f07f686 MD |
2018 | * Figure out how many in-memory records we can actually flush |
| 2019 | * (not including inode meta-data, buffers, etc). | |
| ec4e8497 | 2020 | */ |
| d7e278bb | 2021 | KKASSERT((ip->flags & HAMMER_INODE_WOULDBLOCK) == 0); |
| 1f07f686 | 2022 | if (flags & HAMMER_FLUSH_RECURSION) { |
| 7a61b85d MD |
2023 | /* |
| 2024 | * If this is a upwards recursion we do not want to | |
| 2025 | * recurse down again! | |
| 2026 | */ | |
| 1f07f686 | 2027 | go_count = 1; |
| d7e278bb | 2028 | #if 0 |
| 312de84d | 2029 | } else if (ip->flags & HAMMER_INODE_WOULDBLOCK) { |
| 7a61b85d MD |
2030 | /* |
| 2031 | * No new records are added if we must complete a flush | |
| 2032 | * from a previous cycle, but we do have to move the records | |
| 2033 | * from the previous cycle to the current one. | |
| 2034 | */ | |
| 2035 | #if 0 | |
| 525aad3a MD |
2036 | go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL, |
| 2037 | hammer_syncgrp_child_callback, NULL); | |
| 7a61b85d | 2038 | #endif |
| 312de84d | 2039 | go_count = 1; |
| d7e278bb | 2040 | #endif |
| 1f07f686 | 2041 | } else { |
| 7a61b85d MD |
2042 | /* |
| 2043 | * Normal flush, scan records and bring them into the flush. | |
| 2044 | * Directory adds and deletes are usually skipped (they are | |
| 2045 | * grouped with the related inode rather then with the | |
| 2046 | * directory). | |
| 2047 | * | |
| 2048 | * go_count can be negative, which means the scan aborted | |
| 2049 | * due to the flush group being over-full and we should | |
| 2050 | * flush what we have. | |
| 2051 | */ | |
| 1f07f686 MD |
2052 | go_count = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL, |
| 2053 | hammer_setup_child_callback, NULL); | |
| 2054 | } | |
| b84de5af MD |
2055 | |
| 2056 | /* | |
| 1f07f686 MD |
2057 | * This is a more involved test that includes go_count. If we |
| 2058 | * can't flush, flag the inode and return. If go_count is 0 we | |
| 2059 | * were are unable to flush any records in our rec_tree and | |
| 2060 | * must ignore the XDIRTY flag. | |
| b84de5af | 2061 | */ |
| 1f07f686 MD |
2062 | if (go_count == 0) { |
| 2063 | if ((ip->flags & HAMMER_INODE_MODMASK_NOXDIRTY) == 0) { | |
| 37646115 | 2064 | --hmp->count_iqueued; |
| af209b0f MD |
2065 | --hammer_count_iqueued; |
| 2066 | ||
| 4889cbd4 | 2067 | --flg->total_count; |
| 1f07f686 | 2068 | ip->flush_state = HAMMER_FST_SETUP; |
| 7a61b85d | 2069 | ip->flush_group = NULL; |
| 37646115 MD |
2070 | if (flags & HAMMER_FLUSH_SIGNAL) { |
| 2071 | ip->flags |= HAMMER_INODE_REFLUSH | | |
| 2072 | HAMMER_INODE_RESIGNAL; | |
| 2073 | } else { | |
| 2074 | ip->flags |= HAMMER_INODE_REFLUSH; | |
| 2075 | } | |
| e0092341 | 2076 | #if 0 |
| e8599db1 MD |
2077 | if (ip->flags & HAMMER_INODE_VHELD) { |
| 2078 | ip->flags &= ~HAMMER_INODE_VHELD; | |
| 2079 | vrele(ip->vp); | |
| 2080 | } | |
| e0092341 | 2081 | #endif |
| 4889cbd4 MD |
2082 | |
| 2083 | /* | |
| 2084 | * REFLUSH is needed to trigger dependancy wakeups | |
| 2085 | * when an inode is in SETUP. | |
| 2086 | */ | |
| 2087 | ip->flags |= HAMMER_INODE_REFLUSH; | |
| 37646115 MD |
2088 | if (--hmp->flusher.group_lock == 0) |
| 2089 | wakeup(&hmp->flusher.group_lock); | |
| 1f07f686 MD |
2090 | return; |
| 2091 | } | |
| 2092 | } | |
| b84de5af | 2093 | |
| b84de5af MD |
2094 | /* |
| 2095 | * Snapshot the state of the inode for the backend flusher. | |
| 2096 | * | |
| a9d52b76 | 2097 | * We continue to retain save_trunc_off even when all truncations |
| cb51be26 MD |
2098 | * have been resolved as an optimization to determine if we can |
| 2099 | * skip the B-Tree lookup for overwrite deletions. | |
| 2100 | * | |
| 1f07f686 MD |
2101 | * NOTE: The DELETING flag is a mod flag, but it is also sticky, |
| 2102 | * and stays in ip->flags. Once set, it stays set until the | |
| 2103 | * inode is destroyed. | |
| b84de5af | 2104 | */ |
| d7e278bb | 2105 | if (ip->flags & HAMMER_INODE_TRUNCATED) { |
| 312de84d MD |
2106 | KKASSERT((ip->sync_flags & HAMMER_INODE_TRUNCATED) == 0); |
| 2107 | ip->sync_trunc_off = ip->trunc_off; | |
| 2108 | ip->trunc_off = 0x7FFFFFFFFFFFFFFFLL; | |
| 2109 | ip->flags &= ~HAMMER_INODE_TRUNCATED; | |
| 2110 | ip->sync_flags |= HAMMER_INODE_TRUNCATED; | |
| a9d52b76 MD |
2111 | |
| 2112 | /* | |
| 2113 | * The save_trunc_off used to cache whether the B-Tree | |
| 2114 | * holds any records past that point is not used until | |
| 2115 | * after the truncation has succeeded, so we can safely | |
| 2116 | * set it now. | |
| 2117 | */ | |
| 2118 | if (ip->save_trunc_off > ip->sync_trunc_off) | |
| 2119 | ip->save_trunc_off = ip->sync_trunc_off; | |
| 2120 | } | |
| 312de84d MD |
2121 | ip->sync_flags |= (ip->flags & HAMMER_INODE_MODMASK & |
| 2122 | ~HAMMER_INODE_TRUNCATED); | |
| 11ad5ade | 2123 | ip->sync_ino_leaf = ip->ino_leaf; |
| b84de5af | 2124 | ip->sync_ino_data = ip->ino_data; |
| 312de84d | 2125 | ip->flags &= ~HAMMER_INODE_MODMASK | HAMMER_INODE_TRUNCATED; |
| 0832c9bb MD |
2126 | #ifdef DEBUG_TRUNCATE |
| 2127 | if ((ip->sync_flags & HAMMER_INODE_TRUNCATED) && ip == HammerTruncIp) | |
| 2128 | kprintf("truncateS %016llx\n", ip->sync_trunc_off); | |
| 2129 | #endif | |
| b84de5af MD |
2130 | |
| 2131 | /* | |
| 4e17f465 | 2132 | * The flusher list inherits our inode and reference. |
| b84de5af | 2133 | */ |
| 7a61b85d | 2134 | KKASSERT(flg->running == 0); |
| ff003b11 | 2135 | RB_INSERT(hammer_fls_rb_tree, &flg->flush_tree, ip); |
| 37646115 MD |
2136 | if (--hmp->flusher.group_lock == 0) |
| 2137 | wakeup(&hmp->flusher.group_lock); | |
| 1f07f686 | 2138 | |
| 37646115 MD |
2139 | /* |
| 2140 | * Auto-flush the group if it grows too large. Make sure the | |
| 2141 | * inode reclaim wait pipeline continues to work. | |
| 2142 | */ | |
| 2143 | if (flg->total_count >= hammer_autoflush || | |
| 2144 | flg->total_count >= hammer_limit_reclaim / 4) { | |
| 2145 | if (hmp->fill_flush_group == flg) | |
| 2146 | hmp->fill_flush_group = TAILQ_NEXT(flg, flush_entry); | |
| 2147 | hammer_flusher_async(hmp, flg); | |
| 0832c9bb | 2148 | } |
| b84de5af MD |
2149 | } |
| 2150 | ||
| ec4e8497 | 2151 | /* |
| 1f07f686 MD |
2152 | * Callback for scan of ip->rec_tree. Try to include each record in our |
| 2153 | * flush. ip->flush_group has been set but the inode has not yet been | |
| 2154 | * moved into a flushing state. | |
| 2155 | * | |
| 2156 | * If we get stuck on a record we have to set HAMMER_INODE_REFLUSH on | |
| 2157 | * both inodes. | |
| 2158 | * | |
| 2159 | * We return 1 for any record placed or found in FST_FLUSH, which prevents | |
| 2160 | * the caller from shortcutting the flush. | |
| ec4e8497 | 2161 | */ |
| c0ade690 | 2162 | static int |
| 1f07f686 | 2163 | hammer_setup_child_callback(hammer_record_t rec, void *data) |
| b84de5af | 2164 | { |
| 7a61b85d | 2165 | hammer_flush_group_t flg; |
| 1f07f686 MD |
2166 | hammer_inode_t target_ip; |
| 2167 | hammer_inode_t ip; | |
| 2168 | int r; | |
| 2169 | ||
| 2170 | /* | |
| 3214ade6 MD |
2171 | * Records deleted or committed by the backend are ignored. |
| 2172 | * Note that the flush detects deleted frontend records at | |
| 2173 | * multiple points to deal with races. This is just the first | |
| 2174 | * line of defense. The only time HAMMER_RECF_DELETED_FE cannot | |
| 2175 | * be set is when HAMMER_RECF_INTERLOCK_BE is set, because it | |
| 2176 | * messes up link-count calculations. | |
| 7bc5b8c2 | 2177 | * |
| 3214ade6 MD |
2178 | * NOTE: Don't get confused between record deletion and, say, |
| 2179 | * directory entry deletion. The deletion of a directory entry | |
| 2180 | * which is on-media has nothing to do with the record deletion | |
| 2181 | * flags. | |
| 1f07f686 | 2182 | */ |
| 3214ade6 MD |
2183 | if (rec->flags & (HAMMER_RECF_DELETED_FE | HAMMER_RECF_DELETED_BE | |
| 2184 | HAMMER_RECF_COMMITTED)) { | |
| ecca949a | 2185 | if (rec->flush_state == HAMMER_FST_FLUSH) { |
| 7a61b85d | 2186 | KKASSERT(rec->flush_group == rec->ip->flush_group); |
| ecca949a MD |
2187 | r = 1; |
| 2188 | } else { | |
| 2189 | r = 0; | |
| 2190 | } | |
| 2191 | return(r); | |
| 2192 | } | |
| 1f07f686 MD |
2193 | |
| 2194 | /* | |
| 2195 | * If the record is in an idle state it has no dependancies and | |
| 2196 | * can be flushed. | |
| 2197 | */ | |
| 2198 | ip = rec->ip; | |
| 7a61b85d | 2199 | flg = ip->flush_group; |
| 1f07f686 MD |
2200 | r = 0; |
| 2201 | ||
| 2202 | switch(rec->flush_state) { | |
| 2203 | case HAMMER_FST_IDLE: | |
| 2204 | /* | |
| 7a61b85d | 2205 | * The record has no setup dependancy, we can flush it. |
| 1f07f686 MD |
2206 | */ |
| 2207 | KKASSERT(rec->target_ip == NULL); | |
| 2208 | rec->flush_state = HAMMER_FST_FLUSH; | |
| 7a61b85d MD |
2209 | rec->flush_group = flg; |
| 2210 | ++flg->refs; | |
| b84de5af | 2211 | hammer_ref(&rec->lock); |
| 1f07f686 MD |
2212 | r = 1; |
| 2213 | break; | |
| 2214 | case HAMMER_FST_SETUP: | |
| 2215 | /* | |
| 7a61b85d MD |
2216 | * The record has a setup dependancy. These are typically |
| 2217 | * directory entry adds and deletes. Such entries will be | |
| 7b6ccb11 MD |
2218 | * flushed when their inodes are flushed so we do not |
| 2219 | * usually have to add them to the flush here. However, | |
| 2220 | * if the target_ip has set HAMMER_INODE_CONN_DOWN then | |
| 2221 | * it is asking us to flush this record (and it). | |
| 1f07f686 MD |
2222 | */ |
| 2223 | target_ip = rec->target_ip; | |
| 2224 | KKASSERT(target_ip != NULL); | |
| 2225 | KKASSERT(target_ip->flush_state != HAMMER_FST_IDLE); | |
| 7a61b85d MD |
2226 | |
| 2227 | /* | |
| 2228 | * If the target IP is already flushing in our group | |
| 5c8d05e2 MD |
2229 | * we could associate the record, but target_ip has |
| 2230 | * already synced ino_data to sync_ino_data and we | |
| 2231 | * would also have to adjust nlinks. Plus there are | |
| 2232 | * ordering issues for adds and deletes. | |
| 2233 | * | |
| 2234 | * Reflush downward if this is an ADD, and upward if | |
| 2235 | * this is a DEL. | |
| 7a61b85d | 2236 | */ |
| 1f07f686 | 2237 | if (target_ip->flush_state == HAMMER_FST_FLUSH) { |
| 5c8d05e2 MD |
2238 | if (rec->flush_state == HAMMER_MEM_RECORD_ADD) |
| 2239 | ip->flags |= HAMMER_INODE_REFLUSH; | |
| 2240 | else | |
| 1f07f686 | 2241 | target_ip->flags |= HAMMER_INODE_REFLUSH; |
| 7a61b85d MD |
2242 | break; |
| 2243 | } | |
| 2244 | ||
| 2245 | /* | |
| 2246 | * Target IP is not yet flushing. This can get complex | |
| 2247 | * because we have to be careful about the recursion. | |
| 7b6ccb11 MD |
2248 | * |
| 2249 | * Directories create an issue for us in that if a flush | |
| 2250 | * of a directory is requested the expectation is to flush | |
| 2251 | * any pending directory entries, but this will cause the | |
| 2252 | * related inodes to recursively flush as well. We can't | |
| 2253 | * really defer the operation so just get as many as we | |
| 2254 | * can and | |
| 7a61b85d | 2255 | */ |
| 7b6ccb11 | 2256 | #if 0 |
| 7a61b85d | 2257 | if ((target_ip->flags & HAMMER_INODE_RECLAIM) == 0 && |
| 7b6ccb11 | 2258 | (target_ip->flags & HAMMER_INODE_CONN_DOWN) == 0) { |
| 7a61b85d | 2259 | /* |
| 7b6ccb11 MD |
2260 | * We aren't reclaiming and the target ip was not |
| 2261 | * previously prevented from flushing due to this | |
| 2262 | * record dependancy. Do not flush this record. | |
| 7a61b85d MD |
2263 | */ |
| 2264 | /*r = 0;*/ | |
| 7b6ccb11 MD |
2265 | } else |
| 2266 | #endif | |
| 2267 | if (flg->total_count + flg->refs > | |
| 7a61b85d MD |
2268 | ip->hmp->undo_rec_limit) { |
| 2269 | /* | |
| 2270 | * Our flush group is over-full and we risk blowing | |
| 2271 | * out the UNDO FIFO. Stop the scan, flush what we | |
| 2272 | * have, then reflush the directory. | |
| 2273 | * | |
| 2274 | * The directory may be forced through multiple | |
| 2275 | * flush groups before it can be completely | |
| 2276 | * flushed. | |
| 2277 | */ | |
| 4889cbd4 MD |
2278 | ip->flags |= HAMMER_INODE_RESIGNAL | |
| 2279 | HAMMER_INODE_REFLUSH; | |
| 7a61b85d | 2280 | r = -1; |
| 1f07f686 MD |
2281 | } else if (rec->type == HAMMER_MEM_RECORD_ADD) { |
| 2282 | /* | |
| 2283 | * If the target IP is not flushing we can force | |
| 2284 | * it to flush, even if it is unable to write out | |
| 2285 | * any of its own records we have at least one in | |
| 2286 | * hand that we CAN deal with. | |
| 2287 | */ | |
| 2288 | rec->flush_state = HAMMER_FST_FLUSH; | |
| 7a61b85d MD |
2289 | rec->flush_group = flg; |
| 2290 | ++flg->refs; | |
| 1f07f686 | 2291 | hammer_ref(&rec->lock); |
| 7a61b85d | 2292 | hammer_flush_inode_core(target_ip, flg, |
| 1f07f686 MD |
2293 | HAMMER_FLUSH_RECURSION); |
| 2294 | r = 1; | |
| 2295 | } else { | |
| 2296 | /* | |
| e8599db1 MD |
2297 | * General or delete-on-disk record. |
| 2298 | * | |
| 2299 | * XXX this needs help. If a delete-on-disk we could | |
| 2300 | * disconnect the target. If the target has its own | |
| 2301 | * dependancies they really need to be flushed. | |
| 1f07f686 MD |
2302 | * |
| 2303 | * XXX | |
| 2304 | */ | |
| 2305 | rec->flush_state = HAMMER_FST_FLUSH; | |
| 7a61b85d MD |
2306 | rec->flush_group = flg; |
| 2307 | ++flg->refs; | |
| 1f07f686 | 2308 | hammer_ref(&rec->lock); |
| 7a61b85d | 2309 | hammer_flush_inode_core(target_ip, flg, |
| 1f07f686 MD |
2310 | HAMMER_FLUSH_RECURSION); |
| 2311 | r = 1; | |
| 2312 | } | |
| 2313 | break; | |
| 2314 | case HAMMER_FST_FLUSH: | |
| 2315 | /* | |
| e3c8589c MD |
2316 | * The record could be part of a previous flush group if the |
| 2317 | * inode is a directory (the record being a directory entry). | |
| 2318 | * Once the flush group was closed a hammer_test_inode() | |
| 2319 | * function can cause a new flush group to be setup, placing | |
| 2320 | * the directory inode itself in a new flush group. | |
| 2321 | * | |
| 2322 | * When associated with a previous flush group we count it | |
| 2323 | * as if it were in our current flush group, since it will | |
| 2324 | * effectively be flushed by the time we flush our current | |
| 2325 | * flush group. | |
| 1f07f686 | 2326 | */ |
| e3c8589c MD |
2327 | KKASSERT( |
| 2328 | rec->ip->ino_data.obj_type == HAMMER_OBJTYPE_DIRECTORY || | |
| 2329 | rec->flush_group == flg); | |
| 1f07f686 MD |
2330 | r = 1; |
| 2331 | break; | |
| b84de5af | 2332 | } |
| 1f07f686 | 2333 | return(r); |
| b84de5af MD |
2334 | } |
| 2335 | ||
| 7a61b85d | 2336 | #if 0 |
| b84de5af | 2337 | /* |
| 525aad3a MD |
2338 | * This version just moves records already in a flush state to the new |
| 2339 | * flush group and that is it. | |
| 2340 | */ | |
| 2341 | static int | |
| 2342 | hammer_syncgrp_child_callback(hammer_record_t rec, void *data) | |
| 2343 | { | |
| 2344 | hammer_inode_t ip = rec->ip; | |
| 2345 | ||
| 2346 | switch(rec->flush_state) { | |
| 2347 | case HAMMER_FST_FLUSH: | |
| 7a61b85d | 2348 | KKASSERT(rec->flush_group == ip->flush_group); |
| 525aad3a MD |
2349 | break; |
| 2350 | default: | |
| 2351 | break; | |
| 2352 | } | |
| 2353 | return(0); | |
| 2354 | } | |
| 7a61b85d | 2355 | #endif |
| 525aad3a MD |
2356 | |
| 2357 | /* | |
| 7a61b85d | 2358 | * Wait for a previously queued flush to complete. |
| cdb6e4e6 MD |
2359 | * |
| 2360 | * If a critical error occured we don't try to wait. | |
| b84de5af MD |
2361 | */ |
| 2362 | void | |
| 2363 | hammer_wait_inode(hammer_inode_t ip) | |
| 2364 | { | |
| 7a61b85d | 2365 | hammer_flush_group_t flg; |
| ddfdf542 | 2366 | |
| 7a61b85d | 2367 | flg = NULL; |
| cdb6e4e6 | 2368 | if ((ip->hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) == 0) { |
| cdb6e4e6 MD |
2369 | while (ip->flush_state != HAMMER_FST_IDLE && |
| 2370 | (ip->hmp->flags & HAMMER_MOUNT_CRITICAL_ERROR) == 0) { | |
| f153644d MD |
2371 | if (ip->flush_state == HAMMER_FST_SETUP) |
| 2372 | hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); | |
| 526fce4d MD |
2373 | |
| 2374 | /* | |
| 2375 | * If the inode was already being flushed its flg | |
| 2376 | * may not have been queued to the backend. We have | |
| 2377 | * to make sure it gets queued or we can wind up | |
| 2378 | * blocked or deadlocked (particularly if we are | |
| 2379 | * the vnlru thread). | |
| 2380 | */ | |
| 2381 | KKASSERT(ip->flush_group); | |
| 2382 | if (ip->flush_group->closed == 0) { | |
| 2383 | kprintf("hammer: debug: forcing async " | |
| 2384 | "flush ip %016jx\n", | |
| 2385 | (intmax_t)ip->obj_id); | |
| 2386 | hammer_flusher_async(ip->hmp, ip->flush_group); | |
| 2387 | } | |
| f153644d MD |
2388 | if (ip->flush_state != HAMMER_FST_IDLE) { |
| 2389 | ip->flags |= HAMMER_INODE_FLUSHW; | |
| 2390 | tsleep(&ip->flags, 0, "hmrwin", 0); | |
| 2391 | } | |
| cdb6e4e6 | 2392 | } |
| b84de5af MD |
2393 | } |
| 2394 | } | |
| 2395 | ||
| 2396 | /* | |
| 2397 | * Called by the backend code when a flush has been completed. | |
| 2398 | * The inode has already been removed from the flush list. | |
| 2399 | * | |
| 2400 | * A pipelined flush can occur, in which case we must re-enter the | |
| 2401 | * inode on the list and re-copy its fields. | |
| 2402 | */ | |
| 2403 | void | |
| cdb6e4e6 | 2404 | hammer_flush_inode_done(hammer_inode_t ip, int error) |
| b84de5af | 2405 | { |
| af209b0f MD |
2406 | hammer_mount_t hmp; |
| 2407 | int dorel; | |
| 1955afa7 | 2408 | |
| b84de5af MD |
2409 | KKASSERT(ip->flush_state == HAMMER_FST_FLUSH); |
| 2410 | ||
| af209b0f MD |
2411 | hmp = ip->hmp; |
| 2412 | ||
| 1f07f686 | 2413 | /* |
| 5c667a24 MD |
2414 | * Auto-reflush if the backend could not completely flush |
| 2415 | * the inode. This fixes a case where a deferred buffer flush | |
| 2416 | * could cause fsync to return early. | |
| 2417 | */ | |
| 2418 | if (ip->sync_flags & HAMMER_INODE_MODMASK) | |
| 2419 | ip->flags |= HAMMER_INODE_REFLUSH; | |
| 2420 | ||
| 2421 | /* | |
| 1f07f686 | 2422 | * Merge left-over flags back into the frontend and fix the state. |
| a9d52b76 | 2423 | * Incomplete truncations are retained by the backend. |
| 1f07f686 | 2424 | */ |
| cdb6e4e6 | 2425 | ip->error = error; |
| a9d52b76 MD |
2426 | ip->flags |= ip->sync_flags & ~HAMMER_INODE_TRUNCATED; |
| 2427 | ip->sync_flags &= HAMMER_INODE_TRUNCATED; | |
| 1f07f686 MD |
2428 | |
| 2429 | /* | |
| 2430 | * The backend may have adjusted nlinks, so if the adjusted nlinks | |
| 47f363f1 | 2431 | * does not match the fronttend set the frontend's DDIRTY flag again. |
| 1f07f686 | 2432 | */ |
| 11ad5ade MD |
2433 | if (ip->ino_data.nlinks != ip->sync_ino_data.nlinks) |
| 2434 | ip->flags |= HAMMER_INODE_DDIRTY; | |
| b84de5af | 2435 | |
| b84de5af | 2436 | /* |
| a7e9bef1 | 2437 | * Fix up the dirty buffer status. |
| 4e17f465 | 2438 | */ |
| 0832c9bb | 2439 | if (ip->vp && RB_ROOT(&ip->vp->v_rbdirty_tree)) { |
| 1f07f686 | 2440 | ip->flags |= HAMMER_INODE_BUFS; |
| 1f07f686 | 2441 | } |
| 73896937 | 2442 | hammer_redo_fifo_end_flush(ip); |
| 1f07f686 MD |
2443 | |
| 2444 | /* | |
| 2445 | * Re-set the XDIRTY flag if some of the inode's in-memory records | |
| 2446 | * could not be flushed. | |
| 2447 | */ | |
| 0832c9bb MD |
2448 | KKASSERT((RB_EMPTY(&ip->rec_tree) && |
| 2449 | (ip->flags & HAMMER_INODE_XDIRTY) == 0) || | |
| 2450 | (!RB_EMPTY(&ip->rec_tree) && | |
| 2451 | (ip->flags & HAMMER_INODE_XDIRTY) != 0)); | |
| 4e17f465 MD |
2452 | |
| 2453 | /* | |
| 2454 | * Do not lose track of inodes which no longer have vnode | |
| 2455 | * assocations, otherwise they may never get flushed again. | |
| 35a49944 MD |
2456 | * |
| 2457 | * The reflush flag can be set superfluously, causing extra pain | |
| 2458 | * for no reason. If the inode is no longer modified it no longer | |
| 2459 | * needs to be flushed. | |
| 4e17f465 | 2460 | */ |
| 35a49944 MD |
2461 | if (ip->flags & HAMMER_INODE_MODMASK) { |
| 2462 | if (ip->vp == NULL) | |
| 2463 | ip->flags |= HAMMER_INODE_REFLUSH; | |
| 2464 | } else { | |
| 2465 | ip->flags &= ~HAMMER_INODE_REFLUSH; | |
| 2466 | } | |
| 4e17f465 MD |
2467 | |
| 2468 | /* | |
| 7a61b85d | 2469 | * Adjust the flush state. |
| 4e17f465 | 2470 | */ |
| 06ad81ff | 2471 | if (ip->flags & HAMMER_INODE_WOULDBLOCK) { |
| 7a61b85d MD |
2472 | /* |
| 2473 | * We were unable to flush out all our records, leave the | |
| 2474 | * inode in a flush state and in the current flush group. | |
| d7e278bb | 2475 | * The flush group will be re-run. |
| 7a61b85d | 2476 | * |
| d7e278bb MD |
2477 | * This occurs if the UNDO block gets too full or there is |
| 2478 | * too much dirty meta-data and allows the flusher to | |
| 2479 | * finalize the UNDO block and then re-flush. | |
| 7a61b85d | 2480 | */ |
| 06ad81ff | 2481 | ip->flags &= ~HAMMER_INODE_WOULDBLOCK; |
| af209b0f | 2482 | dorel = 0; |
| 7a61b85d MD |
2483 | } else { |
| 2484 | /* | |
| 2485 | * Remove from the flush_group | |
| 2486 | */ | |
| ff003b11 | 2487 | RB_REMOVE(hammer_fls_rb_tree, &ip->flush_group->flush_tree, ip); |
| 7a61b85d MD |
2488 | ip->flush_group = NULL; |
| 2489 | ||
| e0092341 | 2490 | #if 0 |
| 7a61b85d MD |
2491 | /* |
| 2492 | * Clean up the vnode ref and tracking counts. | |
| 2493 | */ | |
| 2494 | if (ip->flags & HAMMER_INODE_VHELD) { | |
| 2495 | ip->flags &= ~HAMMER_INODE_VHELD; | |
| 2496 | vrele(ip->vp); | |
| 2497 | } | |
| e0092341 | 2498 | #endif |
| 7a61b85d MD |
2499 | --hmp->count_iqueued; |
| 2500 | --hammer_count_iqueued; | |
| 2501 | ||
| 2502 | /* | |
| 2503 | * And adjust the state. | |
| 2504 | */ | |
| 2505 | if (TAILQ_EMPTY(&ip->target_list) && RB_EMPTY(&ip->rec_tree)) { | |
| 2506 | ip->flush_state = HAMMER_FST_IDLE; | |
| 2507 | dorel = 1; | |
| 2508 | } else { | |
| 2509 | ip->flush_state = HAMMER_FST_SETUP; | |
| 2510 | dorel = 0; | |
| 2511 | } | |
| b84de5af | 2512 | |
| 7a61b85d MD |
2513 | /* |
| 2514 | * If the frontend is waiting for a flush to complete, | |
| 2515 | * wake it up. | |
| 2516 | */ | |
| 2517 | if (ip->flags & HAMMER_INODE_FLUSHW) { | |
| 2518 | ip->flags &= ~HAMMER_INODE_FLUSHW; | |
| 2519 | wakeup(&ip->flags); | |
| 2520 | } | |
| af209b0f | 2521 | |
| d7e278bb MD |
2522 | /* |
| 2523 | * If the frontend made more changes and requested another | |
| 2524 | * flush, then try to get it running. | |
| 2525 | * | |
| 2526 | * Reflushes are aborted when the inode is errored out. | |
| 2527 | */ | |
| 2528 | if (ip->flags & HAMMER_INODE_REFLUSH) { | |
| 2529 | ip->flags &= ~HAMMER_INODE_REFLUSH; | |
| 2530 | if (ip->flags & HAMMER_INODE_RESIGNAL) { | |
| 2531 | ip->flags &= ~HAMMER_INODE_RESIGNAL; | |
| 2532 | hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); | |
| 2533 | } else { | |
| 2534 | hammer_flush_inode(ip, 0); | |
| 2535 | } | |
| 0729c8c8 | 2536 | } |
| 4e17f465 MD |
2537 | } |
| 2538 | ||
| 2539 | /* | |
| 7b6ccb11 MD |
2540 | * If we have no parent dependancies we can clear CONN_DOWN |
| 2541 | */ | |
| 2542 | if (TAILQ_EMPTY(&ip->target_list)) | |
| 2543 | ip->flags &= ~HAMMER_INODE_CONN_DOWN; | |
| 2544 | ||
| 2545 | /* | |
| e63644f0 MD |
2546 | * If the inode is now clean drop the space reservation. |
| 2547 | */ | |
| 2548 | if ((ip->flags & HAMMER_INODE_MODMASK) == 0 && | |
| 2549 | (ip->flags & HAMMER_INODE_RSV_INODES)) { | |
| 2550 | ip->flags &= ~HAMMER_INODE_RSV_INODES; | |
| af209b0f | 2551 | --hmp->rsv_inodes; |
| e63644f0 MD |
2552 | } |
| 2553 | ||
| 1f07f686 MD |
2554 | if (dorel) |
| 2555 | hammer_rel_inode(ip, 0); | |
| b84de5af MD |
2556 | } |
| 2557 | ||
| 2558 | /* | |
| 2559 | * Called from hammer_sync_inode() to synchronize in-memory records | |
| 2560 | * to the media. | |
| 2561 | */ | |
| 2562 | static int | |
| 2563 | hammer_sync_record_callback(hammer_record_t record, void *data) | |
| c0ade690 | 2564 | { |
| 4e17f465 MD |
2565 | hammer_cursor_t cursor = data; |
| 2566 | hammer_transaction_t trans = cursor->trans; | |
| 6c1f89f4 | 2567 | hammer_mount_t hmp = trans->hmp; |
| c0ade690 MD |
2568 | int error; |
| 2569 | ||
| b84de5af | 2570 | /* |
| 1f07f686 | 2571 | * Skip records that do not belong to the current flush. |
| b84de5af | 2572 | */ |
| 47637bff | 2573 | ++hammer_stats_record_iterations; |
| 1f07f686 | 2574 | if (record->flush_state != HAMMER_FST_FLUSH) |
| b84de5af | 2575 | return(0); |
| 47637bff | 2576 | |
| 1f07f686 MD |
2577 | #if 1 |
| 2578 | if (record->flush_group != record->ip->flush_group) { | |
| 7a61b85d | 2579 | kprintf("sync_record %p ip %p bad flush group %p %p\n", record, record->ip, record->flush_group ,record->ip->flush_group); |
| fc73edd8 MD |
2580 | if (hammer_debug_critical) |
| 2581 | Debugger("blah2"); | |
| 1f07f686 MD |
2582 | return(0); |
| 2583 | } | |
| 2584 | #endif | |
| 2585 | KKASSERT(record->flush_group == record->ip->flush_group); | |
| d36ec43b MD |
2586 | |
| 2587 | /* | |
| 2588 | * Interlock the record using the BE flag. Once BE is set the | |
| 2589 | * frontend cannot change the state of FE. | |
| 2590 | * | |
| 2591 | * NOTE: If FE is set prior to us setting BE we still sync the | |
| 2592 | * record out, but the flush completion code converts it to | |
| 2593 | * a delete-on-disk record instead of destroying it. | |
| 2594 | */ | |
| 4e17f465 | 2595 | KKASSERT((record->flags & HAMMER_RECF_INTERLOCK_BE) == 0); |
| d36ec43b MD |
2596 | record->flags |= HAMMER_RECF_INTERLOCK_BE; |
| 2597 | ||
| 2598 | /* | |
| 3214ade6 | 2599 | * The backend has already disposed of the record. |
| 47637bff | 2600 | */ |
| 3214ade6 | 2601 | if (record->flags & (HAMMER_RECF_DELETED_BE | HAMMER_RECF_COMMITTED)) { |
| 47637bff MD |
2602 | error = 0; |
| 2603 | goto done; | |
| 2604 | } | |
| 2605 | ||
| 2606 | /* | |
| 98f7132d | 2607 | * If the whole inode is being deleting all on-disk records will |
| 930bf163 MD |
2608 | * be deleted very soon, we can't sync any new records to disk |
| 2609 | * because they will be deleted in the same transaction they were | |
| 2610 | * created in (delete_tid == create_tid), which will assert. | |
| 2611 | * | |
| 2612 | * XXX There may be a case with RECORD_ADD with DELETED_FE set | |
| 2613 | * that we currently panic on. | |
| 98f7132d MD |
2614 | */ |
| 2615 | if (record->ip->sync_flags & HAMMER_INODE_DELETING) { | |
| 930bf163 | 2616 | switch(record->type) { |
| 47637bff MD |
2617 | case HAMMER_MEM_RECORD_DATA: |
| 2618 | /* | |
| 2619 | * We don't have to do anything, if the record was | |
| 2620 | * committed the space will have been accounted for | |
| 2621 | * in the blockmap. | |
| 2622 | */ | |
| 2623 | /* fall through */ | |
| 930bf163 | 2624 | case HAMMER_MEM_RECORD_GENERAL: |
| 3214ade6 MD |
2625 | /* |
| 2626 | * Set deleted-by-backend flag. Do not set the | |
| 2627 | * backend committed flag, because we are throwing | |
| 2628 | * the record away. | |
| 2629 | */ | |
| 98f7132d | 2630 | record->flags |= HAMMER_RECF_DELETED_BE; |
| 3214ade6 | 2631 | ++record->ip->rec_generation; |
| 930bf163 MD |
2632 | error = 0; |
| 2633 | goto done; | |
| 2634 | case HAMMER_MEM_RECORD_ADD: | |
| 2635 | panic("hammer_sync_record_callback: illegal add " | |
| 2636 | "during inode deletion record %p", record); | |
| 2637 | break; /* NOT REACHED */ | |
| 2638 | case HAMMER_MEM_RECORD_INODE: | |
| 2639 | panic("hammer_sync_record_callback: attempt to " | |
| 2640 | "sync inode record %p?", record); | |
| 2641 | break; /* NOT REACHED */ | |
| 2642 | case HAMMER_MEM_RECORD_DEL: | |
| 2643 | /* | |
| 2644 | * Follow through and issue the on-disk deletion | |
| 98f7132d | 2645 | */ |
| 930bf163 | 2646 | break; |
| 98f7132d | 2647 | } |
| 98f7132d MD |
2648 | } |
| 2649 | ||
| 2650 | /* | |
| 7bc5b8c2 MD |
2651 | * If DELETED_FE is set special handling is needed for directory |
| 2652 | * entries. Dependant pieces related to the directory entry may | |
| 2653 | * have already been synced to disk. If this occurs we have to | |
| 2654 | * sync the directory entry and then change the in-memory record | |
| 2655 | * from an ADD to a DELETE to cover the fact that it's been | |
| 2656 | * deleted by the frontend. | |
| 2657 | * | |
| 2658 | * A directory delete covering record (MEM_RECORD_DEL) can never | |
| 2659 | * be deleted by the frontend. | |
| 2660 | * | |
| 2661 | * Any other record type (aka DATA) can be deleted by the frontend. | |
| 2662 | * XXX At the moment the flusher must skip it because there may | |
| 2663 | * be another data record in the flush group for the same block, | |
| 2664 | * meaning that some frontend data changes can leak into the backend's | |
| 2665 | * synchronization point. | |
| d36ec43b | 2666 | */ |
| 1f07f686 | 2667 | if (record->flags & HAMMER_RECF_DELETED_FE) { |
| e8599db1 | 2668 | if (record->type == HAMMER_MEM_RECORD_ADD) { |
| 3214ade6 MD |
2669 | /* |
| 2670 | * Convert a front-end deleted directory-add to | |
| 2671 | * a directory-delete entry later. | |
| 2672 | */ | |
| e8599db1 MD |
2673 | record->flags |= HAMMER_RECF_CONVERT_DELETE; |
| 2674 | } else { | |
| 3214ade6 MD |
2675 | /* |
| 2676 | * Dispose of the record (race case). Mark as | |
| 2677 | * deleted by backend (and not committed). | |
| 2678 | */ | |
| e8599db1 | 2679 | KKASSERT(record->type != HAMMER_MEM_RECORD_DEL); |
| 7bc5b8c2 | 2680 | record->flags |= HAMMER_RECF_DELETED_BE; |
| 3214ade6 | 2681 | ++record->ip->rec_generation; |
| 7bc5b8c2 MD |
2682 | error = 0; |
| 2683 | goto done; | |
| e8599db1 | 2684 | } |
| 1f07f686 | 2685 | } |
| b84de5af MD |
2686 | |
| 2687 | /* | |
| 2688 | * Assign the create_tid for new records. Deletions already | |
| 2689 | * have the record's entire key properly set up. | |
| 2690 | */ | |
| 3214ade6 | 2691 | if (record->type != HAMMER_MEM_RECORD_DEL) { |
| 11ad5ade | 2692 | record->leaf.base.create_tid = trans->tid; |
| dd94f1b1 | 2693 | record->leaf.create_ts = trans->time32; |
| 3214ade6 | 2694 | } |
| 47f363f1 MD |
2695 | |
| 2696 | /* | |
| 2697 | * This actually moves the record to the on-media B-Tree. We | |
| 2698 | * must also generate REDO_TERM entries in the UNDO/REDO FIFO | |
| 2699 | * indicating that the related REDO_WRITE(s) have been committed. | |
| 2700 | * | |
| 2701 | * During recovery any REDO_TERM's within the nominal recovery span | |
| 2702 | * are ignored since the related meta-data is being undone, causing | |
| 2703 | * any matching REDO_WRITEs to execute. The REDO_TERMs outside | |
| 2704 | * the nominal recovery span will match against REDO_WRITEs and | |
| 2705 | * prevent them from being executed (because the meta-data has | |
| 2706 | * already been synchronized). | |
| 2707 | */ | |
| 2708 | if (record->flags & HAMMER_RECF_REDO) { | |
| 2709 | KKASSERT(record->type == HAMMER_MEM_RECORD_DATA); | |
| 2710 | hammer_generate_redo(trans, record->ip, | |
| 2711 | record->leaf.base.key - | |
| 2712 | record->leaf.data_len, | |
| 2713 | HAMMER_REDO_TERM_WRITE, | |
| 2714 | NULL, | |
| 2715 | record->leaf.data_len); | |
| 2716 | } | |
| 4e17f465 MD |
2717 | for (;;) { |
| 2718 | error = hammer_ip_sync_record_cursor(cursor, record); | |
| 2719 | if (error != EDEADLK) | |
| 2720 | break; | |
| 2721 | hammer_done_cursor(cursor); | |
| 2722 | error = hammer_init_cursor(trans, cursor, &record->ip->cache[0], | |
| 2723 | record->ip); | |
| 2724 | if (error) | |
| 2725 | break; | |
| 2726 | } | |
| 2727 | record->flags &= ~HAMMER_RECF_CONVERT_DELETE; | |
| c0ade690 | 2728 | |
| cdb6e4e6 | 2729 | if (error) |
| b3deaf57 | 2730 | error = -error; |
| 98f7132d | 2731 | done: |
| d36ec43b | 2732 | hammer_flush_record_done(record, error); |
| 6c1f89f4 MD |
2733 | |
| 2734 | /* | |
| 2735 | * Do partial finalization if we have built up too many dirty | |
| 2736 | * buffers. Otherwise a buffer cache deadlock can occur when | |
| 2737 | * doing things like creating tens of thousands of tiny files. | |
| 2738 | * | |
| 842e7a70 MD |
2739 | * We must release our cursor lock to avoid a 3-way deadlock |
| 2740 | * due to the exclusive sync lock the finalizer must get. | |
| c9ce54d6 MD |
2741 | * |
| 2742 | * WARNING: See warnings in hammer_unlock_cursor() function. | |
| 6c1f89f4 | 2743 | */ |
| 842e7a70 | 2744 | if (hammer_flusher_meta_limit(hmp)) { |
| 982be4bf | 2745 | hammer_unlock_cursor(cursor); |
| 6c1f89f4 | 2746 | hammer_flusher_finalize(trans, 0); |
| 982be4bf | 2747 | hammer_lock_cursor(cursor); |
| 842e7a70 | 2748 | } |
| 6c1f89f4 | 2749 | |
| b3deaf57 | 2750 | return(error); |
| c0ade690 MD |
2751 | } |
| 2752 | ||
| 2753 | /* | |
| d7e278bb | 2754 | * Backend function called by the flusher to sync an inode to media. |
| c0ade690 MD |
2755 | */ |
| 2756 | int | |
| 02325004 | 2757 | hammer_sync_inode(hammer_transaction_t trans, hammer_inode_t ip) |
| c0ade690 | 2758 | { |
| 4e17f465 | 2759 | struct hammer_cursor cursor; |
| cb51be26 | 2760 | hammer_node_t tmp_node; |
| 1f07f686 MD |
2761 | hammer_record_t depend; |
| 2762 | hammer_record_t next; | |
| ec4e8497 | 2763 | int error, tmp_error; |
| 1f07f686 | 2764 | u_int64_t nlinks; |
| c0ade690 | 2765 | |
| 1f07f686 | 2766 | if ((ip->sync_flags & HAMMER_INODE_MODMASK) == 0) |
| d113fda1 | 2767 | return(0); |
| d113fda1 | 2768 | |
| 02325004 | 2769 | error = hammer_init_cursor(trans, &cursor, &ip->cache[1], ip); |
| 4e17f465 MD |
2770 | if (error) |
| 2771 | goto done; | |
| c0ade690 MD |
2772 | |
| 2773 | /* | |
| 1f07f686 MD |
2774 | * Any directory records referencing this inode which are not in |
| 2775 | * our current flush group must adjust our nlink count for the | |
| 47f363f1 | 2776 | * purposes of synchronizating to disk. |
| 1f07f686 MD |
2777 | * |
| 2778 | * Records which are in our flush group can be unlinked from our | |
| c4bae5fd MD |
2779 | * inode now, potentially allowing the inode to be physically |
| 2780 | * deleted. | |
| bf3b416b MD |
2781 | * |
| 2782 | * This cannot block. | |
| ec4e8497 | 2783 | */ |
| 11ad5ade | 2784 | nlinks = ip->ino_data.nlinks; |
| 1f07f686 MD |
2785 | next = TAILQ_FIRST(&ip->target_list); |
| 2786 | while ((depend = next) != NULL) { | |
| 2787 | next = TAILQ_NEXT(depend, target_entry); | |
| 2788 | if (depend->flush_state == HAMMER_FST_FLUSH && | |
| 7a61b85d | 2789 | depend->flush_group == ip->flush_group) { |
| c4bae5fd MD |
2790 | /* |
| 2791 | * If this is an ADD that was deleted by the frontend | |
| 2792 | * the frontend nlinks count will have already been | |
| 2793 | * decremented, but the backend is going to sync its | |
| 2794 | * directory entry and must account for it. The | |
| 2795 | * record will be converted to a delete-on-disk when | |
| 2796 | * it gets synced. | |
| 2797 | * | |
| 2798 | * If the ADD was not deleted by the frontend we | |
| 2799 | * can remove the dependancy from our target_list. | |
| 2800 | */ | |
| 2801 | if (depend->flags & HAMMER_RECF_DELETED_FE) { | |
| 2802 | ++nlinks; | |
| 2803 | } else { | |
| 2804 | TAILQ_REMOVE(&ip->target_list, depend, | |
| 2805 | target_entry); | |
| 2806 | depend->target_ip = NULL; | |
| 2807 | } | |
| 1f07f686 | 2808 | } else if ((depend->flags & HAMMER_RECF_DELETED_FE) == 0) { |
| c4bae5fd | 2809 | /* |
| 3214ade6 MD |
2810 | * Not part of our flush group and not deleted by |
| 2811 | * the front-end, adjust the link count synced to | |
| 2812 | * the media (undo what the frontend did when it | |
| 2813 | * queued the record). | |
| c4bae5fd MD |
2814 | */ |
| 2815 | KKASSERT((depend->flags & HAMMER_RECF_DELETED_BE) == 0); | |
| 1f07f686 MD |
2816 | switch(depend->type) { |
| 2817 | case HAMMER_MEM_RECORD_ADD: | |
| 2818 | --nlinks; | |
| 2819 | break; | |
| 2820 | case HAMMER_MEM_RECORD_DEL: | |
| 2821 | ++nlinks; | |
| 2822 | break; | |
| e8599db1 MD |
2823 | default: |
| 2824 | break; | |
| 1f07f686 | 2825 | } |
| ec4e8497 | 2826 | } |
| ec4e8497 MD |
2827 | } |
| 2828 | ||
| ec4e8497 | 2829 | /* |
| 1f07f686 | 2830 | * Set dirty if we had to modify the link count. |
| c0ade690 | 2831 | */ |
| 11ad5ade | 2832 | if (ip->sync_ino_data.nlinks != nlinks) { |
| 1f07f686 | 2833 | KKASSERT((int64_t)nlinks >= 0); |
| 11ad5ade MD |
2834 | ip->sync_ino_data.nlinks = nlinks; |
| 2835 | ip->sync_flags |= HAMMER_INODE_DDIRTY; | |
| 1f07f686 | 2836 | } |
| b84de5af | 2837 | |
| 4e17f465 | 2838 | /* |
| 869e8f55 MD |
2839 | * If there is a trunction queued destroy any data past the (aligned) |
| 2840 | * truncation point. Userland will have dealt with the buffer | |
| 2841 | * containing the truncation point for us. | |
| 2842 | * | |
| 2843 | * We don't flush pending frontend data buffers until after we've | |
| cb51be26 | 2844 | * dealt with the truncation. |
| 1f07f686 | 2845 | */ |
| 869e8f55 | 2846 | if (ip->sync_flags & HAMMER_INODE_TRUNCATED) { |
| b84de5af MD |
2847 | /* |
| 2848 | * Interlock trunc_off. The VOP front-end may continue to | |
| 2849 | * make adjustments to it while we are blocked. | |
| 2850 | */ | |
| 2851 | off_t trunc_off; | |
| 2852 | off_t aligned_trunc_off; | |
| 4a2796f3 | 2853 | int blkmask; |
| c0ade690 | 2854 | |
| b84de5af | 2855 | trunc_off = ip->sync_trunc_off; |
| 4a2796f3 MD |
2856 | blkmask = hammer_blocksize(trunc_off) - 1; |
| 2857 | aligned_trunc_off = (trunc_off + blkmask) & ~(int64_t)blkmask; | |
| b84de5af MD |
2858 | |
| 2859 | /* | |
| 2860 | * Delete any whole blocks on-media. The front-end has | |
| 2861 | * already cleaned out any partial block and made it | |
| 2862 | * pending. The front-end may have updated trunc_off | |
| 47637bff | 2863 | * while we were blocked so we only use sync_trunc_off. |
| 06ad81ff MD |
2864 | * |
| 2865 | * This operation can blow out the buffer cache, EWOULDBLOCK | |
| a9d52b76 MD |
2866 | * means we were unable to complete the deletion. The |
| 2867 | * deletion will update sync_trunc_off in that case. | |
| b84de5af | 2868 | */ |
| 4e17f465 | 2869 | error = hammer_ip_delete_range(&cursor, ip, |
| b84de5af | 2870 | aligned_trunc_off, |
| 06ad81ff MD |
2871 | 0x7FFFFFFFFFFFFFFFLL, 2); |
| 2872 | if (error == EWOULDBLOCK) { | |
| 2873 | ip->flags |= HAMMER_INODE_WOULDBLOCK; | |
| 2874 | error = 0; | |
| 2875 | goto defer_buffer_flush; | |
| 2876 | } | |
| 2877 | ||
| b84de5af | 2878 | if (error) |
| cdb6e4e6 | 2879 | goto done; |
| 47637bff MD |
2880 | |
| 2881 | /* | |
| 47f363f1 MD |
2882 | * Generate a REDO_TERM_TRUNC entry in the UNDO/REDO FIFO. |
| 2883 | * | |
| 2884 | * XXX we do this even if we did not previously generate | |
| 2885 | * a REDO_TRUNC record. This operation may enclosed the | |
| 2886 | * range for multiple prior truncation entries in the REDO | |
| 2887 | * log. | |
| 2888 | */ | |
| 73896937 MD |
2889 | if (trans->hmp->version >= HAMMER_VOL_VERSION_FOUR && |
| 2890 | (ip->flags & HAMMER_INODE_RDIRTY)) { | |
| 47f363f1 MD |
2891 | hammer_generate_redo(trans, ip, aligned_trunc_off, |
| 2892 | HAMMER_REDO_TERM_TRUNC, | |
| 2893 | NULL, 0); | |
| 2894 | } | |
| 2895 | ||
| 2896 | /* | |
| 47637bff | 2897 | * Clear the truncation flag on the backend after we have |
| 47f363f1 | 2898 | * completed the deletions. Backend data is now good again |
| 47637bff | 2899 | * (including new records we are about to sync, below). |
| cb51be26 MD |
2900 | * |
| 2901 | * Leave sync_trunc_off intact. As we write additional | |
| 2902 | * records the backend will update sync_trunc_off. This | |
| 2903 | * tells the backend whether it can skip the overwrite | |
| 2904 | * test. This should work properly even when the backend | |
| 2905 | * writes full blocks where the truncation point straddles | |
| 2906 | * the block because the comparison is against the base | |
| 2907 | * offset of the record. | |
| 47637bff | 2908 | */ |
| b84de5af | 2909 | ip->sync_flags &= ~HAMMER_INODE_TRUNCATED; |
| cb51be26 | 2910 | /* ip->sync_trunc_off = 0x7FFFFFFFFFFFFFFFLL; */ |
| 1f07f686 MD |
2911 | } else { |
| 2912 | error = 0; | |
| f3b0f382 MD |
2913 | } |
| 2914 | ||
| 1f07f686 MD |
2915 | /* |
| 2916 | * Now sync related records. These will typically be directory | |
| 6c1f89f4 | 2917 | * entries, records tracking direct-writes, or delete-on-disk records. |
| 1f07f686 MD |
2918 | */ |
| 2919 | if (error == 0) { | |
| 2920 | tmp_error = RB_SCAN(hammer_rec_rb_tree, &ip->rec_tree, NULL, | |
| 4e17f465 | 2921 | hammer_sync_record_callback, &cursor); |
| 1f07f686 MD |
2922 | if (tmp_error < 0) |
| 2923 | tmp_error = -error; | |
| 2924 | if (tmp_error) | |
| 2925 | error = tmp_error; | |
| 2926 | } | |
| bcac4bbb | 2927 | hammer_cache_node(&ip->cache[1], cursor.node); |
| cb51be26 MD |
2928 | |
| 2929 | /* | |
| 43c665ae MD |
2930 | * Re-seek for inode update, assuming our cache hasn't been ripped |
| 2931 | * out from under us. | |
| cb51be26 MD |
2932 | */ |
| 2933 | if (error == 0) { | |
| 4c286c36 | 2934 | tmp_node = hammer_ref_node_safe(trans, &ip->cache[0], &error); |
| cb51be26 | 2935 | if (tmp_node) { |
| 5fa5c92f MD |
2936 | hammer_cursor_downgrade(&cursor); |
| 2937 | hammer_lock_sh(&tmp_node->lock); | |
| 43c665ae MD |
2938 | if ((tmp_node->flags & HAMMER_NODE_DELETED) == 0) |
| 2939 | hammer_cursor_seek(&cursor, tmp_node, 0); | |
| 5fa5c92f | 2940 | hammer_unlock(&tmp_node->lock); |
| cb51be26 MD |
2941 | hammer_rel_node(tmp_node); |
| 2942 | } | |
| 2943 | error = 0; | |
| 2944 | } | |
| 1f07f686 MD |
2945 | |
| 2946 | /* | |
| 869e8f55 MD |
2947 | * If we are deleting the inode the frontend had better not have |
| 2948 | * any active references on elements making up the inode. | |
| a9d52b76 MD |
2949 | * |
| 2950 | * The call to hammer_ip_delete_clean() cleans up auxillary records | |
| 2951 | * but not DB or DATA records. Those must have already been deleted | |
| 2952 | * by the normal truncation mechanic. | |
| 1f07f686 | 2953 | */ |
| 11ad5ade | 2954 | if (error == 0 && ip->sync_ino_data.nlinks == 0 && |
| 869e8f55 MD |
2955 | RB_EMPTY(&ip->rec_tree) && |
| 2956 | (ip->sync_flags & HAMMER_INODE_DELETING) && | |
| 2957 | (ip->flags & HAMMER_INODE_DELETED) == 0) { | |
| 2958 | int count1 = 0; | |
| 1f07f686 | 2959 | |
| a9d52b76 | 2960 | error = hammer_ip_delete_clean(&cursor, ip, &count1); |
| 869e8f55 | 2961 | if (error == 0) { |
| 06ad81ff | 2962 | ip->flags |= HAMMER_INODE_DELETED; |
| 869e8f55 MD |
2963 | ip->sync_flags &= ~HAMMER_INODE_DELETING; |
| 2964 | ip->sync_flags &= ~HAMMER_INODE_TRUNCATED; | |
| 2965 | KKASSERT(RB_EMPTY(&ip->rec_tree)); | |
| 1f07f686 | 2966 | |
| 869e8f55 MD |
2967 | /* |
| 2968 | * Set delete_tid in both the frontend and backend | |
| 2969 | * copy of the inode record. The DELETED flag handles | |
| 47f363f1 | 2970 | * this, do not set DDIRTY. |
| 869e8f55 | 2971 | */ |
| 02325004 MD |
2972 | ip->ino_leaf.base.delete_tid = trans->tid; |
| 2973 | ip->sync_ino_leaf.base.delete_tid = trans->tid; | |
| 2974 | ip->ino_leaf.delete_ts = trans->time32; | |
| 2975 | ip->sync_ino_leaf.delete_ts = trans->time32; | |
| dd94f1b1 | 2976 | |
| 1f07f686 | 2977 | |
| 869e8f55 MD |
2978 | /* |
| 2979 | * Adjust the inode count in the volume header | |
| 2980 | */ | |
| 02325004 | 2981 | hammer_sync_lock_sh(trans); |
| f36a9737 | 2982 | if (ip->flags & HAMMER_INODE_ONDISK) { |
| 02325004 MD |
2983 | hammer_modify_volume_field(trans, |
| 2984 | trans->rootvol, | |
| f36a9737 MD |
2985 | vol0_stat_inodes); |
| 2986 | --ip->hmp->rootvol->ondisk->vol0_stat_inodes; | |
| 02325004 | 2987 | hammer_modify_volume_done(trans->rootvol); |
| f36a9737 | 2988 | } |
| 02325004 | 2989 | hammer_sync_unlock(trans); |
| 869e8f55 | 2990 | } |
| 1f07f686 | 2991 | } |
| b84de5af | 2992 | |
| b84de5af | 2993 | if (error) |
| cdb6e4e6 MD |
2994 | goto done; |
| 2995 | ip->sync_flags &= ~HAMMER_INODE_BUFS; | |
| c0ade690 | 2996 | |
| 06ad81ff | 2997 | defer_buffer_flush: |
| c0ade690 MD |
2998 | /* |
| 2999 | * Now update the inode's on-disk inode-data and/or on-disk record. | |
| b84de5af | 3000 | * DELETED and ONDISK are managed only in ip->flags. |
| 06ad81ff MD |
3001 | * |
| 3002 | * In the case of a defered buffer flush we still update the on-disk | |
| 3003 | * inode to satisfy visibility requirements if there happen to be | |
| 3004 | * directory dependancies. | |
| c0ade690 | 3005 | */ |
| b84de5af | 3006 | switch(ip->flags & (HAMMER_INODE_DELETED | HAMMER_INODE_ONDISK)) { |
| c0ade690 MD |
3007 | case HAMMER_INODE_DELETED|HAMMER_INODE_ONDISK: |
| 3008 | /* | |
| 3009 | * If deleted and on-disk, don't set any additional flags. | |
| 3010 | * the delete flag takes care of things. | |
| 869e8f55 MD |
3011 | * |
| 3012 | * Clear flags which may have been set by the frontend. | |
| c0ade690 | 3013 | */ |
| ddfdf542 | 3014 | ip->sync_flags &= ~(HAMMER_INODE_DDIRTY | HAMMER_INODE_XDIRTY | |
| 9192654c | 3015 | HAMMER_INODE_SDIRTY | |
| ddfdf542 | 3016 | HAMMER_INODE_ATIME | HAMMER_INODE_MTIME | |
| 869e8f55 | 3017 | HAMMER_INODE_DELETING); |
| c0ade690 MD |
3018 | break; |
| 3019 | case HAMMER_INODE_DELETED: | |
| 3020 | /* | |
| 3021 | * Take care of the case where a deleted inode was never | |
| 3022 | * flushed to the disk in the first place. | |
| 869e8f55 MD |
3023 | * |
| 3024 | * Clear flags which may have been set by the frontend. | |
| c0ade690 | 3025 | */ |
| ddfdf542 | 3026 | ip->sync_flags &= ~(HAMMER_INODE_DDIRTY | HAMMER_INODE_XDIRTY | |
| 9192654c | 3027 | HAMMER_INODE_SDIRTY | |
| ddfdf542 | 3028 | HAMMER_INODE_ATIME | HAMMER_INODE_MTIME | |
| 869e8f55 | 3029 | HAMMER_INODE_DELETING); |
| d26d0ae9 | 3030 | while (RB_ROOT(&ip->rec_tree)) { |
| d36ec43b MD |
3031 | hammer_record_t record = RB_ROOT(&ip->rec_tree); |
| 3032 | hammer_ref(&record->lock); | |
| 250aec18 | 3033 | KKASSERT(hammer_oneref(&record->lock)); |
| d36ec43b | 3034 | record->flags |= HAMMER_RECF_DELETED_BE; |
| 3214ade6 | 3035 | ++record->ip->rec_generation; |
| d36ec43b | 3036 | hammer_rel_mem_record(record); |
| d26d0ae9 | 3037 | } |
| c0ade690 MD |
3038 | break; |
| 3039 | case HAMMER_INODE_ONDISK: | |
| 3040 | /* | |
| 3041 | * If already on-disk, do not set any additional flags. | |
| 3042 | */ | |
| 3043 | break; | |
| 3044 | default: | |
| 3045 | /* | |
| ddfdf542 MD |
3046 | * If not on-disk and not deleted, set DDIRTY to force |
| 3047 | * an initial record to be written. | |
| b84de5af | 3048 | * |
| ddfdf542 | 3049 | * Also set the create_tid in both the frontend and backend |
| b84de5af | 3050 | * copy of the inode record. |
| c0ade690 | 3051 | */ |
| 02325004 MD |
3052 | ip->ino_leaf.base.create_tid = trans->tid; |
| 3053 | ip->ino_leaf.create_ts = trans->time32; | |
| 3054 | ip->sync_ino_leaf.base.create_tid = trans->tid; | |
| 3055 | ip->sync_ino_leaf.create_ts = trans->time32; | |
| 11ad5ade | 3056 | ip->sync_flags |= HAMMER_INODE_DDIRTY; |
| c0ade690 MD |
3057 | break; |
| 3058 | } | |
| 3059 | ||
| 3060 | /* | |
| 47f363f1 | 3061 | * If DDIRTY or SDIRTY is set, write out a new record. |
| 9192654c MD |
3062 | * If the inode is already on-disk the old record is marked as |
| 3063 | * deleted. | |
| d113fda1 MD |
3064 | * |
| 3065 | * If DELETED is set hammer_update_inode() will delete the existing | |
| 3066 | * record without writing out a new one. | |
| 3067 | * | |
| 3068 | * If *ONLY* the ITIMES flag is set we can update the record in-place. | |
| c0ade690 | 3069 | */ |
| b84de5af | 3070 | if (ip->flags & HAMMER_INODE_DELETED) { |
| 4e17f465 | 3071 | error = hammer_update_inode(&cursor, ip); |
| b84de5af | 3072 | } else |
| 9192654c | 3073 | if (!(ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_SDIRTY)) && |
| ddfdf542 | 3074 | (ip->sync_flags & (HAMMER_INODE_ATIME | HAMMER_INODE_MTIME))) { |
| 4e17f465 | 3075 | error = hammer_update_itimes(&cursor, ip); |
| d113fda1 | 3076 | } else |
| 9192654c MD |
3077 | if (ip->sync_flags & (HAMMER_INODE_DDIRTY | HAMMER_INODE_SDIRTY | |
| 3078 | HAMMER_INODE_ATIME | HAMMER_INODE_MTIME)) { | |
| 4e17f465 | 3079 | error = hammer_update_inode(&cursor, ip); |
| c0ade690 | 3080 | } |
| 4e17f465 | 3081 | done: |
| cdb6e4e6 MD |
3082 | if (error) { |
| 3083 | hammer_critical_error(ip->hmp, ip, error, | |
| 3084 | "while syncing inode"); | |
| 3085 | } | |
| 4e17f465 | 3086 | hammer_done_cursor(&cursor); |
| c0ade690 | 3087 | return(error); |
| 8cd0a023 MD |
3088 | } |
| 3089 | ||
| 1f07f686 MD |
3090 | /* |
| 3091 | * This routine is called when the OS is no longer actively referencing | |
| 3092 | * the inode (but might still be keeping it cached), or when releasing | |
| 3093 | * the last reference to an inode. | |
| 3094 | * | |
| 3095 | * At this point if the inode's nlinks count is zero we want to destroy | |
| 3096 | * it, which may mean destroying it on-media too. | |
| 3097 | */ | |
| 3bf2d80a | 3098 | void |
| e8599db1 | 3099 | hammer_inode_unloadable_check(hammer_inode_t ip, int getvp) |
| 1f07f686 | 3100 | { |
| e8599db1 MD |
3101 | struct vnode *vp; |
| 3102 | ||
| 1f07f686 | 3103 | /* |
| c4bae5fd MD |
3104 | * Set the DELETING flag when the link count drops to 0 and the |
| 3105 | * OS no longer has any opens on the inode. | |
| 3106 | * | |
| 3107 | * The backend will clear DELETING (a mod flag) and set DELETED | |
| 3108 | * (a state flag) when it is actually able to perform the | |
| 3109 | * operation. | |
| 35a49944 MD |
3110 | * |
| 3111 | * Don't reflag the deletion if the flusher is currently syncing | |
| 3112 | * one that was already flagged. A previously set DELETING flag | |
| 3113 | * may bounce around flags and sync_flags until the operation is | |
| 3114 | * completely done. | |
| ef11b6e1 MD |
3115 | * |
| 3116 | * Do not attempt to modify a snapshot inode (one set to read-only). | |
| 1f07f686 | 3117 | */ |
| 11ad5ade | 3118 | if (ip->ino_data.nlinks == 0 && |
| ef11b6e1 | 3119 | ((ip->flags | ip->sync_flags) & (HAMMER_INODE_RO|HAMMER_INODE_DELETING|HAMMER_INODE_DELETED)) == 0) { |
| e8599db1 MD |
3120 | ip->flags |= HAMMER_INODE_DELETING; |
| 3121 | ip->flags |= HAMMER_INODE_TRUNCATED; | |
| 3122 | ip->trunc_off = 0; | |
| 3123 | vp = NULL; | |
| 3124 | if (getvp) { | |
| 3125 | if (hammer_get_vnode(ip, &vp) != 0) | |
| 3126 | return; | |
| 3127 | } | |
| 29ce0677 MD |
3128 | |
| 3129 | /* | |
| 29ce0677 MD |
3130 | * Final cleanup |
| 3131 | */ | |
| 6362a262 MD |
3132 | if (ip->vp) |
| 3133 | nvtruncbuf(ip->vp, 0, HAMMER_BUFSIZE, 0); | |
| 3134 | if (getvp) | |
| e8599db1 | 3135 | vput(vp); |
| 1f07f686 | 3136 | } |
| 1f07f686 MD |
3137 | } |
| 3138 | ||
| 3bf2d80a | 3139 | /* |
| 7b6ccb11 MD |
3140 | * After potentially resolving a dependancy the inode is tested |
| 3141 | * to determine whether it needs to be reflushed. | |
| 3bf2d80a | 3142 | */ |
| 1f07f686 MD |
3143 | void |
| 3144 | hammer_test_inode(hammer_inode_t ip) | |
| 3145 | { | |
| 3146 | if (ip->flags & HAMMER_INODE_REFLUSH) { | |
| 3147 | ip->flags &= ~HAMMER_INODE_REFLUSH; | |
| 3148 | hammer_ref(&ip->lock); | |
| 3bf2d80a MD |
3149 | if (ip->flags & HAMMER_INODE_RESIGNAL) { |
| 3150 | ip->flags &= ~HAMMER_INODE_RESIGNAL; | |
| 3151 | hammer_flush_inode(ip, HAMMER_FLUSH_SIGNAL); | |
| 3152 | } else { | |
| 3153 | hammer_flush_inode(ip, 0); | |
| 3154 | } | |
| 1f07f686 MD |
3155 | hammer_rel_inode(ip, 0); |
| 3156 | } | |
| 3157 | } | |
| 3158 | ||
| 9f5097dc | 3159 | /* |
| 7bc5b8c2 MD |
3160 | * Clear the RECLAIM flag on an inode. This occurs when the inode is |
| 3161 | * reassociated with a vp or just before it gets freed. | |
| af209b0f | 3162 | * |
| 82010f9f | 3163 | * Pipeline wakeups to threads blocked due to an excessive number of |
| ccf6a64d MD |
3164 | * detached inodes. This typically occurs when atime updates accumulate |
| 3165 | * while scanning a directory tree. | |
| 9f5097dc | 3166 | */ |
| 7bc5b8c2 | 3167 | static void |
| ccf6a64d | 3168 | hammer_inode_wakereclaims(hammer_inode_t ip) |
| 9f5097dc | 3169 | { |
| 7bc5b8c2 | 3170 | struct hammer_reclaim *reclaim; |
| d99d6bf5 | 3171 | hammer_mount_t hmp = ip->hmp; |
| d99d6bf5 | 3172 | |
| 7bc5b8c2 | 3173 | if ((ip->flags & HAMMER_INODE_RECLAIM) == 0) |
| d99d6bf5 | 3174 | return; |
| 3897d7e9 | 3175 | |
| 7bc5b8c2 MD |
3176 | --hammer_count_reclaiming; |
| 3177 | --hmp->inode_reclaims; | |
| 3178 | ip->flags &= ~HAMMER_INODE_RECLAIM; | |
| 9f5097dc | 3179 | |
| 37646115 MD |
3180 | if ((reclaim = TAILQ_FIRST(&hmp->reclaim_list)) != NULL) { |
| 3181 | KKASSERT(reclaim->count > 0); | |
| 3182 | if (--reclaim->count == 0) { | |
| 82010f9f MD |
3183 | TAILQ_REMOVE(&hmp->reclaim_list, reclaim, entry); |
| 3184 | wakeup(reclaim); | |
| 3185 | } | |
| 9f5097dc MD |
3186 | } |
| 3187 | } | |
| 3188 | ||
| 4a2796f3 MD |
3189 | /* |
| 3190 | * Setup our reclaim pipeline. We only let so many detached (and dirty) | |
| ccf6a64d MD |
3191 | * inodes build up before we start blocking. This routine is called |
| 3192 | * if a new inode is created or an inode is loaded from media. | |
| 4a2796f3 MD |
3193 | * |
| 3194 | * When we block we don't care *which* inode has finished reclaiming, | |
| ccf6a64d | 3195 | * as lone as one does. |
| 37646115 MD |
3196 | * |
| 3197 | * The reclaim pipeline is primary governed by the auto-flush which is | |
| 3198 | * 1/4 hammer_limit_reclaim. We don't want to block if the count is | |
| 3199 | * less than 1/2 hammer_limit_reclaim. From 1/2 to full count is | |
| 3200 | * dynamically governed. | |
| 4a2796f3 MD |
3201 | */ |
| 3202 | void | |
| e98f1b96 | 3203 | hammer_inode_waitreclaims(hammer_transaction_t trans) |
| 4a2796f3 | 3204 | { |
| e98f1b96 | 3205 | hammer_mount_t hmp = trans->hmp; |
| 4a2796f3 | 3206 | struct hammer_reclaim reclaim; |
| 37646115 | 3207 | int lower_limit; |
| 4a2796f3 | 3208 | |
| e98f1b96 | 3209 | /* |
| 37646115 MD |
3210 | * Track inode load, delay if the number of reclaiming inodes is |
| 3211 | * between 2/4 and 4/4 hammer_limit_reclaim, depending. | |
| e98f1b96 MD |
3212 | */ |
| 3213 | if (curthread->td_proc) { | |
| 3214 | struct hammer_inostats *stats; | |
| e98f1b96 MD |
3215 | |
| 3216 | stats = hammer_inode_inostats(hmp, curthread->td_proc->p_pid); | |
| 3217 | ++stats->count; | |
| 3218 | ||
| 3219 | if (stats->count > hammer_limit_reclaim / 2) | |
| 3220 | stats->count = hammer_limit_reclaim / 2; | |
| 3221 | lower_limit = hammer_limit_reclaim - stats->count; | |
| 37646115 MD |
3222 | if (hammer_debug_general & 0x10000) { |
| 3223 | kprintf("pid %5d limit %d\n", | |
| 3224 | (int)curthread->td_proc->p_pid, lower_limit); | |
| 3225 | } | |
| e98f1b96 | 3226 | } else { |
| 37646115 MD |
3227 | lower_limit = hammer_limit_reclaim * 3 / 4; |
| 3228 | } | |
| 3229 | if (hmp->inode_reclaims >= lower_limit) { | |
| 3230 | reclaim.count = 1; | |
| 3231 | TAILQ_INSERT_TAIL(&hmp->reclaim_list, &reclaim, entry); | |
| 3232 | tsleep(&reclaim, 0, "hmrrcm", hz); | |
| 3233 | if (reclaim.count > 0) | |
| 3234 | TAILQ_REMOVE(&hmp->reclaim_list, &reclaim, entry); | |
| e98f1b96 | 3235 | } |
| 82010f9f | 3236 | } |
| 4a2796f3 | 3237 | |
| 858cc00a MD |
3238 | /* |
| 3239 | * Keep track of reclaim statistics on a per-pid basis using a loose | |
| 3240 | * 4-way set associative hash table. Collisions inherit the count of | |
| 3241 | * the previous entry. | |
| 3242 | * | |
| 3243 | * NOTE: We want to be careful here to limit the chain size. If the chain | |
| 3244 | * size is too large a pid will spread its stats out over too many | |
| 3245 | * entries under certain types of heavy filesystem activity and | |
| 3246 | * wind up not delaying long enough. | |
| 3247 | */ | |