| Commit | Line | Data |
|---|---|---|
| 7cfa8da5 MD |
1 | /* |
| 2 | * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. | |
| 3 | * | |
| 4 | * This code is derived from software contributed to The DragonFly Project | |
| 5 | * by Matthew Dillon <dillon@dragonflybsd.org> | |
| 6 | * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> | |
| 7 | * | |
| 8 | * Redistribution and use in source and binary forms, with or without | |
| 9 | * modification, are permitted provided that the following conditions | |
| 10 | * are met: | |
| 11 | * | |
| 12 | * 1. Redistributions of source code must retain the above copyright | |
| 13 | * notice, this list of conditions and the following disclaimer. | |
| 14 | * 2. Redistributions in binary form must reproduce the above copyright | |
| 15 | * notice, this list of conditions and the following disclaimer in | |
| 16 | * the documentation and/or other materials provided with the | |
| 17 | * distribution. | |
| 18 | * 3. Neither the name of The DragonFly Project nor the names of its | |
| 19 | * contributors may be used to endorse or promote products derived | |
| 20 | * from this software without specific, prior written permission. | |
| 21 | * | |
| 22 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 24 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS | |
| 25 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE | |
| 26 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, | |
| 27 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, | |
| 28 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| 29 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED | |
| 30 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 31 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT | |
| 32 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
| 33 | * SUCH DAMAGE. | |
| 34 | */ | |
| 5c23d7f1 MD |
35 | /* |
| 36 | * This subsystem handles direct and indirect block searches, recursions, | |
| 37 | * creation, and deletion. Chains of blockrefs are tracked and modifications | |
| 38 | * are flag for propagation... eventually all the way back to the volume | |
| 39 | * header. | |
| 40 | */ | |
| 41 | ||
| 7cfa8da5 | 42 | #include <sys/cdefs.h> |
| 7cfa8da5 MD |
43 | #include <sys/param.h> |
| 44 | #include <sys/systm.h> | |
| 45 | #include <sys/types.h> | |
| 46 | #include <sys/lock.h> | |
| 47 | #include <sys/uuid.h> | |
| 48 | ||
| 49 | #include "hammer2.h" | |
| 50 | ||
| 995e78dc MD |
51 | static int hammer2_indirect_optimize; /* XXX SYSCTL */ |
| 52 | ||
| 53 | static hammer2_chain_t *hammer2_chain_create_indirect( | |
| 54 | hammer2_mount_t *hmp, hammer2_chain_t *parent, | |
| 55 | hammer2_key_t key, int keybits); | |
| 56 | ||
| 5c23d7f1 | 57 | /* |
| 01eabad4 | 58 | * Splay tree |
| 5c23d7f1 | 59 | */ |
| 01eabad4 MD |
60 | SPLAY_GENERATE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp); |
| 61 | ||
| 5c23d7f1 MD |
62 | int |
| 63 | hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) | |
| 64 | { | |
| 65 | return(chain2->index - chain1->index); | |
| 66 | } | |
| 67 | ||
| 68 | /* | |
| 01eabad4 MD |
69 | * Recursively mark the parent chain elements so flushes can find |
| 70 | * modified elements. | |
| 71 | * | |
| 72 | * NOTE: The flush code will modify a SUBMODIFIED-flagged chain | |
| 73 | * during the flush recursion after clearing the parent's | |
| 74 | * SUBMODIFIED bit. We don't want to re-set the parent's | |
| 75 | * SUBMODIFIED bit in this case! | |
| 76 | * | |
| 77 | * XXX rename of parent can create a SMP race | |
| 78 | */ | |
| 79 | static void | |
| 80 | hammer2_chain_parent_setsubmod(hammer2_mount_t *hmp, hammer2_chain_t *chain) | |
| 81 | { | |
| 82 | hammer2_chain_t *parent; | |
| 83 | ||
| 84 | if ((chain->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) { | |
| 85 | parent = chain->parent; | |
| 86 | while (parent && | |
| 87 | (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) == 0) { | |
| 88 | atomic_set_int(&parent->flags, | |
| 89 | HAMMER2_CHAIN_SUBMODIFIED); | |
| 90 | parent = parent->parent; | |
| 91 | } | |
| 92 | } | |
| 93 | } | |
| 94 | ||
| 95 | /* | |
| 5c23d7f1 | 96 | * Allocate a new disconnected chain element representing the specified |
| 37494cab | 97 | * bref. The chain element is locked exclusively and refs is set to 1. |
| 5c23d7f1 MD |
98 | * |
| 99 | * This essentially allocates a system memory structure representing one | |
| 100 | * of the media structure types, including inodes. | |
| 101 | */ | |
| 102 | hammer2_chain_t * | |
| 103 | hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref) | |
| 104 | { | |
| 105 | hammer2_chain_t *chain; | |
| 106 | hammer2_inode_t *ip; | |
| 107 | hammer2_indblock_t *np; | |
| 108 | hammer2_data_t *dp; | |
| 6ba3b984 | 109 | u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); |
| 5c23d7f1 MD |
110 | |
| 111 | /* | |
| 112 | * Construct the appropriate system structure. | |
| 113 | */ | |
| 114 | switch(bref->type) { | |
| 115 | case HAMMER2_BREF_TYPE_INODE: | |
| 116 | ip = kmalloc(sizeof(*ip), hmp->minode, M_WAITOK | M_ZERO); | |
| 117 | chain = &ip->chain; | |
| 118 | chain->u.ip = ip; | |
| 119 | lockinit(&chain->lk, "inode", 0, LK_CANRECURSE); | |
| 120 | ip->hmp = hmp; | |
| 121 | break; | |
| 122 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 8cce658d | 123 | np = kmalloc(sizeof(*np), hmp->mchain, M_WAITOK | M_ZERO); |
| 5c23d7f1 MD |
124 | chain = &np->chain; |
| 125 | chain->u.np = np; | |
| 126 | lockinit(&chain->lk, "iblk", 0, LK_CANRECURSE); | |
| 127 | break; | |
| 128 | case HAMMER2_BREF_TYPE_DATA: | |
| 129 | dp = kmalloc(sizeof(*dp), hmp->mchain, M_WAITOK | M_ZERO); | |
| 130 | chain = &dp->chain; | |
| 131 | chain->u.dp = dp; | |
| 132 | lockinit(&chain->lk, "dblk", 0, LK_CANRECURSE); | |
| 133 | break; | |
| 134 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 135 | chain = NULL; | |
| 866d5273 | 136 | panic("hammer2_chain_alloc volume type illegal for op"); |
| 5c23d7f1 MD |
137 | default: |
| 138 | chain = NULL; | |
| 866d5273 | 139 | panic("hammer2_chain_alloc: unrecognized blockref type: %d", |
| 5c23d7f1 MD |
140 | bref->type); |
| 141 | } | |
| 142 | chain->bref = *bref; | |
| 995e78dc | 143 | chain->index = -1; /* not yet assigned */ |
| 5c23d7f1 | 144 | chain->refs = 1; |
| 6ba3b984 | 145 | chain->bytes = bytes; |
| 37494cab | 146 | lockmgr(&chain->lk, LK_EXCLUSIVE); |
| 5c23d7f1 MD |
147 | |
| 148 | return (chain); | |
| 149 | } | |
| 150 | ||
| 151 | /* | |
| 152 | * Free a disconnected chain element | |
| 153 | */ | |
| 154 | void | |
| 155 | hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain) | |
| 156 | { | |
| 157 | void *mem; | |
| 158 | ||
| 8cce658d MD |
159 | if (chain->bref.type == HAMMER2_BREF_TYPE_INODE || |
| 160 | chain->bref.type == HAMMER2_BREF_TYPE_VOLUME) { | |
| 161 | chain->data = NULL; | |
| 162 | } | |
| 163 | ||
| 5c23d7f1 MD |
164 | KKASSERT(chain->bp == NULL); |
| 165 | KKASSERT(chain->data == NULL); | |
| 166 | KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_INODE || | |
| 167 | chain->u.ip->vp == NULL); | |
| 168 | ||
| 169 | if ((mem = chain->u.mem) != NULL) { | |
| 170 | chain->u.mem = NULL; | |
| 171 | if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) | |
| 172 | kfree(mem, hmp->minode); | |
| 173 | else | |
| 174 | kfree(mem, hmp->mchain); | |
| 175 | } | |
| 176 | } | |
| 177 | ||
| 7cfa8da5 MD |
178 | /* |
| 179 | * Add a reference to a chain element (for shared access). The chain | |
| 5c23d7f1 | 180 | * element must already have at least 1 ref controlled by the caller. |
| 7cfa8da5 MD |
181 | */ |
| 182 | void | |
| 183 | hammer2_chain_ref(hammer2_mount_t *hmp, hammer2_chain_t *chain) | |
| 184 | { | |
| 185 | KKASSERT(chain->refs > 0); | |
| 186 | atomic_add_int(&chain->refs, 1); | |
| 187 | } | |
| 188 | ||
| 189 | /* | |
| 190 | * Drop the callers reference to the chain element. If the ref count | |
| 5c23d7f1 MD |
191 | * reaches zero the chain element and its related structure (typically an |
| 192 | * inode or indirect block) will be freed and the parent will be | |
| 193 | * recursively dropped. | |
| 194 | * | |
| 195 | * Modified elements hold an additional reference so it should not be | |
| 196 | * possible for the count on a modified element to drop to 0. | |
| 197 | * | |
| 198 | * The chain element must NOT be locked by the caller. | |
| 7cfa8da5 | 199 | * |
| 5c23d7f1 MD |
200 | * The parent might or might not be locked by the caller but if so it |
| 201 | * will also be referenced so we shouldn't recurse upward. | |
| 7cfa8da5 MD |
202 | */ |
| 203 | void | |
| 204 | hammer2_chain_drop(hammer2_mount_t *hmp, hammer2_chain_t *chain) | |
| 205 | { | |
| 5c23d7f1 MD |
206 | hammer2_chain_t *parent; |
| 207 | u_int refs; | |
| 208 | ||
| 209 | while (chain) { | |
| 210 | refs = chain->refs; | |
| 211 | cpu_ccfence(); | |
| db0c2eb3 | 212 | KKASSERT(refs > 0); |
| 5c23d7f1 MD |
213 | if (refs == 1) { |
| 214 | KKASSERT(chain != &hmp->vchain); | |
| 215 | parent = chain->parent; | |
| 4e2004ea MD |
216 | if (parent) |
| 217 | lockmgr(&parent->lk, LK_EXCLUSIVE); | |
| 5c23d7f1 MD |
218 | if (atomic_cmpset_int(&chain->refs, 1, 0)) { |
| 219 | /* | |
| 220 | * Succeeded, recurse and drop parent | |
| 221 | */ | |
| 3ac6a319 MD |
222 | if (!(chain->flags & HAMMER2_CHAIN_DELETED)) { |
| 223 | SPLAY_REMOVE(hammer2_chain_splay, | |
| 224 | &parent->shead, chain); | |
| 6934ae32 MD |
225 | atomic_set_int(&chain->flags, |
| 226 | HAMMER2_CHAIN_DELETED); | |
| 227 | /* parent refs dropped via recursion */ | |
| 3ac6a319 | 228 | } |
| 5c23d7f1 | 229 | chain->parent = NULL; |
| 6934ae32 MD |
230 | if (parent) |
| 231 | lockmgr(&parent->lk, LK_RELEASE); | |
| 5c23d7f1 MD |
232 | hammer2_chain_free(hmp, chain); |
| 233 | chain = parent; | |
| 6934ae32 MD |
234 | /* recurse on parent */ |
| 235 | } else { | |
| 236 | if (parent) | |
| 237 | lockmgr(&parent->lk, LK_RELEASE); | |
| 238 | /* retry the same chain */ | |
| 5c23d7f1 MD |
239 | } |
| 240 | } else { | |
| 241 | if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) { | |
| 242 | /* | |
| 243 | * Succeeded, count did not reach zero so | |
| 244 | * cut out of the loop. | |
| 245 | */ | |
| 246 | break; | |
| 247 | } | |
| 6934ae32 | 248 | /* retry the same chain */ |
| 7cfa8da5 MD |
249 | } |
| 250 | } | |
| 251 | } | |
| 252 | ||
| 5c23d7f1 | 253 | /* |
| 01eabad4 MD |
254 | * Ref and lock a chain element, acquiring its data with I/O if necessary, |
| 255 | * and specify how you would like the data to be resolved. | |
| 5c23d7f1 MD |
256 | * |
| 257 | * Returns 0 on success or an error code if the data could not be acquired. | |
| 258 | * The chain element is locked either way. | |
| db71f61f | 259 | * |
| 01eabad4 MD |
260 | * The lock is allowed to recurse, multiple locking ops will aggregate |
| 261 | * the requested resolve types. Once data is assigned it will not be | |
| 262 | * removed until the last unlock. | |
| 263 | * | |
| 264 | * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. | |
| 265 | * (typically used to avoid device/logical buffer | |
| 266 | * aliasing for data) | |
| 267 | * | |
| 268 | * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in | |
| 269 | * the INITIAL-create state (indirect blocks only). | |
| 270 | * | |
| 271 | * Do not resolve data elements for DATA chains. | |
| 272 | * (typically used to avoid device/logical buffer | |
| 273 | * aliasing for data) | |
| 274 | * | |
| 275 | * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. | |
| 276 | * | |
| 8cce658d | 277 | * |
| 01eabad4 MD |
278 | * NOTE: Embedded elements (volume header, inodes) are always resolved |
| 279 | * regardless. | |
| 280 | * | |
| 281 | * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded | |
| 282 | * element will instantiate and zero its buffer, and flush it on | |
| 283 | * release. | |
| 284 | * | |
| 285 | * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE | |
| 286 | * so as not to instantiate a device buffer, which could alias against | |
| 287 | * a logical file buffer. However, if ALWAYS is specified the | |
| 288 | * device buffer will be instantiated anyway. | |
| 5c23d7f1 MD |
289 | */ |
| 290 | int | |
| 01eabad4 | 291 | hammer2_chain_lock(hammer2_mount_t *hmp, hammer2_chain_t *chain, int how) |
| 232a50f9 | 292 | { |
| 5c23d7f1 | 293 | hammer2_blockref_t *bref; |
| 8cce658d | 294 | hammer2_off_t pbase; |
| 01eabad4 MD |
295 | hammer2_off_t peof; |
| 296 | size_t boff; | |
| 297 | size_t bbytes; | |
| 5c23d7f1 | 298 | int error; |
| 01eabad4 | 299 | char *bdata; |
| 5c23d7f1 MD |
300 | |
| 301 | /* | |
| 302 | * Lock the element. Under certain conditions this might end up | |
| 303 | * being a recursive lock. | |
| 304 | */ | |
| 305 | KKASSERT(chain->refs > 0); | |
| 01eabad4 | 306 | atomic_add_int(&chain->refs, 1); |
| 5c23d7f1 MD |
307 | lockmgr(&chain->lk, LK_EXCLUSIVE); |
| 308 | ||
| 309 | /* | |
| 01eabad4 MD |
310 | * If we already have a valid data pointer no further action is |
| 311 | * necessary. | |
| 5c23d7f1 | 312 | */ |
| db71f61f | 313 | if (chain->data) |
| 5c23d7f1 | 314 | return (0); |
| 5c23d7f1 MD |
315 | |
| 316 | /* | |
| 01eabad4 | 317 | * Do we have to resolve the data? |
| 8cce658d | 318 | */ |
| 01eabad4 MD |
319 | switch(how) { |
| 320 | case HAMMER2_RESOLVE_NEVER: | |
| 8cce658d | 321 | return(0); |
| 01eabad4 MD |
322 | case HAMMER2_RESOLVE_MAYBE: |
| 323 | if (chain->flags & HAMMER2_CHAIN_INITIAL) | |
| 324 | return(0); | |
| 325 | if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) | |
| 326 | return(0); | |
| 327 | /* fall through */ | |
| 328 | case HAMMER2_RESOLVE_ALWAYS: | |
| 329 | break; | |
| 330 | } | |
| 8cce658d MD |
331 | |
| 332 | /* | |
| 01eabad4 MD |
333 | * We must resolve to a device buffer, either by issuing I/O or |
| 334 | * by creating a zero-fill element. We do not mark the buffer | |
| 335 | * dirty when creating a zero-fill element (the hammer2_chain_modify() | |
| 336 | * API must still be used to do that). | |
| db71f61f | 337 | * |
| 01eabad4 MD |
338 | * The device buffer is variable-sized in powers of 2 down |
| 339 | * to HAMMER2_MINALLOCSIZE (typically 1K). A 64K physical storage | |
| 340 | * chunk always contains buffers of the same size. (XXX) | |
| 8cce658d | 341 | * |
| 01eabad4 MD |
342 | * The minimum physical IO size may be larger than the variable |
| 343 | * block size. | |
| 5c23d7f1 | 344 | */ |
| 5c23d7f1 MD |
345 | bref = &chain->bref; |
| 346 | ||
| 01eabad4 MD |
347 | if ((bbytes = chain->bytes) < HAMMER2_MINIOSIZE) |
| 348 | bbytes = HAMMER2_MINIOSIZE; | |
| 349 | pbase = bref->data_off & ~(hammer2_off_t)(bbytes - 1); | |
| 350 | peof = (pbase + HAMMER2_PBUFSIZE64) & ~HAMMER2_PBUFMASK64; | |
| 351 | boff = bref->data_off & HAMMER2_OFF_MASK & (bbytes - 1); | |
| 8cce658d | 352 | KKASSERT(pbase != 0); |
| 01eabad4 MD |
353 | |
| 354 | /* | |
| 355 | * The getblk() optimization can only be used on newly created | |
| 356 | * elements if the physical block size matches the request. | |
| 357 | */ | |
| 358 | if ((chain->flags & HAMMER2_CHAIN_INITIAL) && | |
| 359 | chain->bytes == bbytes) { | |
| 360 | chain->bp = getblk(hmp->devvp, pbase, bbytes, 0, 0); | |
| 361 | error = 0; | |
| 362 | } else if (hammer2_cluster_enable) { | |
| 363 | error = cluster_read(hmp->devvp, peof, pbase, bbytes, | |
| 364 | HAMMER2_PBUFSIZE, HAMMER2_PBUFSIZE, | |
| 365 | &chain->bp); | |
| 366 | } else { | |
| 367 | error = bread(hmp->devvp, pbase, bbytes, &chain->bp); | |
| 368 | } | |
| 866d5273 | 369 | |
| 5c23d7f1 MD |
370 | if (error) { |
| 371 | kprintf("hammer2_chain_get: I/O error %016jx: %d\n", | |
| 8cce658d | 372 | (intmax_t)pbase, error); |
| 6ba3b984 | 373 | bqrelse(chain->bp); |
| 5c23d7f1 MD |
374 | chain->bp = NULL; |
| 375 | return (error); | |
| 376 | } | |
| 377 | ||
| 378 | /* | |
| 01eabad4 MD |
379 | * Zero the data area if the chain is in the INITIAL-create state |
| 380 | */ | |
| 381 | bdata = (char *)chain->bp->b_data + boff; | |
| 382 | if (chain->flags & HAMMER2_CHAIN_INITIAL) | |
| 383 | bzero(bdata, chain->bytes); | |
| 384 | ||
| 385 | /* | |
| db71f61f | 386 | * Setup the data pointer, either pointing it to an embedded data |
| 8cce658d | 387 | * structure and copying the data from the buffer, or pointing it |
| db71f61f | 388 | * into the buffer. |
| 5c23d7f1 | 389 | * |
| db71f61f MD |
390 | * The buffer is not retained when copying to an embedded data |
| 391 | * structure in order to avoid potential deadlocks or recursions | |
| 392 | * on the same physical buffer. | |
| 5c23d7f1 MD |
393 | */ |
| 394 | switch (bref->type) { | |
| b7926f31 MD |
395 | case HAMMER2_BREF_TYPE_VOLUME: |
| 396 | /* | |
| 397 | * Copy data from bp to embedded buffer | |
| 398 | */ | |
| 01eabad4 MD |
399 | panic("hammer2_chain_lock: called on unresolved volume header"); |
| 400 | #if 0 | |
| 401 | /* NOT YET */ | |
| 8cce658d MD |
402 | KKASSERT(pbase == 0); |
| 403 | KKASSERT(chain->bytes == HAMMER2_PBUFSIZE); | |
| 01eabad4 | 404 | bcopy(bdata, &hmp->voldata, chain->bytes); |
| b7926f31 | 405 | chain->data = (void *)&hmp->voldata; |
| 6ba3b984 | 406 | bqrelse(chain->bp); |
| b7926f31 | 407 | chain->bp = NULL; |
| 01eabad4 | 408 | #endif |
| b7926f31 | 409 | break; |
| 5c23d7f1 MD |
410 | case HAMMER2_BREF_TYPE_INODE: |
| 411 | /* | |
| 01eabad4 MD |
412 | * Copy data from bp to embedded buffer, do not retain the |
| 413 | * device buffer. | |
| 5c23d7f1 | 414 | */ |
| 01eabad4 | 415 | bcopy(bdata, &chain->u.ip->ip_data, chain->bytes); |
| 5c23d7f1 | 416 | chain->data = (void *)&chain->u.ip->ip_data; |
| 6ba3b984 | 417 | bqrelse(chain->bp); |
| 5c23d7f1 MD |
418 | chain->bp = NULL; |
| 419 | break; | |
| 6ba3b984 | 420 | case HAMMER2_BREF_TYPE_INDIRECT: |
| 8cce658d | 421 | case HAMMER2_BREF_TYPE_DATA: |
| 5c23d7f1 MD |
422 | default: |
| 423 | /* | |
| 01eabad4 | 424 | * Point data at the device buffer and leave bp intact. |
| 5c23d7f1 | 425 | */ |
| 01eabad4 | 426 | chain->data = (void *)bdata; |
| 5c23d7f1 MD |
427 | break; |
| 428 | } | |
| 429 | return (0); | |
| 232a50f9 MD |
430 | } |
| 431 | ||
| 432 | /* | |
| 01eabad4 | 433 | * Unlock and deref a chain element. |
| 8cce658d | 434 | * |
| 01eabad4 MD |
435 | * On the last lock release any non-embedded data (chain->bp) will be |
| 436 | * retired. | |
| 866d5273 MD |
437 | */ |
| 438 | void | |
| 01eabad4 | 439 | hammer2_chain_unlock(hammer2_mount_t *hmp, hammer2_chain_t *chain) |
| 866d5273 | 440 | { |
| 01eabad4 | 441 | long *counterp; |
| 866d5273 MD |
442 | |
| 443 | /* | |
| 01eabad4 | 444 | * Undo a recursive lock |
| 866d5273 | 445 | */ |
| 01eabad4 MD |
446 | if (lockcountnb(&chain->lk) > 1) { |
| 447 | KKASSERT(chain->refs > 1); | |
| 448 | atomic_add_int(&chain->refs, -1); | |
| 449 | lockmgr(&chain->lk, LK_RELEASE); | |
| 450 | return; | |
| 451 | } | |
| 866d5273 MD |
452 | |
| 453 | /* | |
| 01eabad4 MD |
454 | * Shortcut the case if the data is embedded or not resolved. |
| 455 | * Do NOT null-out pointers to embedded data (e.g. inode). | |
| 866d5273 | 456 | */ |
| 01eabad4 MD |
457 | if (chain->bp == NULL) { |
| 458 | lockmgr(&chain->lk, LK_RELEASE); | |
| 459 | hammer2_chain_drop(hmp, chain); | |
| 866d5273 | 460 | return; |
| 01eabad4 | 461 | } |
| 866d5273 | 462 | |
| 866d5273 | 463 | /* |
| 01eabad4 | 464 | * Statistics |
| 866d5273 | 465 | */ |
| 01eabad4 MD |
466 | if ((chain->flags & HAMMER2_CHAIN_DIRTYBP) == 0) { |
| 467 | ; | |
| 468 | } else if (chain->flags & HAMMER2_CHAIN_IOFLUSH) { | |
| 866d5273 | 469 | switch(chain->bref.type) { |
| 01eabad4 MD |
470 | case HAMMER2_BREF_TYPE_DATA: |
| 471 | counterp = &hammer2_ioa_file_write; | |
| 472 | break; | |
| 473 | case HAMMER2_BREF_TYPE_INODE: | |
| 474 | counterp = &hammer2_ioa_meta_write; | |
| 866d5273 MD |
475 | break; |
| 476 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 01eabad4 MD |
477 | counterp = &hammer2_ioa_indr_write; |
| 478 | break; | |
| 479 | default: | |
| 480 | counterp = &hammer2_ioa_volu_write; | |
| 6ba3b984 | 481 | break; |
| 01eabad4 MD |
482 | } |
| 483 | ++*counterp; | |
| 484 | } else { | |
| 485 | switch(chain->bref.type) { | |
| 866d5273 | 486 | case HAMMER2_BREF_TYPE_DATA: |
| 01eabad4 MD |
487 | counterp = &hammer2_iod_file_write; |
| 488 | break; | |
| 489 | case HAMMER2_BREF_TYPE_INODE: | |
| 490 | counterp = &hammer2_iod_meta_write; | |
| 491 | break; | |
| 492 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 493 | counterp = &hammer2_iod_indr_write; | |
| 866d5273 MD |
494 | break; |
| 495 | default: | |
| 01eabad4 | 496 | counterp = &hammer2_iod_volu_write; |
| 866d5273 | 497 | break; |
| 01eabad4 MD |
498 | } |
| 499 | ++*counterp; | |
| 500 | } | |
| 866d5273 | 501 | |
| 01eabad4 MD |
502 | /* |
| 503 | * Clean out the bp. | |
| 504 | * | |
| 505 | * If a device buffer was used for data be sure to destroy the | |
| 506 | * buffer when we are done to avoid aliases (XXX what about the | |
| 507 | * underlying VM pages?). | |
| 508 | */ | |
| 509 | if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) | |
| 510 | chain->bp->b_flags |= B_RELBUF; | |
| 511 | ||
| 512 | chain->data = NULL; | |
| 513 | if (chain->flags & HAMMER2_CHAIN_DIRTYBP) { | |
| 514 | atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP); | |
| 515 | if (chain->flags & HAMMER2_CHAIN_IOFLUSH) { | |
| 516 | atomic_clear_int(&chain->flags, | |
| 517 | HAMMER2_CHAIN_IOFLUSH); | |
| 518 | chain->bp->b_flags |= B_RELBUF; | |
| 1c9f601e | 519 | cluster_awrite(chain->bp); |
| 01eabad4 MD |
520 | } else { |
| 521 | chain->bp->b_flags |= B_CLUSTEROK; | |
| 522 | bdwrite(chain->bp); | |
| 523 | } | |
| 524 | } else { | |
| 525 | if (chain->flags & HAMMER2_CHAIN_IOFLUSH) { | |
| 526 | atomic_clear_int(&chain->flags, | |
| 527 | HAMMER2_CHAIN_IOFLUSH); | |
| 528 | chain->bp->b_flags |= B_RELBUF; | |
| 529 | brelse(chain->bp); | |
| 530 | } else { | |
| 531 | /* bp might still be dirty */ | |
| 532 | bqrelse(chain->bp); | |
| 866d5273 MD |
533 | } |
| 534 | } | |
| 01eabad4 MD |
535 | chain->bp = NULL; |
| 536 | lockmgr(&chain->lk, LK_RELEASE); | |
| 537 | hammer2_chain_drop(hmp, chain); | |
| 866d5273 MD |
538 | } |
| 539 | ||
| 540 | /* | |
| 01eabad4 MD |
541 | * Resize the chain's physical storage allocation. Chains can be resized |
| 542 | * smaller without reallocating the storage. Resizing larger will reallocate | |
| 543 | * the storage. | |
| 544 | * | |
| 545 | * Must be passed a locked chain. If you want the resize to copy the data | |
| 546 | * you should lock the chain with RESOLVE_MAYBE or RESOLVE_ALWAYS, otherwise | |
| 547 | * the resize operation will not copy the data. | |
| 548 | * | |
| 549 | * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order | |
| 550 | * to avoid instantiating a device buffer that conflicts with the vnode | |
| 551 | * data buffer. | |
| 552 | * | |
| 553 | * XXX flags currently ignored, uses chain->bp to detect data/no-data. | |
| 8cce658d MD |
554 | */ |
| 555 | void | |
| 01eabad4 MD |
556 | hammer2_chain_resize(hammer2_mount_t *hmp, hammer2_chain_t *chain, |
| 557 | int nradix, int flags) | |
| 8cce658d | 558 | { |
| 01eabad4 MD |
559 | struct buf *nbp; |
| 560 | hammer2_off_t pbase; | |
| 8cce658d MD |
561 | size_t obytes; |
| 562 | size_t nbytes; | |
| 01eabad4 MD |
563 | size_t bbytes; |
| 564 | int boff; | |
| 565 | char *bdata; | |
| 566 | int error; | |
| 8cce658d MD |
567 | |
| 568 | /* | |
| 569 | * Only data and indirect blocks can be resized for now | |
| 570 | */ | |
| 571 | KKASSERT(chain != &hmp->vchain); | |
| 572 | KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || | |
| 573 | chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT); | |
| 574 | ||
| 575 | /* | |
| 576 | * Nothing to do if the element is already the proper size | |
| 577 | */ | |
| 578 | obytes = chain->bytes; | |
| 1c9f601e | 579 | nbytes = 1U << nradix; |
| 8cce658d MD |
580 | if (obytes == nbytes) |
| 581 | return; | |
| 582 | ||
| 8cce658d | 583 | /* |
| 2910a90c | 584 | * Set MODIFIED and add a chain ref to prevent destruction. Both |
| 8cce658d MD |
585 | * modified flags share the same ref. |
| 586 | */ | |
| 2910a90c MD |
587 | if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { |
| 588 | atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); | |
| 8cce658d MD |
589 | hammer2_chain_ref(hmp, chain); |
| 590 | } | |
| 591 | ||
| 01eabad4 MD |
592 | /* |
| 593 | * Relocate the block, even if making it smaller (because different | |
| 594 | * block sizes may be in different regions). | |
| 595 | */ | |
| 596 | chain->bref.data_off = hammer2_freemap_alloc(hmp, chain->bref.type, | |
| 597 | nbytes); | |
| 598 | chain->bytes = nbytes; | |
| 599 | ||
| 600 | /* | |
| 601 | * The device buffer may be larger than the allocation size. | |
| 602 | */ | |
| 603 | if ((bbytes = chain->bytes) < HAMMER2_MINIOSIZE) | |
| 604 | bbytes = HAMMER2_MINIOSIZE; | |
| 605 | pbase = chain->bref.data_off & ~(hammer2_off_t)(bbytes - 1); | |
| 606 | boff = chain->bref.data_off & HAMMER2_OFF_MASK & (bbytes - 1); | |
| 607 | ||
| 608 | /* | |
| 609 | * Only copy the data if resolved, otherwise the caller is | |
| 610 | * responsible. | |
| 611 | */ | |
| 612 | if (chain->bp) { | |
| 613 | KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || | |
| 614 | chain->bref.type == HAMMER2_BREF_TYPE_DATA); | |
| 615 | KKASSERT(chain != &hmp->vchain); /* safety */ | |
| 616 | ||
| 8cce658d | 617 | /* |
| 01eabad4 MD |
618 | * The getblk() optimization can only be used if the |
| 619 | * physical block size matches the request. | |
| 8cce658d | 620 | */ |
| 01eabad4 MD |
621 | if (nbytes == bbytes) { |
| 622 | nbp = getblk(hmp->devvp, pbase, bbytes, 0, 0); | |
| 623 | error = 0; | |
| 624 | } else { | |
| 625 | error = bread(hmp->devvp, pbase, bbytes, &nbp); | |
| 626 | KKASSERT(error == 0); | |
| 627 | } | |
| 628 | bdata = (char *)nbp->b_data + boff; | |
| 629 | ||
| 630 | if (nbytes < obytes) { | |
| 631 | bcopy(chain->data, bdata, nbytes); | |
| 632 | } else { | |
| 633 | bcopy(chain->data, bdata, obytes); | |
| 634 | bzero(bdata + obytes, nbytes - obytes); | |
| 635 | } | |
| 636 | ||
| 8cce658d | 637 | /* |
| 01eabad4 MD |
638 | * NOTE: The INITIAL state of the chain is left intact. |
| 639 | * | |
| 640 | * NOTE: Because of the reallocation we have to set DIRTYBP | |
| 641 | * if INITIAL is not set. | |
| 8cce658d | 642 | */ |
| 01eabad4 MD |
643 | chain->bp->b_flags |= B_RELBUF; |
| 644 | brelse(chain->bp); | |
| 645 | chain->bp = nbp; | |
| 646 | chain->data = (void *)bdata; | |
| 647 | if ((chain->flags & HAMMER2_CHAIN_INITIAL) == 0) | |
| 648 | atomic_set_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP); | |
| 8cce658d | 649 | } |
| 222d9e22 | 650 | hammer2_chain_parent_setsubmod(hmp, chain); |
| 8cce658d MD |
651 | } |
| 652 | ||
| 653 | /* | |
| 5c23d7f1 | 654 | * Convert a locked chain that was retrieved read-only to read-write. |
| db71f61f MD |
655 | * |
| 656 | * If not already marked modified a new physical block will be allocated | |
| 6ba3b984 | 657 | * and assigned to the bref. |
| db71f61f | 658 | * |
| 01eabad4 MD |
659 | * Non-data blocks - The chain should be locked to at least the RESOLVE_MAYBE |
| 660 | * level or the COW operation will not work. | |
| 8cce658d | 661 | * |
| 01eabad4 MD |
662 | * Data blocks - The chain is usually locked RESOLVE_NEVER so as not to |
| 663 | * run the data through the device buffers. | |
| 232a50f9 MD |
664 | */ |
| 665 | void | |
| 01eabad4 | 666 | hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain, int flags) |
| 232a50f9 | 667 | { |
| db71f61f | 668 | struct buf *nbp; |
| db71f61f | 669 | int error; |
| 01eabad4 MD |
670 | hammer2_off_t pbase; |
| 671 | size_t bbytes; | |
| 672 | size_t boff; | |
| 673 | void *bdata; | |
| a92f82c4 MD |
674 | |
| 675 | /* | |
| 2910a90c | 676 | * If the chain is already marked MODIFIED we can just return. |
| 5c23d7f1 | 677 | */ |
| 2910a90c | 678 | if (chain->flags & HAMMER2_CHAIN_MODIFIED) { |
| 01eabad4 MD |
679 | if ((flags & HAMMER2_MODIFY_OPTDATA) == 0 && |
| 680 | chain->bp == NULL) { | |
| 681 | goto skip1; | |
| 682 | } | |
| 5c23d7f1 MD |
683 | return; |
| 684 | } | |
| 685 | ||
| db0c2eb3 | 686 | /* |
| 2910a90c | 687 | * Set MODIFIED and add a chain ref to prevent destruction. Both |
| 73e441b9 MD |
688 | * modified flags share the same ref. |
| 689 | */ | |
| 2910a90c | 690 | atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); |
| 8cce658d | 691 | hammer2_chain_ref(hmp, chain); |
| 73e441b9 MD |
692 | |
| 693 | /* | |
| 694 | * We must allocate the copy-on-write block. | |
| db71f61f MD |
695 | * |
| 696 | * If the data is embedded no other action is required. | |
| 697 | * | |
| 698 | * If the data is not embedded we acquire and clear the | |
| 699 | * new block. If chain->data is not NULL we then do the | |
| 700 | * copy-on-write. chain->data will then be repointed to the new | |
| 701 | * buffer and the old buffer will be released. | |
| c667909f MD |
702 | * |
| 703 | * For newly created elements with no prior allocation we go | |
| 704 | * through the copy-on-write steps except without the copying part. | |
| 5c23d7f1 | 705 | */ |
| db71f61f | 706 | if (chain != &hmp->vchain) { |
| 8cce658d | 707 | if ((hammer2_debug & 0x0001) && |
| 01eabad4 | 708 | (chain->bref.data_off & HAMMER2_OFF_MASK)) { |
| 6ba3b984 | 709 | kprintf("Replace %d\n", chain->bytes); |
| 8cce658d | 710 | } |
| 6ba3b984 | 711 | chain->bref.data_off = |
| 8cce658d MD |
712 | hammer2_freemap_alloc(hmp, chain->bref.type, |
| 713 | chain->bytes); | |
| db71f61f MD |
714 | /* XXX failed allocation */ |
| 715 | } | |
| 5c23d7f1 | 716 | |
| 01eabad4 MD |
717 | /* |
| 718 | * If data instantiation is optional and the chain has no current | |
| 719 | * data association (typical for DATA and newly-created INDIRECT | |
| 720 | * elements), don't instantiate the buffer now. | |
| 721 | */ | |
| 722 | if ((flags & HAMMER2_MODIFY_OPTDATA) && chain->bp == NULL) | |
| 723 | goto skip2; | |
| 724 | ||
| 725 | skip1: | |
| 726 | /* | |
| 727 | * Setting the DIRTYBP flag will cause the buffer to be dirtied or | |
| 2910a90c | 728 | * written-out on unlock. This bit is independent of the MODIFIED |
| 01eabad4 | 729 | * bit because the chain may still need meta-data adjustments done |
| 2910a90c | 730 | * by virtue of MODIFIED for its parent, and the buffer can be |
| 01eabad4 MD |
731 | * flushed out (possibly multiple times) by the OS before that. |
| 732 | * | |
| 733 | * Clearing the INITIAL flag (for indirect blocks) indicates that | |
| 734 | * a zero-fill buffer has been instantiated. | |
| 735 | */ | |
| 736 | atomic_set_int(&chain->flags, HAMMER2_CHAIN_DIRTYBP); | |
| 737 | atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); | |
| 738 | ||
| 739 | /* | |
| 740 | * We currently should never instantiate a device buffer for a | |
| 741 | * data chain. | |
| 742 | */ | |
| 743 | KKASSERT(chain->bref.type != HAMMER2_BREF_TYPE_DATA); | |
| 744 | ||
| 745 | /* | |
| 746 | * Execute COW operation | |
| 747 | */ | |
| db71f61f | 748 | switch(chain->bref.type) { |
| 01eabad4 MD |
749 | case HAMMER2_BREF_TYPE_VOLUME: |
| 750 | case HAMMER2_BREF_TYPE_INODE: | |
| 5c23d7f1 | 751 | /* |
| 01eabad4 MD |
752 | * The data is embedded, no copy-on-write operation is |
| 753 | * needed. | |
| 5c23d7f1 | 754 | */ |
| 01eabad4 | 755 | KKASSERT(chain->bp == NULL); |
| db71f61f | 756 | break; |
| db71f61f | 757 | case HAMMER2_BREF_TYPE_DATA: |
| 01eabad4 | 758 | case HAMMER2_BREF_TYPE_INDIRECT: |
| db71f61f | 759 | /* |
| 01eabad4 | 760 | * Perform the copy-on-write operation |
| db71f61f MD |
761 | */ |
| 762 | KKASSERT(chain != &hmp->vchain); /* safety */ | |
| 01eabad4 MD |
763 | /* |
| 764 | * The device buffer may be larger than the allocation size. | |
| 765 | */ | |
| 766 | if ((bbytes = chain->bytes) < HAMMER2_MINIOSIZE) | |
| 767 | bbytes = HAMMER2_MINIOSIZE; | |
| 768 | pbase = chain->bref.data_off & ~(hammer2_off_t)(bbytes - 1); | |
| 769 | boff = chain->bref.data_off & HAMMER2_OFF_MASK & (bbytes - 1); | |
| 770 | ||
| 771 | /* | |
| 772 | * The getblk() optimization can only be used if the | |
| 773 | * physical block size matches the request. | |
| 774 | */ | |
| 775 | if (chain->bytes == bbytes) { | |
| 776 | nbp = getblk(hmp->devvp, pbase, bbytes, 0, 0); | |
| 777 | error = 0; | |
| 778 | } else { | |
| 779 | error = bread(hmp->devvp, pbase, bbytes, &nbp); | |
| 780 | KKASSERT(error == 0); | |
| 781 | } | |
| 782 | bdata = (char *)nbp->b_data + boff; | |
| 866d5273 MD |
783 | |
| 784 | /* | |
| 6ba3b984 MD |
785 | * Copy or zero-fill on write depending on whether |
| 786 | * chain->data exists or not. | |
| 866d5273 | 787 | */ |
| db71f61f | 788 | if (chain->data) { |
| 01eabad4 | 789 | bcopy(chain->data, bdata, chain->bytes); |
| db71f61f | 790 | KKASSERT(chain->bp != NULL); |
| 6ba3b984 | 791 | } else { |
| 01eabad4 | 792 | bzero(bdata, chain->bytes); |
| db71f61f | 793 | } |
| 8cce658d MD |
794 | if (chain->bp) { |
| 795 | chain->bp->b_flags |= B_RELBUF; | |
| 796 | brelse(chain->bp); | |
| 797 | } | |
| db71f61f | 798 | chain->bp = nbp; |
| 01eabad4 | 799 | chain->data = bdata; |
| db71f61f MD |
800 | break; |
| 801 | default: | |
| 01eabad4 MD |
802 | panic("hammer2_chain_modify: illegal non-embedded type %d", |
| 803 | chain->bref.type); | |
| db71f61f MD |
804 | break; |
| 805 | ||
| 5c23d7f1 | 806 | } |
| 01eabad4 MD |
807 | skip2: |
| 808 | if ((flags & HAMMER2_MODIFY_NOSUB) == 0) | |
| 214f4a77 | 809 | hammer2_chain_parent_setsubmod(hmp, chain); |
| 232a50f9 MD |
810 | } |
| 811 | ||
| 812 | /* | |
| 2910a90c MD |
813 | * Mark the volume as having been modified. This short-cut version |
| 814 | * does not have to lock the volume's chain, which allows the ioctl | |
| 815 | * code to make adjustments to connections without deadlocking. | |
| 816 | */ | |
| 817 | void | |
| 818 | hammer2_modify_volume(hammer2_mount_t *hmp) | |
| 819 | { | |
| 820 | hammer2_voldata_lock(hmp); | |
| 821 | atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED_AUX); | |
| 822 | hammer2_voldata_unlock(hmp); | |
| 823 | } | |
| 824 | ||
| 825 | /* | |
| b7926f31 MD |
826 | * Locate an in-memory chain. The parent must be locked. The in-memory |
| 827 | * chain is returned or NULL if no in-memory chain is present. | |
| 828 | * | |
| 829 | * NOTE: A chain on-media might exist for this index when NULL is returned. | |
| 830 | */ | |
| 831 | hammer2_chain_t * | |
| 832 | hammer2_chain_find(hammer2_mount_t *hmp, hammer2_chain_t *parent, int index) | |
| 833 | { | |
| 834 | hammer2_chain_t dummy; | |
| 835 | hammer2_chain_t *chain; | |
| 836 | ||
| 837 | dummy.index = index; | |
| 838 | chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy); | |
| 839 | return (chain); | |
| 840 | } | |
| 841 | ||
| 842 | /* | |
| 5c23d7f1 MD |
843 | * Return a locked chain structure with all associated data acquired. |
| 844 | * | |
| 5c23d7f1 | 845 | * Caller must lock the parent on call, the returned child will be locked. |
| 232a50f9 MD |
846 | */ |
| 847 | hammer2_chain_t * | |
| c667909f MD |
848 | hammer2_chain_get(hammer2_mount_t *hmp, hammer2_chain_t *parent, |
| 849 | int index, int flags) | |
| 232a50f9 | 850 | { |
| 5c23d7f1 | 851 | hammer2_blockref_t *bref; |
| 232a50f9 | 852 | hammer2_chain_t *chain; |
| 5c23d7f1 | 853 | hammer2_chain_t dummy; |
| 01eabad4 MD |
854 | int how; |
| 855 | ||
| 856 | /* | |
| 857 | * Figure out how to lock. MAYBE can be used to optimized | |
| 858 | * the initial-create state for indirect blocks. | |
| 859 | */ | |
| 860 | if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) | |
| 861 | how = HAMMER2_RESOLVE_NEVER; | |
| 862 | else | |
| 863 | how = HAMMER2_RESOLVE_MAYBE; | |
| 232a50f9 MD |
864 | |
| 865 | /* | |
| 5c23d7f1 MD |
866 | * First see if we have a (possibly modified) chain element cached |
| 867 | * for this (parent, index). Acquire the data if necessary. | |
| 868 | * | |
| 869 | * If chain->data is non-NULL the chain should already be marked | |
| 870 | * modified. | |
| 232a50f9 | 871 | */ |
| 5c23d7f1 MD |
872 | dummy.index = index; |
| 873 | chain = SPLAY_FIND(hammer2_chain_splay, &parent->shead, &dummy); | |
| 874 | if (chain) { | |
| 01eabad4 MD |
875 | if (flags & HAMMER2_LOOKUP_NOLOCK) |
| 876 | hammer2_chain_ref(hmp, chain); | |
| 877 | else | |
| 878 | hammer2_chain_lock(hmp, chain, how); | |
| 5c23d7f1 | 879 | return(chain); |
| 232a50f9 | 880 | } |
| 232a50f9 MD |
881 | |
| 882 | /* | |
| 01eabad4 MD |
883 | * the get function must always succeed, panic if there's no |
| 884 | * data to index. | |
| 885 | */ | |
| 886 | if (parent->flags & HAMMER2_CHAIN_INITIAL) { | |
| 887 | panic("hammer2_chain_get: Missing bref(1)"); | |
| 888 | /* NOT REACHED */ | |
| 889 | } | |
| 890 | ||
| 891 | /* | |
| db71f61f | 892 | * Otherwise lookup the bref and issue I/O (switch on the parent) |
| 232a50f9 | 893 | */ |
| 5c23d7f1 | 894 | switch(parent->bref.type) { |
| 232a50f9 | 895 | case HAMMER2_BREF_TYPE_INODE: |
| 5c23d7f1 MD |
896 | KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT); |
| 897 | bref = &parent->data->ipdata.u.blockset.blockref[index]; | |
| 232a50f9 MD |
898 | break; |
| 899 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 01eabad4 | 900 | KKASSERT(parent->data != NULL); |
| 6ba3b984 MD |
901 | KKASSERT(index >= 0 && |
| 902 | index < parent->bytes / sizeof(hammer2_blockref_t)); | |
| 5c23d7f1 | 903 | bref = &parent->data->npdata.blockref[index]; |
| 232a50f9 MD |
904 | break; |
| 905 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 5c23d7f1 MD |
906 | KKASSERT(index >= 0 && index < HAMMER2_SET_COUNT); |
| 907 | bref = &hmp->voldata.sroot_blockset.blockref[index]; | |
| 232a50f9 MD |
908 | break; |
| 909 | default: | |
| 5c23d7f1 | 910 | bref = NULL; |
| 232a50f9 | 911 | panic("hammer2_chain_get: unrecognized blockref type: %d", |
| 5c23d7f1 | 912 | parent->bref.type); |
| 232a50f9 | 913 | } |
| 01eabad4 MD |
914 | if (bref->type == 0) { |
| 915 | panic("hammer2_chain_get: Missing bref(2)"); | |
| 916 | /* NOT REACHED */ | |
| 917 | } | |
| 6934ae32 MD |
918 | |
| 919 | /* | |
| 920 | * Allocate a chain structure representing the existing media | |
| 01eabad4 MD |
921 | * entry. |
| 922 | * | |
| 923 | * The locking operation we do later will issue I/O to read it. | |
| 6934ae32 | 924 | */ |
| 5c23d7f1 | 925 | chain = hammer2_chain_alloc(hmp, bref); |
| 5c23d7f1 MD |
926 | |
| 927 | /* | |
| 928 | * Link the chain into its parent. Caller is expected to hold an | |
| 929 | * exclusive lock on the parent. | |
| 930 | */ | |
| 931 | chain->parent = parent; | |
| 932 | chain->index = index; | |
| 933 | if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain)) | |
| 934 | panic("hammer2_chain_link: collision"); | |
| 8cce658d | 935 | KKASSERT(parent->refs > 0); |
| 5b4a2132 | 936 | atomic_add_int(&parent->refs, 1); /* for splay entry */ |
| 5c23d7f1 MD |
937 | |
| 938 | /* | |
| e028fa74 MD |
939 | * Additional linkage for inodes. Reuse the parent pointer to |
| 940 | * find the parent directory. | |
| 941 | */ | |
| 942 | if (bref->type == HAMMER2_BREF_TYPE_INODE) { | |
| 943 | while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) | |
| 944 | parent = parent->parent; | |
| 945 | if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) | |
| 946 | chain->u.ip->pip = parent->u.ip; | |
| 947 | } | |
| 948 | ||
| 949 | /* | |
| 5c23d7f1 MD |
950 | * Our new chain structure has already been referenced and locked |
| 951 | * but the lock code handles the I/O so call it to resolve the data. | |
| 952 | * Then release one of our two exclusive locks. | |
| c667909f MD |
953 | * |
| 954 | * If NOLOCK is set the release will release the one-and-only lock. | |
| 5c23d7f1 | 955 | */ |
| 01eabad4 MD |
956 | if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0) { |
| 957 | hammer2_chain_lock(hmp, chain, how); /* recusive lock */ | |
| 958 | hammer2_chain_drop(hmp, chain); /* excess ref */ | |
| 959 | } | |
| 960 | lockmgr(&chain->lk, LK_RELEASE); /* from alloc */ | |
| 232a50f9 MD |
961 | |
| 962 | return (chain); | |
| 963 | } | |
| 964 | ||
| 5c23d7f1 | 965 | /* |
| e028fa74 MD |
966 | * Locate any key between key_beg and key_end inclusive. (*parentp) |
| 967 | * typically points to an inode but can also point to a related indirect | |
| 968 | * block and this function will recurse upwards and find the inode again. | |
| 5c23d7f1 | 969 | * |
| 37aa19df MD |
970 | * WARNING! THIS DOES NOT RETURN KEYS IN LOGICAL KEY ORDER! ANY KEY |
| 971 | * WITHIN THE RANGE CAN BE RETURNED. HOWEVER, AN ITERATION | |
| 972 | * WHICH PICKS UP WHERE WE LEFT OFF WILL CONTINUE THE SCAN. | |
| 973 | * | |
| 5c23d7f1 MD |
974 | * (*parentp) must be exclusively locked and referenced and can be an inode |
| 975 | * or an existing indirect block within the inode. | |
| 976 | * | |
| 977 | * On return (*parentp) will be modified to point at the deepest parent chain | |
| 978 | * element encountered during the search, as a helper for an insertion or | |
| 979 | * deletion. The new (*parentp) will be locked and referenced and the old | |
| 980 | * will be unlocked and dereferenced (no change if they are both the same). | |
| 981 | * | |
| 982 | * The matching chain will be returned exclusively locked and referenced. | |
| 983 | * | |
| 984 | * NULL is returned if no match was found, but (*parentp) will still | |
| 985 | * potentially be adjusted. | |
| 986 | * | |
| 987 | * This function will also recurse up the chain if the key is not within the | |
| 988 | * current parent's range. (*parentp) can never be set to NULL. An iteration | |
| 989 | * can simply allow (*parentp) to float inside the loop. | |
| 7cfa8da5 MD |
990 | */ |
| 991 | hammer2_chain_t * | |
| 5c23d7f1 | 992 | hammer2_chain_lookup(hammer2_mount_t *hmp, hammer2_chain_t **parentp, |
| c667909f MD |
993 | hammer2_key_t key_beg, hammer2_key_t key_end, |
| 994 | int flags) | |
| 7cfa8da5 | 995 | { |
| 5c23d7f1 | 996 | hammer2_chain_t *parent; |
| 232a50f9 | 997 | hammer2_chain_t *chain; |
| b7926f31 | 998 | hammer2_chain_t *tmp; |
| 5c23d7f1 | 999 | hammer2_blockref_t *base; |
| 232a50f9 | 1000 | hammer2_blockref_t *bref; |
| e028fa74 MD |
1001 | hammer2_key_t scan_beg; |
| 1002 | hammer2_key_t scan_end; | |
| 232a50f9 | 1003 | int count = 0; |
| 232a50f9 MD |
1004 | int i; |
| 1005 | ||
| 232a50f9 | 1006 | /* |
| e028fa74 MD |
1007 | * Recurse (*parentp) upward if necessary until the parent completely |
| 1008 | * encloses the key range or we hit the inode. | |
| 5c23d7f1 MD |
1009 | */ |
| 1010 | parent = *parentp; | |
| 1011 | while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT) { | |
| e028fa74 MD |
1012 | scan_beg = parent->bref.key; |
| 1013 | scan_end = scan_beg + | |
| 1014 | ((hammer2_key_t)1 << parent->bref.keybits) - 1; | |
| 1015 | if (key_beg >= scan_beg && key_end <= scan_end) | |
| 5c23d7f1 | 1016 | break; |
| 01eabad4 MD |
1017 | hammer2_chain_ref(hmp, parent); /* ref old parent */ |
| 1018 | hammer2_chain_unlock(hmp, parent); /* unlock old parent */ | |
| 5c23d7f1 | 1019 | parent = parent->parent; |
| 01eabad4 MD |
1020 | /* lock new parent */ |
| 1021 | hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_MAYBE); | |
| 5c23d7f1 MD |
1022 | hammer2_chain_drop(hmp, *parentp); /* drop old parent */ |
| 1023 | *parentp = parent; /* new parent */ | |
| 1024 | } | |
| 1025 | ||
| 1026 | again: | |
| 1027 | /* | |
| 1028 | * Locate the blockref array. Currently we do a fully associative | |
| 1029 | * search through the array. | |
| 232a50f9 MD |
1030 | */ |
| 1031 | switch(parent->bref.type) { | |
| 1032 | case HAMMER2_BREF_TYPE_INODE: | |
| 3ac6a319 MD |
1033 | /* |
| 1034 | * Special shortcut for embedded data returns the inode | |
| 1035 | * itself. Callers must detect this condition and access | |
| 1036 | * the embedded data (the strategy code does this for us). | |
| 1037 | * | |
| 1038 | * This is only applicable to regular files and softlinks. | |
| 1039 | */ | |
| 1040 | if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) { | |
| 01eabad4 MD |
1041 | if (flags & HAMMER2_LOOKUP_NOLOCK) |
| 1042 | hammer2_chain_ref(hmp, parent); | |
| 1043 | else | |
| 1044 | hammer2_chain_lock(hmp, parent, | |
| 1045 | HAMMER2_RESOLVE_ALWAYS); | |
| 3ac6a319 MD |
1046 | return (parent); |
| 1047 | } | |
| 5c23d7f1 MD |
1048 | base = &parent->data->ipdata.u.blockset.blockref[0]; |
| 1049 | count = HAMMER2_SET_COUNT; | |
| 232a50f9 MD |
1050 | break; |
| 1051 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 01eabad4 MD |
1052 | /* |
| 1053 | * Optimize indirect blocks in the INITIAL state to avoid | |
| 1054 | * I/O. | |
| 1055 | */ | |
| 1056 | if (parent->flags & HAMMER2_CHAIN_INITIAL) { | |
| 1057 | base = NULL; | |
| 1058 | } else { | |
| 1059 | if (parent->data == NULL) | |
| 1060 | panic("parent->data is NULL"); | |
| 1061 | base = &parent->data->npdata.blockref[0]; | |
| 1062 | } | |
| 6ba3b984 | 1063 | count = parent->bytes / sizeof(hammer2_blockref_t); |
| 232a50f9 MD |
1064 | break; |
| 1065 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 5c23d7f1 MD |
1066 | base = &hmp->voldata.sroot_blockset.blockref[0]; |
| 1067 | count = HAMMER2_SET_COUNT; | |
| 232a50f9 MD |
1068 | break; |
| 1069 | default: | |
| 3ac6a319 | 1070 | panic("hammer2_chain_lookup: unrecognized blockref type: %d", |
| 232a50f9 | 1071 | parent->bref.type); |
| 5c23d7f1 MD |
1072 | base = NULL; /* safety */ |
| 1073 | count = 0; /* safety */ | |
| 232a50f9 MD |
1074 | } |
| 1075 | ||
| 5c23d7f1 | 1076 | /* |
| e028fa74 | 1077 | * If the element and key overlap we use the element. |
| 5c23d7f1 MD |
1078 | */ |
| 1079 | bref = NULL; | |
| 1080 | for (i = 0; i < count; ++i) { | |
| b7926f31 | 1081 | tmp = hammer2_chain_find(hmp, parent, i); |
| 01eabad4 MD |
1082 | if (tmp) { |
| 1083 | bref = &tmp->bref; | |
| 1084 | KKASSERT(bref->type != 0); | |
| 1085 | } else if (base == NULL || base[i].type == 0) { | |
| c667909f | 1086 | continue; |
| 01eabad4 MD |
1087 | } else { |
| 1088 | bref = &base[i]; | |
| 1089 | } | |
| e028fa74 MD |
1090 | scan_beg = bref->key; |
| 1091 | scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1; | |
| 1092 | if (key_beg <= scan_end && key_end >= scan_beg) | |
| 232a50f9 | 1093 | break; |
| 232a50f9 | 1094 | } |
| 5c23d7f1 | 1095 | if (i == count) { |
| e028fa74 | 1096 | if (key_beg == key_end) |
| 5c23d7f1 | 1097 | return (NULL); |
| e028fa74 | 1098 | return (hammer2_chain_next(hmp, parentp, NULL, |
| c667909f | 1099 | key_beg, key_end, flags)); |
| 5c23d7f1 MD |
1100 | } |
| 1101 | ||
| 1102 | /* | |
| 1103 | * Acquire the new chain element. If the chain element is an | |
| 1104 | * indirect block we must search recursively. | |
| 1105 | */ | |
| c667909f | 1106 | chain = hammer2_chain_get(hmp, parent, i, flags); |
| 5c23d7f1 MD |
1107 | if (chain == NULL) |
| 1108 | return (NULL); | |
| 1109 | ||
| 1110 | /* | |
| 1111 | * If the chain element is an indirect block it becomes the new | |
| 01eabad4 MD |
1112 | * parent and we loop on it. |
| 1113 | * | |
| 1114 | * The parent always has to be locked with at least RESOLVE_MAYBE, | |
| 1115 | * so it might need a fixup if the caller passed incompatible flags. | |
| 5c23d7f1 MD |
1116 | */ |
| 1117 | if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) { | |
| 01eabad4 | 1118 | hammer2_chain_unlock(hmp, parent); |
| 5c23d7f1 | 1119 | *parentp = parent = chain; |
| 01eabad4 MD |
1120 | if (flags & HAMMER2_LOOKUP_NOLOCK) { |
| 1121 | hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_MAYBE); | |
| 1122 | hammer2_chain_drop(hmp, chain); /* excess ref */ | |
| 1123 | } else if (flags & HAMMER2_LOOKUP_NODATA) { | |
| 1124 | hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_MAYBE); | |
| 1125 | hammer2_chain_unlock(hmp, chain); | |
| 1126 | } | |
| 5c23d7f1 MD |
1127 | goto again; |
| 1128 | } | |
| 1129 | ||
| 1130 | /* | |
| 1131 | * All done, return chain | |
| 1132 | */ | |
| 232a50f9 | 1133 | return (chain); |
| 7cfa8da5 MD |
1134 | } |
| 1135 | ||
| 1136 | /* | |
| 5c23d7f1 MD |
1137 | * After having issued a lookup we can iterate all matching keys. |
| 1138 | * | |
| 1139 | * If chain is non-NULL we continue the iteration from just after it's index. | |
| 1140 | * | |
| 1141 | * If chain is NULL we assume the parent was exhausted and continue the | |
| 1142 | * iteration at the next parent. | |
| 8e12e3c9 MD |
1143 | * |
| 1144 | * parent must be locked on entry and remains locked throughout. chain's | |
| 1145 | * lock status must match flags. | |
| 7cfa8da5 MD |
1146 | */ |
| 1147 | hammer2_chain_t * | |
| 5c23d7f1 MD |
1148 | hammer2_chain_next(hammer2_mount_t *hmp, hammer2_chain_t **parentp, |
| 1149 | hammer2_chain_t *chain, | |
| c667909f MD |
1150 | hammer2_key_t key_beg, hammer2_key_t key_end, |
| 1151 | int flags) | |
| 7cfa8da5 | 1152 | { |
| 5c23d7f1 | 1153 | hammer2_chain_t *parent; |
| b7926f31 | 1154 | hammer2_chain_t *tmp; |
| 5c23d7f1 | 1155 | hammer2_blockref_t *base; |
| 232a50f9 | 1156 | hammer2_blockref_t *bref; |
| e028fa74 MD |
1157 | hammer2_key_t scan_beg; |
| 1158 | hammer2_key_t scan_end; | |
| 232a50f9 | 1159 | int i; |
| 5c23d7f1 MD |
1160 | int count; |
| 1161 | ||
| 1162 | parent = *parentp; | |
| 232a50f9 | 1163 | |
| 5c23d7f1 MD |
1164 | again: |
| 1165 | /* | |
| 1166 | * Calculate the next index and recalculate the parent if necessary. | |
| 1167 | */ | |
| 1168 | if (chain) { | |
| 1169 | /* | |
| 3ac6a319 MD |
1170 | * Continue iteration within current parent. If not NULL |
| 1171 | * the passed-in chain may or may not be locked, based on | |
| 1172 | * the LOOKUP_NOLOCK flag (passed in as returned from lookup | |
| 1173 | * or a prior next). | |
| 5c23d7f1 MD |
1174 | */ |
| 1175 | i = chain->index + 1; | |
| 3ac6a319 MD |
1176 | if (flags & HAMMER2_LOOKUP_NOLOCK) |
| 1177 | hammer2_chain_drop(hmp, chain); | |
| 1178 | else | |
| 01eabad4 | 1179 | hammer2_chain_unlock(hmp, chain); |
| 3ac6a319 MD |
1180 | |
| 1181 | /* | |
| 1182 | * Any scan where the lookup returned degenerate data embedded | |
| 1183 | * in the inode has an invalid index and must terminate. | |
| 1184 | */ | |
| 1185 | if (chain == parent) | |
| 1186 | return(NULL); | |
| 5c23d7f1 MD |
1187 | chain = NULL; |
| 1188 | } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT) { | |
| 1189 | /* | |
| 1190 | * We reached the end of the iteration. | |
| 1191 | */ | |
| 1192 | return (NULL); | |
| 1193 | } else { | |
| 1194 | /* | |
| 37aa19df MD |
1195 | * Continue iteration with next parent unless the current |
| 1196 | * parent covers the range. | |
| 5c23d7f1 | 1197 | */ |
| 995e78dc MD |
1198 | hammer2_chain_t *nparent; |
| 1199 | ||
| 37aa19df MD |
1200 | scan_beg = parent->bref.key; |
| 1201 | scan_end = scan_beg + | |
| 1202 | ((hammer2_key_t)1 << parent->bref.keybits) - 1; | |
| 1203 | if (key_beg >= scan_beg && key_end <= scan_end) | |
| 1204 | return (NULL); | |
| 1205 | ||
| 5c23d7f1 | 1206 | i = parent->index + 1; |
| 995e78dc MD |
1207 | nparent = parent->parent; |
| 1208 | hammer2_chain_ref(hmp, nparent); /* ref new parent */ | |
| 01eabad4 MD |
1209 | hammer2_chain_unlock(hmp, parent); /* unlock old parent */ |
| 1210 | /* lock new parent */ | |
| 1211 | hammer2_chain_lock(hmp, nparent, HAMMER2_RESOLVE_MAYBE); | |
| 1212 | hammer2_chain_drop(hmp, nparent); /* drop excess ref */ | |
| 5b4a2132 | 1213 | *parentp = parent = nparent; |
| 5c23d7f1 | 1214 | } |
| 232a50f9 | 1215 | |
| 5c23d7f1 | 1216 | again2: |
| 232a50f9 | 1217 | /* |
| 5c23d7f1 MD |
1218 | * Locate the blockref array. Currently we do a fully associative |
| 1219 | * search through the array. | |
| 232a50f9 MD |
1220 | */ |
| 1221 | switch(parent->bref.type) { | |
| 1222 | case HAMMER2_BREF_TYPE_INODE: | |
| 5c23d7f1 MD |
1223 | base = &parent->data->ipdata.u.blockset.blockref[0]; |
| 1224 | count = HAMMER2_SET_COUNT; | |
| 232a50f9 MD |
1225 | break; |
| 1226 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 01eabad4 MD |
1227 | if (parent->flags & HAMMER2_CHAIN_INITIAL) { |
| 1228 | base = NULL; | |
| 1229 | } else { | |
| 1230 | KKASSERT(parent->data != NULL); | |
| 1231 | base = &parent->data->npdata.blockref[0]; | |
| 1232 | } | |
| 6ba3b984 | 1233 | count = parent->bytes / sizeof(hammer2_blockref_t); |
| 232a50f9 MD |
1234 | break; |
| 1235 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 5c23d7f1 MD |
1236 | base = &hmp->voldata.sroot_blockset.blockref[0]; |
| 1237 | count = HAMMER2_SET_COUNT; | |
| 232a50f9 MD |
1238 | break; |
| 1239 | default: | |
| 3ac6a319 | 1240 | panic("hammer2_chain_next: unrecognized blockref type: %d", |
| 232a50f9 | 1241 | parent->bref.type); |
| 5c23d7f1 MD |
1242 | base = NULL; /* safety */ |
| 1243 | count = 0; /* safety */ | |
| 1244 | break; | |
| 232a50f9 | 1245 | } |
| 5c23d7f1 | 1246 | KKASSERT(i <= count); |
| 232a50f9 | 1247 | |
| 5c23d7f1 MD |
1248 | /* |
| 1249 | * Look for the key. If we are unable to find a match and an exact | |
| 1250 | * match was requested we return NULL. If a range was requested we | |
| 1251 | * run hammer2_chain_next() to iterate. | |
| 1252 | */ | |
| 1253 | bref = NULL; | |
| 1254 | while (i < count) { | |
| b7926f31 | 1255 | tmp = hammer2_chain_find(hmp, parent, i); |
| 01eabad4 MD |
1256 | if (tmp) { |
| 1257 | bref = &tmp->bref; | |
| 1258 | } else if (base == NULL || base[i].type == 0) { | |
| c667909f MD |
1259 | ++i; |
| 1260 | continue; | |
| 01eabad4 MD |
1261 | } else { |
| 1262 | bref = &base[i]; | |
| c667909f | 1263 | } |
| e028fa74 MD |
1264 | scan_beg = bref->key; |
| 1265 | scan_end = scan_beg + ((hammer2_key_t)1 << bref->keybits) - 1; | |
| 1266 | if (key_beg <= scan_end && key_end >= scan_beg) | |
| 232a50f9 | 1267 | break; |
| 5c23d7f1 | 1268 | ++i; |
| 232a50f9 | 1269 | } |
| 5c23d7f1 MD |
1270 | |
| 1271 | /* | |
| 1272 | * If we couldn't find a match recurse up a parent to continue the | |
| 1273 | * search. | |
| 1274 | */ | |
| 1275 | if (i == count) | |
| 1276 | goto again; | |
| 1277 | ||
| 1278 | /* | |
| 1279 | * Acquire the new chain element. If the chain element is an | |
| 1280 | * indirect block we must search recursively. | |
| 1281 | */ | |
| c667909f | 1282 | chain = hammer2_chain_get(hmp, parent, i, flags); |
| 5c23d7f1 MD |
1283 | if (chain == NULL) |
| 1284 | return (NULL); | |
| 1285 | ||
| 1286 | /* | |
| 1287 | * If the chain element is an indirect block it becomes the new | |
| 01eabad4 MD |
1288 | * parent and we loop on it. |
| 1289 | * | |
| 1290 | * The parent always has to be locked with at least RESOLVE_MAYBE, | |
| 1291 | * so it might need a fixup if the caller passed incompatible flags. | |
| 5c23d7f1 MD |
1292 | */ |
| 1293 | if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) { | |
| 01eabad4 | 1294 | hammer2_chain_unlock(hmp, parent); |
| 5c23d7f1 | 1295 | *parentp = parent = chain; |
| 8e12e3c9 | 1296 | chain = NULL; |
| 01eabad4 | 1297 | if (flags & HAMMER2_LOOKUP_NOLOCK) { |
| 8e12e3c9 MD |
1298 | hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_MAYBE); |
| 1299 | hammer2_chain_drop(hmp, parent); /* excess ref */ | |
| 01eabad4 | 1300 | } else if (flags & HAMMER2_LOOKUP_NODATA) { |
| 8e12e3c9 MD |
1301 | hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_MAYBE); |
| 1302 | hammer2_chain_unlock(hmp, parent); | |
| 01eabad4 | 1303 | } |
| 5c23d7f1 MD |
1304 | i = 0; |
| 1305 | goto again2; | |
| 1306 | } | |
| 1307 | ||
| 1308 | /* | |
| 1309 | * All done, return chain | |
| 1310 | */ | |
| 232a50f9 | 1311 | return (chain); |
| 7cfa8da5 MD |
1312 | } |
| 1313 | ||
| 1314 | /* | |
| 5c23d7f1 | 1315 | * Create and return a new hammer2 system memory structure of the specified |
| 37aa19df MD |
1316 | * key, type and size and insert it RELATIVE TO (PARENT). |
| 1317 | * | |
| 01eabad4 | 1318 | * (parent) is typically either an inode or an indirect block, acquired |
| 37aa19df MD |
1319 | * acquired as a side effect of issuing a prior failed lookup. parent |
| 1320 | * must be locked and held. Do not pass the inode chain to this function | |
| 1321 | * unless that is the chain returned by the failed lookup. | |
| 5c23d7f1 MD |
1322 | * |
| 1323 | * Non-indirect types will automatically allocate indirect blocks as required | |
| 1324 | * if the new item does not fit in the current (parent). | |
| 1325 | * | |
| 1326 | * Indirect types will move a portion of the existing blockref array in | |
| 1327 | * (parent) into the new indirect type and then use one of the free slots | |
| 1328 | * to emplace the new indirect type. | |
| 1329 | * | |
| 1330 | * A new locked, referenced chain element is returned of the specified type. | |
| 01eabad4 MD |
1331 | * The element may or may not have a data area associated with it: |
| 1332 | * | |
| 1333 | * VOLUME not allowed here | |
| 1334 | * INODE embedded data are will be set-up | |
| 1335 | * INDIRECT not allowed here | |
| 1336 | * DATA no data area will be set-up (caller is expected | |
| 1337 | * to have logical buffers, we don't want to alias | |
| 1338 | * the data onto device buffers!). | |
| 7cfa8da5 MD |
1339 | */ |
| 1340 | hammer2_chain_t * | |
| 5c23d7f1 | 1341 | hammer2_chain_create(hammer2_mount_t *hmp, hammer2_chain_t *parent, |
| 6934ae32 | 1342 | hammer2_chain_t *chain, |
| 5c23d7f1 | 1343 | hammer2_key_t key, int keybits, int type, size_t bytes) |
| 7cfa8da5 | 1344 | { |
| 5c23d7f1 MD |
1345 | hammer2_blockref_t dummy; |
| 1346 | hammer2_blockref_t *base; | |
| b7926f31 | 1347 | hammer2_chain_t dummy_chain; |
| 6934ae32 MD |
1348 | int unlock_parent = 0; |
| 1349 | int allocated = 0; | |
| 5c23d7f1 | 1350 | int count; |
| 232a50f9 MD |
1351 | int i; |
| 1352 | ||
| 6934ae32 MD |
1353 | if (chain == NULL) { |
| 1354 | /* | |
| 1355 | * First allocate media space and construct the dummy bref, | |
| 1356 | * then allocate the in-memory chain structure. | |
| 1357 | */ | |
| 1358 | bzero(&dummy, sizeof(dummy)); | |
| 1359 | dummy.type = type; | |
| 1360 | dummy.key = key; | |
| 1361 | dummy.keybits = keybits; | |
| 6ba3b984 | 1362 | dummy.data_off = hammer2_bytes_to_radix(bytes); |
| 6934ae32 MD |
1363 | chain = hammer2_chain_alloc(hmp, &dummy); |
| 1364 | allocated = 1; | |
| 232a50f9 | 1365 | |
| 6934ae32 | 1366 | /* |
| 01eabad4 MD |
1367 | * We do NOT set INITIAL here (yet). INITIAL is only |
| 1368 | * used for indirect blocks. | |
| 8e12e3c9 | 1369 | * |
| 6934ae32 MD |
1370 | * Recalculate bytes to reflect the actual media block |
| 1371 | * allocation. | |
| 1372 | */ | |
| 1373 | bytes = (hammer2_off_t)1 << | |
| 1374 | (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); | |
| 866d5273 | 1375 | chain->bytes = bytes; |
| 6934ae32 MD |
1376 | |
| 1377 | switch(type) { | |
| 1378 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 1379 | panic("hammer2_chain_create: called with volume type"); | |
| 1380 | break; | |
| 1381 | case HAMMER2_BREF_TYPE_INODE: | |
| 1382 | KKASSERT(bytes == HAMMER2_INODE_BYTES); | |
| 1383 | chain->data = (void *)&chain->u.ip->ip_data; | |
| 1384 | break; | |
| 6ba3b984 | 1385 | case HAMMER2_BREF_TYPE_INDIRECT: |
| 01eabad4 MD |
1386 | panic("hammer2_chain_create: cannot be used to" |
| 1387 | "create indirect block"); | |
| 1388 | break; | |
| 8cce658d | 1389 | case HAMMER2_BREF_TYPE_DATA: |
| 6934ae32 MD |
1390 | default: |
| 1391 | /* leave chain->data NULL */ | |
| 1392 | KKASSERT(chain->data == NULL); | |
| 1393 | break; | |
| 1394 | } | |
| 1395 | } else { | |
| 1396 | /* | |
| 8e12e3c9 | 1397 | * Potentially update the chain's key/keybits. |
| 6934ae32 MD |
1398 | */ |
| 1399 | chain->bref.key = key; | |
| 1400 | chain->bref.keybits = keybits; | |
| 5c23d7f1 | 1401 | } |
| 5c23d7f1 | 1402 | |
| 995e78dc | 1403 | again: |
| 5c23d7f1 MD |
1404 | /* |
| 1405 | * Locate a free blockref in the parent's array | |
| 1406 | */ | |
| 232a50f9 MD |
1407 | switch(parent->bref.type) { |
| 1408 | case HAMMER2_BREF_TYPE_INODE: | |
| 995e78dc | 1409 | KKASSERT(parent->data != NULL); |
| 5c23d7f1 MD |
1410 | base = &parent->data->ipdata.u.blockset.blockref[0]; |
| 1411 | count = HAMMER2_SET_COUNT; | |
| 232a50f9 MD |
1412 | break; |
| 1413 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 01eabad4 MD |
1414 | if (parent->flags & HAMMER2_CHAIN_INITIAL) { |
| 1415 | base = NULL; | |
| 1416 | } else { | |
| 1417 | KKASSERT(parent->data != NULL); | |
| 1418 | base = &parent->data->npdata.blockref[0]; | |
| 1419 | } | |
| 6ba3b984 | 1420 | count = parent->bytes / sizeof(hammer2_blockref_t); |
| 232a50f9 MD |
1421 | break; |
| 1422 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 995e78dc | 1423 | KKASSERT(parent->data != NULL); |
| 5c23d7f1 MD |
1424 | base = &hmp->voldata.sroot_blockset.blockref[0]; |
| 1425 | count = HAMMER2_SET_COUNT; | |
| 232a50f9 MD |
1426 | break; |
| 1427 | default: | |
| 3ac6a319 | 1428 | panic("hammer2_chain_create: unrecognized blockref type: %d", |
| 232a50f9 | 1429 | parent->bref.type); |
| 5c23d7f1 MD |
1430 | count = 0; |
| 1431 | break; | |
| 232a50f9 MD |
1432 | } |
| 1433 | ||
| b7926f31 | 1434 | /* |
| db71f61f MD |
1435 | * Scan for an unallocated bref, also skipping any slots occupied |
| 1436 | * by in-memory chain elements that may not yet have been updated | |
| 1437 | * in the parent's bref array. | |
| b7926f31 MD |
1438 | */ |
| 1439 | bzero(&dummy_chain, sizeof(dummy_chain)); | |
| 5c23d7f1 | 1440 | for (i = 0; i < count; ++i) { |
| 01eabad4 MD |
1441 | if (base == NULL) { |
| 1442 | dummy_chain.index = i; | |
| 1443 | if (SPLAY_FIND(hammer2_chain_splay, | |
| 1444 | &parent->shead, &dummy_chain) == NULL) { | |
| 1445 | break; | |
| 1446 | } | |
| 1447 | } else if (base[i].type == 0) { | |
| 1448 | dummy_chain.index = i; | |
| 1449 | if (SPLAY_FIND(hammer2_chain_splay, | |
| 1450 | &parent->shead, &dummy_chain) == NULL) { | |
| 1451 | break; | |
| 1452 | } | |
| b7926f31 | 1453 | } |
| 232a50f9 | 1454 | } |
| 5c23d7f1 MD |
1455 | |
| 1456 | /* | |
| 995e78dc MD |
1457 | * If no free blockref count be found we must create an indirect |
| 1458 | * block and move a number of blockrefs into it. With the parent | |
| 1459 | * locked we can safely lock each child in order to move it without | |
| 1460 | * causing a deadlock. | |
| 1461 | * | |
| 1462 | * This may return the new indirect block or the old parent depending | |
| 1463 | * on where the key falls. | |
| 5c23d7f1 MD |
1464 | */ |
| 1465 | if (i == count) { | |
| 995e78dc MD |
1466 | hammer2_chain_t *nparent; |
| 1467 | ||
| 1468 | nparent = hammer2_chain_create_indirect(hmp, parent, | |
| 1469 | key, keybits); | |
| 1470 | if (nparent == NULL) { | |
| 6934ae32 MD |
1471 | if (allocated) |
| 1472 | hammer2_chain_free(hmp, chain); | |
| 995e78dc MD |
1473 | chain = NULL; |
| 1474 | goto done; | |
| 1475 | } | |
| 1476 | if (parent != nparent) { | |
| 1477 | if (unlock_parent) | |
| 01eabad4 | 1478 | hammer2_chain_unlock(hmp, parent); |
| 995e78dc MD |
1479 | parent = nparent; |
| 1480 | unlock_parent = 1; | |
| 1481 | } | |
| 1482 | goto again; | |
| 5c23d7f1 MD |
1483 | } |
| 1484 | ||
| 1485 | /* | |
| b7926f31 | 1486 | * Link the chain into its parent. |
| 5c23d7f1 | 1487 | */ |
| 6934ae32 MD |
1488 | if (chain->parent != NULL) |
| 1489 | panic("hammer2: hammer2_chain_create: chain already connected"); | |
| 1490 | KKASSERT(chain->parent == NULL); | |
| 5c23d7f1 MD |
1491 | chain->parent = parent; |
| 1492 | chain->index = i; | |
| 1493 | if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, chain)) | |
| 1494 | panic("hammer2_chain_link: collision"); | |
| 6934ae32 | 1495 | atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); |
| 222d9e22 | 1496 | KKASSERT(parent->refs > 0); |
| 5c23d7f1 | 1497 | atomic_add_int(&parent->refs, 1); |
| e028fa74 MD |
1498 | |
| 1499 | /* | |
| 1500 | * Additional linkage for inodes. Reuse the parent pointer to | |
| 1501 | * find the parent directory. | |
| 1502 | */ | |
| b7926f31 | 1503 | if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { |
| 995e78dc MD |
1504 | hammer2_chain_t *scan = parent; |
| 1505 | while (scan->bref.type == HAMMER2_BREF_TYPE_INDIRECT) | |
| 1506 | scan = scan->parent; | |
| 1507 | if (scan->bref.type == HAMMER2_BREF_TYPE_INODE) | |
| 1508 | chain->u.ip->pip = scan->u.ip; | |
| e028fa74 | 1509 | } |
| 5c23d7f1 | 1510 | |
| 37494cab | 1511 | /* |
| 8e12e3c9 | 1512 | * (allocated) indicates that this is a newly-created chain element |
| 8cce658d | 1513 | * rather than a renamed chain element. In this situation we want |
| 2910a90c | 1514 | * to place the chain element in the MODIFIED state. |
| 8cce658d | 1515 | * |
| 01eabad4 | 1516 | * The data area will be set up as follows: |
| db71f61f | 1517 | * |
| 01eabad4 | 1518 | * VOLUME not allowed here. |
| db71f61f | 1519 | * |
| 01eabad4 MD |
1520 | * INODE embedded data are will be set-up. |
| 1521 | * | |
| 1522 | * INDIRECT not allowed here. | |
| 1523 | * | |
| 1524 | * DATA no data area will be set-up (caller is expected | |
| 1525 | * to have logical buffers, we don't want to alias | |
| 1526 | * the data onto device buffers!). | |
| 37494cab | 1527 | */ |
| 8e12e3c9 | 1528 | if (allocated) { |
| 01eabad4 MD |
1529 | if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) { |
| 1530 | hammer2_chain_modify(hmp, chain, | |
| 1531 | HAMMER2_MODIFY_OPTDATA); | |
| 1532 | } else if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) { | |
| 1533 | /* not supported in this function */ | |
| 1534 | panic("hammer2_chain_create: bad type"); | |
| 1535 | atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); | |
| 1536 | hammer2_chain_modify(hmp, chain, | |
| 1537 | HAMMER2_MODIFY_OPTDATA); | |
| 1538 | } else { | |
| 1539 | hammer2_chain_modify(hmp, chain, 0); | |
| 1540 | } | |
| 8e12e3c9 MD |
1541 | } else { |
| 1542 | /* | |
| 1543 | * When reconnecting inodes we have to call setsubmod() | |
| 1544 | * to ensure that its state propagates up the newly | |
| 1545 | * connected parent. | |
| 1546 | * | |
| 2910a90c | 1547 | * We cannot depend on the chain being in a MODIFIED |
| 8e12e3c9 MD |
1548 | * state, or it might already be in that state, so |
| 1549 | * even if the parent calls hammer2_chain_modify() | |
| 1550 | * MOVED might not get set. Thus we have to set it | |
| 1551 | * here, too. | |
| 1552 | */ | |
| 1553 | if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { | |
| 1554 | hammer2_chain_ref(hmp, chain); | |
| 1555 | atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); | |
| 1556 | } | |
| 1557 | hammer2_chain_parent_setsubmod(hmp, chain); | |
| 6934ae32 | 1558 | } |
| 37494cab | 1559 | |
| 995e78dc MD |
1560 | done: |
| 1561 | if (unlock_parent) | |
| 01eabad4 | 1562 | hammer2_chain_unlock(hmp, parent); |
| 232a50f9 | 1563 | return (chain); |
| 7cfa8da5 | 1564 | } |
| 5c23d7f1 MD |
1565 | |
| 1566 | /* | |
| 995e78dc MD |
1567 | * Create an indirect block that covers one or more of the elements in the |
| 1568 | * current parent. Either returns the existing parent with no locking or | |
| 1569 | * ref changes or returns the new indirect block locked and referenced, | |
| 1570 | * depending on what the specified key falls into. | |
| 1571 | * | |
| 1572 | * The key/keybits for the indirect mode only needs to follow three rules: | |
| 1573 | * | |
| 1574 | * (1) That all elements underneath it fit within its key space and | |
| 1575 | * | |
| 1576 | * (2) That all elements outside it are outside its key space. | |
| 1577 | * | |
| 1578 | * (3) When creating the new indirect block any elements in the current | |
| 1579 | * parent that fit within the new indirect block's keyspace must be | |
| 1580 | * moved into the new indirect block. | |
| 1581 | * | |
| 1582 | * (4) The keyspace chosen for the inserted indirect block CAN cover a wider | |
| 1583 | * keyspace the the current parent, but lookup/iteration rules will | |
| 1584 | * ensure (and must ensure) that rule (2) for all parents leading up | |
| 1585 | * to the nearest inode or the root volume header is adhered to. This | |
| 1586 | * is accomplished by always recursing through matching keyspaces in | |
| 1587 | * the hammer2_chain_lookup() and hammer2_chain_next() API. | |
| 1588 | * | |
| 1589 | * The current implementation calculates the current worst-case keyspace by | |
| 1590 | * iterating the current parent and then divides it into two halves, choosing | |
| 1591 | * whichever half has the most elements (not necessarily the half containing | |
| 1592 | * the requested key). | |
| 1593 | * | |
| 1594 | * We can also opt to use the half with the least number of elements. This | |
| 1595 | * causes lower-numbered keys (aka logical file offsets) to recurse through | |
| 1596 | * fewer indirect blocks and higher-numbered keys to recurse through more. | |
| 1597 | * This also has the risk of not moving enough elements to the new indirect | |
| 1598 | * block and being forced to create several indirect blocks before the element | |
| 1599 | * can be inserted. | |
| 1600 | */ | |
| 1601 | static | |
| 1602 | hammer2_chain_t * | |
| 1603 | hammer2_chain_create_indirect(hammer2_mount_t *hmp, hammer2_chain_t *parent, | |
| 1604 | hammer2_key_t create_key, int create_bits) | |
| 1605 | { | |
| 1606 | hammer2_blockref_t *base; | |
| 1607 | hammer2_blockref_t *bref; | |
| 1608 | hammer2_chain_t *chain; | |
| 1609 | hammer2_chain_t *ichain; | |
| 1610 | hammer2_chain_t dummy; | |
| 1611 | hammer2_key_t key = create_key; | |
| 1612 | int keybits = create_bits; | |
| 1613 | int locount = 0; | |
| 1614 | int hicount = 0; | |
| 1615 | int count; | |
| 6ba3b984 | 1616 | int nbytes; |
| 995e78dc MD |
1617 | int i; |
| 1618 | ||
| 995e78dc | 1619 | /* |
| 01eabad4 MD |
1620 | * Calculate the base blockref pointer or NULL if the chain |
| 1621 | * is known to be empty. | |
| 995e78dc | 1622 | */ |
| 01eabad4 MD |
1623 | hammer2_chain_modify(hmp, parent, HAMMER2_MODIFY_OPTDATA); |
| 1624 | if (parent->flags & HAMMER2_CHAIN_INITIAL) { | |
| 1625 | base = NULL; | |
| 995e78dc | 1626 | |
| 01eabad4 MD |
1627 | /* |
| 1628 | * We still need to calculate the count for SPLAY lookups | |
| 1629 | */ | |
| 1630 | switch(parent->bref.type) { | |
| 1631 | case HAMMER2_BREF_TYPE_INODE: | |
| 1632 | count = HAMMER2_SET_COUNT; | |
| 1633 | break; | |
| 1634 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 1635 | count = parent->bytes / sizeof(hammer2_blockref_t); | |
| 1636 | break; | |
| 1637 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 1638 | count = HAMMER2_SET_COUNT; | |
| 1639 | break; | |
| 1640 | default: | |
| 1641 | panic("hammer2_chain_create_indirect: " | |
| 1642 | "unrecognized blockref type: %d", | |
| 1643 | parent->bref.type); | |
| 1644 | count = 0; | |
| 1645 | break; | |
| 1646 | } | |
| 1647 | } else { | |
| 1648 | /* | |
| 1649 | * Locate a free blockref in the parent's array | |
| 1650 | */ | |
| 1651 | switch(parent->bref.type) { | |
| 1652 | case HAMMER2_BREF_TYPE_INODE: | |
| 1653 | base = &parent->data->ipdata.u.blockset.blockref[0]; | |
| 1654 | count = HAMMER2_SET_COUNT; | |
| 1655 | break; | |
| 1656 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 1657 | base = &parent->data->npdata.blockref[0]; | |
| 1658 | count = parent->bytes / sizeof(hammer2_blockref_t); | |
| 1659 | break; | |
| 1660 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 1661 | base = &hmp->voldata.sroot_blockset.blockref[0]; | |
| 1662 | count = HAMMER2_SET_COUNT; | |
| 1663 | break; | |
| 1664 | default: | |
| 1665 | panic("hammer2_chain_create_indirect: " | |
| 1666 | "unrecognized blockref type: %d", | |
| 1667 | parent->bref.type); | |
| 1668 | count = 0; | |
| 1669 | break; | |
| 1670 | } | |
| 995e78dc MD |
1671 | } |
| 1672 | ||
| 1673 | /* | |
| 1674 | * Scan for an unallocated bref, also skipping any slots occupied | |
| 1675 | * by in-memory chain elements that may not yet have been updated | |
| 1676 | * in the parent's bref array. | |
| 1677 | */ | |
| 1678 | bzero(&dummy, sizeof(dummy)); | |
| 1679 | for (i = 0; i < count; ++i) { | |
| 1680 | int nkeybits; | |
| 1681 | ||
| 01eabad4 MD |
1682 | /* |
| 1683 | * Optimize the case where the parent is still in its | |
| 1684 | * initially created state. | |
| 1685 | */ | |
| 1686 | if (base == NULL || base[i].type == 0) { | |
| 995e78dc | 1687 | dummy.index = i; |
| 01eabad4 MD |
1688 | chain = SPLAY_FIND(hammer2_chain_splay, |
| 1689 | &parent->shead, &dummy); | |
| 995e78dc MD |
1690 | if (chain == NULL) |
| 1691 | continue; | |
| 1692 | bref = &chain->bref; | |
| 01eabad4 MD |
1693 | } else { |
| 1694 | bref = &base[i]; | |
| 995e78dc MD |
1695 | } |
| 1696 | ||
| 1697 | /* | |
| 37aa19df MD |
1698 | * Expand our calculated key range (key, keybits) to fit |
| 1699 | * the scanned key. nkeybits represents the full range | |
| 1700 | * that we will later cut in half (two halves @ nkeybits - 1). | |
| 995e78dc MD |
1701 | */ |
| 1702 | nkeybits = keybits; | |
| 1703 | if (nkeybits < bref->keybits) | |
| 1704 | nkeybits = bref->keybits; | |
| 1705 | while ((~(((hammer2_key_t)1 << nkeybits) - 1) & | |
| 1706 | (key ^ bref->key)) != 0) { | |
| 1707 | ++nkeybits; | |
| 1708 | } | |
| 1709 | ||
| 1710 | /* | |
| 1711 | * If the new key range is larger we have to determine | |
| 1712 | * which side of the new key range the existing keys fall | |
| 1713 | * under by checking the high bit, then collapsing the | |
| 1714 | * locount into the hicount or vise-versa. | |
| 1715 | */ | |
| 1716 | if (keybits != nkeybits) { | |
| 1717 | if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { | |
| 1718 | hicount += locount; | |
| 1719 | locount = 0; | |
| 1720 | } else { | |
| 1721 | locount += hicount; | |
| 1722 | hicount = 0; | |
| 1723 | } | |
| 1724 | keybits = nkeybits; | |
| 1725 | } | |
| 1726 | ||
| 1727 | /* | |
| 1728 | * The newly scanned key will be in the lower half or the | |
| 1729 | * higher half of the (new) key range. | |
| 1730 | */ | |
| 1731 | if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) | |
| 1732 | ++hicount; | |
| 1733 | else | |
| 1734 | ++locount; | |
| 995e78dc MD |
1735 | } |
| 1736 | ||
| 1737 | /* | |
| 37aa19df MD |
1738 | * Adjust keybits to represent half of the full range calculated |
| 1739 | * above. | |
| 1740 | */ | |
| 1741 | --keybits; | |
| 1742 | ||
| 1743 | /* | |
| 1744 | * Select whichever half contains the most elements. Theoretically | |
| 1745 | * we can select either side as long as it contains at least one | |
| 1746 | * element (in order to ensure that a free slot is present to hold | |
| 1747 | * the indirect block). | |
| 995e78dc MD |
1748 | */ |
| 1749 | key &= ~(((hammer2_key_t)1 << keybits) - 1); | |
| 1750 | if (hammer2_indirect_optimize) { | |
| 1751 | /* | |
| 37aa19df MD |
1752 | * Insert node for least number of keys, this will arrange |
| 1753 | * the first few blocks of a large file or the first few | |
| 1754 | * inodes in a directory with fewer indirect blocks when | |
| 1755 | * created linearly. | |
| 995e78dc | 1756 | */ |
| 37aa19df MD |
1757 | if (hicount < locount && hicount != 0) |
| 1758 | key |= (hammer2_key_t)1 << keybits; | |
| 1759 | else | |
| 1760 | key &= ~(hammer2_key_t)1 << keybits; | |
| 995e78dc MD |
1761 | } else { |
| 1762 | /* | |
| 1763 | * Insert node for most number of keys, best for heavily | |
| 1764 | * fragmented files. | |
| 1765 | */ | |
| 1766 | if (hicount > locount) | |
| 37aa19df MD |
1767 | key |= (hammer2_key_t)1 << keybits; |
| 1768 | else | |
| 1769 | key &= ~(hammer2_key_t)1 << keybits; | |
| 995e78dc MD |
1770 | } |
| 1771 | ||
| 1772 | /* | |
| 6ba3b984 MD |
1773 | * How big should our new indirect block be? It has to be at least |
| 1774 | * as large as its parent. | |
| 1775 | */ | |
| 1776 | if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) | |
| 1777 | nbytes = HAMMER2_IND_BYTES_MIN; | |
| 1778 | else | |
| 1779 | nbytes = HAMMER2_IND_BYTES_MAX; | |
| 1780 | if (nbytes < count * sizeof(hammer2_blockref_t)) | |
| 1781 | nbytes = count * sizeof(hammer2_blockref_t); | |
| 1782 | ||
| 1783 | /* | |
| 995e78dc MD |
1784 | * Ok, create our new indirect block |
| 1785 | */ | |
| 1786 | dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; | |
| 1787 | dummy.bref.key = key; | |
| 37aa19df | 1788 | dummy.bref.keybits = keybits; |
| 6ba3b984 | 1789 | dummy.bref.data_off = hammer2_bytes_to_radix(nbytes); |
| 995e78dc | 1790 | ichain = hammer2_chain_alloc(hmp, &dummy.bref); |
| 01eabad4 | 1791 | atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); |
| 995e78dc MD |
1792 | |
| 1793 | /* | |
| 1794 | * Iterate the original parent and move the matching brefs into | |
| 37aa19df | 1795 | * the new indirect block. |
| 995e78dc MD |
1796 | */ |
| 1797 | for (i = 0; i < count; ++i) { | |
| 37aa19df MD |
1798 | /* |
| 1799 | * For keying purposes access the bref from the media or | |
| 1800 | * from our in-memory cache. In cases where the in-memory | |
| 1801 | * cache overrides the media the keyrefs will be the same | |
| 1802 | * anyway so we can avoid checking the cache when the media | |
| 1803 | * has a key. | |
| 1804 | */ | |
| 01eabad4 | 1805 | if (base == NULL || base[i].type == 0) { |
| 995e78dc | 1806 | dummy.index = i; |
| 01eabad4 MD |
1807 | chain = SPLAY_FIND(hammer2_chain_splay, |
| 1808 | &parent->shead, &dummy); | |
| 995e78dc MD |
1809 | if (chain == NULL) { |
| 1810 | /* | |
| 1811 | * Select index indirect block is placed in | |
| 1812 | */ | |
| 1813 | if (ichain->index < 0) | |
| 1814 | ichain->index = i; | |
| 1815 | continue; | |
| 1816 | } | |
| 995e78dc | 1817 | bref = &chain->bref; |
| 01eabad4 MD |
1818 | } else { |
| 1819 | bref = &base[i]; | |
| 995e78dc MD |
1820 | } |
| 1821 | ||
| 1822 | /* | |
| 1823 | * Skip keys not in the chosen half (low or high), only bit | |
| 1824 | * (keybits - 1) needs to be compared but for safety we | |
| 1825 | * will compare all msb bits plus that bit again. | |
| 1826 | */ | |
| 37aa19df | 1827 | if ((~(((hammer2_key_t)1 << keybits) - 1) & |
| 995e78dc MD |
1828 | (key ^ bref->key)) != 0) { |
| 1829 | continue; | |
| 1830 | } | |
| 1831 | ||
| 1832 | /* | |
| 1833 | * This element is being moved, its slot is available | |
| 1834 | * for our indirect block. | |
| 1835 | */ | |
| 995e78dc MD |
1836 | if (ichain->index < 0) |
| 1837 | ichain->index = i; | |
| 995e78dc MD |
1838 | |
| 1839 | /* | |
| 1840 | * Load the new indirect block by acquiring or allocating | |
| 1841 | * the related chain entries, then simply move it to the | |
| 1842 | * new parent (ichain). | |
| 1843 | * | |
| 1844 | * Flagging the new chain entry MOVED will cause a flush | |
| 1845 | * to synchronize its block into the new indirect block. | |
| 5b4a2132 MD |
1846 | * The chain is unlocked after being moved but needs to |
| 1847 | * retain a reference for the MOVED state | |
| 1848 | * | |
| 1849 | * We must still set SUBMODIFIED in the parent but we do | |
| 1850 | * that after the loop. | |
| 1851 | * | |
| 1852 | * XXX we really need a lock here but we don't need the | |
| 1853 | * data. NODATA feature needed. | |
| 995e78dc MD |
1854 | */ |
| 1855 | chain = hammer2_chain_get(hmp, parent, i, | |
| 01eabad4 | 1856 | HAMMER2_LOOKUP_NODATA); |
| 995e78dc MD |
1857 | SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain); |
| 1858 | if (SPLAY_INSERT(hammer2_chain_splay, &ichain->shead, chain)) | |
| 1859 | panic("hammer2_chain_create_indirect: collision"); | |
| 1860 | chain->parent = ichain; | |
| 01eabad4 MD |
1861 | if (base) |
| 1862 | bzero(&base[i], sizeof(base[i])); | |
| 995e78dc MD |
1863 | atomic_add_int(&parent->refs, -1); |
| 1864 | atomic_add_int(&ichain->refs, 1); | |
| 01eabad4 MD |
1865 | if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { |
| 1866 | hammer2_chain_ref(hmp, chain); | |
| 5b4a2132 MD |
1867 | atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); |
| 1868 | } | |
| 01eabad4 | 1869 | hammer2_chain_unlock(hmp, chain); |
| 995e78dc MD |
1870 | KKASSERT(parent->refs > 0); |
| 1871 | chain = NULL; | |
| 1872 | } | |
| 1873 | ||
| 1874 | /* | |
| 1875 | * Insert the new indirect block into the parent now that we've | |
| 1876 | * cleared out some entries in the parent. We calculated a good | |
| 1877 | * insertion index in the loop above (ichain->index). | |
| 1878 | */ | |
| 1879 | KKASSERT(ichain->index >= 0); | |
| 995e78dc MD |
1880 | if (SPLAY_INSERT(hammer2_chain_splay, &parent->shead, ichain)) |
| 1881 | panic("hammer2_chain_create_indirect: ichain insertion"); | |
| 1882 | ichain->parent = parent; | |
| 1883 | atomic_add_int(&parent->refs, 1); | |
| 1884 | ||
| 1885 | /* | |
| 1886 | * Mark the new indirect block modified after insertion, which | |
| 1887 | * will propagate up through parent all the way to the root and | |
| 6ba3b984 MD |
1888 | * also allocate the physical block in ichain for our caller, |
| 1889 | * and assign ichain->data to a pre-zero'd space (because there | |
| 1890 | * is not prior data to copy into it). | |
| 995e78dc MD |
1891 | * |
| 1892 | * We have to set SUBMODIFIED in ichain's flags manually so the | |
| 1893 | * flusher knows it has to recurse through it to get to all of | |
| 222d9e22 MD |
1894 | * our moved blocks, then call setsubmod() to set the bit |
| 1895 | * recursively. | |
| 995e78dc | 1896 | */ |
| 01eabad4 | 1897 | hammer2_chain_modify(hmp, ichain, HAMMER2_MODIFY_OPTDATA); |
| 995e78dc | 1898 | atomic_set_int(&ichain->flags, HAMMER2_CHAIN_SUBMODIFIED); |
| 222d9e22 | 1899 | hammer2_chain_parent_setsubmod(hmp, ichain); |
| 995e78dc MD |
1900 | |
| 1901 | /* | |
| 1902 | * Figure out what to return. | |
| 1903 | */ | |
| 1904 | if (create_bits >= keybits) { | |
| 1905 | /* | |
| 1906 | * Key being created is way outside the key range, | |
| 1907 | * return the original parent. | |
| 1908 | */ | |
| 01eabad4 | 1909 | hammer2_chain_unlock(hmp, ichain); |
| 37aa19df | 1910 | } else if (~(((hammer2_key_t)1 << keybits) - 1) & |
| 995e78dc MD |
1911 | (create_key ^ key)) { |
| 1912 | /* | |
| 1913 | * Key being created is outside the key range, | |
| 1914 | * return the original parent. | |
| 1915 | */ | |
| 01eabad4 | 1916 | hammer2_chain_unlock(hmp, ichain); |
| 995e78dc MD |
1917 | } else { |
| 1918 | /* | |
| 1919 | * Otherwise its in the range, return the new parent. | |
| 1920 | */ | |
| 1921 | parent = ichain; | |
| 1922 | } | |
| 1923 | ||
| 995e78dc MD |
1924 | return(parent); |
| 1925 | } | |
| 1926 | ||
| 1927 | /* | |
| 5c23d7f1 MD |
1928 | * Physically delete the specified chain element. Note that inodes with |
| 1929 | * open descriptors should not be deleted (as with other filesystems) until | |
| 1930 | * the last open descriptor is closed. | |
| 1931 | * | |
| 1932 | * This routine will remove the chain element from its parent and potentially | |
| 1933 | * also recurse upward and delete indirect blocks which become empty as a | |
| 1934 | * side effect. | |
| 1935 | * | |
| 1936 | * The caller must pass a pointer to the chain's parent, also locked and | |
| 1937 | * referenced. (*parentp) will be modified in a manner similar to a lookup | |
| 1938 | * or iteration when indirect blocks are also deleted as a side effect. | |
| 1939 | */ | |
| 1940 | void | |
| 3ac6a319 | 1941 | hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent, |
| 5c23d7f1 MD |
1942 | hammer2_chain_t *chain) |
| 1943 | { | |
| 3ac6a319 MD |
1944 | hammer2_blockref_t *base; |
| 1945 | int count; | |
| 1946 | ||
| db0c2eb3 MD |
1947 | if (chain->parent != parent) |
| 1948 | panic("hammer2_chain_delete: parent mismatch"); | |
| 1949 | ||
| 3ac6a319 MD |
1950 | /* |
| 1951 | * Mark the parent modified so our base[] pointer remains valid | |
| 01eabad4 MD |
1952 | * while we move entries. For the optimized indirect block |
| 1953 | * case mark the parent moved instead. | |
| 3ac6a319 MD |
1954 | * |
| 1955 | * Calculate the blockref reference in the parent | |
| 1956 | */ | |
| 3ac6a319 MD |
1957 | switch(parent->bref.type) { |
| 1958 | case HAMMER2_BREF_TYPE_INODE: | |
| 01eabad4 | 1959 | hammer2_chain_modify(hmp, parent, 0); |
| 3ac6a319 MD |
1960 | base = &parent->data->ipdata.u.blockset.blockref[0]; |
| 1961 | count = HAMMER2_SET_COUNT; | |
| 1962 | break; | |
| 1963 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 01eabad4 MD |
1964 | hammer2_chain_modify(hmp, parent, HAMMER2_MODIFY_OPTDATA); |
| 1965 | if (parent->flags & HAMMER2_CHAIN_INITIAL) | |
| 1966 | base = NULL; | |
| 1967 | else | |
| 1968 | base = &parent->data->npdata.blockref[0]; | |
| 6ba3b984 | 1969 | count = parent->bytes / sizeof(hammer2_blockref_t); |
| 3ac6a319 MD |
1970 | break; |
| 1971 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 01eabad4 | 1972 | hammer2_chain_modify(hmp, parent, 0); |
| 3ac6a319 MD |
1973 | base = &hmp->voldata.sroot_blockset.blockref[0]; |
| 1974 | count = HAMMER2_SET_COUNT; | |
| 1975 | break; | |
| 1976 | default: | |
| 1977 | panic("hammer2_chain_delete: unrecognized blockref type: %d", | |
| 1978 | parent->bref.type); | |
| 1979 | count = 0; | |
| 1980 | break; | |
| 1981 | } | |
| 6934ae32 MD |
1982 | |
| 1983 | /* | |
| 1984 | * Disconnect the bref in the parent, remove the chain, and | |
| 1985 | * disconnect in-memory fields from the parent. | |
| 1986 | */ | |
| 3ac6a319 | 1987 | KKASSERT(chain->index >= 0 && chain->index < count); |
| 01eabad4 MD |
1988 | if (base) |
| 1989 | bzero(&base[chain->index], sizeof(*base)); | |
| 6934ae32 | 1990 | |
| 3ac6a319 MD |
1991 | SPLAY_REMOVE(hammer2_chain_splay, &parent->shead, chain); |
| 1992 | atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); | |
| 6934ae32 MD |
1993 | atomic_add_int(&parent->refs, -1); /* for splay entry */ |
| 1994 | chain->index = -1; | |
| 4e2004ea | 1995 | chain->parent = NULL; |
| 222d9e22 MD |
1996 | |
| 1997 | /* | |
| 1998 | * If this is an inode clear the pip. | |
| 1999 | */ | |
| 6934ae32 MD |
2000 | if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) |
| 2001 | chain->u.ip->pip = NULL; | |
| 4e2004ea MD |
2002 | |
| 2003 | /* | |
| 222d9e22 MD |
2004 | * The chain is still likely referenced, possibly even by a vnode |
| 2005 | * (if an inode), so defer further action until the chain gets | |
| 2006 | * dropped. | |
| 4e2004ea | 2007 | */ |
| 5c23d7f1 | 2008 | } |
| b7926f31 MD |
2009 | |
| 2010 | /* | |
| 2011 | * Recursively flush the specified chain. The chain is locked and | |
| 1c9f601e MD |
2012 | * referenced by the caller and will remain so on return. The chain |
| 2013 | * will remain referenced throughout but can temporarily lose its | |
| 2014 | * lock during the recursion to avoid unnecessarily stalling user | |
| 2015 | * processes. | |
| b7926f31 | 2016 | * |
| 73e441b9 | 2017 | * |
| b7926f31 | 2018 | */ |
| 1c9f601e MD |
2019 | TAILQ_HEAD(flush_deferral_list, hammer2_chain); |
| 2020 | ||
| 2021 | struct hammer2_flush_info { | |
| 2022 | struct flush_deferral_list flush_list; | |
| 2023 | int depth; | |
| 2024 | }; | |
| 2025 | ||
| 2026 | typedef struct hammer2_flush_info hammer2_flush_info_t; | |
| 2027 | ||
| 73e441b9 | 2028 | static void |
| 1c9f601e MD |
2029 | hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *parent, |
| 2030 | hammer2_flush_info_t *info) | |
| b7926f31 | 2031 | { |
| 8cce658d MD |
2032 | hammer2_blockref_t *bref; |
| 2033 | hammer2_off_t pbase; | |
| 01eabad4 MD |
2034 | size_t bbytes; |
| 2035 | size_t boff; | |
| 2036 | char *bdata; | |
| 8cce658d | 2037 | struct buf *bp; |
| 01eabad4 | 2038 | int error; |
| 8cce658d | 2039 | |
| 1c9f601e MD |
2040 | /* |
| 2041 | * If we hit the stack recursion depth limit defer the operation. | |
| 2042 | * The controller of the info structure will execute the deferral | |
| 2043 | * list and then retry. | |
| 2044 | * | |
| 2045 | * This is only applicable if SUBMODIFIED is set. After a reflush | |
| 2046 | * SUBMODIFIED will probably be cleared and we want to drop through | |
| 2047 | * to finish processing the current element so our direct parent | |
| 2048 | * can process the results. | |
| 2049 | */ | |
| 2050 | if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT && | |
| 2051 | (parent->flags & HAMMER2_CHAIN_SUBMODIFIED)) { | |
| 2052 | if ((parent->flags & HAMMER2_CHAIN_DEFERRED) == 0 && | |
| 2053 | ((parent->flags & (HAMMER2_CHAIN_SUBMODIFIED | | |
| 2910a90c MD |
2054 | HAMMER2_CHAIN_MODIFIED | |
| 2055 | HAMMER2_CHAIN_MODIFIED_AUX | | |
| 1c9f601e MD |
2056 | HAMMER2_CHAIN_MOVED)) != 0)) { |
| 2057 | hammer2_chain_ref(hmp, parent); | |
| 2058 | TAILQ_INSERT_TAIL(&info->flush_list, | |
| 2059 | parent, flush_node); | |
| 2060 | atomic_set_int(&parent->flags, HAMMER2_CHAIN_DEFERRED); | |
| 2061 | } | |
| 2062 | return; | |
| 2063 | } | |
| 2064 | ||
| 222d9e22 MD |
2065 | if (hammer2_debug & 0x0008) |
| 2066 | kprintf("%*.*sCHAIN type=%d@%08jx %p/%d %04x {\n", | |
| 1c9f601e MD |
2067 | info->depth, info->depth, "", |
| 2068 | parent->bref.type, parent->bref.data_off, | |
| 2069 | parent, parent->refs, parent->flags); | |
| 222d9e22 | 2070 | |
| b7926f31 | 2071 | /* |
| 1c9f601e | 2072 | * Flush any children of this parent. |
| 222d9e22 MD |
2073 | * |
| 2074 | * NOTE: If we use a while() here an active filesystem can | |
| 2075 | * prevent the flush from ever finishing. | |
| b7926f31 | 2076 | */ |
| 1c9f601e | 2077 | if (parent->flags & HAMMER2_CHAIN_SUBMODIFIED) { |
| b7926f31 | 2078 | hammer2_blockref_t *base; |
| 1c9f601e | 2079 | hammer2_chain_t *chain; |
| c667909f | 2080 | hammer2_chain_t *next; |
| b7926f31 MD |
2081 | int count; |
| 2082 | int submodified = 0; | |
| 222d9e22 | 2083 | int submoved = 0; |
| b7926f31 MD |
2084 | |
| 2085 | /* | |
| 222d9e22 MD |
2086 | * Clear SUBMODIFIED now. Flag any races during the flush |
| 2087 | * with the (submodified) local variable and re-arm it | |
| 2088 | * as necessary after the loop is done. | |
| 73e441b9 | 2089 | * |
| 1c9f601e | 2090 | * Delaying the setting of the parent to MODIFIED can reduce |
| 222d9e22 | 2091 | * unnecessary I/O. |
| b7926f31 | 2092 | * |
| 222d9e22 MD |
2093 | * Modifications to the children will propagate up, forcing |
| 2094 | * us to become modified and copy-on-write too. Be sure | |
| 1c9f601e | 2095 | * to modify parent (as a side effect of the recursive |
| 222d9e22 MD |
2096 | * flush) ONLY if it is actually being modified by the |
| 2097 | * recursive flush. | |
| b7926f31 | 2098 | */ |
| 1c9f601e | 2099 | atomic_clear_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED); |
| b7926f31 MD |
2100 | |
| 2101 | /* | |
| c667909f MD |
2102 | * Flush the children and update the blockrefs in the parent. |
| 2103 | * Be careful of ripouts during the loop. | |
| b7926f31 | 2104 | */ |
| 1c9f601e MD |
2105 | next = SPLAY_MIN(hammer2_chain_splay, &parent->shead); |
| 2106 | while ((chain = next) != NULL) { | |
| 222d9e22 | 2107 | next = SPLAY_NEXT(hammer2_chain_splay, |
| 1c9f601e MD |
2108 | &parent->shead, chain); |
| 2109 | /* | |
| 2110 | * We only recurse if SUBMODIFIED (internal node) | |
| 2910a90c | 2111 | * or MODIFIED (internal node or leaf) is set. |
| 1c9f601e MD |
2112 | * However, we must still track whether any MOVED |
| 2113 | * entries are present to determine if the parent's | |
| 2114 | * blockref's need updating or not. | |
| 2115 | */ | |
| 2116 | if (chain->flags & HAMMER2_CHAIN_MOVED) | |
| 2117 | submoved = 1; | |
| 2118 | if ((chain->flags & (HAMMER2_CHAIN_SUBMODIFIED | | |
| 2910a90c MD |
2119 | HAMMER2_CHAIN_MODIFIED | |
| 2120 | HAMMER2_CHAIN_MODIFIED_AUX)) == 0) { | |
| 8cce658d MD |
2121 | continue; |
| 2122 | } | |
| 222d9e22 MD |
2123 | |
| 2124 | /* | |
| 1c9f601e MD |
2125 | * Propagate the DESTROYED flag if found set, then |
| 2126 | * recurse the flush. | |
| 222d9e22 | 2127 | */ |
| 1c9f601e MD |
2128 | hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_MAYBE); |
| 2129 | if ((parent->flags & HAMMER2_CHAIN_DESTROYED) && | |
| 2130 | (chain->flags & HAMMER2_CHAIN_DESTROYED) == 0) { | |
| 2131 | atomic_set_int(&chain->flags, | |
| 2132 | HAMMER2_CHAIN_DESTROYED | | |
| 2133 | HAMMER2_CHAIN_SUBMODIFIED); | |
| 222d9e22 | 2134 | } |
| 1c9f601e MD |
2135 | ++info->depth; |
| 2136 | hammer2_chain_flush_pass1(hmp, chain, info); | |
| 2137 | --info->depth; | |
| 222d9e22 MD |
2138 | |
| 2139 | /* | |
| 2140 | * No point loading blockrefs yet if the | |
| 2141 | * child (recursively) is still dirty. | |
| 2142 | */ | |
| 1c9f601e | 2143 | if (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED | |
| 2910a90c MD |
2144 | HAMMER2_CHAIN_MODIFIED | |
| 2145 | HAMMER2_CHAIN_MODIFIED_AUX)) { | |
| 8cce658d | 2146 | submodified = 1; |
| 222d9e22 MD |
2147 | if (hammer2_debug & 0x0008) |
| 2148 | kprintf("s"); | |
| 2149 | } | |
| 1c9f601e | 2150 | if (chain->flags & HAMMER2_CHAIN_MOVED) { |
| 222d9e22 MD |
2151 | if (hammer2_debug & 0x0008) |
| 2152 | kprintf("m"); | |
| 2153 | submoved = 1; | |
| 2154 | } | |
| 2155 | if (hammer2_debug & 0x0008) | |
| 2156 | kprintf("\n"); | |
| 1c9f601e | 2157 | hammer2_chain_unlock(hmp, chain); |
| 222d9e22 MD |
2158 | } |
| 2159 | ||
| 1c9f601e MD |
2160 | if (submodified || |
| 2161 | (parent->flags & HAMMER2_CHAIN_SUBMODIFIED)) { | |
| 222d9e22 MD |
2162 | /* |
| 2163 | * No point loading up the blockrefs if submodified | |
| 2164 | * got re-set. | |
| 214f4a77 MD |
2165 | * |
| 2166 | * NOTE: Even though we cleared the SUBMODIFIED flag | |
| 2167 | * it can still get re-set by operations | |
| 2168 | * occuring under our chain, so check both. | |
| 222d9e22 | 2169 | */ |
| 1c9f601e | 2170 | atomic_set_int(&parent->flags, |
| 222d9e22 MD |
2171 | HAMMER2_CHAIN_SUBMODIFIED); |
| 2172 | } else if (submoved) { | |
| 2173 | /* | |
| 1c9f601e | 2174 | * Ok, we can modify the blockrefs in this parent |
| 222d9e22 MD |
2175 | * entry. Mark it modified. Calculate the |
| 2176 | * blockref array after marking it modified (since | |
| 2177 | * that may change the underlying data ptr). | |
| 2178 | * | |
| 2179 | * NOTE: We only do this if submoved != 0, otherwise | |
| 2180 | * there may not be any changes and setting | |
| 1c9f601e | 2181 | * the parent modified will re-arm the MOVED |
| 222d9e22 MD |
2182 | * bit recursively, resulting in O(N^2) |
| 2183 | * flushes. | |
| 214f4a77 MD |
2184 | * |
| 2185 | * NOTE: We don't want hammer2_chain_modify() to | |
| 2186 | * recursively set the SUBMODIFIED flag | |
| 2187 | * upward in this case! | |
| 222d9e22 | 2188 | */ |
| 1c9f601e | 2189 | hammer2_chain_modify(hmp, parent, HAMMER2_MODIFY_NOSUB); |
| 222d9e22 | 2190 | |
| 1c9f601e | 2191 | switch(parent->bref.type) { |
| 222d9e22 | 2192 | case HAMMER2_BREF_TYPE_INODE: |
| 1c9f601e | 2193 | base = &parent->data->ipdata.u.blockset. |
| 222d9e22 MD |
2194 | blockref[0]; |
| 2195 | count = HAMMER2_SET_COUNT; | |
| 2196 | break; | |
| 2197 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 1c9f601e MD |
2198 | base = &parent->data->npdata.blockref[0]; |
| 2199 | count = parent->bytes / | |
| 222d9e22 MD |
2200 | sizeof(hammer2_blockref_t); |
| 2201 | break; | |
| 2202 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 2203 | base = &hmp->voldata.sroot_blockset.blockref[0]; | |
| 2204 | count = HAMMER2_SET_COUNT; | |
| 2205 | break; | |
| 2206 | default: | |
| 2207 | base = NULL; | |
| 2208 | panic("hammer2_chain_get: " | |
| 2209 | "unrecognized blockref type: %d", | |
| 1c9f601e | 2210 | parent->bref.type); |
| 222d9e22 MD |
2211 | } |
| 2212 | ||
| 2213 | /* | |
| 2214 | * Update the blockrefs. | |
| 2215 | */ | |
| 1c9f601e MD |
2216 | next = SPLAY_MIN(hammer2_chain_splay, &parent->shead); |
| 2217 | while ((chain = next) != NULL) { | |
| 222d9e22 | 2218 | next = SPLAY_NEXT(hammer2_chain_splay, |
| 1c9f601e MD |
2219 | &parent->shead, chain); |
| 2220 | KKASSERT(chain->index >= 0 && | |
| 2221 | chain->index < count); | |
| 2222 | hammer2_chain_lock(hmp, chain, | |
| 01eabad4 | 2223 | HAMMER2_RESOLVE_NEVER); |
| 1c9f601e MD |
2224 | base[chain->index] = chain->bref; |
| 2225 | if (chain->flags & HAMMER2_CHAIN_MOVED) { | |
| 2226 | atomic_clear_int(&chain->flags, | |
| 8cce658d | 2227 | HAMMER2_CHAIN_MOVED); |
| 1c9f601e | 2228 | hammer2_chain_drop(hmp, chain); |
| b7926f31 | 2229 | } |
| 1c9f601e | 2230 | hammer2_chain_unlock(hmp, chain); |
| b7926f31 MD |
2231 | } |
| 2232 | } | |
| 222d9e22 MD |
2233 | } |
| 2234 | ||
| 2235 | /* | |
| 2910a90c | 2236 | * If destroying the object we unconditonally clear the MODIFIED |
| 222d9e22 MD |
2237 | * and MOVED bits, and we destroy the buffer without writing it |
| 2238 | * out. | |
| 2239 | * | |
| 2240 | * We don't bother updating the hash/crc or the parent bref. | |
| 2241 | * | |
| 2242 | * XXX allocations for unflushed data can be returned to the | |
| 2243 | * free pool. | |
| 2244 | */ | |
| 1c9f601e | 2245 | if (parent->flags & HAMMER2_CHAIN_DESTROYED) { |
| 2910a90c | 2246 | if (parent->flags & HAMMER2_CHAIN_MODIFIED) { |
| 1c9f601e MD |
2247 | if (parent->bp) { |
| 2248 | parent->bp->b_flags |= B_INVAL|B_RELBUF; | |
| 222d9e22 | 2249 | } |
| 1c9f601e | 2250 | atomic_clear_int(&parent->flags, |
| 2910a90c | 2251 | HAMMER2_CHAIN_MODIFIED); |
| 1c9f601e | 2252 | hammer2_chain_drop(hmp, parent); |
| 222d9e22 | 2253 | } |
| 2910a90c MD |
2254 | if (parent->flags & HAMMER2_CHAIN_MODIFIED_AUX) { |
| 2255 | atomic_clear_int(&parent->flags, | |
| 2256 | HAMMER2_CHAIN_MODIFIED_AUX); | |
| 2257 | } | |
| 1c9f601e MD |
2258 | if (parent->flags & HAMMER2_CHAIN_MOVED) { |
| 2259 | atomic_clear_int(&parent->flags, | |
| 222d9e22 | 2260 | HAMMER2_CHAIN_MOVED); |
| 1c9f601e | 2261 | hammer2_chain_drop(hmp, parent); |
| b7926f31 | 2262 | } |
| 222d9e22 | 2263 | return; |
| b7926f31 MD |
2264 | } |
| 2265 | ||
| 2266 | /* | |
| db71f61f | 2267 | * Flush this chain entry only if it is marked modified. |
| b7926f31 | 2268 | */ |
| 2910a90c MD |
2269 | if ((parent->flags & (HAMMER2_CHAIN_MODIFIED | |
| 2270 | HAMMER2_CHAIN_MODIFIED_AUX)) == 0) { | |
| 222d9e22 | 2271 | goto done; |
| 222d9e22 | 2272 | } |
| 73e441b9 MD |
2273 | |
| 2274 | /* | |
| 2910a90c | 2275 | * Clear MODIFIED and set HAMMER2_CHAIN_MOVED. The caller |
| 222d9e22 MD |
2276 | * will re-test the MOVED bit. |
| 2277 | * | |
| 73e441b9 MD |
2278 | * bits own a single parent ref and the MOVED bit owns its own |
| 2279 | * parent ref. | |
| 2280 | */ | |
| 2910a90c MD |
2281 | if (parent->flags & HAMMER2_CHAIN_MODIFIED) { |
| 2282 | atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MODIFIED); | |
| 2283 | if (parent->flags & HAMMER2_CHAIN_MOVED) { | |
| 2284 | hammer2_chain_drop(hmp, parent); | |
| 2285 | } else { | |
| 2286 | /* inherit ref from the MODIFIED we cleared */ | |
| 2287 | atomic_set_int(&parent->flags, HAMMER2_CHAIN_MOVED); | |
| 2288 | } | |
| 995e78dc | 2289 | } |
| 2910a90c | 2290 | atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MODIFIED_AUX); |
| b7926f31 MD |
2291 | |
| 2292 | /* | |
| 2293 | * If this is part of a recursive flush we can go ahead and write | |
| 2294 | * out the buffer cache buffer and pass a new bref back up the chain. | |
| 2295 | * | |
| 2296 | * This will never be a volume header. | |
| 2297 | */ | |
| 1c9f601e | 2298 | switch(parent->bref.type) { |
| 8cce658d MD |
2299 | case HAMMER2_BREF_TYPE_VOLUME: |
| 2300 | /* | |
| 2301 | * The volume header is flushed manually by the syncer, not | |
| 2302 | * here. | |
| 2303 | */ | |
| 2304 | break; | |
| 2305 | case HAMMER2_BREF_TYPE_DATA: | |
| 2306 | /* | |
| 2307 | * Data elements have already been flushed via the logical | |
| 2308 | * file buffer cache. Their hash was set in the bref by | |
| 2309 | * the vop_write code. | |
| 1c9f601e MD |
2310 | * |
| 2311 | * Make sure the buffer(s) have been flushed out here. | |
| 8cce658d | 2312 | */ |
| 1c9f601e MD |
2313 | #if 1 |
| 2314 | bbytes = parent->bytes; | |
| 2315 | pbase = parent->bref.data_off & ~(hammer2_off_t)(bbytes - 1); | |
| 2316 | boff = parent->bref.data_off & HAMMER2_OFF_MASK & (bbytes - 1); | |
| 2317 | ||
| 2318 | bp = getblk(hmp->devvp, pbase, bbytes, GETBLK_NOWAIT, 0); | |
| 2319 | if (bp) { | |
| 2320 | if ((bp->b_flags & (B_CACHE | B_DIRTY)) == | |
| 2321 | (B_CACHE | B_DIRTY)) { | |
| 2322 | kprintf("x"); | |
| 2323 | cluster_awrite(bp); | |
| 2324 | } else { | |
| 2325 | bp->b_flags |= B_RELBUF; | |
| 2326 | brelse(bp); | |
| 2327 | } | |
| 2328 | } | |
| 2329 | #endif | |
| 8cce658d | 2330 | break; |
| 01eabad4 MD |
2331 | case HAMMER2_BREF_TYPE_INDIRECT: |
| 2332 | /* | |
| 2333 | * Indirect blocks may be in an INITIAL state. | |
| 2334 | */ | |
| 2335 | break; | |
| 8cce658d | 2336 | default: |
| 01eabad4 MD |
2337 | /* |
| 2338 | * Embedded elements have to be flushed out. | |
| 2339 | */ | |
| 1c9f601e MD |
2340 | KKASSERT(parent->data != NULL); |
| 2341 | bref = &parent->bref; | |
| b7926f31 | 2342 | |
| 01eabad4 | 2343 | KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0); |
| db71f61f | 2344 | |
| 1c9f601e | 2345 | if (parent->bp == NULL) { |
| db71f61f MD |
2346 | /* |
| 2347 | * The data is embedded, we have to acquire the | |
| 2348 | * buffer cache buffer and copy the data into it. | |
| 2349 | */ | |
| 1c9f601e | 2350 | if ((bbytes = parent->bytes) < HAMMER2_MINIOSIZE) |
| 01eabad4 MD |
2351 | bbytes = HAMMER2_MINIOSIZE; |
| 2352 | pbase = bref->data_off & ~(hammer2_off_t)(bbytes - 1); | |
| 2353 | boff = bref->data_off & HAMMER2_OFF_MASK & (bbytes - 1); | |
| 2354 | ||
| 2355 | /* | |
| 2356 | * The getblk() optimization can only be used if the | |
| 2357 | * physical block size matches the request. | |
| 2358 | */ | |
| 1c9f601e | 2359 | if (parent->bytes == bbytes) { |
| 01eabad4 MD |
2360 | bp = getblk(hmp->devvp, pbase, bbytes, 0, 0); |
| 2361 | error = 0; | |
| 2362 | } else { | |
| 2363 | error = bread(hmp->devvp, pbase, bbytes, &bp); | |
| 2364 | KKASSERT(error == 0); | |
| 2365 | } | |
| 2366 | bdata = (char *)bp->b_data + boff; | |
| db71f61f | 2367 | |
| b7926f31 MD |
2368 | /* |
| 2369 | * Copy the data to the buffer, mark the buffer | |
| 1c9f601e | 2370 | * dirty, and convert the parent to unmodified. |
| b7926f31 | 2371 | */ |
| 1c9f601e | 2372 | bcopy(parent->data, bdata, parent->bytes); |
| 8cce658d | 2373 | bp->b_flags |= B_CLUSTEROK; |
| db71f61f MD |
2374 | bdwrite(bp); |
| 2375 | bp = NULL; | |
| 1c9f601e MD |
2376 | parent->bref.check.iscsi32.value = |
| 2377 | hammer2_icrc32(parent->data, parent->bytes); | |
| 2378 | if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) | |
| 01eabad4 MD |
2379 | ++hammer2_iod_meta_write; |
| 2380 | else | |
| 2381 | ++hammer2_iod_indr_write; | |
| 8cce658d | 2382 | } else { |
| 1c9f601e MD |
2383 | parent->bref.check.iscsi32.value = |
| 2384 | hammer2_icrc32(parent->data, parent->bytes); | |
| b7926f31 | 2385 | } |
| 73e441b9 | 2386 | } |
| b7926f31 | 2387 | |
| 8cce658d MD |
2388 | /* |
| 2389 | * Special handling | |
| 2390 | */ | |
| 1c9f601e | 2391 | bref = &parent->bref; |
| b7926f31 | 2392 | |
| 8cce658d MD |
2393 | switch(bref->type) { |
| 2394 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 1c9f601e MD |
2395 | KKASSERT(parent->data != NULL); |
| 2396 | KKASSERT(parent->bp == NULL); | |
| 8cce658d MD |
2397 | |
| 2398 | hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= | |
| 2399 | hammer2_icrc32( | |
| 2400 | (char *)&hmp->voldata + | |
| 2401 | HAMMER2_VOLUME_ICRC1_OFF, | |
| 2402 | HAMMER2_VOLUME_ICRC1_SIZE); | |
| 2403 | hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= | |
| 2404 | hammer2_icrc32( | |
| 2405 | (char *)&hmp->voldata + | |
| 2406 | HAMMER2_VOLUME_ICRC0_OFF, | |
| 2407 | HAMMER2_VOLUME_ICRC0_SIZE); | |
| 2408 | hmp->voldata.icrc_volheader = | |
| 2409 | hammer2_icrc32( | |
| 2410 | (char *)&hmp->voldata + | |
| 2411 | HAMMER2_VOLUME_ICRCVH_OFF, | |
| 2412 | HAMMER2_VOLUME_ICRCVH_SIZE); | |
| 2413 | break; | |
| b7926f31 | 2414 | } |
| 222d9e22 | 2415 | done: |
| 1c9f601e | 2416 | if (hammer2_debug & 0x0008) { |
| 222d9e22 | 2417 | kprintf("%*.*s} %p/%d %04x ", |
| 1c9f601e MD |
2418 | info->depth, info->depth, "", |
| 2419 | parent, parent->refs, parent->flags); | |
| 2420 | } | |
| b7926f31 | 2421 | } |
| 73e441b9 MD |
2422 | |
| 2423 | #if 0 | |
| 2424 | /* | |
| 2425 | * PASS2 - not yet implemented (should be called only with the root chain?) | |
| 2426 | */ | |
| 2427 | static void | |
| 2428 | hammer2_chain_flush_pass2(hammer2_mount_t *hmp, hammer2_chain_t *chain) | |
| 2429 | { | |
| 2430 | } | |
| 2431 | #endif | |
| 2432 | ||
| 222d9e22 MD |
2433 | /* |
| 2434 | * Stand-alone flush. If the chain is unable to completely flush we have | |
| 2435 | * to be sure that SUBMODIFIED propagates up the parent chain. | |
| 2436 | * | |
| 2437 | * This routine can be called from several places but the most important | |
| 2438 | * is from the hammer2_vop_reclaim() function. We want to try to completely | |
| 2439 | * clean out the inode structure to prevent disconnected inodes from | |
| 2440 | * building up and blowing out the kmalloc pool. | |
| 2441 | */ | |
| 73e441b9 MD |
2442 | void |
| 2443 | hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain) | |
| 2444 | { | |
| 222d9e22 | 2445 | hammer2_chain_t *parent; |
| 1c9f601e | 2446 | hammer2_chain_t *scan; |
| 222d9e22 | 2447 | hammer2_blockref_t *base; |
| 1c9f601e | 2448 | hammer2_flush_info_t info; |
| 222d9e22 | 2449 | int count; |
| 1c9f601e | 2450 | int reflush; |
| 222d9e22 MD |
2451 | |
| 2452 | /* | |
| 1c9f601e MD |
2453 | * Execute the recursive flush and handle deferrals. |
| 2454 | * | |
| 2455 | * Chains can be ridiculously long (thousands deep), so to | |
| 2456 | * avoid blowing out the kernel stack the recursive flush has a | |
| 2457 | * depth limit. Elements at the limit are placed on a list | |
| 2458 | * for re-execution after the stack has been popped. | |
| 222d9e22 | 2459 | */ |
| 1c9f601e MD |
2460 | bzero(&info, sizeof(info)); |
| 2461 | TAILQ_INIT(&info.flush_list); | |
| 2462 | reflush = 1; | |
| 2463 | ||
| 2464 | while (reflush) { | |
| 2465 | /* | |
| 2466 | * Primary recursion | |
| 2467 | */ | |
| 2468 | hammer2_chain_flush_pass1(hmp, chain, &info); | |
| 2469 | reflush = 0; | |
| 2470 | ||
| 2471 | while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) { | |
| 2472 | /* | |
| 2473 | * Secondary recursion. Note that a reference is | |
| 2474 | * retained from the element's presence on the | |
| 2475 | * deferral list. | |
| 2476 | */ | |
| 2477 | KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); | |
| 2478 | TAILQ_REMOVE(&info.flush_list, scan, flush_node); | |
| 2479 | atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED); | |
| 2480 | ||
| 2481 | /* | |
| 2482 | * Now that we've popped back up we can do a secondary | |
| 2483 | * recursion on the deferred elements. | |
| 2484 | */ | |
| 2485 | if (hammer2_debug & 0x0040) | |
| 2486 | kprintf("defered flush %p\n", scan); | |
| 2487 | hammer2_chain_lock(hmp, scan, HAMMER2_RESOLVE_MAYBE); | |
| 2488 | hammer2_chain_flush(hmp, scan); | |
| 2489 | hammer2_chain_unlock(hmp, scan); | |
| 2490 | ||
| 2491 | /* | |
| 2492 | * Only flag a reflush if SUBMODIFIED is no longer | |
| 2493 | * set. If SUBMODIFIED is set the element will just | |
| 2494 | * wind up on our flush_list again. | |
| 2495 | */ | |
| 2496 | if ((scan->flags & (HAMMER2_CHAIN_SUBMODIFIED | | |
| 2910a90c MD |
2497 | HAMMER2_CHAIN_MODIFIED | |
| 2498 | HAMMER2_CHAIN_MODIFIED_AUX)) == 0) { | |
| 1c9f601e | 2499 | reflush = 1; |
| 2910a90c | 2500 | } |
| 1c9f601e MD |
2501 | hammer2_chain_drop(hmp, scan); |
| 2502 | } | |
| 2503 | if ((hammer2_debug & 0x0040) && reflush) | |
| 2504 | kprintf("reflush %p\n", chain); | |
| 2505 | } | |
| 222d9e22 MD |
2506 | |
| 2507 | /* | |
| 2508 | * The SUBMODIFIED bit must propagate upward if the chain could not | |
| 2509 | * be completely flushed. | |
| 2510 | */ | |
| 2511 | if (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED | | |
| 2910a90c MD |
2512 | HAMMER2_CHAIN_MODIFIED | |
| 2513 | HAMMER2_CHAIN_MODIFIED_AUX | | |
| 222d9e22 MD |
2514 | HAMMER2_CHAIN_MOVED)) { |
| 2515 | hammer2_chain_parent_setsubmod(hmp, chain); | |
| 2516 | } | |
| 2517 | ||
| 2518 | /* | |
| 2519 | * If the only thing left is a simple bref update try to | |
| 2520 | * pro-actively update the parent, otherwise return early. | |
| 2521 | */ | |
| 2522 | parent = chain->parent; | |
| 2523 | if (parent == NULL || | |
| 2524 | chain->bref.type != HAMMER2_BREF_TYPE_INODE || | |
| 2525 | (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED | | |
| 2910a90c MD |
2526 | HAMMER2_CHAIN_MODIFIED | |
| 2527 | HAMMER2_CHAIN_MODIFIED_AUX | | |
| 222d9e22 MD |
2528 | HAMMER2_CHAIN_MOVED)) != HAMMER2_CHAIN_MOVED) { |
| 2529 | return; | |
| 2530 | } | |
| 2531 | ||
| 2532 | /* | |
| 2533 | * We are locking backwards so allow the lock to fail | |
| 2534 | */ | |
| 2535 | if (lockmgr(&parent->lk, LK_EXCLUSIVE | LK_NOWAIT) != 0) { | |
| 2536 | return; | |
| 2537 | } | |
| 214f4a77 MD |
2538 | |
| 2539 | /* | |
| 01eabad4 | 2540 | * We are updating brefs but we have to call chain_modify() w/ |
| 214f4a77 MD |
2541 | * setsubmod = TRUE because our caller is not a recursive |
| 2542 | * flush. | |
| 2543 | */ | |
| 01eabad4 MD |
2544 | hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_MAYBE); |
| 2545 | hammer2_chain_modify(hmp, parent, 0); | |
| 222d9e22 MD |
2546 | |
| 2547 | switch(parent->bref.type) { | |
| 2548 | case HAMMER2_BREF_TYPE_INODE: | |
| 2549 | base = &parent->data->ipdata.u.blockset. | |
| 2550 | blockref[0]; | |
| 2551 | count = HAMMER2_SET_COUNT; | |
| 2552 | break; | |
| 2553 | case HAMMER2_BREF_TYPE_INDIRECT: | |
| 2554 | base = &parent->data->npdata.blockref[0]; | |
| 2555 | count = parent->bytes / | |
| 2556 | sizeof(hammer2_blockref_t); | |
| 2557 | break; | |
| 2558 | case HAMMER2_BREF_TYPE_VOLUME: | |
| 2559 | base = &hmp->voldata.sroot_blockset.blockref[0]; | |
| 2560 | count = HAMMER2_SET_COUNT; | |
| 2561 | break; | |
| 2562 | default: | |
| 2563 | base = NULL; | |
| 2564 | panic("hammer2_chain_flush: " | |
| 2565 | "unrecognized blockref type: %d", | |
| 2566 | parent->bref.type); | |
| 2567 | } | |
| 2568 | ||
| 2569 | /* | |
| 2570 | * Update the blockref in the parent | |
| 2571 | */ | |
| 2572 | KKASSERT(chain->index >= 0 && | |
| 2573 | chain->index < count); | |
| 2574 | base[chain->index] = chain->bref; | |
| 2575 | if (chain->flags & HAMMER2_CHAIN_MOVED) { | |
| 2576 | atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MOVED); | |
| 2577 | hammer2_chain_drop(hmp, chain); | |
| 2578 | } | |
| 2579 | ||
| 01eabad4 MD |
2580 | lockmgr(&parent->lk, LK_RELEASE); /* release manual lockmgr op */ |
| 2581 | hammer2_chain_unlock(hmp, parent); | |
| 73e441b9 | 2582 | } |