| 1 | /* |
| 2 | * Copyright (c) 2008 The DragonFly Project. All rights reserved. |
| 3 | * |
| 4 | * This code is derived from software contributed to The DragonFly Project |
| 5 | * by Matthew Dillon <dillon@backplane.com> |
| 6 | * |
| 7 | * Redistribution and use in source and binary forms, with or without |
| 8 | * modification, are permitted provided that the following conditions |
| 9 | * are met: |
| 10 | * |
| 11 | * 1. Redistributions of source code must retain the above copyright |
| 12 | * notice, this list of conditions and the following disclaimer. |
| 13 | * 2. Redistributions in binary form must reproduce the above copyright |
| 14 | * notice, this list of conditions and the following disclaimer in |
| 15 | * the documentation and/or other materials provided with the |
| 16 | * distribution. |
| 17 | * 3. Neither the name of The DragonFly Project nor the names of its |
| 18 | * contributors may be used to endorse or promote products derived |
| 19 | * from this software without specific, prior written permission. |
| 20 | * |
| 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 22 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 24 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 25 | * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 26 | * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 27 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 28 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| 29 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 30 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| 31 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 32 | * SUCH DAMAGE. |
| 33 | * |
| 34 | * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.11 2008/05/05 20:34:48 dillon Exp $ |
| 35 | */ |
| 36 | /* |
| 37 | * HAMMER reblocker - This code frees up fragmented physical space |
| 38 | * |
| 39 | * HAMMER only keeps track of free space on a big-block basis. A big-block |
| 40 | * containing holes can only be freed by migrating the remaining data in |
| 41 | * that big-block into a new big-block, then freeing the big-block. |
| 42 | * |
| 43 | * This function is called from an ioctl or via the hammer support thread. |
| 44 | */ |
| 45 | |
| 46 | #include "hammer.h" |
| 47 | |
| 48 | static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock, |
| 49 | hammer_cursor_t cursor, |
| 50 | hammer_btree_elm_t elm); |
| 51 | static int hammer_reblock_data(struct hammer_ioc_reblock *reblock, |
| 52 | hammer_cursor_t cursor, hammer_btree_elm_t elm); |
| 53 | static int hammer_reblock_record(struct hammer_ioc_reblock *reblock, |
| 54 | hammer_cursor_t cursor, hammer_btree_elm_t elm); |
| 55 | static int hammer_reblock_node(struct hammer_ioc_reblock *reblock, |
| 56 | hammer_cursor_t cursor, hammer_btree_elm_t elm); |
| 57 | |
| 58 | int |
| 59 | hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip, |
| 60 | struct hammer_ioc_reblock *reblock) |
| 61 | { |
| 62 | struct hammer_cursor cursor; |
| 63 | hammer_btree_elm_t elm; |
| 64 | int error; |
| 65 | |
| 66 | if (reblock->beg_obj_id >= reblock->end_obj_id) |
| 67 | return(EINVAL); |
| 68 | if (reblock->free_level < 0) |
| 69 | return(EINVAL); |
| 70 | |
| 71 | retry: |
| 72 | error = hammer_init_cursor(trans, &cursor, NULL, NULL); |
| 73 | if (error) { |
| 74 | hammer_done_cursor(&cursor); |
| 75 | return(error); |
| 76 | } |
| 77 | cursor.key_beg.obj_id = reblock->cur_obj_id; |
| 78 | cursor.key_beg.key = HAMMER_MIN_KEY; |
| 79 | cursor.key_beg.create_tid = 1; |
| 80 | cursor.key_beg.delete_tid = 0; |
| 81 | cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE; |
| 82 | cursor.key_beg.obj_type = 0; |
| 83 | |
| 84 | cursor.key_end.obj_id = reblock->end_obj_id; |
| 85 | cursor.key_end.key = HAMMER_MAX_KEY; |
| 86 | cursor.key_end.create_tid = HAMMER_MAX_TID - 1; |
| 87 | cursor.key_end.delete_tid = 0; |
| 88 | cursor.key_end.rec_type = HAMMER_MAX_RECTYPE; |
| 89 | cursor.key_end.obj_type = 0; |
| 90 | |
| 91 | cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE; |
| 92 | cursor.flags |= HAMMER_CURSOR_BACKEND; |
| 93 | |
| 94 | error = hammer_btree_first(&cursor); |
| 95 | while (error == 0) { |
| 96 | elm = &cursor.node->ondisk->elms[cursor.index]; |
| 97 | reblock->cur_obj_id = elm->base.obj_id; |
| 98 | |
| 99 | /* |
| 100 | * Acquiring the sync_lock prevents the operation from |
| 101 | * crossing a synchronization boundary. |
| 102 | */ |
| 103 | hammer_lock_ex(&trans->hmp->sync_lock); |
| 104 | error = hammer_reblock_helper(reblock, &cursor, elm); |
| 105 | hammer_unlock(&trans->hmp->sync_lock); |
| 106 | if (error == 0) { |
| 107 | cursor.flags |= HAMMER_CURSOR_ATEDISK; |
| 108 | error = hammer_btree_iterate(&cursor); |
| 109 | } |
| 110 | |
| 111 | /* |
| 112 | * Bad hack for now, don't blow out the kernel's buffer |
| 113 | * cache. |
| 114 | */ |
| 115 | if (trans->hmp->locked_dirty_count > hammer_limit_dirtybufs) |
| 116 | hammer_flusher_sync(trans->hmp); |
| 117 | if (error == 0) |
| 118 | error = hammer_signal_check(trans->hmp); |
| 119 | } |
| 120 | if (error == ENOENT) |
| 121 | error = 0; |
| 122 | hammer_done_cursor(&cursor); |
| 123 | if (error == EDEADLK) |
| 124 | goto retry; |
| 125 | if (error == EINTR) { |
| 126 | reblock->head.flags |= HAMMER_IOC_HEAD_INTR; |
| 127 | error = 0; |
| 128 | } |
| 129 | return(error); |
| 130 | } |
| 131 | |
| 132 | /* |
| 133 | * Reblock the B-Tree (leaf) node, record, and/or data if necessary. |
| 134 | * |
| 135 | * XXX We have no visibility into internal B-Tree nodes at the moment, |
| 136 | * only leaf nodes. |
| 137 | */ |
| 138 | static int |
| 139 | hammer_reblock_helper(struct hammer_ioc_reblock *reblock, |
| 140 | hammer_cursor_t cursor, hammer_btree_elm_t elm) |
| 141 | { |
| 142 | hammer_off_t tmp_offset; |
| 143 | int error; |
| 144 | int zone; |
| 145 | int bytes; |
| 146 | int cur; |
| 147 | |
| 148 | if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD) |
| 149 | return(0); |
| 150 | error = 0; |
| 151 | |
| 152 | /* |
| 153 | * Reblock data. Note that data embedded in a record is reblocked |
| 154 | * by the record reblock code. |
| 155 | */ |
| 156 | tmp_offset = elm->leaf.data_offset; |
| 157 | zone = HAMMER_ZONE_DECODE(tmp_offset); /* can be 0 */ |
| 158 | if ((zone == HAMMER_ZONE_SMALL_DATA_INDEX || |
| 159 | zone == HAMMER_ZONE_LARGE_DATA_INDEX) && error == 0) { |
| 160 | ++reblock->data_count; |
| 161 | reblock->data_byte_count += elm->leaf.data_len; |
| 162 | bytes = hammer_blockmap_getfree(cursor->trans->hmp, tmp_offset, |
| 163 | &cur, &error); |
| 164 | if (error == 0 && cur == 0 && bytes > reblock->free_level) { |
| 165 | if (hammer_debug_general & 0x4000) |
| 166 | kprintf("%6d ", bytes); |
| 167 | error = hammer_cursor_upgrade(cursor); |
| 168 | if (error == 0) { |
| 169 | error = hammer_reblock_data(reblock, |
| 170 | cursor, elm); |
| 171 | } |
| 172 | if (error == 0) { |
| 173 | ++reblock->data_moves; |
| 174 | reblock->data_byte_moves += elm->leaf.data_len; |
| 175 | } |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | /* |
| 180 | * Reblock a record |
| 181 | */ |
| 182 | tmp_offset = elm->leaf.rec_offset; |
| 183 | zone = HAMMER_ZONE_DECODE(tmp_offset); |
| 184 | if (zone == HAMMER_ZONE_RECORD_INDEX && error == 0) { |
| 185 | ++reblock->record_count; |
| 186 | bytes = hammer_blockmap_getfree(cursor->trans->hmp, tmp_offset, |
| 187 | &cur, &error); |
| 188 | if (error == 0 && cur == 0 && bytes > reblock->free_level) { |
| 189 | if (hammer_debug_general & 0x4000) |
| 190 | kprintf("%6d ", bytes); |
| 191 | error = hammer_cursor_upgrade(cursor); |
| 192 | if (error == 0) { |
| 193 | error = hammer_reblock_record(reblock, |
| 194 | cursor, elm); |
| 195 | } |
| 196 | if (error == 0) { |
| 197 | ++reblock->record_moves; |
| 198 | } |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | /* |
| 203 | * Reblock a B-Tree node. Adjust elm to point at the parent's |
| 204 | * leaf entry. |
| 205 | */ |
| 206 | tmp_offset = cursor->node->node_offset; |
| 207 | zone = HAMMER_ZONE_DECODE(tmp_offset); |
| 208 | if (zone == HAMMER_ZONE_BTREE_INDEX && error == 0 && |
| 209 | cursor->index == 0) { |
| 210 | ++reblock->btree_count; |
| 211 | bytes = hammer_blockmap_getfree(cursor->trans->hmp, tmp_offset, |
| 212 | &cur, &error); |
| 213 | if (error == 0 && cur == 0 && bytes > reblock->free_level) { |
| 214 | if (hammer_debug_general & 0x4000) |
| 215 | kprintf("%6d ", bytes); |
| 216 | error = hammer_cursor_upgrade(cursor); |
| 217 | if (error == 0) { |
| 218 | if (cursor->parent) |
| 219 | elm = &cursor->parent->ondisk->elms[cursor->parent_index]; |
| 220 | else |
| 221 | elm = NULL; |
| 222 | error = hammer_reblock_node(reblock, |
| 223 | cursor, elm); |
| 224 | } |
| 225 | if (error == 0) { |
| 226 | ++reblock->btree_moves; |
| 227 | } |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | hammer_cursor_downgrade(cursor); |
| 232 | return(error); |
| 233 | } |
| 234 | |
| 235 | /* |
| 236 | * Reblock a record's data. Both the B-Tree element and record pointers |
| 237 | * to the data must be adjusted. |
| 238 | */ |
| 239 | static int |
| 240 | hammer_reblock_data(struct hammer_ioc_reblock *reblock, |
| 241 | hammer_cursor_t cursor, hammer_btree_elm_t elm) |
| 242 | { |
| 243 | struct hammer_buffer *data_buffer = NULL; |
| 244 | hammer_off_t ndata_offset; |
| 245 | int error; |
| 246 | void *ndata; |
| 247 | |
| 248 | error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA | |
| 249 | HAMMER_CURSOR_GET_RECORD); |
| 250 | if (error) |
| 251 | return (error); |
| 252 | ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len, |
| 253 | &ndata_offset, &data_buffer, &error); |
| 254 | if (error) |
| 255 | goto done; |
| 256 | |
| 257 | /* |
| 258 | * Move the data |
| 259 | */ |
| 260 | hammer_modify_buffer(cursor->trans, data_buffer, NULL, 0); |
| 261 | bcopy(cursor->data, ndata, elm->leaf.data_len); |
| 262 | hammer_modify_buffer_done(data_buffer); |
| 263 | |
| 264 | hammer_blockmap_free(cursor->trans, |
| 265 | elm->leaf.data_offset, elm->leaf.data_len); |
| 266 | |
| 267 | hammer_modify_record_field(cursor->trans, cursor->record_buffer, |
| 268 | cursor->record, base.data_off, 0); |
| 269 | cursor->record->base.data_off = ndata_offset; |
| 270 | hammer_modify_record_done(cursor->record_buffer, cursor->record); |
| 271 | |
| 272 | hammer_modify_node(cursor->trans, cursor->node, |
| 273 | &elm->leaf.data_offset, sizeof(hammer_off_t)); |
| 274 | elm->leaf.data_offset = ndata_offset; |
| 275 | hammer_modify_node_done(cursor->node); |
| 276 | |
| 277 | done: |
| 278 | if (data_buffer) |
| 279 | hammer_rel_buffer(data_buffer, 0); |
| 280 | return (error); |
| 281 | } |
| 282 | |
| 283 | /* |
| 284 | * Reblock a record. The B-Tree must be adjusted to point to the new record |
| 285 | * and the existing record must be physically destroyed so a FS rebuild |
| 286 | * does not see two versions of the same record. |
| 287 | */ |
| 288 | static int |
| 289 | hammer_reblock_record(struct hammer_ioc_reblock *reblock, |
| 290 | hammer_cursor_t cursor, hammer_btree_elm_t elm) |
| 291 | { |
| 292 | struct hammer_buffer *rec_buffer = NULL; |
| 293 | hammer_off_t nrec_offset; |
| 294 | hammer_off_t ndata_offset; |
| 295 | hammer_record_ondisk_t orec; |
| 296 | hammer_record_ondisk_t nrec; |
| 297 | int error; |
| 298 | int inline_data; |
| 299 | |
| 300 | error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD); |
| 301 | if (error) |
| 302 | return (error); |
| 303 | |
| 304 | nrec = hammer_alloc_record(cursor->trans, &nrec_offset, |
| 305 | elm->leaf.base.rec_type, &rec_buffer, |
| 306 | 0, NULL, NULL, NULL, &error); |
| 307 | if (error) |
| 308 | goto done; |
| 309 | |
| 310 | /* |
| 311 | * Move the record. Check for an inline data reference and move that |
| 312 | * too if necessary. |
| 313 | */ |
| 314 | orec = cursor->record; |
| 315 | hammer_modify_buffer(cursor->trans, rec_buffer, NULL, 0); |
| 316 | bcopy(orec, nrec, sizeof(*nrec)); |
| 317 | |
| 318 | if ((orec->base.data_off & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RECORD) { |
| 319 | ndata_offset = orec->base.data_off - elm->leaf.rec_offset; |
| 320 | KKASSERT(ndata_offset < sizeof(*nrec)); |
| 321 | ndata_offset += nrec_offset; |
| 322 | inline_data = 1; |
| 323 | } else { |
| 324 | ndata_offset = 0; |
| 325 | inline_data = 0; |
| 326 | } |
| 327 | hammer_modify_record_field(cursor->trans, cursor->record_buffer, |
| 328 | orec, base.base.rec_type, 1); |
| 329 | orec->base.base.rec_type |= HAMMER_RECTYPE_MOVED; |
| 330 | orec->base.signature = HAMMER_RECORD_SIGNATURE_DESTROYED; |
| 331 | hammer_modify_record_done(cursor->record_buffer, orec); |
| 332 | |
| 333 | hammer_blockmap_free(cursor->trans, |
| 334 | elm->leaf.rec_offset, sizeof(*nrec)); |
| 335 | |
| 336 | if (hammer_debug_general & 0x4000) { |
| 337 | kprintf("REBLOCK RECD %016llx -> %016llx\n", |
| 338 | elm->leaf.rec_offset, nrec_offset); |
| 339 | } |
| 340 | |
| 341 | hammer_modify_node(cursor->trans, cursor->node, |
| 342 | &elm->leaf.rec_offset, sizeof(hammer_off_t)); |
| 343 | elm->leaf.rec_offset = nrec_offset; |
| 344 | hammer_modify_node_done(cursor->node); |
| 345 | if (inline_data) { |
| 346 | hammer_modify_node(cursor->trans, cursor->node, |
| 347 | &elm->leaf.data_offset, sizeof(hammer_off_t)); |
| 348 | elm->leaf.data_offset = ndata_offset; |
| 349 | hammer_modify_node_done(cursor->node); |
| 350 | nrec->base.data_off = ndata_offset; |
| 351 | } |
| 352 | hammer_modify_buffer_done(rec_buffer); |
| 353 | |
| 354 | done: |
| 355 | if (rec_buffer) |
| 356 | hammer_rel_buffer(rec_buffer, 0); |
| 357 | return (error); |
| 358 | } |
| 359 | |
| 360 | /* |
| 361 | * Reblock a B-Tree (leaf) node. The parent must be adjusted to point to |
| 362 | * the new copy of the leaf node. elm is a pointer to the parent element |
| 363 | * pointing at cursor.node. |
| 364 | * |
| 365 | * XXX reblock internal nodes too. |
| 366 | */ |
| 367 | static int |
| 368 | hammer_reblock_node(struct hammer_ioc_reblock *reblock, |
| 369 | hammer_cursor_t cursor, hammer_btree_elm_t elm) |
| 370 | { |
| 371 | hammer_node_t onode; |
| 372 | hammer_node_t nnode; |
| 373 | int error; |
| 374 | |
| 375 | onode = cursor->node; |
| 376 | nnode = hammer_alloc_btree(cursor->trans, &error); |
| 377 | hammer_lock_ex(&nnode->lock); |
| 378 | |
| 379 | if (nnode == NULL) |
| 380 | return (error); |
| 381 | |
| 382 | /* |
| 383 | * Move the node |
| 384 | */ |
| 385 | bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk)); |
| 386 | |
| 387 | if (elm) { |
| 388 | /* |
| 389 | * We are not the root of the B-Tree |
| 390 | */ |
| 391 | hammer_modify_node(cursor->trans, cursor->parent, |
| 392 | &elm->internal.subtree_offset, |
| 393 | sizeof(elm->internal.subtree_offset)); |
| 394 | elm->internal.subtree_offset = nnode->node_offset; |
| 395 | hammer_modify_node_done(cursor->parent); |
| 396 | } else { |
| 397 | /* |
| 398 | * We are the root of the B-Tree |
| 399 | */ |
| 400 | hammer_volume_t volume; |
| 401 | |
| 402 | volume = hammer_get_root_volume(cursor->trans->hmp, &error); |
| 403 | KKASSERT(error == 0); |
| 404 | |
| 405 | hammer_modify_volume_field(cursor->trans, volume, |
| 406 | vol0_btree_root); |
| 407 | volume->ondisk->vol0_btree_root = nnode->node_offset; |
| 408 | hammer_modify_volume_done(volume); |
| 409 | hammer_rel_volume(volume, 0); |
| 410 | } |
| 411 | |
| 412 | hammer_delete_node(cursor->trans, onode); |
| 413 | |
| 414 | if (hammer_debug_general & 0x4000) { |
| 415 | kprintf("REBLOCK NODE %016llx -> %016llx\n", |
| 416 | onode->node_offset, nnode->node_offset); |
| 417 | } |
| 418 | |
| 419 | cursor->node = nnode; |
| 420 | hammer_rel_node(onode); |
| 421 | |
| 422 | return (error); |
| 423 | } |
| 424 | |