HAMMER 46/Many: Performance pass, media changes, bug fixes.
[dragonfly.git] / sys / vfs / hammer / hammer_reblock.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.16 2008/05/18 01:48:50 dillon Exp $
35  */
36 /*
37  * HAMMER reblocker - This code frees up fragmented physical space
38  *
39  * HAMMER only keeps track of free space on a big-block basis.  A big-block
40  * containing holes can only be freed by migrating the remaining data in
41  * that big-block into a new big-block, then freeing the big-block.
42  *
43  * This function is called from an ioctl or via the hammer support thread.
44  */
45
46 #include "hammer.h"
47
48 static int hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
49                                  hammer_cursor_t cursor,
50                                  hammer_btree_elm_t elm);
51 static int hammer_reblock_data(struct hammer_ioc_reblock *reblock,
52                                 hammer_cursor_t cursor, hammer_btree_elm_t elm);
53 static int hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
54                                 hammer_cursor_t cursor, hammer_btree_elm_t elm);
55 static int hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
56                                 hammer_cursor_t cursor, hammer_btree_elm_t elm);
57
58 int
59 hammer_ioc_reblock(hammer_transaction_t trans, hammer_inode_t ip,
60                struct hammer_ioc_reblock *reblock)
61 {
62         struct hammer_cursor cursor;
63         hammer_btree_elm_t elm;
64         int error;
65
66         if (reblock->beg_obj_id >= reblock->end_obj_id)
67                 return(EINVAL);
68         if (reblock->free_level < 0)
69                 return(EINVAL);
70
71         reblock->cur_obj_id = reblock->beg_obj_id;
72         reblock->cur_localization = reblock->beg_localization;
73
74 retry:
75         error = hammer_init_cursor(trans, &cursor, NULL, NULL);
76         if (error) {
77                 hammer_done_cursor(&cursor);
78                 return(error);
79         }
80         cursor.key_beg.localization = reblock->cur_localization;
81         cursor.key_beg.obj_id = reblock->cur_obj_id;
82         cursor.key_beg.key = HAMMER_MIN_KEY;
83         cursor.key_beg.create_tid = 1;
84         cursor.key_beg.delete_tid = 0;
85         cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
86         cursor.key_beg.obj_type = 0;
87
88         cursor.key_end.localization = reblock->end_localization;
89         cursor.key_end.obj_id = reblock->end_obj_id;
90         cursor.key_end.key = HAMMER_MAX_KEY;
91         cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
92         cursor.key_end.delete_tid = 0;
93         cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
94         cursor.key_end.obj_type = 0;
95
96         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
97         cursor.flags |= HAMMER_CURSOR_BACKEND;
98
99         /*
100          * This flag allows the btree scan code to return internal nodes,
101          * so we can reblock them in addition to the leafs.  Only specify it
102          * if we intend to reblock B-Tree nodes.
103          */
104         if (reblock->head.flags & HAMMER_IOC_DO_BTREE)
105                 cursor.flags |= HAMMER_CURSOR_REBLOCKING;
106
107         error = hammer_btree_first(&cursor);
108         while (error == 0) {
109                 /*
110                  * Internal or Leaf node
111                  */
112                 elm = &cursor.node->ondisk->elms[cursor.index];
113                 reblock->cur_obj_id = elm->base.obj_id;
114                 reblock->cur_localization = elm->base.localization;
115
116                 /*
117                  * Acquiring the sync_lock prevents the operation from
118                  * crossing a synchronization boundary.
119                  *
120                  * NOTE: cursor.node may have changed on return.
121                  */
122                 hammer_sync_lock_sh(trans);
123                 error = hammer_reblock_helper(reblock, &cursor, elm);
124                 hammer_sync_unlock(trans);
125                 if (error == 0) {
126                         cursor.flags |= HAMMER_CURSOR_ATEDISK;
127                         error = hammer_btree_iterate(&cursor);
128                 }
129
130                 /*
131                  * Bad hack for now, don't blow out the kernel's buffer
132                  * cache.  NOTE: We still hold locks on the cursor, we
133                  * cannot call the flusher synchronously.
134                  */
135                 if (trans->hmp->locked_dirty_count +
136                     trans->hmp->io_running_count > hammer_limit_dirtybufs) {
137                         hammer_flusher_async(trans->hmp);
138                         tsleep(trans, 0, "hmrslo", hz / 10);
139                 }
140                 if (error == 0)
141                         error = hammer_signal_check(trans->hmp);
142         }
143         if (error == ENOENT)
144                 error = 0;
145         hammer_done_cursor(&cursor);
146         if (error == EDEADLK)
147                 goto retry;
148         if (error == EINTR) {
149                 reblock->head.flags |= HAMMER_IOC_HEAD_INTR;
150                 error = 0;
151         }
152         return(error);
153 }
154
155 /*
156  * Reblock the B-Tree (leaf) node, record, and/or data if necessary.
157  *
158  * XXX We have no visibility into internal B-Tree nodes at the moment,
159  * only leaf nodes.
160  */
161 static int
162 hammer_reblock_helper(struct hammer_ioc_reblock *reblock,
163                       hammer_cursor_t cursor, hammer_btree_elm_t elm)
164 {
165         hammer_off_t tmp_offset;
166         int error;
167         int zone;
168         int bytes;
169         int cur;
170
171         error = 0;
172
173         /*
174          * Reblock data.  Note that data embedded in a record is reblocked
175          * by the record reblock code.  Data processing only occurs at leaf
176          * nodes and for RECORD element types.
177          */
178         if (cursor->node->ondisk->type != HAMMER_BTREE_TYPE_LEAF)
179                 goto skip;
180         if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
181                 return(0);
182         tmp_offset = elm->leaf.data_offset;
183         zone = HAMMER_ZONE_DECODE(tmp_offset);          /* can be 0 */
184         if ((zone == HAMMER_ZONE_SMALL_DATA_INDEX ||
185              zone == HAMMER_ZONE_LARGE_DATA_INDEX) &&
186             error == 0 && (reblock->head.flags & (HAMMER_IOC_DO_DATA | HAMMER_IOC_DO_INODES))) {
187                 ++reblock->data_count;
188                 reblock->data_byte_count += elm->leaf.data_len;
189                 bytes = hammer_blockmap_getfree(cursor->trans->hmp, tmp_offset,
190                                                 &cur, &error);
191                 if (hammer_debug_general & 0x4000)
192                         kprintf("D %6d/%d\n", bytes, reblock->free_level);
193                 if (error == 0 && cur == 0 && bytes >= reblock->free_level) {
194                         error = hammer_cursor_upgrade(cursor);
195                         if (error == 0) {
196                                 error = hammer_reblock_data(reblock,
197                                                             cursor, elm);
198                         }
199                         if (error == 0) {
200                                 ++reblock->data_moves;
201                                 reblock->data_byte_moves += elm->leaf.data_len;
202                         }
203                 }
204         }
205
206 skip:
207         /*
208          * Reblock a B-Tree internal or leaf node.
209          */
210         tmp_offset = cursor->node->node_offset;
211         zone = HAMMER_ZONE_DECODE(tmp_offset);
212         if (zone == HAMMER_ZONE_BTREE_INDEX && cursor->index == 0 &&
213             error == 0 && (reblock->head.flags & HAMMER_IOC_DO_BTREE)) {
214                 ++reblock->btree_count;
215                 bytes = hammer_blockmap_getfree(cursor->trans->hmp, tmp_offset,
216                                                 &cur, &error);
217                 if (hammer_debug_general & 0x4000)
218                         kprintf("B %6d/%d\n", bytes, reblock->free_level);
219                 if (error == 0 && cur == 0 && bytes >= reblock->free_level) {
220                         error = hammer_cursor_upgrade(cursor);
221                         if (error == 0) {
222                                 if (cursor->parent)
223                                         elm = &cursor->parent->ondisk->elms[cursor->parent_index];
224                                 else
225                                         elm = NULL;
226                                 switch(cursor->node->ondisk->type) {
227                                 case HAMMER_BTREE_TYPE_LEAF:
228                                         error = hammer_reblock_leaf_node(
229                                                         reblock, cursor, elm);
230                                         break;
231                                 case HAMMER_BTREE_TYPE_INTERNAL:
232                                         error = hammer_reblock_int_node(
233                                                         reblock, cursor, elm);
234                                         break;
235                                 default:
236                                         panic("Illegal B-Tree node type");
237                                 }
238                         }
239                         if (error == 0) {
240                                 ++reblock->btree_moves;
241                         }
242                 }
243         }
244
245         hammer_cursor_downgrade(cursor);
246         return(error);
247 }
248
249 /*
250  * Reblock a record's data.  Both the B-Tree element and record pointers
251  * to the data must be adjusted.
252  */
253 static int
254 hammer_reblock_data(struct hammer_ioc_reblock *reblock,
255                     hammer_cursor_t cursor, hammer_btree_elm_t elm)
256 {
257         struct hammer_buffer *data_buffer = NULL;
258         hammer_off_t ndata_offset;
259         int error;
260         void *ndata;
261
262         error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
263                                              HAMMER_CURSOR_GET_LEAF);
264         if (error)
265                 return (error);
266         ndata = hammer_alloc_data(cursor->trans, elm->leaf.data_len,
267                                   &ndata_offset, &data_buffer, &error);
268         if (error)
269                 goto done;
270
271         /*
272          * Move the data
273          */
274         hammer_modify_buffer(cursor->trans, data_buffer, NULL, 0);
275         bcopy(cursor->data, ndata, elm->leaf.data_len);
276         hammer_modify_buffer_done(data_buffer);
277
278         hammer_blockmap_free(cursor->trans,
279                              elm->leaf.data_offset, elm->leaf.data_len);
280
281         hammer_modify_node(cursor->trans, cursor->node,
282                            &elm->leaf.data_offset, sizeof(hammer_off_t));
283         elm->leaf.data_offset = ndata_offset;
284         hammer_modify_node_done(cursor->node);
285
286 done:
287         if (data_buffer)
288                 hammer_rel_buffer(data_buffer, 0);
289         return (error);
290 }
291
292 /*
293  * Reblock a B-Tree leaf node.  The parent must be adjusted to point to
294  * the new copy of the leaf node.
295  *
296  * elm is a pointer to the parent element pointing at cursor.node.
297  */
298 static int
299 hammer_reblock_leaf_node(struct hammer_ioc_reblock *reblock,
300                          hammer_cursor_t cursor, hammer_btree_elm_t elm)
301 {
302         hammer_node_t onode;
303         hammer_node_t nnode;
304         int error;
305
306         onode = cursor->node;
307         nnode = hammer_alloc_btree(cursor->trans, &error);
308
309         if (nnode == NULL)
310                 return (error);
311
312         /*
313          * Move the node
314          */
315         hammer_lock_ex(&nnode->lock);
316         hammer_modify_node_noundo(cursor->trans, nnode);
317         bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));
318
319         if (elm) {
320                 /*
321                  * We are not the root of the B-Tree 
322                  */
323                 hammer_modify_node(cursor->trans, cursor->parent,
324                                    &elm->internal.subtree_offset,
325                                    sizeof(elm->internal.subtree_offset));
326                 elm->internal.subtree_offset = nnode->node_offset;
327                 hammer_modify_node_done(cursor->parent);
328         } else {
329                 /*
330                  * We are the root of the B-Tree
331                  */
332                 hammer_volume_t volume;
333                         
334                 volume = hammer_get_root_volume(cursor->trans->hmp, &error);
335                 KKASSERT(error == 0);
336
337                 hammer_modify_volume_field(cursor->trans, volume,
338                                            vol0_btree_root);
339                 volume->ondisk->vol0_btree_root = nnode->node_offset;
340                 hammer_modify_volume_done(volume);
341                 hammer_rel_volume(volume, 0);
342         }
343
344         hammer_delete_node(cursor->trans, onode);
345
346         if (hammer_debug_general & 0x4000) {
347                 kprintf("REBLOCK LNODE %016llx -> %016llx\n",
348                         onode->node_offset, nnode->node_offset);
349         }
350         hammer_modify_node_done(nnode);
351         cursor->node = nnode;
352
353         hammer_unlock(&onode->lock);
354         hammer_rel_node(onode);
355
356         return (error);
357 }
358
359 /*
360  * Reblock a B-Tree internal node.  The parent must be adjusted to point to
361  * the new copy of the internal node, and the node's children's parent
362  * pointers must also be adjusted to point to the new copy.
363  *
364  * elm is a pointer to the parent element pointing at cursor.node.
365  */
366 static int
367 hammer_reblock_int_node(struct hammer_ioc_reblock *reblock,
368                          hammer_cursor_t cursor, hammer_btree_elm_t elm)
369 {
370         hammer_node_locklist_t locklist = NULL;
371         hammer_node_t onode;
372         hammer_node_t nnode;
373         int error;
374         int i;
375
376         error = hammer_btree_lock_children(cursor, &locklist);
377         if (error)
378                 goto done;
379
380         onode = cursor->node;
381         nnode = hammer_alloc_btree(cursor->trans, &error);
382
383         if (nnode == NULL)
384                 goto done;
385
386         /*
387          * Move the node.  Adjust the parent's pointer to us first.
388          */
389         hammer_lock_ex(&nnode->lock);
390         hammer_modify_node_noundo(cursor->trans, nnode);
391         bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));
392
393         if (elm) {
394                 /*
395                  * We are not the root of the B-Tree 
396                  */
397                 hammer_modify_node(cursor->trans, cursor->parent,
398                                    &elm->internal.subtree_offset,
399                                    sizeof(elm->internal.subtree_offset));
400                 elm->internal.subtree_offset = nnode->node_offset;
401                 hammer_modify_node_done(cursor->parent);
402         } else {
403                 /*
404                  * We are the root of the B-Tree
405                  */
406                 hammer_volume_t volume;
407                         
408                 volume = hammer_get_root_volume(cursor->trans->hmp, &error);
409                 KKASSERT(error == 0);
410
411                 hammer_modify_volume_field(cursor->trans, volume,
412                                            vol0_btree_root);
413                 volume->ondisk->vol0_btree_root = nnode->node_offset;
414                 hammer_modify_volume_done(volume);
415                 hammer_rel_volume(volume, 0);
416         }
417
418         /*
419          * Now adjust our children's pointers to us.
420          */
421         for (i = 0; i < nnode->ondisk->count; ++i) {
422                 elm = &nnode->ondisk->elms[i];
423                 error = btree_set_parent(cursor->trans, nnode, elm);
424                 if (error)
425                         panic("reblock internal node: fixup problem");
426         }
427
428         /*
429          * Clean up.
430          *
431          * The new node replaces the current node in the cursor.  The cursor
432          * expects it to be locked so leave it locked.  Discard onode.
433          */
434         hammer_delete_node(cursor->trans, onode);
435
436         if (hammer_debug_general & 0x4000) {
437                 kprintf("REBLOCK INODE %016llx -> %016llx\n",
438                         onode->node_offset, nnode->node_offset);
439         }
440         hammer_modify_node_done(nnode);
441         cursor->node = nnode;
442
443         hammer_unlock(&onode->lock);
444         hammer_rel_node(onode);
445
446 done:
447         hammer_btree_unlock_children(&locklist);
448         return (error);
449 }
450