HAMMER 32/many: Record holes, initial undo API, initial reblocking code
[dragonfly.git] / sys / vfs / hammer / hammer_reblock.c
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.1 2008/03/18 05:19:16 dillon Exp $
35  */
36 /*
37  * HAMMER reblocker - This code frees up fragmented physical space
38  *
39  * HAMMER only keeps track of free space on a big-block basis.  A big-block
40  * containing holes can only be freed by migrating the remaining data in
41  * that big-block into a new big-block, then freeing the big-block.
42  *
43  * This function is called from an ioctl or via the hammer support thread.
44  */
45
46 #include "hammer.h"
47
48 static int hammer_reblock_helper(hammer_mount_t hmp,
49                                  struct hammer_ioc_reblock *reblock,
50                                  hammer_cursor_t cursor,
51                                  hammer_btree_elm_t elm);
52 static int hammer_reblock_data(hammer_mount_t hmp,
53                                 struct hammer_ioc_reblock *reblock,
54                                 hammer_cursor_t cursor, hammer_btree_elm_t elm);
55 static int hammer_reblock_record(hammer_mount_t hmp,
56                                 struct hammer_ioc_reblock *reblock,
57                                 hammer_cursor_t cursor, hammer_btree_elm_t elm);
58 static int hammer_reblock_node(hammer_mount_t hmp,
59                                 struct hammer_ioc_reblock *reblock,
60                                 hammer_cursor_t cursor, hammer_btree_elm_t elm);
61
62 int
63 hammer_reblock(hammer_inode_t ip, struct hammer_ioc_reblock *reblock)
64 {
65         struct hammer_cursor cursor;
66         hammer_btree_elm_t elm;
67         int error;
68
69         if (reblock->beg_obj_id >= reblock->end_obj_id)
70                 return(EINVAL);
71         if (reblock->free_level < 0)
72                 return(EINVAL);
73
74 retry:
75         error = hammer_init_cursor_hmp(&cursor, NULL, ip->hmp);
76         if (error) {
77                 hammer_done_cursor(&cursor);
78                 return(error);
79         }
80         cursor.key_beg.obj_id = reblock->cur_obj_id;
81         cursor.key_beg.key = HAMMER_MIN_KEY;
82         cursor.key_beg.create_tid = 1;
83         cursor.key_beg.delete_tid = 0;
84         cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
85         cursor.key_beg.obj_type = 0;
86
87         cursor.key_end.obj_id = reblock->end_obj_id;
88         cursor.key_end.key = HAMMER_MAX_KEY;
89         cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
90         cursor.key_end.delete_tid = 0;
91         cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
92         cursor.key_end.obj_type = 0;
93
94         cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
95
96         error = hammer_btree_first(&cursor);
97         while (error == 0) {
98                 elm = &cursor.node->ondisk->elms[cursor.index];
99                 reblock->cur_obj_id = elm->base.obj_id;
100
101                 error = hammer_reblock_helper(ip->hmp, reblock, &cursor, elm);
102                 if (error == 0) {
103                         cursor.flags |= HAMMER_CURSOR_ATEDISK;
104                         error = hammer_btree_iterate(&cursor);
105                 }
106         }
107         if (error == ENOENT)
108                 error = 0;
109         hammer_done_cursor(&cursor);
110         if (error == EDEADLK)
111                 goto retry;
112         return(error);
113 }
114
115 /*
116  * Reblock the B-Tree (leaf) node, record, and/or data if necessary.
117  *
118  * XXX We have no visibility into internal B-Tree nodes at the moment.
119  */
120 static int
121 hammer_reblock_helper(hammer_mount_t hmp, struct hammer_ioc_reblock *reblock,
122                       hammer_cursor_t cursor, hammer_btree_elm_t elm)
123 {
124         hammer_off_t tmp_offset;
125         int error;
126         int zone;
127         int bytes;
128         int cur;
129
130         if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
131                 return(0);
132         error = 0;
133
134         /*
135          * Reblock data.  Note that data embedded in a record is reblocked
136          * by the record reblock code.
137          */
138         tmp_offset = elm->leaf.data_offset;
139         zone = HAMMER_ZONE_DECODE(tmp_offset);          /* can be 0 */
140         if ((zone == HAMMER_ZONE_SMALL_DATA_INDEX ||
141              zone == HAMMER_ZONE_LARGE_DATA_INDEX) && error == 0) {
142                 ++reblock->data_count;
143                 reblock->data_byte_count += elm->leaf.data_len;
144                 bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
145                 if (error == 0 && cur == 0 && bytes > reblock->free_level) {
146                         kprintf("%6d ", bytes);
147                         error = hammer_cursor_upgrade(cursor);
148                         if (error == 0) {
149                                 error = hammer_reblock_data(hmp, reblock,
150                                                             cursor, elm);
151                         }
152                         if (error == 0) {
153                                 ++reblock->data_moves;
154                                 reblock->data_byte_moves += elm->leaf.data_len;
155                         }
156                 }
157         }
158
159         /*
160          * Reblock a record
161          */
162         tmp_offset = elm->leaf.rec_offset;
163         zone = HAMMER_ZONE_DECODE(tmp_offset);
164         if (zone == HAMMER_ZONE_RECORD_INDEX && error == 0) {
165                 ++reblock->record_count;
166                 bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
167                 if (error == 0 && cur == 0 && bytes > reblock->free_level) {
168                         kprintf("%6d ", bytes);
169                         error = hammer_cursor_upgrade(cursor);
170                         if (error == 0) {
171                                 error = hammer_reblock_record(hmp, reblock,
172                                                               cursor, elm);
173                         }
174                         if (error == 0) {
175                                 ++reblock->record_moves;
176                         }
177                 }
178         }
179
180         /*
181          * Reblock a B-Tree node.  Adjust elm to point at the parent's
182          * leaf entry.
183          */
184         tmp_offset = cursor->node->node_offset;
185         zone = HAMMER_ZONE_DECODE(tmp_offset);
186         if (zone == HAMMER_ZONE_BTREE_INDEX && error == 0 &&
187             cursor->index == 0) {
188                 ++reblock->btree_count;
189                 bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
190                 if (error == 0 && cur == 0 && bytes > reblock->free_level) {
191                         kprintf("%6d ", bytes);
192                         error = hammer_cursor_upgrade(cursor);
193                         if (error == 0) {
194                                 if (cursor->parent)
195                                         elm = &cursor->parent->ondisk->elms[cursor->parent_index];
196                                 else
197                                         elm = NULL;
198                                 error = hammer_reblock_node(hmp, reblock,
199                                                             cursor, elm);
200                         }
201                         if (error == 0) {
202                                 ++reblock->btree_moves;
203                         }
204                 }
205         }
206
207         hammer_cursor_downgrade(cursor);
208         return(error);
209 }
210
211 /*
212  * Reblock a record's data.  Both the B-Tree element and record pointers
213  * to the data must be adjusted.
214  */
215 static int
216 hammer_reblock_data(hammer_mount_t hmp, struct hammer_ioc_reblock *reblock,
217                     hammer_cursor_t cursor, hammer_btree_elm_t elm)
218 {
219         struct hammer_buffer *data_buffer = NULL;
220         hammer_off_t ndata_offset;
221         int error;
222         void *ndata;
223
224         error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
225                                              HAMMER_CURSOR_GET_RECORD);
226         if (error)
227                 return (error);
228         ndata = hammer_alloc_data(hmp, elm->leaf.data_len, &ndata_offset,
229                                   &data_buffer, &error);
230         if (error)
231                 goto done;
232
233         /*
234          * Move the data
235          */
236         bcopy(cursor->data, ndata, elm->leaf.data_len);
237         hammer_modify_node(cursor->node,
238                 &elm->leaf.data_offset, sizeof(hammer_off_t));
239         hammer_modify_record(cursor->record_buffer,
240                 &cursor->record->base.data_off, sizeof(hammer_off_t));
241
242         hammer_blockmap_free(hmp, elm->leaf.data_offset, elm->leaf.data_len);
243
244         kprintf("REBLOCK DATA %016llx -> %016llx\n",
245                 elm->leaf.data_offset, ndata_offset);
246         cursor->record->base.data_off = ndata_offset;
247         elm->leaf.data_offset = ndata_offset;
248
249 done:
250         if (data_buffer)
251                 hammer_rel_buffer(data_buffer, 0);
252         return (error);
253 }
254
255 /*
256  * Reblock a record.  The B-Tree must be adjusted to point to the new record
257  * and the existing record must be physically destroyed so a FS rebuild
258  * does not see two versions of the same record.
259  */
260 static int
261 hammer_reblock_record(hammer_mount_t hmp, struct hammer_ioc_reblock *reblock,
262                       hammer_cursor_t cursor, hammer_btree_elm_t elm)
263 {
264         struct hammer_buffer *rec_buffer = NULL;
265         hammer_off_t nrec_offset;
266         hammer_record_ondisk_t nrec;
267         int error;
268
269         error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD);
270         if (error)
271                 return (error);
272
273         nrec = hammer_alloc_record(hmp, &nrec_offset, elm->leaf.base.rec_type,
274                                   &rec_buffer, 0, NULL, NULL, &error);
275         if (error)
276                 goto done;
277
278         /*
279          * Move the record
280          */
281         bcopy(cursor->record, nrec, sizeof(*nrec));
282
283         hammer_modify_node(cursor->node,
284                 &elm->leaf.rec_offset, sizeof(hammer_off_t));
285         hammer_modify_record(cursor->record_buffer,
286                 &cursor->record->base.base.rec_type,
287                 sizeof(cursor->record->base.base.rec_type));
288
289         hammer_blockmap_free(hmp, elm->leaf.rec_offset, sizeof(*nrec));
290
291         kprintf("REBLOCK RECD %016llx -> %016llx\n",
292                 elm->leaf.rec_offset, nrec_offset);
293         elm->leaf.rec_offset = nrec_offset;
294
295 done:
296         if (rec_buffer)
297                 hammer_rel_buffer(rec_buffer, 0);
298         return (error);
299 }
300
301 /*
302  * Reblock a B-Tree (leaf) node.  The parent must be adjusted to point to
303  * the new copy of the leaf node.  elm is a pointer to the parent element
304  * pointing at cursor.node.
305  *
306  * XXX reblock internal nodes too.
307  */
308 static int
309 hammer_reblock_node(hammer_mount_t hmp, struct hammer_ioc_reblock *reblock,
310                     hammer_cursor_t cursor, hammer_btree_elm_t elm)
311 {
312         hammer_node_t onode;
313         hammer_node_t nnode;
314         int error;
315
316         onode = cursor->node;
317         nnode = hammer_alloc_btree(hmp, &error);
318         hammer_lock_ex(&nnode->lock);
319
320         if (nnode == NULL)
321                 return (error);
322
323         /*
324          * Move the node
325          */
326         bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));
327
328         if (elm) {
329                 /*
330                  * We are not the root of the B-Tree 
331                  */
332                 hammer_modify_node(cursor->parent,
333                                    &elm->internal.subtree_offset,
334                                    sizeof(elm->internal.subtree_offset));
335                 elm->internal.subtree_offset = nnode->node_offset;
336         } else {
337                 /*
338                  * We are the root of the B-Tree
339                  */
340                 hammer_volume_t volume;
341                         
342                 volume = hammer_get_root_volume(hmp, &error);
343                 KKASSERT(error == 0);
344
345                 hammer_modify_volume(volume, &volume->ondisk->vol0_btree_root,
346                                      sizeof(hammer_off_t));
347                 volume->ondisk->vol0_btree_root = nnode->node_offset;
348                 hammer_rel_volume(volume, 0);
349         }
350
351         onode->flags |= HAMMER_NODE_DELETED;
352
353         kprintf("REBLOCK NODE %016llx -> %016llx\n",
354                 onode->node_offset, nnode->node_offset);
355
356         cursor->node = nnode;
357         hammer_rel_node(onode);
358
359         return (error);
360 }
361