HAMMER 32/many: Record holes, initial undo API, initial reblocking code
[dragonfly.git] / sys / vfs / hammer / hammer_reblock.c
CommitLineData
bf686dbe
MD
1/*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * $DragonFly: src/sys/vfs/hammer/hammer_reblock.c,v 1.1 2008/03/18 05:19:16 dillon Exp $
35 */
36/*
37 * HAMMER reblocker - This code frees up fragmented physical space
38 *
39 * HAMMER only keeps track of free space on a big-block basis. A big-block
40 * containing holes can only be freed by migrating the remaining data in
41 * that big-block into a new big-block, then freeing the big-block.
42 *
43 * This function is called from an ioctl or via the hammer support thread.
44 */
45
46#include "hammer.h"
47
48static int hammer_reblock_helper(hammer_mount_t hmp,
49 struct hammer_ioc_reblock *reblock,
50 hammer_cursor_t cursor,
51 hammer_btree_elm_t elm);
52static int hammer_reblock_data(hammer_mount_t hmp,
53 struct hammer_ioc_reblock *reblock,
54 hammer_cursor_t cursor, hammer_btree_elm_t elm);
55static int hammer_reblock_record(hammer_mount_t hmp,
56 struct hammer_ioc_reblock *reblock,
57 hammer_cursor_t cursor, hammer_btree_elm_t elm);
58static int hammer_reblock_node(hammer_mount_t hmp,
59 struct hammer_ioc_reblock *reblock,
60 hammer_cursor_t cursor, hammer_btree_elm_t elm);
61
62int
63hammer_reblock(hammer_inode_t ip, struct hammer_ioc_reblock *reblock)
64{
65 struct hammer_cursor cursor;
66 hammer_btree_elm_t elm;
67 int error;
68
69 if (reblock->beg_obj_id >= reblock->end_obj_id)
70 return(EINVAL);
71 if (reblock->free_level < 0)
72 return(EINVAL);
73
74retry:
75 error = hammer_init_cursor_hmp(&cursor, NULL, ip->hmp);
76 if (error) {
77 hammer_done_cursor(&cursor);
78 return(error);
79 }
80 cursor.key_beg.obj_id = reblock->cur_obj_id;
81 cursor.key_beg.key = HAMMER_MIN_KEY;
82 cursor.key_beg.create_tid = 1;
83 cursor.key_beg.delete_tid = 0;
84 cursor.key_beg.rec_type = HAMMER_MIN_RECTYPE;
85 cursor.key_beg.obj_type = 0;
86
87 cursor.key_end.obj_id = reblock->end_obj_id;
88 cursor.key_end.key = HAMMER_MAX_KEY;
89 cursor.key_end.create_tid = HAMMER_MAX_TID - 1;
90 cursor.key_end.delete_tid = 0;
91 cursor.key_end.rec_type = HAMMER_MAX_RECTYPE;
92 cursor.key_end.obj_type = 0;
93
94 cursor.flags |= HAMMER_CURSOR_END_INCLUSIVE;
95
96 error = hammer_btree_first(&cursor);
97 while (error == 0) {
98 elm = &cursor.node->ondisk->elms[cursor.index];
99 reblock->cur_obj_id = elm->base.obj_id;
100
101 error = hammer_reblock_helper(ip->hmp, reblock, &cursor, elm);
102 if (error == 0) {
103 cursor.flags |= HAMMER_CURSOR_ATEDISK;
104 error = hammer_btree_iterate(&cursor);
105 }
106 }
107 if (error == ENOENT)
108 error = 0;
109 hammer_done_cursor(&cursor);
110 if (error == EDEADLK)
111 goto retry;
112 return(error);
113}
114
115/*
116 * Reblock the B-Tree (leaf) node, record, and/or data if necessary.
117 *
118 * XXX We have no visibility into internal B-Tree nodes at the moment.
119 */
120static int
121hammer_reblock_helper(hammer_mount_t hmp, struct hammer_ioc_reblock *reblock,
122 hammer_cursor_t cursor, hammer_btree_elm_t elm)
123{
124 hammer_off_t tmp_offset;
125 int error;
126 int zone;
127 int bytes;
128 int cur;
129
130 if (elm->leaf.base.btype != HAMMER_BTREE_TYPE_RECORD)
131 return(0);
132 error = 0;
133
134 /*
135 * Reblock data. Note that data embedded in a record is reblocked
136 * by the record reblock code.
137 */
138 tmp_offset = elm->leaf.data_offset;
139 zone = HAMMER_ZONE_DECODE(tmp_offset); /* can be 0 */
140 if ((zone == HAMMER_ZONE_SMALL_DATA_INDEX ||
141 zone == HAMMER_ZONE_LARGE_DATA_INDEX) && error == 0) {
142 ++reblock->data_count;
143 reblock->data_byte_count += elm->leaf.data_len;
144 bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
145 if (error == 0 && cur == 0 && bytes > reblock->free_level) {
146 kprintf("%6d ", bytes);
147 error = hammer_cursor_upgrade(cursor);
148 if (error == 0) {
149 error = hammer_reblock_data(hmp, reblock,
150 cursor, elm);
151 }
152 if (error == 0) {
153 ++reblock->data_moves;
154 reblock->data_byte_moves += elm->leaf.data_len;
155 }
156 }
157 }
158
159 /*
160 * Reblock a record
161 */
162 tmp_offset = elm->leaf.rec_offset;
163 zone = HAMMER_ZONE_DECODE(tmp_offset);
164 if (zone == HAMMER_ZONE_RECORD_INDEX && error == 0) {
165 ++reblock->record_count;
166 bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
167 if (error == 0 && cur == 0 && bytes > reblock->free_level) {
168 kprintf("%6d ", bytes);
169 error = hammer_cursor_upgrade(cursor);
170 if (error == 0) {
171 error = hammer_reblock_record(hmp, reblock,
172 cursor, elm);
173 }
174 if (error == 0) {
175 ++reblock->record_moves;
176 }
177 }
178 }
179
180 /*
181 * Reblock a B-Tree node. Adjust elm to point at the parent's
182 * leaf entry.
183 */
184 tmp_offset = cursor->node->node_offset;
185 zone = HAMMER_ZONE_DECODE(tmp_offset);
186 if (zone == HAMMER_ZONE_BTREE_INDEX && error == 0 &&
187 cursor->index == 0) {
188 ++reblock->btree_count;
189 bytes = hammer_blockmap_getfree(hmp, tmp_offset, &cur, &error);
190 if (error == 0 && cur == 0 && bytes > reblock->free_level) {
191 kprintf("%6d ", bytes);
192 error = hammer_cursor_upgrade(cursor);
193 if (error == 0) {
194 if (cursor->parent)
195 elm = &cursor->parent->ondisk->elms[cursor->parent_index];
196 else
197 elm = NULL;
198 error = hammer_reblock_node(hmp, reblock,
199 cursor, elm);
200 }
201 if (error == 0) {
202 ++reblock->btree_moves;
203 }
204 }
205 }
206
207 hammer_cursor_downgrade(cursor);
208 return(error);
209}
210
211/*
212 * Reblock a record's data. Both the B-Tree element and record pointers
213 * to the data must be adjusted.
214 */
215static int
216hammer_reblock_data(hammer_mount_t hmp, struct hammer_ioc_reblock *reblock,
217 hammer_cursor_t cursor, hammer_btree_elm_t elm)
218{
219 struct hammer_buffer *data_buffer = NULL;
220 hammer_off_t ndata_offset;
221 int error;
222 void *ndata;
223
224 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_DATA |
225 HAMMER_CURSOR_GET_RECORD);
226 if (error)
227 return (error);
228 ndata = hammer_alloc_data(hmp, elm->leaf.data_len, &ndata_offset,
229 &data_buffer, &error);
230 if (error)
231 goto done;
232
233 /*
234 * Move the data
235 */
236 bcopy(cursor->data, ndata, elm->leaf.data_len);
237 hammer_modify_node(cursor->node,
238 &elm->leaf.data_offset, sizeof(hammer_off_t));
239 hammer_modify_record(cursor->record_buffer,
240 &cursor->record->base.data_off, sizeof(hammer_off_t));
241
242 hammer_blockmap_free(hmp, elm->leaf.data_offset, elm->leaf.data_len);
243
244 kprintf("REBLOCK DATA %016llx -> %016llx\n",
245 elm->leaf.data_offset, ndata_offset);
246 cursor->record->base.data_off = ndata_offset;
247 elm->leaf.data_offset = ndata_offset;
248
249done:
250 if (data_buffer)
251 hammer_rel_buffer(data_buffer, 0);
252 return (error);
253}
254
255/*
256 * Reblock a record. The B-Tree must be adjusted to point to the new record
257 * and the existing record must be physically destroyed so a FS rebuild
258 * does not see two versions of the same record.
259 */
260static int
261hammer_reblock_record(hammer_mount_t hmp, struct hammer_ioc_reblock *reblock,
262 hammer_cursor_t cursor, hammer_btree_elm_t elm)
263{
264 struct hammer_buffer *rec_buffer = NULL;
265 hammer_off_t nrec_offset;
266 hammer_record_ondisk_t nrec;
267 int error;
268
269 error = hammer_btree_extract(cursor, HAMMER_CURSOR_GET_RECORD);
270 if (error)
271 return (error);
272
273 nrec = hammer_alloc_record(hmp, &nrec_offset, elm->leaf.base.rec_type,
274 &rec_buffer, 0, NULL, NULL, &error);
275 if (error)
276 goto done;
277
278 /*
279 * Move the record
280 */
281 bcopy(cursor->record, nrec, sizeof(*nrec));
282
283 hammer_modify_node(cursor->node,
284 &elm->leaf.rec_offset, sizeof(hammer_off_t));
285 hammer_modify_record(cursor->record_buffer,
286 &cursor->record->base.base.rec_type,
287 sizeof(cursor->record->base.base.rec_type));
288
289 hammer_blockmap_free(hmp, elm->leaf.rec_offset, sizeof(*nrec));
290
291 kprintf("REBLOCK RECD %016llx -> %016llx\n",
292 elm->leaf.rec_offset, nrec_offset);
293 elm->leaf.rec_offset = nrec_offset;
294
295done:
296 if (rec_buffer)
297 hammer_rel_buffer(rec_buffer, 0);
298 return (error);
299}
300
301/*
302 * Reblock a B-Tree (leaf) node. The parent must be adjusted to point to
303 * the new copy of the leaf node. elm is a pointer to the parent element
304 * pointing at cursor.node.
305 *
306 * XXX reblock internal nodes too.
307 */
308static int
309hammer_reblock_node(hammer_mount_t hmp, struct hammer_ioc_reblock *reblock,
310 hammer_cursor_t cursor, hammer_btree_elm_t elm)
311{
312 hammer_node_t onode;
313 hammer_node_t nnode;
314 int error;
315
316 onode = cursor->node;
317 nnode = hammer_alloc_btree(hmp, &error);
318 hammer_lock_ex(&nnode->lock);
319
320 if (nnode == NULL)
321 return (error);
322
323 /*
324 * Move the node
325 */
326 bcopy(onode->ondisk, nnode->ondisk, sizeof(*nnode->ondisk));
327
328 if (elm) {
329 /*
330 * We are not the root of the B-Tree
331 */
332 hammer_modify_node(cursor->parent,
333 &elm->internal.subtree_offset,
334 sizeof(elm->internal.subtree_offset));
335 elm->internal.subtree_offset = nnode->node_offset;
336 } else {
337 /*
338 * We are the root of the B-Tree
339 */
340 hammer_volume_t volume;
341
342 volume = hammer_get_root_volume(hmp, &error);
343 KKASSERT(error == 0);
344
345 hammer_modify_volume(volume, &volume->ondisk->vol0_btree_root,
346 sizeof(hammer_off_t));
347 volume->ondisk->vol0_btree_root = nnode->node_offset;
348 hammer_rel_volume(volume, 0);
349 }
350
351 onode->flags |= HAMMER_NODE_DELETED;
352
353 kprintf("REBLOCK NODE %016llx -> %016llx\n",
354 onode->node_offset, nnode->node_offset);
355
356 cursor->node = nnode;
357 hammer_rel_node(onode);
358
359 return (error);
360}
361