HAMMER 27/many: Major surgery - change allocation model
[dragonfly.git] / sys / vfs / hammer / hammer_cursor.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.17 2008/02/08 08:30:59 dillon Exp $
35  */
36
37 /*
38  * HAMMER B-Tree index - cursor support routines
39  */
40 #include "hammer.h"
41
42 static int hammer_load_cursor_parent(hammer_cursor_t cursor);
43
44 /*
45  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
46  * is not available initialize a fresh cursor at the root of the filesystem.
47  */
48 int
49 hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache,
50                        hammer_mount_t hmp)
51 {
52         hammer_volume_t volume;
53         hammer_node_t node;
54         int error;
55
56         bzero(cursor, sizeof(*cursor));
57
58         /*
59          * Step 1 - acquire a locked node from the cache if possible
60          */
61         if (cache && *cache) {
62                 node = hammer_ref_node_safe(hmp, cache, &error);
63                 if (error == 0) {
64                         hammer_lock_sh(&node->lock);
65                         if (node->flags & HAMMER_NODE_DELETED) {
66                                 hammer_unlock(&node->lock);
67                                 hammer_rel_node(node);
68                                 node = NULL;
69                         }
70                 }
71         } else {
72                 node = NULL;
73         }
74
75         /*
76          * Step 2 - If we couldn't get a node from the cache, get
77          * the one from the root of the filesystem.
78          */
79         while (node == NULL) {
80                 volume = hammer_get_root_volume(hmp, &error);
81                 if (error)
82                         break;
83                 node = hammer_get_node(hmp, volume->ondisk->vol0_btree_root,
84                                        &error);
85                 hammer_rel_volume(volume, 0);
86                 if (error)
87                         break;
88                 hammer_lock_sh(&node->lock);
89                 if (node->flags & HAMMER_NODE_DELETED) {
90                         hammer_unlock(&node->lock);
91                         hammer_rel_node(node);
92                         node = NULL;
93                 }
94         }
95
96         /*
97          * Step 3 - finish initializing the cursor by acquiring the parent
98          */
99         cursor->node = node;
100         if (error == 0)
101                 error = hammer_load_cursor_parent(cursor);
102         KKASSERT(error == 0);
103         return(error);
104 }
105
106 /*
107  * We are finished with a cursor.  We NULL out various fields as sanity
108  * check, in case the structure is inappropriately used afterwords.
109  */
110 void
111 hammer_done_cursor(hammer_cursor_t cursor)
112 {
113         if (cursor->parent) {
114                 hammer_unlock(&cursor->parent->lock);
115                 hammer_rel_node(cursor->parent);
116                 cursor->parent = NULL;
117         }
118         if (cursor->node) {
119                 hammer_unlock(&cursor->node->lock);
120                 hammer_rel_node(cursor->node);
121                 cursor->node = NULL;
122         }
123         if (cursor->data_buffer) {
124                 hammer_rel_buffer(cursor->data_buffer, 0);
125                 cursor->data_buffer = NULL;
126         }
127         if (cursor->record_buffer) {
128                 hammer_rel_buffer(cursor->record_buffer, 0);
129                 cursor->record_buffer = NULL;
130         }
131         if (cursor->ip)
132                 hammer_mem_done(cursor);
133
134         /*
135          * If we deadlocked this node will be referenced.  Do a quick
136          * lock/unlock to wait for the deadlock condition to clear.
137          */
138         if (cursor->deadlk_node) {
139                 hammer_lock_ex(&cursor->deadlk_node->lock);
140                 hammer_unlock(&cursor->deadlk_node->lock);
141                 hammer_rel_node(cursor->deadlk_node);
142                 cursor->deadlk_node = NULL;
143         }
144
145         cursor->data1 = NULL;
146         cursor->data2 = NULL;
147         cursor->data_split = 0;
148         cursor->record = NULL;
149         cursor->left_bound = NULL;
150         cursor->right_bound = NULL;
151 }
152
153 /*
154  * Upgrade cursor->node and cursor->parent to exclusive locks.  This
155  * function can return EDEADLK.
156  *
157  * If we fail to upgrade the lock and cursor->deadlk_node is NULL, 
158  * we add another reference to the node that failed and set
159  * cursor->deadlk_node so hammer_done_cursor() can block on it.
160  */
161 int
162 hammer_cursor_upgrade(hammer_cursor_t cursor)
163 {
164         int error;
165
166         if (hammer_lock_held(&cursor->node->lock) < 0) {
167                 error = hammer_lock_upgrade(&cursor->node->lock);
168                 if (error && cursor->deadlk_node == NULL) {
169                         cursor->deadlk_node = cursor->node;
170                         hammer_ref_node(cursor->deadlk_node);
171                 }
172         } else {
173                 error = 0;
174         }
175         if (cursor->parent && hammer_lock_held(&cursor->parent->lock) < 0) {
176                 error = hammer_lock_upgrade(&cursor->parent->lock);
177                 if (error && cursor->deadlk_node == NULL) {
178                         cursor->deadlk_node = cursor->parent;
179                         hammer_ref_node(cursor->deadlk_node);
180                 }
181         } else {
182                 error = 0;
183         }
184         return(error);
185 }
186
187 /*
188  * Downgrade cursor->node and cursor->parent to shared locks.  This
189  * function can return EDEADLK.
190  */
191 void
192 hammer_cursor_downgrade(hammer_cursor_t cursor)
193 {
194         if (hammer_lock_held(&cursor->node->lock) > 0)
195                 hammer_lock_downgrade(&cursor->node->lock);
196         if (cursor->parent && hammer_lock_held(&cursor->parent->lock) > 0)
197                 hammer_lock_downgrade(&cursor->parent->lock);
198 }
199
200 /*
201  * Seek the cursor to the specified node and index.
202  */
203 int
204 hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
205 {
206         int error;
207
208         hammer_cursor_downgrade(cursor);
209         error = 0;
210
211         if (cursor->node != node) {
212                 hammer_unlock(&cursor->node->lock);
213                 hammer_rel_node(cursor->node);
214                 cursor->node = node;
215                 cursor->index = index;
216                 hammer_ref_node(node);
217                 hammer_lock_sh(&node->lock);
218
219                 if (cursor->parent) {
220                         hammer_unlock(&cursor->parent->lock);
221                         hammer_rel_node(cursor->parent);
222                         cursor->parent = NULL;
223                         cursor->parent_index = 0;
224                 }
225                 error = hammer_load_cursor_parent(cursor);
226         }
227         return (error);
228 }
229
230 /*
231  * Load the parent of cursor->node into cursor->parent.
232  */
233 static
234 int
235 hammer_load_cursor_parent(hammer_cursor_t cursor)
236 {
237         hammer_mount_t hmp;
238         hammer_node_t parent;
239         hammer_node_t node;
240         hammer_btree_elm_t elm;
241         int error;
242         int i;
243
244         hmp = cursor->node->volume->hmp;
245
246         if (cursor->node->ondisk->parent) {
247                 node = cursor->node;
248                 parent = hammer_get_node(node->volume->hmp,
249                                          node->ondisk->parent, &error);
250                 if (error)
251                         return(error);
252                 elm = NULL;
253                 for (i = 0; i < parent->ondisk->count; ++i) {
254                         elm = &parent->ondisk->elms[i];
255                         if (parent->ondisk->elms[i].internal.subtree_offset ==
256                             node->node_offset) {
257                                 break;
258                         }
259                 }
260                 if (i == parent->ondisk->count)
261                         panic("Bad B-Tree link: parent %p node %p\n", parent, node);
262                 KKASSERT(i != parent->ondisk->count);
263                 cursor->parent = parent;
264                 cursor->parent_index = i;
265                 cursor->left_bound = &elm[0].internal.base;
266                 cursor->right_bound = &elm[1].internal.base;
267
268                 hammer_lock_sh(&parent->lock);
269                 return(error);
270         } else {
271                 cursor->parent = NULL;
272                 cursor->parent_index = 0;
273                 cursor->left_bound = &hmp->root_btree_beg;
274                 cursor->right_bound = &hmp->root_btree_end;
275                 error = 0;
276         }
277         return(error);
278 }
279
280 /*
281  * Cursor up to our parent node.  Return ENOENT if we are at the root of
282  * the filesystem.
283  *
284  * If doing a nonblocking cursor-up and we are unable to acquire the lock,
285  * the cursor remains unchanged.
286  */
287 int
288 hammer_cursor_up(hammer_cursor_t cursor)
289 {
290         int error;
291
292         hammer_cursor_downgrade(cursor);
293
294         /*
295          * Set the node to its parent.  If the parent is NULL we are at
296          * the root of the filesystem and return ENOENT.
297          */
298         hammer_unlock(&cursor->node->lock);
299         hammer_rel_node(cursor->node);
300         cursor->node = cursor->parent;
301         cursor->index = cursor->parent_index;
302         cursor->parent = NULL;
303         cursor->parent_index = 0;
304
305         if (cursor->node == NULL) {
306                 error = ENOENT;
307         } else {
308                 error = hammer_load_cursor_parent(cursor);
309         }
310         return(error);
311 }
312
313 /*
314  * Cursor down through the current node, which must be an internal node.
315  *
316  * This routine adjusts the cursor and sets index to 0.
317  */
318 int
319 hammer_cursor_down(hammer_cursor_t cursor)
320 {
321         hammer_node_t node;
322         hammer_btree_elm_t elm;
323         int error;
324
325         /*
326          * The current node becomes the current parent
327          */
328         hammer_cursor_downgrade(cursor);
329         node = cursor->node;
330         KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
331         if (cursor->parent) {
332                 hammer_unlock(&cursor->parent->lock);
333                 hammer_rel_node(cursor->parent);
334         }
335         cursor->parent = node;
336         cursor->parent_index = cursor->index;
337         cursor->node = NULL;
338         cursor->index = 0;
339
340         /*
341          * Extract element to push into at (node,index), set bounds.
342          */
343         elm = &node->ondisk->elms[cursor->parent_index];
344
345         /*
346          * Ok, push down into elm.  If elm specifies an internal or leaf
347          * node the current node must be an internal node.  If elm specifies
348          * a spike then the current node must be a leaf node.
349          */
350         switch(elm->base.btype) {
351         case HAMMER_BTREE_TYPE_INTERNAL:
352         case HAMMER_BTREE_TYPE_LEAF:
353                 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
354                 KKASSERT(elm->internal.subtree_offset != 0);
355                 cursor->left_bound = &elm[0].internal.base;
356                 cursor->right_bound = &elm[1].internal.base;
357                 node = hammer_get_node(node->volume->hmp,
358                                        elm->internal.subtree_offset,
359                                        &error);
360                 if (error == 0) {
361                         KKASSERT(elm->base.btype == node->ondisk->type);
362                         if (node->ondisk->parent != cursor->parent->node_offset)
363                                 panic("node %p %016llx vs %016llx\n", node, node->ondisk->parent, cursor->parent->node_offset);
364                         KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
365                 }
366                 break;
367         default:
368                 panic("hammer_cursor_down: illegal btype %02x (%c)\n",
369                       elm->base.btype,
370                       (elm->base.btype ? elm->base.btype : '?'));
371                 break;
372         }
373         if (error == 0) {
374                 hammer_lock_sh(&node->lock);
375                 cursor->node = node;
376                 cursor->index = 0;
377         }
378         return(error);
379 }
380