HAMMER 28/many: Implement zoned blockmap
[dragonfly.git] / sys / vfs / hammer / hammer_cursor.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.18 2008/02/10 09:51:01 dillon Exp $
35  */
36
37 /*
38  * HAMMER B-Tree index - cursor support routines
39  */
40 #include "hammer.h"
41
42 static int hammer_load_cursor_parent(hammer_cursor_t cursor);
43
44 /*
45  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
46  * is not available initialize a fresh cursor at the root of the filesystem.
47  */
48 int
49 hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache,
50                        hammer_mount_t hmp)
51 {
52         hammer_volume_t volume;
53         hammer_node_t node;
54         int error;
55
56         bzero(cursor, sizeof(*cursor));
57
58         /*
59          * Step 1 - acquire a locked node from the cache if possible
60          */
61         if (cache && *cache) {
62                 node = hammer_ref_node_safe(hmp, cache, &error);
63                 if (error == 0) {
64                         hammer_lock_sh(&node->lock);
65                         if (node->flags & HAMMER_NODE_DELETED) {
66                                 hammer_unlock(&node->lock);
67                                 hammer_rel_node(node);
68                                 node = NULL;
69                         }
70                 }
71         } else {
72                 node = NULL;
73         }
74
75         /*
76          * Step 2 - If we couldn't get a node from the cache, get
77          * the one from the root of the filesystem.
78          */
79         while (node == NULL) {
80                 volume = hammer_get_root_volume(hmp, &error);
81                 if (error)
82                         break;
83                 node = hammer_get_node(hmp, volume->ondisk->vol0_btree_root,
84                                        &error);
85                 hammer_rel_volume(volume, 0);
86                 if (error)
87                         break;
88                 hammer_lock_sh(&node->lock);
89                 if (node->flags & HAMMER_NODE_DELETED) {
90                         hammer_unlock(&node->lock);
91                         hammer_rel_node(node);
92                         node = NULL;
93                 }
94         }
95
96         /*
97          * Step 3 - finish initializing the cursor by acquiring the parent
98          */
99         cursor->node = node;
100         if (error == 0)
101                 error = hammer_load_cursor_parent(cursor);
102         KKASSERT(error == 0);
103         return(error);
104 }
105
106 /*
107  * We are finished with a cursor.  We NULL out various fields as sanity
108  * check, in case the structure is inappropriately used afterwords.
109  */
110 void
111 hammer_done_cursor(hammer_cursor_t cursor)
112 {
113         if (cursor->parent) {
114                 hammer_unlock(&cursor->parent->lock);
115                 hammer_rel_node(cursor->parent);
116                 cursor->parent = NULL;
117         }
118         if (cursor->node) {
119                 hammer_unlock(&cursor->node->lock);
120                 hammer_rel_node(cursor->node);
121                 cursor->node = NULL;
122         }
123         if (cursor->data_buffer) {
124                 hammer_rel_buffer(cursor->data_buffer, 0);
125                 cursor->data_buffer = NULL;
126         }
127         if (cursor->record_buffer) {
128                 hammer_rel_buffer(cursor->record_buffer, 0);
129                 cursor->record_buffer = NULL;
130         }
131         if (cursor->ip)
132                 hammer_mem_done(cursor);
133
134         /*
135          * If we deadlocked this node will be referenced.  Do a quick
136          * lock/unlock to wait for the deadlock condition to clear.
137          */
138         if (cursor->deadlk_node) {
139                 hammer_lock_ex(&cursor->deadlk_node->lock);
140                 hammer_unlock(&cursor->deadlk_node->lock);
141                 hammer_rel_node(cursor->deadlk_node);
142                 cursor->deadlk_node = NULL;
143         }
144
145         cursor->data = NULL;
146         cursor->record = NULL;
147         cursor->left_bound = NULL;
148         cursor->right_bound = NULL;
149 }
150
151 /*
152  * Upgrade cursor->node and cursor->parent to exclusive locks.  This
153  * function can return EDEADLK.
154  *
155  * If we fail to upgrade the lock and cursor->deadlk_node is NULL, 
156  * we add another reference to the node that failed and set
157  * cursor->deadlk_node so hammer_done_cursor() can block on it.
158  */
159 int
160 hammer_cursor_upgrade(hammer_cursor_t cursor)
161 {
162         int error;
163
164         if (hammer_lock_held(&cursor->node->lock) < 0) {
165                 error = hammer_lock_upgrade(&cursor->node->lock);
166                 if (error && cursor->deadlk_node == NULL) {
167                         cursor->deadlk_node = cursor->node;
168                         hammer_ref_node(cursor->deadlk_node);
169                 }
170         } else {
171                 error = 0;
172         }
173         if (cursor->parent && hammer_lock_held(&cursor->parent->lock) < 0) {
174                 error = hammer_lock_upgrade(&cursor->parent->lock);
175                 if (error && cursor->deadlk_node == NULL) {
176                         cursor->deadlk_node = cursor->parent;
177                         hammer_ref_node(cursor->deadlk_node);
178                 }
179         } else {
180                 error = 0;
181         }
182         return(error);
183 }
184
185 /*
186  * Downgrade cursor->node and cursor->parent to shared locks.  This
187  * function can return EDEADLK.
188  */
189 void
190 hammer_cursor_downgrade(hammer_cursor_t cursor)
191 {
192         if (hammer_lock_held(&cursor->node->lock) > 0)
193                 hammer_lock_downgrade(&cursor->node->lock);
194         if (cursor->parent && hammer_lock_held(&cursor->parent->lock) > 0)
195                 hammer_lock_downgrade(&cursor->parent->lock);
196 }
197
198 /*
199  * Seek the cursor to the specified node and index.
200  */
201 int
202 hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
203 {
204         int error;
205
206         hammer_cursor_downgrade(cursor);
207         error = 0;
208
209         if (cursor->node != node) {
210                 hammer_unlock(&cursor->node->lock);
211                 hammer_rel_node(cursor->node);
212                 cursor->node = node;
213                 cursor->index = index;
214                 hammer_ref_node(node);
215                 hammer_lock_sh(&node->lock);
216
217                 if (cursor->parent) {
218                         hammer_unlock(&cursor->parent->lock);
219                         hammer_rel_node(cursor->parent);
220                         cursor->parent = NULL;
221                         cursor->parent_index = 0;
222                 }
223                 error = hammer_load_cursor_parent(cursor);
224         }
225         return (error);
226 }
227
228 /*
229  * Load the parent of cursor->node into cursor->parent.
230  */
231 static
232 int
233 hammer_load_cursor_parent(hammer_cursor_t cursor)
234 {
235         hammer_mount_t hmp;
236         hammer_node_t parent;
237         hammer_node_t node;
238         hammer_btree_elm_t elm;
239         int error;
240         int i;
241
242         hmp = cursor->node->hmp;
243
244         if (cursor->node->ondisk->parent) {
245                 node = cursor->node;
246                 parent = hammer_get_node(node->hmp,
247                                          node->ondisk->parent, &error);
248                 if (error)
249                         return(error);
250                 elm = NULL;
251                 for (i = 0; i < parent->ondisk->count; ++i) {
252                         elm = &parent->ondisk->elms[i];
253                         if (parent->ondisk->elms[i].internal.subtree_offset ==
254                             node->node_offset) {
255                                 break;
256                         }
257                 }
258                 if (i == parent->ondisk->count)
259                         panic("Bad B-Tree link: parent %p node %p\n", parent, node);
260                 KKASSERT(i != parent->ondisk->count);
261                 cursor->parent = parent;
262                 cursor->parent_index = i;
263                 cursor->left_bound = &elm[0].internal.base;
264                 cursor->right_bound = &elm[1].internal.base;
265
266                 hammer_lock_sh(&parent->lock);
267                 return(error);
268         } else {
269                 cursor->parent = NULL;
270                 cursor->parent_index = 0;
271                 cursor->left_bound = &hmp->root_btree_beg;
272                 cursor->right_bound = &hmp->root_btree_end;
273                 error = 0;
274         }
275         return(error);
276 }
277
278 /*
279  * Cursor up to our parent node.  Return ENOENT if we are at the root of
280  * the filesystem.
281  *
282  * If doing a nonblocking cursor-up and we are unable to acquire the lock,
283  * the cursor remains unchanged.
284  */
285 int
286 hammer_cursor_up(hammer_cursor_t cursor)
287 {
288         int error;
289
290         hammer_cursor_downgrade(cursor);
291
292         /*
293          * Set the node to its parent.  If the parent is NULL we are at
294          * the root of the filesystem and return ENOENT.
295          */
296         hammer_unlock(&cursor->node->lock);
297         hammer_rel_node(cursor->node);
298         cursor->node = cursor->parent;
299         cursor->index = cursor->parent_index;
300         cursor->parent = NULL;
301         cursor->parent_index = 0;
302
303         if (cursor->node == NULL) {
304                 error = ENOENT;
305         } else {
306                 error = hammer_load_cursor_parent(cursor);
307         }
308         return(error);
309 }
310
311 /*
312  * Cursor down through the current node, which must be an internal node.
313  *
314  * This routine adjusts the cursor and sets index to 0.
315  */
316 int
317 hammer_cursor_down(hammer_cursor_t cursor)
318 {
319         hammer_node_t node;
320         hammer_btree_elm_t elm;
321         int error;
322
323         /*
324          * The current node becomes the current parent
325          */
326         hammer_cursor_downgrade(cursor);
327         node = cursor->node;
328         KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
329         if (cursor->parent) {
330                 hammer_unlock(&cursor->parent->lock);
331                 hammer_rel_node(cursor->parent);
332         }
333         cursor->parent = node;
334         cursor->parent_index = cursor->index;
335         cursor->node = NULL;
336         cursor->index = 0;
337
338         /*
339          * Extract element to push into at (node,index), set bounds.
340          */
341         elm = &node->ondisk->elms[cursor->parent_index];
342
343         /*
344          * Ok, push down into elm.  If elm specifies an internal or leaf
345          * node the current node must be an internal node.  If elm specifies
346          * a spike then the current node must be a leaf node.
347          */
348         switch(elm->base.btype) {
349         case HAMMER_BTREE_TYPE_INTERNAL:
350         case HAMMER_BTREE_TYPE_LEAF:
351                 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
352                 KKASSERT(elm->internal.subtree_offset != 0);
353                 cursor->left_bound = &elm[0].internal.base;
354                 cursor->right_bound = &elm[1].internal.base;
355                 node = hammer_get_node(node->hmp, elm->internal.subtree_offset,
356                                        &error);
357                 if (error == 0) {
358                         KASSERT(elm->base.btype == node->ondisk->type, ("BTYPE MISMATCH %c %c NODE %p\n", elm->base.btype, node->ondisk->type, node));
359                         if (node->ondisk->parent != cursor->parent->node_offset)
360                                 panic("node %p %016llx vs %016llx\n", node, node->ondisk->parent, cursor->parent->node_offset);
361                         KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
362                 }
363                 break;
364         default:
365                 panic("hammer_cursor_down: illegal btype %02x (%c)\n",
366                       elm->base.btype,
367                       (elm->base.btype ? elm->base.btype : '?'));
368                 break;
369         }
370         if (error == 0) {
371                 hammer_lock_sh(&node->lock);
372                 cursor->node = node;
373                 cursor->index = 0;
374         }
375         return(error);
376 }
377