HAMMER 33/many: Expand transaction processing, fix bug in B-Tree
[dragonfly.git] / sys / vfs / hammer / hammer_cursor.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.19 2008/03/19 20:18:17 dillon Exp $
35  */
36
37 /*
38  * HAMMER B-Tree index - cursor support routines
39  */
40 #include "hammer.h"
41
42 static int hammer_load_cursor_parent(hammer_cursor_t cursor);
43
44 /*
45  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
46  * is not available initialize a fresh cursor at the root of the filesystem.
47  */
48 int
49 hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor,
50                    struct hammer_node **cache)
51 {
52         hammer_volume_t volume;
53         hammer_node_t node;
54         int error;
55
56         bzero(cursor, sizeof(*cursor));
57
58         cursor->trans = trans;
59
60         /*
61          * Step 1 - acquire a locked node from the cache if possible
62          */
63         if (cache && *cache) {
64                 node = hammer_ref_node_safe(trans->hmp, cache, &error);
65                 if (error == 0) {
66                         hammer_lock_sh(&node->lock);
67                         if (node->flags & HAMMER_NODE_DELETED) {
68                                 hammer_unlock(&node->lock);
69                                 hammer_rel_node(node);
70                                 node = NULL;
71                         }
72                 }
73         } else {
74                 node = NULL;
75         }
76
77         /*
78          * Step 2 - If we couldn't get a node from the cache, get
79          * the one from the root of the filesystem.
80          */
81         while (node == NULL) {
82                 volume = hammer_get_root_volume(trans->hmp, &error);
83                 if (error)
84                         break;
85                 node = hammer_get_node(trans->hmp,
86                                        volume->ondisk->vol0_btree_root, &error);
87                 hammer_rel_volume(volume, 0);
88                 if (error)
89                         break;
90                 hammer_lock_sh(&node->lock);
91                 if (node->flags & HAMMER_NODE_DELETED) {
92                         hammer_unlock(&node->lock);
93                         hammer_rel_node(node);
94                         node = NULL;
95                 }
96         }
97
98         /*
99          * Step 3 - finish initializing the cursor by acquiring the parent
100          */
101         cursor->node = node;
102         if (error == 0)
103                 error = hammer_load_cursor_parent(cursor);
104         KKASSERT(error == 0);
105         return(error);
106 }
107
108 /*
109  * We are finished with a cursor.  We NULL out various fields as sanity
110  * check, in case the structure is inappropriately used afterwords.
111  */
112 void
113 hammer_done_cursor(hammer_cursor_t cursor)
114 {
115         if (cursor->parent) {
116                 hammer_unlock(&cursor->parent->lock);
117                 hammer_rel_node(cursor->parent);
118                 cursor->parent = NULL;
119         }
120         if (cursor->node) {
121                 hammer_unlock(&cursor->node->lock);
122                 hammer_rel_node(cursor->node);
123                 cursor->node = NULL;
124         }
125         if (cursor->data_buffer) {
126                 hammer_rel_buffer(cursor->data_buffer, 0);
127                 cursor->data_buffer = NULL;
128         }
129         if (cursor->record_buffer) {
130                 hammer_rel_buffer(cursor->record_buffer, 0);
131                 cursor->record_buffer = NULL;
132         }
133         if (cursor->ip)
134                 hammer_mem_done(cursor);
135
136         /*
137          * If we deadlocked this node will be referenced.  Do a quick
138          * lock/unlock to wait for the deadlock condition to clear.
139          */
140         if (cursor->deadlk_node) {
141                 hammer_lock_ex(&cursor->deadlk_node->lock);
142                 hammer_unlock(&cursor->deadlk_node->lock);
143                 hammer_rel_node(cursor->deadlk_node);
144                 cursor->deadlk_node = NULL;
145         }
146
147         cursor->data = NULL;
148         cursor->record = NULL;
149         cursor->left_bound = NULL;
150         cursor->right_bound = NULL;
151         cursor->trans = NULL;
152 }
153
154 /*
155  * Upgrade cursor->node and cursor->parent to exclusive locks.  This
156  * function can return EDEADLK.
157  *
158  * If we fail to upgrade the lock and cursor->deadlk_node is NULL, 
159  * we add another reference to the node that failed and set
160  * cursor->deadlk_node so hammer_done_cursor() can block on it.
161  */
162 int
163 hammer_cursor_upgrade(hammer_cursor_t cursor)
164 {
165         int error;
166
167         if (hammer_lock_held(&cursor->node->lock) < 0) {
168                 error = hammer_lock_upgrade(&cursor->node->lock);
169                 if (error && cursor->deadlk_node == NULL) {
170                         cursor->deadlk_node = cursor->node;
171                         hammer_ref_node(cursor->deadlk_node);
172                 }
173         } else {
174                 error = 0;
175         }
176         if (cursor->parent && hammer_lock_held(&cursor->parent->lock) < 0) {
177                 error = hammer_lock_upgrade(&cursor->parent->lock);
178                 if (error && cursor->deadlk_node == NULL) {
179                         cursor->deadlk_node = cursor->parent;
180                         hammer_ref_node(cursor->deadlk_node);
181                 }
182         } else {
183                 error = 0;
184         }
185         return(error);
186 }
187
188 /*
189  * Downgrade cursor->node and cursor->parent to shared locks.  This
190  * function can return EDEADLK.
191  */
192 void
193 hammer_cursor_downgrade(hammer_cursor_t cursor)
194 {
195         if (hammer_lock_held(&cursor->node->lock) > 0)
196                 hammer_lock_downgrade(&cursor->node->lock);
197         if (cursor->parent && hammer_lock_held(&cursor->parent->lock) > 0)
198                 hammer_lock_downgrade(&cursor->parent->lock);
199 }
200
201 /*
202  * Seek the cursor to the specified node and index.
203  */
204 int
205 hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
206 {
207         int error;
208
209         hammer_cursor_downgrade(cursor);
210         error = 0;
211
212         if (cursor->node != node) {
213                 hammer_unlock(&cursor->node->lock);
214                 hammer_rel_node(cursor->node);
215                 cursor->node = node;
216                 cursor->index = index;
217                 hammer_ref_node(node);
218                 hammer_lock_sh(&node->lock);
219
220                 if (cursor->parent) {
221                         hammer_unlock(&cursor->parent->lock);
222                         hammer_rel_node(cursor->parent);
223                         cursor->parent = NULL;
224                         cursor->parent_index = 0;
225                 }
226                 error = hammer_load_cursor_parent(cursor);
227         }
228         return (error);
229 }
230
231 /*
232  * Load the parent of cursor->node into cursor->parent.
233  */
234 static
235 int
236 hammer_load_cursor_parent(hammer_cursor_t cursor)
237 {
238         hammer_mount_t hmp;
239         hammer_node_t parent;
240         hammer_node_t node;
241         hammer_btree_elm_t elm;
242         int error;
243         int i;
244
245         hmp = cursor->trans->hmp;
246
247         if (cursor->node->ondisk->parent) {
248                 node = cursor->node;
249                 parent = hammer_get_node(hmp, node->ondisk->parent, &error);
250                 if (error)
251                         return(error);
252                 elm = NULL;
253                 for (i = 0; i < parent->ondisk->count; ++i) {
254                         elm = &parent->ondisk->elms[i];
255                         if (parent->ondisk->elms[i].internal.subtree_offset ==
256                             node->node_offset) {
257                                 break;
258                         }
259                 }
260                 if (i == parent->ondisk->count)
261                         panic("Bad B-Tree link: parent %p node %p\n", parent, node);
262                 KKASSERT(i != parent->ondisk->count);
263                 cursor->parent = parent;
264                 cursor->parent_index = i;
265                 cursor->left_bound = &elm[0].internal.base;
266                 cursor->right_bound = &elm[1].internal.base;
267
268                 hammer_lock_sh(&parent->lock);
269                 return(error);
270         } else {
271                 cursor->parent = NULL;
272                 cursor->parent_index = 0;
273                 cursor->left_bound = &hmp->root_btree_beg;
274                 cursor->right_bound = &hmp->root_btree_end;
275                 error = 0;
276         }
277         return(error);
278 }
279
280 /*
281  * Cursor up to our parent node.  Return ENOENT if we are at the root of
282  * the filesystem.
283  *
284  * If doing a nonblocking cursor-up and we are unable to acquire the lock,
285  * the cursor remains unchanged.
286  */
287 int
288 hammer_cursor_up(hammer_cursor_t cursor)
289 {
290         int error;
291
292         hammer_cursor_downgrade(cursor);
293
294         /*
295          * Set the node to its parent.  If the parent is NULL we are at
296          * the root of the filesystem and return ENOENT.
297          */
298         hammer_unlock(&cursor->node->lock);
299         hammer_rel_node(cursor->node);
300         cursor->node = cursor->parent;
301         cursor->index = cursor->parent_index;
302         cursor->parent = NULL;
303         cursor->parent_index = 0;
304
305         if (cursor->node == NULL) {
306                 error = ENOENT;
307         } else {
308                 error = hammer_load_cursor_parent(cursor);
309         }
310         return(error);
311 }
312
313 /*
314  * Cursor down through the current node, which must be an internal node.
315  *
316  * This routine adjusts the cursor and sets index to 0.
317  */
318 int
319 hammer_cursor_down(hammer_cursor_t cursor)
320 {
321         hammer_node_t node;
322         hammer_btree_elm_t elm;
323         int error;
324
325         /*
326          * The current node becomes the current parent
327          */
328         hammer_cursor_downgrade(cursor);
329         node = cursor->node;
330         KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
331         if (cursor->parent) {
332                 hammer_unlock(&cursor->parent->lock);
333                 hammer_rel_node(cursor->parent);
334         }
335         cursor->parent = node;
336         cursor->parent_index = cursor->index;
337         cursor->node = NULL;
338         cursor->index = 0;
339
340         /*
341          * Extract element to push into at (node,index), set bounds.
342          */
343         elm = &node->ondisk->elms[cursor->parent_index];
344
345         /*
346          * Ok, push down into elm.  If elm specifies an internal or leaf
347          * node the current node must be an internal node.  If elm specifies
348          * a spike then the current node must be a leaf node.
349          */
350         switch(elm->base.btype) {
351         case HAMMER_BTREE_TYPE_INTERNAL:
352         case HAMMER_BTREE_TYPE_LEAF:
353                 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
354                 KKASSERT(elm->internal.subtree_offset != 0);
355                 cursor->left_bound = &elm[0].internal.base;
356                 cursor->right_bound = &elm[1].internal.base;
357                 node = hammer_get_node(cursor->trans->hmp,
358                                        elm->internal.subtree_offset, &error);
359                 if (error == 0) {
360                         KASSERT(elm->base.btype == node->ondisk->type, ("BTYPE MISMATCH %c %c NODE %p\n", elm->base.btype, node->ondisk->type, node));
361                         if (node->ondisk->parent != cursor->parent->node_offset)
362                                 panic("node %p %016llx vs %016llx\n", node, node->ondisk->parent, cursor->parent->node_offset);
363                         KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
364                 }
365                 break;
366         default:
367                 panic("hammer_cursor_down: illegal btype %02x (%c)\n",
368                       elm->base.btype,
369                       (elm->base.btype ? elm->base.btype : '?'));
370                 break;
371         }
372         if (error == 0) {
373                 hammer_lock_sh(&node->lock);
374                 cursor->node = node;
375                 cursor->index = 0;
376         }
377         return(error);
378 }
379