HAMMER 38A/Many: Undo/Synchronization and crash recovery
[dragonfly.git] / sys / vfs / hammer / hammer_cursor.c
1 /*
2  * Copyright (c) 2007-2008 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.22 2008/04/24 21:20:33 dillon Exp $
35  */
36
37 /*
38  * HAMMER B-Tree index - cursor support routines
39  */
40 #include "hammer.h"
41
42 static int hammer_load_cursor_parent(hammer_cursor_t cursor);
43
44 /*
45  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
46  * is not available initialize a fresh cursor at the root of the filesystem.
47  */
48 int
49 hammer_init_cursor(hammer_transaction_t trans, hammer_cursor_t cursor,
50                    struct hammer_node **cache)
51 {
52         hammer_volume_t volume;
53         hammer_node_t node;
54         int error;
55
56         bzero(cursor, sizeof(*cursor));
57
58         cursor->trans = trans;
59
60         /*
61          * Step 1 - acquire a locked node from the cache if possible
62          */
63         if (cache && *cache) {
64                 node = hammer_ref_node_safe(trans->hmp, cache, &error);
65                 if (error == 0) {
66                         hammer_lock_sh(&node->lock);
67                         if (node->flags & HAMMER_NODE_DELETED) {
68                                 hammer_unlock(&node->lock);
69                                 hammer_rel_node(node);
70                                 node = NULL;
71                         }
72                 }
73         } else {
74                 node = NULL;
75         }
76
77         /*
78          * Step 2 - If we couldn't get a node from the cache, get
79          * the one from the root of the filesystem.
80          */
81         while (node == NULL) {
82                 volume = hammer_get_root_volume(trans->hmp, &error);
83                 if (error)
84                         break;
85                 node = hammer_get_node(trans->hmp,
86                                        volume->ondisk->vol0_btree_root, &error);
87                 hammer_rel_volume(volume, 0);
88                 if (error)
89                         break;
90                 hammer_lock_sh(&node->lock);
91                 if (node->flags & HAMMER_NODE_DELETED) {
92                         hammer_unlock(&node->lock);
93                         hammer_rel_node(node);
94                         node = NULL;
95                 }
96         }
97
98         /*
99          * Step 3 - finish initializing the cursor by acquiring the parent
100          */
101         cursor->node = node;
102         if (error == 0)
103                 error = hammer_load_cursor_parent(cursor);
104         KKASSERT(error == 0);
105         /* if (error) hammer_done_cursor(cursor); */
106         return(error);
107 }
108
109 /*
110  * We are finished with a cursor.  We NULL out various fields as sanity
111  * check, in case the structure is inappropriately used afterwords.
112  */
113 void
114 hammer_done_cursor(hammer_cursor_t cursor)
115 {
116         if (cursor->parent) {
117                 hammer_unlock(&cursor->parent->lock);
118                 hammer_rel_node(cursor->parent);
119                 cursor->parent = NULL;
120         }
121         if (cursor->node) {
122                 hammer_unlock(&cursor->node->lock);
123                 hammer_rel_node(cursor->node);
124                 cursor->node = NULL;
125         }
126         if (cursor->data_buffer) {
127                 hammer_rel_buffer(cursor->data_buffer, 0);
128                 cursor->data_buffer = NULL;
129         }
130         if (cursor->record_buffer) {
131                 hammer_rel_buffer(cursor->record_buffer, 0);
132                 cursor->record_buffer = NULL;
133         }
134         if (cursor->ip)
135                 hammer_mem_done(cursor);
136
137         /*
138          * If we deadlocked this node will be referenced.  Do a quick
139          * lock/unlock to wait for the deadlock condition to clear.
140          */
141         if (cursor->deadlk_node) {
142                 hammer_lock_ex(&cursor->deadlk_node->lock);
143                 hammer_unlock(&cursor->deadlk_node->lock);
144                 hammer_rel_node(cursor->deadlk_node);
145                 cursor->deadlk_node = NULL;
146         }
147         if (cursor->deadlk_rec) {
148                 hammer_wait_mem_record(cursor->deadlk_rec);
149                 hammer_rel_mem_record(cursor->deadlk_rec);
150                 cursor->deadlk_rec = NULL;
151         }
152
153         cursor->data = NULL;
154         cursor->record = NULL;
155         cursor->left_bound = NULL;
156         cursor->right_bound = NULL;
157         cursor->trans = NULL;
158 }
159
160 /*
161  * Upgrade cursor->node and cursor->parent to exclusive locks.  This
162  * function can return EDEADLK.
163  *
164  * The lock must already be either held shared or already held exclusively
165  * by us.
166  *
167  * If we fail to upgrade the lock and cursor->deadlk_node is NULL, 
168  * we add another reference to the node that failed and set
169  * cursor->deadlk_node so hammer_done_cursor() can block on it.
170  */
171 int
172 hammer_cursor_upgrade(hammer_cursor_t cursor)
173 {
174         int error;
175
176         error = hammer_lock_upgrade(&cursor->node->lock);
177         if (error && cursor->deadlk_node == NULL) {
178                 cursor->deadlk_node = cursor->node;
179                 hammer_ref_node(cursor->deadlk_node);
180         } else if (error == 0 && cursor->parent) {
181                 error = hammer_lock_upgrade(&cursor->parent->lock);
182                 if (error && cursor->deadlk_node == NULL) {
183                         cursor->deadlk_node = cursor->parent;
184                         hammer_ref_node(cursor->deadlk_node);
185                 }
186         }
187         return(error);
188 }
189
190 /*
191  * Downgrade cursor->node and cursor->parent to shared locks.  This
192  * function can return EDEADLK.
193  */
194 void
195 hammer_cursor_downgrade(hammer_cursor_t cursor)
196 {
197         if (hammer_lock_excl_owned(&cursor->node->lock, curthread))
198                 hammer_lock_downgrade(&cursor->node->lock);
199         if (cursor->parent &&
200             hammer_lock_excl_owned(&cursor->parent->lock, curthread)) {
201                 hammer_lock_downgrade(&cursor->parent->lock);
202         }
203 }
204
205 /*
206  * Seek the cursor to the specified node and index.
207  */
208 int
209 hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
210 {
211         int error;
212
213         hammer_cursor_downgrade(cursor);
214         error = 0;
215
216         if (cursor->node != node) {
217                 hammer_unlock(&cursor->node->lock);
218                 hammer_rel_node(cursor->node);
219                 cursor->node = node;
220                 hammer_ref_node(node);
221                 hammer_lock_sh(&node->lock);
222
223                 if (cursor->parent) {
224                         hammer_unlock(&cursor->parent->lock);
225                         hammer_rel_node(cursor->parent);
226                         cursor->parent = NULL;
227                         cursor->parent_index = 0;
228                 }
229                 error = hammer_load_cursor_parent(cursor);
230         }
231         cursor->index = index;
232         return (error);
233 }
234
235 /*
236  * Load the parent of cursor->node into cursor->parent.
237  */
238 static
239 int
240 hammer_load_cursor_parent(hammer_cursor_t cursor)
241 {
242         hammer_mount_t hmp;
243         hammer_node_t parent;
244         hammer_node_t node;
245         hammer_btree_elm_t elm;
246         int error;
247         int i;
248
249         hmp = cursor->trans->hmp;
250
251         if (cursor->node->ondisk->parent) {
252                 node = cursor->node;
253                 parent = hammer_get_node(hmp, node->ondisk->parent, &error);
254                 if (error)
255                         return(error);
256                 hammer_lock_sh(&parent->lock);
257                 elm = NULL;
258                 for (i = 0; i < parent->ondisk->count; ++i) {
259                         elm = &parent->ondisk->elms[i];
260                         if (parent->ondisk->elms[i].internal.subtree_offset ==
261                             node->node_offset) {
262                                 break;
263                         }
264                 }
265                 if (i == parent->ondisk->count) {
266                         hammer_unlock(&parent->lock);
267                         panic("Bad B-Tree link: parent %p node %p\n", parent, node);
268                 }
269                 KKASSERT(i != parent->ondisk->count);
270                 cursor->parent = parent;
271                 cursor->parent_index = i;
272                 cursor->left_bound = &elm[0].internal.base;
273                 cursor->right_bound = &elm[1].internal.base;
274                 return(error);
275         } else {
276                 cursor->parent = NULL;
277                 cursor->parent_index = 0;
278                 cursor->left_bound = &hmp->root_btree_beg;
279                 cursor->right_bound = &hmp->root_btree_end;
280                 error = 0;
281         }
282         return(error);
283 }
284
285 /*
286  * Cursor up to our parent node.  Return ENOENT if we are at the root of
287  * the filesystem.
288  *
289  * If doing a nonblocking cursor-up and we are unable to acquire the lock,
290  * the cursor remains unchanged.
291  */
292 int
293 hammer_cursor_up(hammer_cursor_t cursor)
294 {
295         int error;
296
297         hammer_cursor_downgrade(cursor);
298
299         /*
300          * Set the node to its parent.  If the parent is NULL we are at
301          * the root of the filesystem and return ENOENT.
302          */
303         hammer_unlock(&cursor->node->lock);
304         hammer_rel_node(cursor->node);
305         cursor->node = cursor->parent;
306         cursor->index = cursor->parent_index;
307         cursor->parent = NULL;
308         cursor->parent_index = 0;
309
310         if (cursor->node == NULL) {
311                 error = ENOENT;
312         } else {
313                 error = hammer_load_cursor_parent(cursor);
314         }
315         return(error);
316 }
317
318 /*
319  * Cursor down through the current node, which must be an internal node.
320  *
321  * This routine adjusts the cursor and sets index to 0.
322  */
323 int
324 hammer_cursor_down(hammer_cursor_t cursor)
325 {
326         hammer_node_t node;
327         hammer_btree_elm_t elm;
328         int error;
329
330         /*
331          * The current node becomes the current parent
332          */
333         hammer_cursor_downgrade(cursor);
334         node = cursor->node;
335         KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
336         if (cursor->parent) {
337                 hammer_unlock(&cursor->parent->lock);
338                 hammer_rel_node(cursor->parent);
339         }
340         cursor->parent = node;
341         cursor->parent_index = cursor->index;
342         cursor->node = NULL;
343         cursor->index = 0;
344
345         /*
346          * Extract element to push into at (node,index), set bounds.
347          */
348         elm = &node->ondisk->elms[cursor->parent_index];
349
350         /*
351          * Ok, push down into elm.  If elm specifies an internal or leaf
352          * node the current node must be an internal node.  If elm specifies
353          * a spike then the current node must be a leaf node.
354          */
355         switch(elm->base.btype) {
356         case HAMMER_BTREE_TYPE_INTERNAL:
357         case HAMMER_BTREE_TYPE_LEAF:
358                 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
359                 KKASSERT(elm->internal.subtree_offset != 0);
360                 cursor->left_bound = &elm[0].internal.base;
361                 cursor->right_bound = &elm[1].internal.base;
362                 node = hammer_get_node(cursor->trans->hmp,
363                                        elm->internal.subtree_offset, &error);
364                 if (error == 0) {
365                         KASSERT(elm->base.btype == node->ondisk->type, ("BTYPE MISMATCH %c %c NODE %p\n", elm->base.btype, node->ondisk->type, node));
366                         if (node->ondisk->parent != cursor->parent->node_offset)
367                                 panic("node %p %016llx vs %016llx\n", node, node->ondisk->parent, cursor->parent->node_offset);
368                         KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
369                 }
370                 break;
371         default:
372                 panic("hammer_cursor_down: illegal btype %02x (%c)\n",
373                       elm->base.btype,
374                       (elm->base.btype ? elm->base.btype : '?'));
375                 break;
376         }
377         if (error == 0) {
378                 hammer_lock_sh(&node->lock);
379                 cursor->node = node;
380                 cursor->index = 0;
381         }
382         return(error);
383 }
384