HAMMER 17/many: Refactor IO backend, clean up buffer cache deadlocks.
[dragonfly.git] / sys / vfs / hammer / hammer_cursor.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.11 2008/01/10 07:41:03 dillon Exp $
35  */
36
37 /*
38  * HAMMER B-Tree index - cursor support routines
39  */
40 #include "hammer.h"
41
42 static int hammer_load_cursor_parent(hammer_cursor_t cursor);
43 static int hammer_load_cursor_parent_local(hammer_cursor_t cursor);
44 static int hammer_load_cursor_parent_cluster(hammer_cursor_t cursor);
45
46 /*
47  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
48  * is not available initialize a fresh cursor at the root of the root
49  * cluster.
50  */
51 int
52 hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache,
53                        hammer_mount_t hmp)
54 {
55         hammer_cluster_t cluster;
56         hammer_node_t node;
57         int error;
58
59         bzero(cursor, sizeof(*cursor));
60
61         /*
62          * Step 1 - acquire a locked node from the cache if possible
63          */
64         if (cache && *cache) {
65                 node = hammer_ref_node_safe(hmp, cache, &error);
66                 if (error == 0) {
67                         hammer_lock_ex(&node->lock);
68                         if (node->flags & HAMMER_NODE_DELETED) {
69                                 hammer_unlock(&node->lock);
70                                 hammer_rel_node(node);
71                                 node = NULL;
72                         }
73                 }
74         } else {
75                 node = NULL;
76         }
77
78         /*
79          * Step 2 - If we couldn't get a node from the cache, get
80          * the one from the root of the root cluster.
81          */
82         while (node == NULL) {
83                 cluster = hammer_get_root_cluster(hmp, &error);
84                 if (error)
85                         break;
86                 node = hammer_get_node(cluster,
87                                        cluster->ondisk->clu_btree_root,
88                                        &error);
89                 hammer_rel_cluster(cluster, 0);
90                 if (error)
91                         break;
92                 hammer_lock_ex(&node->lock);
93                 if (node->flags & HAMMER_NODE_DELETED) {
94                         hammer_unlock(&node->lock);
95                         hammer_rel_node(node);
96                         node = NULL;
97                 }
98         }
99
100         /*
101          * Step 3 - finish initializing the cursor by acquiring the parent
102          */
103         cursor->node = node;
104         if (error == 0)
105                 error = hammer_load_cursor_parent(cursor);
106         KKASSERT(error == 0);
107         return(error);
108 }
109
110 /*
111  * Initialize a fresh cursor at the root of the specified cluster and
112  * limit operations to within the cluster.
113  */
114 int
115 hammer_init_cursor_cluster(hammer_cursor_t cursor, hammer_cluster_t cluster)
116 {
117         int error;
118
119         bzero(cursor, sizeof(*cursor));
120         cursor->flags |= HAMMER_CURSOR_INCLUSTER;
121         cursor->node = hammer_get_node(cluster,
122                                        cluster->ondisk->clu_btree_root,
123                                        &error);
124         if (error == 0) {
125                 hammer_lock_ex(&cursor->node->lock);
126                 error = hammer_load_cursor_parent(cursor);
127         }
128         KKASSERT(error == 0);
129         return(error);
130 }
131
132 /*
133  * We are finished with a cursor.  We NULL out various fields as sanity
134  * check, in case the structure is inappropriately used afterwords.
135  */
136 void
137 hammer_done_cursor(hammer_cursor_t cursor)
138 {
139         if (cursor->parent) {
140                 hammer_unlock(&cursor->parent->lock);
141                 hammer_rel_node(cursor->parent);
142                 cursor->parent = NULL;
143         }
144         if (cursor->node) {
145                 hammer_unlock(&cursor->node->lock);
146                 hammer_rel_node(cursor->node);
147                 cursor->node = NULL;
148         }
149         if (cursor->data_buffer) {
150                 hammer_rel_buffer(cursor->data_buffer, 0);
151                 cursor->data_buffer = NULL;
152         }
153         if (cursor->record_buffer) {
154                 hammer_rel_buffer(cursor->record_buffer, 0);
155                 cursor->record_buffer = NULL;
156         }
157         if (cursor->ip)
158                 hammer_mem_done(cursor);
159
160         cursor->data = NULL;
161         cursor->record = NULL;
162         cursor->left_bound = NULL;
163         cursor->right_bound = NULL;
164 }
165
166 /*
167  * Acquire the parent B-Tree node of the specified node, returning a
168  * referenced but unlocked node.  NULL can be returned with *errorp == 0
169  * if node is the root node of the root cluster.
170  */
171 static
172 hammer_node_t
173 hammer_get_parent_node(hammer_node_t node, int *errorp)
174 {
175         hammer_cluster_t cluster;
176         hammer_node_t parent;
177
178         cluster = node->cluster;
179         if (node->ondisk->parent) {
180                 /*
181                  * Local parent
182                  */
183                 parent = hammer_get_node(cluster, node->ondisk->parent, errorp);
184         } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0) {
185                 /*
186                  * At cluster root, locate node in parent cluster
187                  */
188                 hammer_cluster_ondisk_t ondisk;
189                 hammer_cluster_t pcluster;
190                 hammer_volume_t pvolume;
191                 int32_t clu_no;
192                 int32_t vol_no;
193
194                 ondisk = cluster->ondisk;
195                 vol_no = ondisk->clu_btree_parent_vol_no;
196                 clu_no = ondisk->clu_btree_parent_clu_no;
197
198                 /*
199                  * Acquire the node from (volume, cluster, offset)
200                  */
201                 pvolume = hammer_get_volume(cluster->volume->hmp, vol_no,
202                                             errorp);
203                 if (*errorp)
204                         return (NULL);
205                 pcluster = hammer_get_cluster(pvolume, clu_no, errorp, 0);
206                 hammer_rel_volume(pvolume, 0);
207                 if (*errorp)
208                         return (NULL);
209                 parent = hammer_get_node(pcluster,
210                                          ondisk->clu_btree_parent_offset,
211                                          errorp);
212                 hammer_rel_cluster(pcluster, 0);
213         } else {
214                 /*
215                  * At root of root cluster, there is no parent.
216                  */
217                 KKASSERT(cluster->ondisk->clu_btree_parent_vol_no == -1);
218                 parent = NULL;
219                 *errorp = 0;
220         }
221         return(parent);
222 }
223
224 /*
225  * Load the parent of cursor->node into cursor->parent.  There are several
226  * cases.  (1) The parent is in the current cluster.  (2) We are at the
227  * root of the cluster and the parent is in another cluster.  (3) We are at
228  * the root of the root cluster.
229  *
230  * If HAMMER_CURSOR_INCLUSTER is set and we are at the root of the cluster,
231  * we do not access the parent cluster at all and make the cursor look like
232  * its at the root.
233  */
234 static
235 int
236 hammer_load_cursor_parent(hammer_cursor_t cursor)
237 {
238         hammer_cluster_t cluster;
239         int error;
240
241         cluster = cursor->node->cluster;
242
243         if (cursor->node->ondisk->parent) {
244                 error = hammer_load_cursor_parent_local(cursor);
245         } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0 &&
246                    ((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0)
247         ) {
248                 error = hammer_load_cursor_parent_cluster(cursor);
249         } else {
250                 cursor->parent = NULL;
251                 cursor->parent_index = 0;
252                 cursor->left_bound = &cluster->ondisk->clu_btree_beg;
253                 cursor->right_bound = &cluster->ondisk->clu_btree_end;
254                 error = 0;
255         }
256         return(error);
257 }
258
259 static
260 int
261 hammer_load_cursor_parent_local(hammer_cursor_t cursor)
262 {
263         hammer_node_t node;
264         hammer_node_t parent;
265         hammer_btree_elm_t elm;
266         int error;
267         int i;
268
269         node = cursor->node;
270         parent = hammer_get_node(node->cluster, node->ondisk->parent, &error);
271         if (error)
272                 return(error);
273         elm = NULL;
274         for (i = 0; i < parent->ondisk->count; ++i) {
275                 elm = &parent->ondisk->elms[i];
276                 if (parent->ondisk->elms[i].internal.subtree_offset ==
277                     node->node_offset) {
278                         break;
279                 }
280         }
281         if (i == parent->ondisk->count)
282                 panic("Bad B-Tree link: parent %p node %p\n", parent, node);
283         KKASSERT(i != parent->ondisk->count);
284         KKASSERT(parent->ondisk->elms[i].internal.rec_offset == 0);
285         cursor->parent = parent;
286         cursor->parent_index = i;
287         cursor->left_bound = &elm[0].internal.base;
288         cursor->right_bound = &elm[1].internal.base;
289
290         if (hammer_lock_ex_try(&parent->lock) != 0) {
291                 hammer_unlock(&cursor->node->lock);
292                 hammer_lock_ex(&parent->lock);
293                 hammer_lock_ex(&cursor->node->lock);
294                 /* XXX check node generation count */
295         }
296         return(error);
297 }
298
299 static
300 int
301 hammer_load_cursor_parent_cluster(hammer_cursor_t cursor)
302 {
303         hammer_cluster_ondisk_t ondisk;
304         hammer_cluster_t pcluster;
305         hammer_cluster_t ccluster;
306         hammer_volume_t volume;
307         hammer_node_t node;
308         hammer_node_t parent;
309         hammer_btree_elm_t elm;
310         int32_t clu_no;
311         int32_t vol_no;
312         int error;
313         int i;
314
315         node = cursor->node;
316         ccluster = node->cluster;
317         ondisk = ccluster->ondisk;
318         vol_no = ondisk->clu_btree_parent_vol_no;
319         clu_no = ondisk->clu_btree_parent_clu_no;
320
321         /*
322          * Acquire the node from (volume, cluster, offset)
323          */
324         volume = hammer_get_volume(ccluster->volume->hmp, vol_no, &error);
325         if (error)
326                 return (error);
327         pcluster = hammer_get_cluster(volume, clu_no, &error, 0);
328         hammer_rel_volume(volume, 0);
329         if (error)
330                 return (error);
331         parent = hammer_get_node(pcluster, ondisk->clu_btree_parent_offset,
332                                  &error);
333         hammer_rel_cluster(pcluster, 0);
334         if (error)
335                 return (error);
336
337         /* 
338          * Ok, we have the node.  Locate the inter-cluster element
339          */
340         elm = NULL;
341         for (i = 0; i < parent->ondisk->count; ++i) {
342                 elm = &parent->ondisk->elms[i];
343                 if (elm->internal.rec_offset != 0 &&
344                     elm->internal.subtree_type == HAMMER_BTREE_TYPE_CLUSTER &&
345                     elm->internal.subtree_clu_no == cursor->node->cluster->clu_no) {
346                         break;
347                 }
348         }
349         KKASSERT(i != parent->ondisk->count);
350         cursor->parent = parent;
351         cursor->parent_index = i;
352         cursor->left_bound = &elm[0].internal.base;
353         cursor->right_bound = &elm[1].internal.base;
354
355         KKASSERT(hammer_btree_cmp(cursor->left_bound,
356                  &ccluster->ondisk->clu_btree_beg) <= 0);
357         KKASSERT(hammer_btree_cmp(cursor->right_bound,
358                  &ccluster->ondisk->clu_btree_end) >= 0);
359
360         if (hammer_lock_ex_try(&parent->lock) != 0) {
361                 hammer_unlock(&cursor->node->lock);
362                 hammer_lock_ex(&parent->lock);
363                 hammer_lock_ex(&cursor->node->lock);
364                 /* XXX check node generation count */
365         }
366         return(0);
367 }
368
369 /*
370  * Cursor up to our parent node.  Return ENOENT if we are at the root of
371  * the root cluster.
372  *
373  * If doing a nonblocking cursor-up and we are unable to acquire the lock,
374  * the cursor remains unchanged.
375  */
376 int
377 hammer_cursor_up(hammer_cursor_t cursor, int nonblock)
378 {
379         hammer_node_t save;
380         int error;
381
382         /*
383          * If asked to do this non-blocking acquire a lock on the parent
384          * now, before we mess with the cursor.
385          */
386         if (nonblock) {
387                 save = hammer_get_parent_node(cursor->parent, &error);
388                 if (error)
389                         return(error);
390                 if (save) {
391                         if (hammer_lock_ex_try(&save->lock) != 0) {
392                                 hammer_rel_node(save);
393                                 return(EAGAIN);
394                         }
395                 }
396         } else {
397                 save = NULL;
398         }
399
400         /*
401          * Set the node to its parent.  If the parent is NULL we are at
402          * the root of the root cluster and return ENOENT.
403          */
404         hammer_unlock(&cursor->node->lock);
405         hammer_rel_node(cursor->node);
406         cursor->node = cursor->parent;
407         cursor->index = cursor->parent_index;
408         cursor->parent = NULL;
409         cursor->parent_index = 0;
410
411         if (cursor->node == NULL) {
412                 error = ENOENT;
413         } else if ((cursor->flags & HAMMER_CURSOR_INCLUSTER) &&
414                    cursor->node->ondisk->parent == 0
415         ) {
416                 error = ENOENT;
417         } else {
418                 error = hammer_load_cursor_parent(cursor);
419         }
420         if (save) {
421                 hammer_unlock(&save->lock);
422                 hammer_rel_node(save);
423         }
424         return(error);
425 }
426
427 /*
428  * Set the cursor to the root of the current cursor's cluster.
429  */
430 int
431 hammer_cursor_toroot(hammer_cursor_t cursor)
432 {
433         hammer_cluster_t cluster;
434         int error;
435
436         /*
437          * Already at root of cluster?
438          */
439         if (cursor->node->ondisk->parent == 0) 
440                 return (0);
441
442         /*
443          * Parent is root of cluster?
444          */
445         if (cursor->parent->ondisk->parent == 0)
446                 return (hammer_cursor_up(cursor, 0));
447
448         /*
449          * Ok, reload the cursor with the root of the cluster, then
450          * locate its parent.
451          */
452         cluster = cursor->node->cluster;
453         error = hammer_ref_cluster(cluster);
454         if (error)
455                 return (error);
456
457         hammer_unlock(&cursor->parent->lock);
458         hammer_rel_node(cursor->parent);
459         hammer_unlock(&cursor->node->lock);
460         hammer_rel_node(cursor->node);
461         cursor->parent = NULL;
462         cursor->parent_index = 0;
463
464         cursor->node = hammer_get_node(cluster, cluster->ondisk->clu_btree_root,
465                                        &error);
466         cursor->index = 0;
467         hammer_lock_ex(&cursor->node->lock);
468         hammer_rel_cluster(cluster, 0);
469         if (error == 0)
470                 error = hammer_load_cursor_parent(cursor);
471         return(error);
472 }
473
474 /*
475  * Cursor down through the current node, which must be an internal node.
476  *
477  * This routine adjusts the cursor and sets index to 0.
478  */
479 int
480 hammer_cursor_down(hammer_cursor_t cursor)
481 {
482         hammer_node_t node;
483         hammer_btree_elm_t elm;
484         hammer_volume_t volume;
485         hammer_cluster_t cluster;
486         int32_t vol_no;
487         int32_t clu_no;
488         int error;
489
490         /*
491          * The current node becomes the current parent
492          */
493         node = cursor->node;
494         KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
495         KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
496         if (cursor->parent) {
497                 hammer_unlock(&cursor->parent->lock);
498                 hammer_rel_node(cursor->parent);
499         }
500         cursor->parent = node;
501         cursor->parent_index = cursor->index;
502         cursor->node = NULL;
503         cursor->index = 0;
504
505         /*
506          * Extract element to push into at (node,index), set bounds.
507          */
508         elm = &node->ondisk->elms[cursor->parent_index];
509         cursor->left_bound = &elm[0].internal.base;
510         cursor->right_bound = &elm[1].internal.base;
511
512         /*
513          * Ok, push down into elm.  If rec_offset is non-zero this is
514          * an inter-cluster push, otherwise it is a intra-cluster push.
515          *
516          * Cursoring down through a cluster transition when the INCLUSTER
517          * flag is set is not legal.
518          */
519         if (elm->internal.rec_offset) {
520                 KKASSERT((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0);
521                 vol_no = elm->internal.subtree_vol_no;
522                 clu_no = elm->internal.subtree_clu_no;
523                 volume = hammer_get_volume(node->cluster->volume->hmp,
524                                            vol_no, &error);
525                 KKASSERT(error != EINVAL);
526                 if (error)
527                         return(error);
528                 cluster = hammer_get_cluster(volume, clu_no, &error, 0);
529                 KKASSERT(error != EINVAL);
530                 hammer_rel_volume(volume, 0);
531                 if (error)
532                         return(error);
533                 node = hammer_get_node(cluster,
534                                        cluster->ondisk->clu_btree_root,
535                                        &error);
536                 hammer_rel_cluster(cluster, 0);
537         } else {
538                 KKASSERT(elm->internal.subtree_offset != 0);
539                 node = hammer_get_node(node->cluster,
540                                        elm->internal.subtree_offset,
541                                        &error);
542                 if (error == 0) {
543                         KKASSERT(elm->internal.subtree_type == node->ondisk->type);
544                         if(node->ondisk->parent != cursor->parent->node_offset)
545                                 kprintf("node %p %d vs %d\n", node, node->ondisk->parent, cursor->parent->node_offset);
546                         KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
547                 }
548         }
549         if (error == 0) {
550                 hammer_lock_ex(&node->lock);
551                 cursor->node = node;
552                 cursor->index = 0;
553         }
554         return(error);
555 }
556