2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.16 2008/02/05 07:58:43 dillon Exp $
38 * HAMMER B-Tree index - cursor support routines
42 static int hammer_load_cursor_parent(hammer_cursor_t cursor);
43 static int hammer_load_cursor_parent_local(hammer_cursor_t cursor);
44 static int hammer_load_cursor_parent_cluster(hammer_cursor_t cursor);
47 * Initialize a fresh cursor using the B-Tree node cache. If the cache
48 * is not available initialize a fresh cursor at the root of the root
52 hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache,
55 hammer_cluster_t cluster;
59 bzero(cursor, sizeof(*cursor));
62 * Step 1 - acquire a locked node from the cache if possible
64 if (cache && *cache) {
65 node = hammer_ref_node_safe(hmp, cache, &error);
67 hammer_lock_sh(&node->lock);
68 if (node->flags & HAMMER_NODE_DELETED) {
69 hammer_unlock(&node->lock);
70 hammer_rel_node(node);
79 * Step 2 - If we couldn't get a node from the cache, get
80 * the one from the root of the root cluster.
82 while (node == NULL) {
83 cluster = hammer_get_root_cluster(hmp, &error);
86 node = hammer_get_node(cluster,
87 cluster->ondisk->clu_btree_root,
89 hammer_rel_cluster(cluster, 0);
92 hammer_lock_sh(&node->lock);
93 if (node->flags & HAMMER_NODE_DELETED) {
94 hammer_unlock(&node->lock);
95 hammer_rel_node(node);
101 * Step 3 - finish initializing the cursor by acquiring the parent
105 error = hammer_load_cursor_parent(cursor);
106 KKASSERT(error == 0);
111 * Initialize a fresh cursor at the root of the specified cluster and
112 * limit operations to within the cluster.
114 * NOTE: cursor->parent will be set to NULL to avoid referencing B-Tree
115 * nodes in other clusters.
118 hammer_init_cursor_cluster(hammer_cursor_t cursor, hammer_cluster_t cluster)
122 bzero(cursor, sizeof(*cursor));
123 cursor->flags |= HAMMER_CURSOR_INCLUSTER;
124 cursor->node = hammer_get_node(cluster,
125 cluster->ondisk->clu_btree_root,
128 hammer_lock_sh(&cursor->node->lock);
129 cursor->left_bound = &cluster->ondisk->clu_btree_beg;
130 cursor->right_bound = &cluster->ondisk->clu_btree_end;
131 KKASSERT(error == 0);
136 * We are finished with a cursor. We NULL out various fields as sanity
137 * check, in case the structure is inappropriately used afterwords.
140 hammer_done_cursor(hammer_cursor_t cursor)
142 if (cursor->parent) {
143 hammer_unlock(&cursor->parent->lock);
144 hammer_rel_node(cursor->parent);
145 cursor->parent = NULL;
148 hammer_unlock(&cursor->node->lock);
149 hammer_rel_node(cursor->node);
152 if (cursor->data_buffer) {
153 hammer_rel_buffer(cursor->data_buffer, 0);
154 cursor->data_buffer = NULL;
156 if (cursor->record_buffer) {
157 hammer_rel_buffer(cursor->record_buffer, 0);
158 cursor->record_buffer = NULL;
161 hammer_mem_done(cursor);
164 * If we deadlocked this node will be referenced. Do a quick
165 * lock/unlock to wait for the deadlock condition to clear.
167 if (cursor->deadlk_node) {
168 hammer_lock_ex(&cursor->deadlk_node->lock);
169 hammer_unlock(&cursor->deadlk_node->lock);
170 hammer_rel_node(cursor->deadlk_node);
171 cursor->deadlk_node = NULL;
175 cursor->record = NULL;
176 cursor->left_bound = NULL;
177 cursor->right_bound = NULL;
181 * Upgrade cursor->node and cursor->parent to exclusive locks. This
182 * function can return EDEADLK.
184 * If we fail to upgrade the lock and cursor->deadlk_node is NULL,
185 * we add another reference to the node that failed and set
186 * cursor->deadlk_node so hammer_done_cursor() can block on it.
189 hammer_cursor_upgrade(hammer_cursor_t cursor)
193 if (hammer_lock_held(&cursor->node->lock) < 0) {
194 error = hammer_lock_upgrade(&cursor->node->lock);
195 if (error && cursor->deadlk_node == NULL) {
196 cursor->deadlk_node = cursor->node;
197 hammer_ref_node(cursor->deadlk_node);
202 if (cursor->parent && hammer_lock_held(&cursor->parent->lock) < 0) {
203 error = hammer_lock_upgrade(&cursor->parent->lock);
204 if (error && cursor->deadlk_node == NULL) {
205 cursor->deadlk_node = cursor->parent;
206 hammer_ref_node(cursor->deadlk_node);
215 * Downgrade cursor->node and cursor->parent to shared locks. This
216 * function can return EDEADLK.
219 hammer_cursor_downgrade(hammer_cursor_t cursor)
221 if (hammer_lock_held(&cursor->node->lock) > 0)
222 hammer_lock_downgrade(&cursor->node->lock);
223 if (cursor->parent && hammer_lock_held(&cursor->parent->lock) > 0)
224 hammer_lock_downgrade(&cursor->parent->lock);
228 * Seek the cursor to the specified node and index.
231 hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
235 hammer_cursor_downgrade(cursor);
238 if (cursor->node != node) {
239 hammer_unlock(&cursor->node->lock);
240 hammer_rel_node(cursor->node);
242 cursor->index = index;
243 hammer_ref_node(node);
244 hammer_lock_sh(&node->lock);
246 if (cursor->parent) {
247 hammer_unlock(&cursor->parent->lock);
248 hammer_rel_node(cursor->parent);
249 cursor->parent = NULL;
250 cursor->parent_index = 0;
252 error = hammer_load_cursor_parent(cursor);
260 * Acquire the parent B-Tree node of the specified node, returning a
261 * referenced but unlocked node. NULL can be returned with *errorp == 0
262 * if node is the root node of the root cluster.
266 hammer_get_parent_node(hammer_node_t node, int *errorp)
268 hammer_cluster_t cluster;
269 hammer_node_t parent;
271 cluster = node->cluster;
272 if (node->ondisk->parent) {
276 parent = hammer_get_node(cluster, node->ondisk->parent, errorp);
277 } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0) {
279 * At cluster root, locate node in parent cluster
281 hammer_cluster_ondisk_t ondisk;
282 hammer_cluster_t pcluster;
283 hammer_volume_t pvolume;
287 ondisk = cluster->ondisk;
288 vol_no = ondisk->clu_btree_parent_vol_no;
289 clu_no = ondisk->clu_btree_parent_clu_no;
292 * Acquire the node from (volume, cluster, offset)
294 pvolume = hammer_get_volume(cluster->volume->hmp, vol_no,
298 pcluster = hammer_get_cluster(pvolume, clu_no, errorp, 0);
299 hammer_rel_volume(pvolume, 0);
302 parent = hammer_get_node(pcluster,
303 ondisk->clu_btree_parent_offset,
305 hammer_rel_cluster(pcluster, 0);
308 * At root of root cluster, there is no parent.
310 KKASSERT(cluster->ondisk->clu_btree_parent_vol_no == -1);
320 * Load the parent of cursor->node into cursor->parent. There are several
321 * cases. (1) The parent is in the current cluster. (2) We are at the
322 * root of the cluster and the parent is in another cluster. (3) We are at
323 * the root of the root cluster.
325 * If HAMMER_CURSOR_INCLUSTER is set and we are at the root of the cluster,
326 * we do not access the parent cluster at all and make the cursor look like
331 hammer_load_cursor_parent(hammer_cursor_t cursor)
333 hammer_cluster_t cluster;
336 cluster = cursor->node->cluster;
338 if (cursor->node->ondisk->parent) {
339 error = hammer_load_cursor_parent_local(cursor);
340 } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0 &&
341 ((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0)
343 error = hammer_load_cursor_parent_cluster(cursor);
345 cursor->parent = NULL;
346 cursor->parent_index = 0;
347 cursor->left_bound = &cluster->ondisk->clu_btree_beg;
348 cursor->right_bound = &cluster->ondisk->clu_btree_end;
356 hammer_load_cursor_parent_local(hammer_cursor_t cursor)
359 hammer_node_t parent;
360 hammer_btree_elm_t elm;
365 parent = hammer_get_node(node->cluster, node->ondisk->parent, &error);
369 for (i = 0; i < parent->ondisk->count; ++i) {
370 elm = &parent->ondisk->elms[i];
371 if (parent->ondisk->elms[i].internal.subtree_offset ==
376 if (i == parent->ondisk->count)
377 panic("Bad B-Tree link: parent %p node %p\n", parent, node);
378 KKASSERT(i != parent->ondisk->count);
379 cursor->parent = parent;
380 cursor->parent_index = i;
381 cursor->left_bound = &elm[0].internal.base;
382 cursor->right_bound = &elm[1].internal.base;
384 hammer_lock_sh(&parent->lock);
390 hammer_load_cursor_parent_cluster(hammer_cursor_t cursor)
392 hammer_cluster_ondisk_t ondisk;
393 hammer_cluster_t pcluster;
394 hammer_cluster_t ccluster;
395 hammer_volume_t volume;
397 hammer_node_t parent;
398 hammer_btree_elm_t elm;
405 ccluster = node->cluster;
406 ondisk = ccluster->ondisk;
407 vol_no = ondisk->clu_btree_parent_vol_no;
408 clu_no = ondisk->clu_btree_parent_clu_no;
411 * Acquire the node from (volume, cluster, offset). This should
412 * be a leaf node containing the HAMMER_BTREE_TYPE_SPIKE_END element.
414 volume = hammer_get_volume(ccluster->volume->hmp, vol_no, &error);
417 pcluster = hammer_get_cluster(volume, clu_no, &error, 0);
418 hammer_rel_volume(volume, 0);
421 parent = hammer_get_node(pcluster, ondisk->clu_btree_parent_offset,
423 hammer_rel_cluster(pcluster, 0);
426 KKASSERT(parent->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
429 * Ok, we have the node. Locate the inter-cluster element
432 for (i = 0; i < parent->ondisk->count; ++i) {
433 elm = &parent->ondisk->elms[i];
435 if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END &&
436 elm->leaf.spike_clu_no == cursor->node->cluster->clu_no) {
440 KKASSERT(i != parent->ondisk->count);
441 KKASSERT(i && elm[-1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_BEG);
442 cursor->parent = parent;
443 cursor->parent_index = i;
444 cursor->left_bound = &ccluster->ondisk->clu_btree_beg;
445 cursor->right_bound = &ccluster->ondisk->clu_btree_end;
447 KKASSERT(hammer_btree_cmp(&elm[-1].leaf.base,
448 &ccluster->ondisk->clu_btree_beg) == 0);
450 * spike_end is an inclusive boundary and will != clu_btree_end
451 KKASSERT(hammer_btree_cmp(cursor->right_bound,
452 &ccluster->ondisk->clu_btree_end) >= 0);
455 hammer_lock_sh(&parent->lock);
460 * Cursor up to our parent node. Return ENOENT if we are at the root of
463 * If doing a nonblocking cursor-up and we are unable to acquire the lock,
464 * the cursor remains unchanged.
467 hammer_cursor_up(hammer_cursor_t cursor)
471 hammer_cursor_downgrade(cursor);
474 * Set the node to its parent. If the parent is NULL we are at
475 * the root of the root cluster and return ENOENT.
477 hammer_unlock(&cursor->node->lock);
478 hammer_rel_node(cursor->node);
479 cursor->node = cursor->parent;
480 cursor->index = cursor->parent_index;
481 cursor->parent = NULL;
482 cursor->parent_index = 0;
484 if (cursor->node == NULL) {
486 } else if ((cursor->flags & HAMMER_CURSOR_INCLUSTER) &&
487 cursor->node->ondisk->parent == 0
491 error = hammer_load_cursor_parent(cursor);
497 * Set the cursor to the root of the current cursor's cluster.
500 hammer_cursor_toroot(hammer_cursor_t cursor)
502 hammer_cluster_t cluster;
506 * Already at root of cluster?
508 if (cursor->node->ondisk->parent == 0)
511 hammer_cursor_downgrade(cursor);
514 * Parent is root of cluster?
516 if (cursor->parent->ondisk->parent == 0)
517 return (hammer_cursor_up(cursor));
520 * Ok, reload the cursor with the root of the cluster, then
523 cluster = cursor->node->cluster;
524 error = hammer_ref_cluster(cluster);
528 hammer_unlock(&cursor->parent->lock);
529 hammer_rel_node(cursor->parent);
530 hammer_unlock(&cursor->node->lock);
531 hammer_rel_node(cursor->node);
532 cursor->parent = NULL;
533 cursor->parent_index = 0;
535 cursor->node = hammer_get_node(cluster, cluster->ondisk->clu_btree_root,
538 hammer_lock_sh(&cursor->node->lock);
539 hammer_rel_cluster(cluster, 0);
541 error = hammer_load_cursor_parent(cursor);
546 * Cursor down through the current node, which must be an internal node.
548 * This routine adjusts the cursor and sets index to 0.
551 hammer_cursor_down(hammer_cursor_t cursor)
554 hammer_btree_elm_t elm;
555 hammer_volume_t volume;
556 hammer_cluster_t cluster;
562 * The current node becomes the current parent
564 hammer_cursor_downgrade(cursor);
566 KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
567 if (cursor->parent) {
568 hammer_unlock(&cursor->parent->lock);
569 hammer_rel_node(cursor->parent);
571 cursor->parent = node;
572 cursor->parent_index = cursor->index;
577 * Extract element to push into at (node,index), set bounds.
579 elm = &node->ondisk->elms[cursor->parent_index];
582 * Ok, push down into elm. If elm specifies an internal or leaf
583 * node the current node must be an internal node. If elm specifies
584 * a spike then the current node must be a leaf node.
586 * Cursoring down through a cluster transition when the INCLUSTER
587 * flag is set is not legal.
589 switch(elm->base.btype) {
590 case HAMMER_BTREE_TYPE_INTERNAL:
591 case HAMMER_BTREE_TYPE_LEAF:
592 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
593 KKASSERT(elm->internal.subtree_offset != 0);
594 cursor->left_bound = &elm[0].internal.base;
595 cursor->right_bound = &elm[1].internal.base;
596 node = hammer_get_node(node->cluster,
597 elm->internal.subtree_offset,
600 KKASSERT(elm->base.btype == node->ondisk->type);
601 if (node->ondisk->parent != cursor->parent->node_offset)
602 panic("node %p %d vs %d\n", node, node->ondisk->parent, cursor->parent->node_offset);
603 KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
606 case HAMMER_BTREE_TYPE_SPIKE_BEG:
607 /* case not supported yet */
609 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
610 KKASSERT(cursor->parent_index < node->ondisk->count - 1);
611 KKASSERT(elm[1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END);
613 ++cursor->parent_index;
615 case HAMMER_BTREE_TYPE_SPIKE_END:
616 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
617 KKASSERT((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0);
618 vol_no = elm->leaf.spike_vol_no;
619 clu_no = elm->leaf.spike_clu_no;
620 volume = hammer_get_volume(node->cluster->volume->hmp,
622 KKASSERT(error != EINVAL);
625 cluster = hammer_get_cluster(volume, clu_no, &error, 0);
626 KKASSERT(error != EINVAL);
627 hammer_rel_volume(volume, 0);
630 KKASSERT(cluster->ondisk->clu_btree_parent_offset ==
631 cursor->parent->node_offset);
632 KKASSERT(cluster->ondisk->clu_btree_parent_clu_no ==
633 cursor->parent->cluster->clu_no);
635 cursor->left_bound = &cluster->ondisk->clu_btree_beg;
636 cursor->right_bound = &cluster->ondisk->clu_btree_end;
637 node = hammer_get_node(cluster,
638 cluster->ondisk->clu_btree_root,
640 hammer_rel_cluster(cluster, 0);
643 panic("hammer_cursor_down: illegal btype %02x (%c)\n",
645 (elm->base.btype ? elm->base.btype : '?'));
649 hammer_lock_sh(&node->lock);