HAMMER 25/many: Pruning code
[dragonfly.git] / sys / vfs / hammer / hammer_cursor.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.16 2008/02/05 07:58:43 dillon Exp $
35  */
36
37 /*
38  * HAMMER B-Tree index - cursor support routines
39  */
40 #include "hammer.h"
41
42 static int hammer_load_cursor_parent(hammer_cursor_t cursor);
43 static int hammer_load_cursor_parent_local(hammer_cursor_t cursor);
44 static int hammer_load_cursor_parent_cluster(hammer_cursor_t cursor);
45
46 /*
47  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
48  * is not available initialize a fresh cursor at the root of the root
49  * cluster.
50  */
51 int
52 hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache,
53                        hammer_mount_t hmp)
54 {
55         hammer_cluster_t cluster;
56         hammer_node_t node;
57         int error;
58
59         bzero(cursor, sizeof(*cursor));
60
61         /*
62          * Step 1 - acquire a locked node from the cache if possible
63          */
64         if (cache && *cache) {
65                 node = hammer_ref_node_safe(hmp, cache, &error);
66                 if (error == 0) {
67                         hammer_lock_sh(&node->lock);
68                         if (node->flags & HAMMER_NODE_DELETED) {
69                                 hammer_unlock(&node->lock);
70                                 hammer_rel_node(node);
71                                 node = NULL;
72                         }
73                 }
74         } else {
75                 node = NULL;
76         }
77
78         /*
79          * Step 2 - If we couldn't get a node from the cache, get
80          * the one from the root of the root cluster.
81          */
82         while (node == NULL) {
83                 cluster = hammer_get_root_cluster(hmp, &error);
84                 if (error)
85                         break;
86                 node = hammer_get_node(cluster,
87                                        cluster->ondisk->clu_btree_root,
88                                        &error);
89                 hammer_rel_cluster(cluster, 0);
90                 if (error)
91                         break;
92                 hammer_lock_sh(&node->lock);
93                 if (node->flags & HAMMER_NODE_DELETED) {
94                         hammer_unlock(&node->lock);
95                         hammer_rel_node(node);
96                         node = NULL;
97                 }
98         }
99
100         /*
101          * Step 3 - finish initializing the cursor by acquiring the parent
102          */
103         cursor->node = node;
104         if (error == 0)
105                 error = hammer_load_cursor_parent(cursor);
106         KKASSERT(error == 0);
107         return(error);
108 }
109
110 /*
111  * Initialize a fresh cursor at the root of the specified cluster and
112  * limit operations to within the cluster.
113  *
114  * NOTE: cursor->parent will be set to NULL to avoid referencing B-Tree
115  * nodes in other clusters.
116  */
117 int
118 hammer_init_cursor_cluster(hammer_cursor_t cursor, hammer_cluster_t cluster)
119 {
120         int error;
121
122         bzero(cursor, sizeof(*cursor));
123         cursor->flags |= HAMMER_CURSOR_INCLUSTER;
124         cursor->node = hammer_get_node(cluster,
125                                        cluster->ondisk->clu_btree_root,
126                                        &error);
127         if (error == 0)
128                 hammer_lock_sh(&cursor->node->lock);
129         cursor->left_bound = &cluster->ondisk->clu_btree_beg;
130         cursor->right_bound = &cluster->ondisk->clu_btree_end;
131         KKASSERT(error == 0);
132         return(error);
133 }
134
135 /*
136  * We are finished with a cursor.  We NULL out various fields as sanity
137  * check, in case the structure is inappropriately used afterwords.
138  */
139 void
140 hammer_done_cursor(hammer_cursor_t cursor)
141 {
142         if (cursor->parent) {
143                 hammer_unlock(&cursor->parent->lock);
144                 hammer_rel_node(cursor->parent);
145                 cursor->parent = NULL;
146         }
147         if (cursor->node) {
148                 hammer_unlock(&cursor->node->lock);
149                 hammer_rel_node(cursor->node);
150                 cursor->node = NULL;
151         }
152         if (cursor->data_buffer) {
153                 hammer_rel_buffer(cursor->data_buffer, 0);
154                 cursor->data_buffer = NULL;
155         }
156         if (cursor->record_buffer) {
157                 hammer_rel_buffer(cursor->record_buffer, 0);
158                 cursor->record_buffer = NULL;
159         }
160         if (cursor->ip)
161                 hammer_mem_done(cursor);
162
163         /*
164          * If we deadlocked this node will be referenced.  Do a quick
165          * lock/unlock to wait for the deadlock condition to clear.
166          */
167         if (cursor->deadlk_node) {
168                 hammer_lock_ex(&cursor->deadlk_node->lock);
169                 hammer_unlock(&cursor->deadlk_node->lock);
170                 hammer_rel_node(cursor->deadlk_node);
171                 cursor->deadlk_node = NULL;
172         }
173
174         cursor->data = NULL;
175         cursor->record = NULL;
176         cursor->left_bound = NULL;
177         cursor->right_bound = NULL;
178 }
179
180 /*
181  * Upgrade cursor->node and cursor->parent to exclusive locks.  This
182  * function can return EDEADLK.
183  *
184  * If we fail to upgrade the lock and cursor->deadlk_node is NULL, 
185  * we add another reference to the node that failed and set
186  * cursor->deadlk_node so hammer_done_cursor() can block on it.
187  */
188 int
189 hammer_cursor_upgrade(hammer_cursor_t cursor)
190 {
191         int error;
192
193         if (hammer_lock_held(&cursor->node->lock) < 0) {
194                 error = hammer_lock_upgrade(&cursor->node->lock);
195                 if (error && cursor->deadlk_node == NULL) {
196                         cursor->deadlk_node = cursor->node;
197                         hammer_ref_node(cursor->deadlk_node);
198                 }
199         } else {
200                 error = 0;
201         }
202         if (cursor->parent && hammer_lock_held(&cursor->parent->lock) < 0) {
203                 error = hammer_lock_upgrade(&cursor->parent->lock);
204                 if (error && cursor->deadlk_node == NULL) {
205                         cursor->deadlk_node = cursor->parent;
206                         hammer_ref_node(cursor->deadlk_node);
207                 }
208         } else {
209                 error = 0;
210         }
211         return(error);
212 }
213
214 /*
215  * Downgrade cursor->node and cursor->parent to shared locks.  This
216  * function can return EDEADLK.
217  */
218 void
219 hammer_cursor_downgrade(hammer_cursor_t cursor)
220 {
221         if (hammer_lock_held(&cursor->node->lock) > 0)
222                 hammer_lock_downgrade(&cursor->node->lock);
223         if (cursor->parent && hammer_lock_held(&cursor->parent->lock) > 0)
224                 hammer_lock_downgrade(&cursor->parent->lock);
225 }
226
227 /*
228  * Seek the cursor to the specified node and index.
229  */
230 int
231 hammer_cursor_seek(hammer_cursor_t cursor, hammer_node_t node, int index)
232 {
233         int error;
234
235         hammer_cursor_downgrade(cursor);
236         error = 0;
237
238         if (cursor->node != node) {
239                 hammer_unlock(&cursor->node->lock);
240                 hammer_rel_node(cursor->node);
241                 cursor->node = node;
242                 cursor->index = index;
243                 hammer_ref_node(node);
244                 hammer_lock_sh(&node->lock);
245
246                 if (cursor->parent) {
247                         hammer_unlock(&cursor->parent->lock);
248                         hammer_rel_node(cursor->parent);
249                         cursor->parent = NULL;
250                         cursor->parent_index = 0;
251                 }
252                 error = hammer_load_cursor_parent(cursor);
253         }
254         return (error);
255 }
256
257 #if 0
258
259 /*
260  * Acquire the parent B-Tree node of the specified node, returning a
261  * referenced but unlocked node.  NULL can be returned with *errorp == 0
262  * if node is the root node of the root cluster.
263  */
264 static
265 hammer_node_t
266 hammer_get_parent_node(hammer_node_t node, int *errorp)
267 {
268         hammer_cluster_t cluster;
269         hammer_node_t parent;
270
271         cluster = node->cluster;
272         if (node->ondisk->parent) {
273                 /*
274                  * Local parent
275                  */
276                 parent = hammer_get_node(cluster, node->ondisk->parent, errorp);
277         } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0) {
278                 /*
279                  * At cluster root, locate node in parent cluster
280                  */
281                 hammer_cluster_ondisk_t ondisk;
282                 hammer_cluster_t pcluster;
283                 hammer_volume_t pvolume;
284                 int32_t clu_no;
285                 int32_t vol_no;
286
287                 ondisk = cluster->ondisk;
288                 vol_no = ondisk->clu_btree_parent_vol_no;
289                 clu_no = ondisk->clu_btree_parent_clu_no;
290
291                 /*
292                  * Acquire the node from (volume, cluster, offset)
293                  */
294                 pvolume = hammer_get_volume(cluster->volume->hmp, vol_no,
295                                             errorp);
296                 if (*errorp)
297                         return (NULL);
298                 pcluster = hammer_get_cluster(pvolume, clu_no, errorp, 0);
299                 hammer_rel_volume(pvolume, 0);
300                 if (*errorp)
301                         return (NULL);
302                 parent = hammer_get_node(pcluster,
303                                          ondisk->clu_btree_parent_offset,
304                                          errorp);
305                 hammer_rel_cluster(pcluster, 0);
306         } else {
307                 /*
308                  * At root of root cluster, there is no parent.
309                  */
310                 KKASSERT(cluster->ondisk->clu_btree_parent_vol_no == -1);
311                 parent = NULL;
312                 *errorp = 0;
313         }
314         return(parent);
315 }
316
317 #endif
318
319 /*
320  * Load the parent of cursor->node into cursor->parent.  There are several
321  * cases.  (1) The parent is in the current cluster.  (2) We are at the
322  * root of the cluster and the parent is in another cluster.  (3) We are at
323  * the root of the root cluster.
324  *
325  * If HAMMER_CURSOR_INCLUSTER is set and we are at the root of the cluster,
326  * we do not access the parent cluster at all and make the cursor look like
327  * its at the root.
328  */
329 static
330 int
331 hammer_load_cursor_parent(hammer_cursor_t cursor)
332 {
333         hammer_cluster_t cluster;
334         int error;
335
336         cluster = cursor->node->cluster;
337
338         if (cursor->node->ondisk->parent) {
339                 error = hammer_load_cursor_parent_local(cursor);
340         } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0 &&
341                    ((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0)
342         ) {
343                 error = hammer_load_cursor_parent_cluster(cursor);
344         } else {
345                 cursor->parent = NULL;
346                 cursor->parent_index = 0;
347                 cursor->left_bound = &cluster->ondisk->clu_btree_beg;
348                 cursor->right_bound = &cluster->ondisk->clu_btree_end;
349                 error = 0;
350         }
351         return(error);
352 }
353
354 static
355 int
356 hammer_load_cursor_parent_local(hammer_cursor_t cursor)
357 {
358         hammer_node_t node;
359         hammer_node_t parent;
360         hammer_btree_elm_t elm;
361         int error;
362         int i;
363
364         node = cursor->node;
365         parent = hammer_get_node(node->cluster, node->ondisk->parent, &error);
366         if (error)
367                 return(error);
368         elm = NULL;
369         for (i = 0; i < parent->ondisk->count; ++i) {
370                 elm = &parent->ondisk->elms[i];
371                 if (parent->ondisk->elms[i].internal.subtree_offset ==
372                     node->node_offset) {
373                         break;
374                 }
375         }
376         if (i == parent->ondisk->count)
377                 panic("Bad B-Tree link: parent %p node %p\n", parent, node);
378         KKASSERT(i != parent->ondisk->count);
379         cursor->parent = parent;
380         cursor->parent_index = i;
381         cursor->left_bound = &elm[0].internal.base;
382         cursor->right_bound = &elm[1].internal.base;
383
384         hammer_lock_sh(&parent->lock);
385         return(error);
386 }
387
388 static
389 int
390 hammer_load_cursor_parent_cluster(hammer_cursor_t cursor)
391 {
392         hammer_cluster_ondisk_t ondisk;
393         hammer_cluster_t pcluster;
394         hammer_cluster_t ccluster;
395         hammer_volume_t volume;
396         hammer_node_t node;
397         hammer_node_t parent;
398         hammer_btree_elm_t elm;
399         int32_t clu_no;
400         int32_t vol_no;
401         int error;
402         int i;
403
404         node = cursor->node;
405         ccluster = node->cluster;
406         ondisk = ccluster->ondisk;
407         vol_no = ondisk->clu_btree_parent_vol_no;
408         clu_no = ondisk->clu_btree_parent_clu_no;
409
410         /*
411          * Acquire the node from (volume, cluster, offset).  This should
412          * be a leaf node containing the HAMMER_BTREE_TYPE_SPIKE_END element.
413          */
414         volume = hammer_get_volume(ccluster->volume->hmp, vol_no, &error);
415         if (error)
416                 return (error);
417         pcluster = hammer_get_cluster(volume, clu_no, &error, 0);
418         hammer_rel_volume(volume, 0);
419         if (error)
420                 return (error);
421         parent = hammer_get_node(pcluster, ondisk->clu_btree_parent_offset,
422                                  &error);
423         hammer_rel_cluster(pcluster, 0);
424         if (error)
425                 return (error);
426         KKASSERT(parent->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
427
428         /* 
429          * Ok, we have the node.  Locate the inter-cluster element
430          */
431         elm = NULL;
432         for (i = 0; i < parent->ondisk->count; ++i) {
433                 elm = &parent->ondisk->elms[i];
434
435                 if (elm->leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END &&
436                     elm->leaf.spike_clu_no == cursor->node->cluster->clu_no) {
437                         break;
438                 }
439         }
440         KKASSERT(i != parent->ondisk->count);
441         KKASSERT(i && elm[-1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_BEG);
442         cursor->parent = parent;
443         cursor->parent_index = i;
444         cursor->left_bound = &ccluster->ondisk->clu_btree_beg;
445         cursor->right_bound = &ccluster->ondisk->clu_btree_end;
446
447         KKASSERT(hammer_btree_cmp(&elm[-1].leaf.base,
448                  &ccluster->ondisk->clu_btree_beg) == 0);
449             /*
450              * spike_end is an inclusive boundary and will != clu_btree_end
451         KKASSERT(hammer_btree_cmp(cursor->right_bound,
452                  &ccluster->ondisk->clu_btree_end) >= 0);
453             */
454
455         hammer_lock_sh(&parent->lock);
456         return(0);
457 }
458
459 /*
460  * Cursor up to our parent node.  Return ENOENT if we are at the root of
461  * the root cluster.
462  *
463  * If doing a nonblocking cursor-up and we are unable to acquire the lock,
464  * the cursor remains unchanged.
465  */
466 int
467 hammer_cursor_up(hammer_cursor_t cursor)
468 {
469         int error;
470
471         hammer_cursor_downgrade(cursor);
472
473         /*
474          * Set the node to its parent.  If the parent is NULL we are at
475          * the root of the root cluster and return ENOENT.
476          */
477         hammer_unlock(&cursor->node->lock);
478         hammer_rel_node(cursor->node);
479         cursor->node = cursor->parent;
480         cursor->index = cursor->parent_index;
481         cursor->parent = NULL;
482         cursor->parent_index = 0;
483
484         if (cursor->node == NULL) {
485                 error = ENOENT;
486         } else if ((cursor->flags & HAMMER_CURSOR_INCLUSTER) &&
487                    cursor->node->ondisk->parent == 0
488         ) {
489                 error = ENOENT;
490         } else {
491                 error = hammer_load_cursor_parent(cursor);
492         }
493         return(error);
494 }
495
496 /*
497  * Set the cursor to the root of the current cursor's cluster.
498  */
499 int
500 hammer_cursor_toroot(hammer_cursor_t cursor)
501 {
502         hammer_cluster_t cluster;
503         int error;
504
505         /*
506          * Already at root of cluster?
507          */
508         if (cursor->node->ondisk->parent == 0) 
509                 return (0);
510
511         hammer_cursor_downgrade(cursor);
512
513         /*
514          * Parent is root of cluster?
515          */
516         if (cursor->parent->ondisk->parent == 0)
517                 return (hammer_cursor_up(cursor));
518
519         /*
520          * Ok, reload the cursor with the root of the cluster, then
521          * locate its parent.
522          */
523         cluster = cursor->node->cluster;
524         error = hammer_ref_cluster(cluster);
525         if (error)
526                 return (error);
527
528         hammer_unlock(&cursor->parent->lock);
529         hammer_rel_node(cursor->parent);
530         hammer_unlock(&cursor->node->lock);
531         hammer_rel_node(cursor->node);
532         cursor->parent = NULL;
533         cursor->parent_index = 0;
534
535         cursor->node = hammer_get_node(cluster, cluster->ondisk->clu_btree_root,
536                                        &error);
537         cursor->index = 0;
538         hammer_lock_sh(&cursor->node->lock);
539         hammer_rel_cluster(cluster, 0);
540         if (error == 0)
541                 error = hammer_load_cursor_parent(cursor);
542         return(error);
543 }
544
545 /*
546  * Cursor down through the current node, which must be an internal node.
547  *
548  * This routine adjusts the cursor and sets index to 0.
549  */
550 int
551 hammer_cursor_down(hammer_cursor_t cursor)
552 {
553         hammer_node_t node;
554         hammer_btree_elm_t elm;
555         hammer_volume_t volume;
556         hammer_cluster_t cluster;
557         int32_t vol_no;
558         int32_t clu_no;
559         int error;
560
561         /*
562          * The current node becomes the current parent
563          */
564         hammer_cursor_downgrade(cursor);
565         node = cursor->node;
566         KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
567         if (cursor->parent) {
568                 hammer_unlock(&cursor->parent->lock);
569                 hammer_rel_node(cursor->parent);
570         }
571         cursor->parent = node;
572         cursor->parent_index = cursor->index;
573         cursor->node = NULL;
574         cursor->index = 0;
575
576         /*
577          * Extract element to push into at (node,index), set bounds.
578          */
579         elm = &node->ondisk->elms[cursor->parent_index];
580
581         /*
582          * Ok, push down into elm.  If elm specifies an internal or leaf
583          * node the current node must be an internal node.  If elm specifies
584          * a spike then the current node must be a leaf node.
585          *
586          * Cursoring down through a cluster transition when the INCLUSTER
587          * flag is set is not legal.
588          */
589         switch(elm->base.btype) {
590         case HAMMER_BTREE_TYPE_INTERNAL:
591         case HAMMER_BTREE_TYPE_LEAF:
592                 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
593                 KKASSERT(elm->internal.subtree_offset != 0);
594                 cursor->left_bound = &elm[0].internal.base;
595                 cursor->right_bound = &elm[1].internal.base;
596                 node = hammer_get_node(node->cluster,
597                                        elm->internal.subtree_offset,
598                                        &error);
599                 if (error == 0) {
600                         KKASSERT(elm->base.btype == node->ondisk->type);
601                         if (node->ondisk->parent != cursor->parent->node_offset)
602                                 panic("node %p %d vs %d\n", node, node->ondisk->parent, cursor->parent->node_offset);
603                         KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
604                 }
605                 break;
606         case HAMMER_BTREE_TYPE_SPIKE_BEG:
607                 /* case not supported yet */
608                 KKASSERT(0);
609                 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
610                 KKASSERT(cursor->parent_index < node->ondisk->count - 1);
611                 KKASSERT(elm[1].leaf.base.btype == HAMMER_BTREE_TYPE_SPIKE_END);
612                 ++elm;
613                 ++cursor->parent_index;
614                 /* fall through */
615         case HAMMER_BTREE_TYPE_SPIKE_END:
616                 KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_LEAF);
617                 KKASSERT((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0);
618                 vol_no = elm->leaf.spike_vol_no;
619                 clu_no = elm->leaf.spike_clu_no;
620                 volume = hammer_get_volume(node->cluster->volume->hmp,
621                                            vol_no, &error);
622                 KKASSERT(error != EINVAL);
623                 if (error)
624                         return(error);
625                 cluster = hammer_get_cluster(volume, clu_no, &error, 0);
626                 KKASSERT(error != EINVAL);
627                 hammer_rel_volume(volume, 0);
628                 if (error)
629                         return(error);
630                 KKASSERT(cluster->ondisk->clu_btree_parent_offset ==
631                          cursor->parent->node_offset);
632                 KKASSERT(cluster->ondisk->clu_btree_parent_clu_no ==
633                          cursor->parent->cluster->clu_no);
634
635                 cursor->left_bound = &cluster->ondisk->clu_btree_beg;
636                 cursor->right_bound = &cluster->ondisk->clu_btree_end;
637                 node = hammer_get_node(cluster,
638                                        cluster->ondisk->clu_btree_root,
639                                        &error);
640                 hammer_rel_cluster(cluster, 0);
641                 break;
642         default:
643                 panic("hammer_cursor_down: illegal btype %02x (%c)\n",
644                       elm->base.btype,
645                       (elm->base.btype ? elm->base.btype : '?'));
646                 break;
647         }
648         if (error == 0) {
649                 hammer_lock_sh(&node->lock);
650                 cursor->node = node;
651                 cursor->index = 0;
652         }
653         return(error);
654 }
655