HAMMER 15/many - user utility infrastructure, refactor alists, misc
[dragonfly.git] / sys / vfs / hammer / hammer_cursor.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.10 2008/01/03 06:48:49 dillon Exp $
35  */
36
37 /*
38  * HAMMER B-Tree index - cursor support routines
39  */
40 #include "hammer.h"
41
42 static int hammer_load_cursor_parent(hammer_cursor_t cursor);
43 static int hammer_load_cursor_parent_local(hammer_cursor_t cursor);
44 static int hammer_load_cursor_parent_cluster(hammer_cursor_t cursor);
45
46 /*
47  * Initialize a fresh cursor using the B-Tree node cache.  If the cache
48  * is not available initialize a fresh cursor at the root of the root
49  * cluster.
50  */
51 int
52 hammer_init_cursor_hmp(hammer_cursor_t cursor, struct hammer_node **cache,
53                        hammer_mount_t hmp)
54 {
55         hammer_cluster_t cluster;
56         hammer_node_t node;
57         int error;
58
59         bzero(cursor, sizeof(*cursor));
60
61         /*
62          * Step 1 - acquire a locked node from the cache if possible
63          */
64         if (cache && *cache) {
65                 node = *cache;
66                 error = hammer_ref_node(node);
67                 if (error == 0) {
68                         hammer_lock_ex(&node->lock);
69                         if (node->flags & HAMMER_NODE_DELETED) {
70                                 hammer_unlock(&node->lock);
71                                 hammer_rel_node(node);
72                                 node = NULL;
73                         }
74                 } else {
75                         node = NULL;
76                 }
77         } else {
78                 node = NULL;
79         }
80
81         /*
82          * Step 2 - If we couldn't get a node from the cache, get
83          * the one from the root of the root cluster.
84          */
85         while (node == NULL) {
86                 cluster = hammer_get_root_cluster(hmp, &error);
87                 if (error)
88                         break;
89                 node = hammer_get_node(cluster,
90                                        cluster->ondisk->clu_btree_root,
91                                        &error);
92                 hammer_rel_cluster(cluster, 0);
93                 if (error)
94                         break;
95                 hammer_lock_ex(&node->lock);
96                 if (node->flags & HAMMER_NODE_DELETED) {
97                         hammer_unlock(&node->lock);
98                         hammer_rel_node(node);
99                         node = NULL;
100                 }
101         }
102
103         /*
104          * Step 3 - finish initializing the cursor by acquiring the parent
105          */
106         cursor->node = node;
107         if (error == 0)
108                 error = hammer_load_cursor_parent(cursor);
109         KKASSERT(error == 0);
110         return(error);
111 }
112
113 /*
114  * Initialize a fresh cursor at the root of the specified cluster and
115  * limit operations to within the cluster.
116  */
117 int
118 hammer_init_cursor_cluster(hammer_cursor_t cursor, hammer_cluster_t cluster)
119 {
120         int error;
121
122         bzero(cursor, sizeof(*cursor));
123         cursor->flags |= HAMMER_CURSOR_INCLUSTER;
124         cursor->node = hammer_get_node(cluster,
125                                        cluster->ondisk->clu_btree_root,
126                                        &error);
127         if (error == 0) {
128                 hammer_lock_ex(&cursor->node->lock);
129                 error = hammer_load_cursor_parent(cursor);
130         }
131         KKASSERT(error == 0);
132         return(error);
133 }
134
135 /*
136  * We are finished with a cursor.  We NULL out various fields as sanity
137  * check, in case the structure is inappropriately used afterwords.
138  */
139 void
140 hammer_done_cursor(hammer_cursor_t cursor)
141 {
142         if (cursor->parent) {
143                 hammer_unlock(&cursor->parent->lock);
144                 hammer_rel_node(cursor->parent);
145                 cursor->parent = NULL;
146         }
147         if (cursor->node) {
148                 hammer_unlock(&cursor->node->lock);
149                 hammer_rel_node(cursor->node);
150                 cursor->node = NULL;
151         }
152         if (cursor->data_buffer) {
153                 hammer_rel_buffer(cursor->data_buffer, 0);
154                 cursor->data_buffer = NULL;
155         }
156         if (cursor->record_buffer) {
157                 hammer_rel_buffer(cursor->record_buffer, 0);
158                 cursor->record_buffer = NULL;
159         }
160         if (cursor->ip)
161                 hammer_mem_done(cursor);
162
163         cursor->data = NULL;
164         cursor->record = NULL;
165         cursor->left_bound = NULL;
166         cursor->right_bound = NULL;
167 }
168
169 /*
170  * Acquire the parent B-Tree node of the specified node, returning a
171  * referenced but unlocked node.  NULL can be returned with *errorp == 0
172  * if node is the root node of the root cluster.
173  */
174 static
175 hammer_node_t
176 hammer_get_parent_node(hammer_node_t node, int *errorp)
177 {
178         hammer_cluster_t cluster;
179         hammer_node_t parent;
180
181         cluster = node->cluster;
182         if (node->ondisk->parent) {
183                 /*
184                  * Local parent
185                  */
186                 parent = hammer_get_node(cluster, node->ondisk->parent, errorp);
187         } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0) {
188                 /*
189                  * At cluster root, locate node in parent cluster
190                  */
191                 hammer_cluster_ondisk_t ondisk;
192                 hammer_cluster_t pcluster;
193                 hammer_volume_t pvolume;
194                 int32_t clu_no;
195                 int32_t vol_no;
196
197                 ondisk = cluster->ondisk;
198                 vol_no = ondisk->clu_btree_parent_vol_no;
199                 clu_no = ondisk->clu_btree_parent_clu_no;
200
201                 /*
202                  * Acquire the node from (volume, cluster, offset)
203                  */
204                 pvolume = hammer_get_volume(cluster->volume->hmp, vol_no,
205                                             errorp);
206                 if (*errorp)
207                         return (NULL);
208                 pcluster = hammer_get_cluster(pvolume, clu_no, errorp, 0);
209                 hammer_rel_volume(pvolume, 0);
210                 if (*errorp)
211                         return (NULL);
212                 parent = hammer_get_node(pcluster,
213                                          ondisk->clu_btree_parent_offset,
214                                          errorp);
215                 hammer_rel_cluster(pcluster, 0);
216         } else {
217                 /*
218                  * At root of root cluster, there is no parent.
219                  */
220                 KKASSERT(cluster->ondisk->clu_btree_parent_vol_no == -1);
221                 parent = NULL;
222                 *errorp = 0;
223         }
224         return(parent);
225 }
226
227 /*
228  * Load the parent of cursor->node into cursor->parent.  There are several
229  * cases.  (1) The parent is in the current cluster.  (2) We are at the
230  * root of the cluster and the parent is in another cluster.  (3) We are at
231  * the root of the root cluster.
232  *
233  * If HAMMER_CURSOR_INCLUSTER is set and we are at the root of the cluster,
234  * we do not access the parent cluster at all and make the cursor look like
235  * its at the root.
236  */
237 static
238 int
239 hammer_load_cursor_parent(hammer_cursor_t cursor)
240 {
241         hammer_cluster_t cluster;
242         int error;
243
244         cluster = cursor->node->cluster;
245
246         if (cursor->node->ondisk->parent) {
247                 error = hammer_load_cursor_parent_local(cursor);
248         } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0 &&
249                    ((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0)
250         ) {
251                 error = hammer_load_cursor_parent_cluster(cursor);
252         } else {
253                 cursor->parent = NULL;
254                 cursor->parent_index = 0;
255                 cursor->left_bound = &cluster->ondisk->clu_btree_beg;
256                 cursor->right_bound = &cluster->ondisk->clu_btree_end;
257                 error = 0;
258         }
259         return(error);
260 }
261
262 static
263 int
264 hammer_load_cursor_parent_local(hammer_cursor_t cursor)
265 {
266         hammer_node_t node;
267         hammer_node_t parent;
268         hammer_btree_elm_t elm;
269         int error;
270         int i;
271
272         node = cursor->node;
273         parent = hammer_get_node(node->cluster, node->ondisk->parent, &error);
274         if (error)
275                 return(error);
276         elm = NULL;
277         for (i = 0; i < parent->ondisk->count; ++i) {
278                 elm = &parent->ondisk->elms[i];
279                 if (parent->ondisk->elms[i].internal.subtree_offset ==
280                     node->node_offset) {
281                         break;
282                 }
283         }
284         if (i == parent->ondisk->count)
285                 panic("Bad B-Tree link: parent %p node %p\n", parent, node);
286         KKASSERT(i != parent->ondisk->count);
287         KKASSERT(parent->ondisk->elms[i].internal.rec_offset == 0);
288         cursor->parent = parent;
289         cursor->parent_index = i;
290         cursor->left_bound = &elm[0].internal.base;
291         cursor->right_bound = &elm[1].internal.base;
292
293         if (hammer_lock_ex_try(&parent->lock) != 0) {
294                 hammer_unlock(&cursor->node->lock);
295                 hammer_lock_ex(&parent->lock);
296                 hammer_lock_ex(&cursor->node->lock);
297                 /* XXX check node generation count */
298         }
299         return(error);
300 }
301
302 static
303 int
304 hammer_load_cursor_parent_cluster(hammer_cursor_t cursor)
305 {
306         hammer_cluster_ondisk_t ondisk;
307         hammer_cluster_t pcluster;
308         hammer_cluster_t ccluster;
309         hammer_volume_t volume;
310         hammer_node_t node;
311         hammer_node_t parent;
312         hammer_btree_elm_t elm;
313         int32_t clu_no;
314         int32_t vol_no;
315         int error;
316         int i;
317
318         node = cursor->node;
319         ccluster = node->cluster;
320         ondisk = ccluster->ondisk;
321         vol_no = ondisk->clu_btree_parent_vol_no;
322         clu_no = ondisk->clu_btree_parent_clu_no;
323
324         /*
325          * Acquire the node from (volume, cluster, offset)
326          */
327         volume = hammer_get_volume(ccluster->volume->hmp, vol_no, &error);
328         if (error)
329                 return (error);
330         pcluster = hammer_get_cluster(volume, clu_no, &error, 0);
331         hammer_rel_volume(volume, 0);
332         if (error)
333                 return (error);
334         parent = hammer_get_node(pcluster, ondisk->clu_btree_parent_offset,
335                                  &error);
336         hammer_rel_cluster(pcluster, 0);
337         if (error)
338                 return (error);
339
340         /* 
341          * Ok, we have the node.  Locate the inter-cluster element
342          */
343         elm = NULL;
344         for (i = 0; i < parent->ondisk->count; ++i) {
345                 elm = &parent->ondisk->elms[i];
346                 if (elm->internal.rec_offset != 0 &&
347                     elm->internal.subtree_type == HAMMER_BTREE_TYPE_CLUSTER &&
348                     elm->internal.subtree_clu_no == cursor->node->cluster->clu_no) {
349                         break;
350                 }
351         }
352         KKASSERT(i != parent->ondisk->count);
353         cursor->parent = parent;
354         cursor->parent_index = i;
355         cursor->left_bound = &elm[0].internal.base;
356         cursor->right_bound = &elm[1].internal.base;
357
358         KKASSERT(hammer_btree_cmp(cursor->left_bound,
359                  &ccluster->ondisk->clu_btree_beg) <= 0);
360         KKASSERT(hammer_btree_cmp(cursor->right_bound,
361                  &ccluster->ondisk->clu_btree_end) >= 0);
362
363         if (hammer_lock_ex_try(&parent->lock) != 0) {
364                 hammer_unlock(&cursor->node->lock);
365                 hammer_lock_ex(&parent->lock);
366                 hammer_lock_ex(&cursor->node->lock);
367                 /* XXX check node generation count */
368         }
369         return(0);
370 }
371
372 /*
373  * Cursor up to our parent node.  Return ENOENT if we are at the root of
374  * the root cluster.
375  *
376  * If doing a nonblocking cursor-up and we are unable to acquire the lock,
377  * the cursor remains unchanged.
378  */
379 int
380 hammer_cursor_up(hammer_cursor_t cursor, int nonblock)
381 {
382         hammer_node_t save;
383         int error;
384
385         /*
386          * If asked to do this non-blocking acquire a lock on the parent
387          * now, before we mess with the cursor.
388          */
389         if (nonblock) {
390                 save = hammer_get_parent_node(cursor->parent, &error);
391                 if (error)
392                         return(error);
393                 if (save) {
394                         if (hammer_lock_ex_try(&save->lock) != 0) {
395                                 hammer_rel_node(save);
396                                 return(EAGAIN);
397                         }
398                 }
399         } else {
400                 save = NULL;
401         }
402
403         /*
404          * Set the node to its parent.  If the parent is NULL we are at
405          * the root of the root cluster and return ENOENT.
406          */
407         hammer_unlock(&cursor->node->lock);
408         hammer_rel_node(cursor->node);
409         cursor->node = cursor->parent;
410         cursor->index = cursor->parent_index;
411         cursor->parent = NULL;
412         cursor->parent_index = 0;
413
414         if (cursor->node == NULL) {
415                 error = ENOENT;
416         } else if ((cursor->flags & HAMMER_CURSOR_INCLUSTER) &&
417                    cursor->node->ondisk->parent == 0
418         ) {
419                 error = ENOENT;
420         } else {
421                 error = hammer_load_cursor_parent(cursor);
422         }
423         if (save) {
424                 hammer_unlock(&save->lock);
425                 hammer_rel_node(save);
426         }
427         return(error);
428 }
429
430 /*
431  * Set the cursor to the root of the current cursor's cluster.
432  */
433 int
434 hammer_cursor_toroot(hammer_cursor_t cursor)
435 {
436         hammer_cluster_t cluster;
437         int error;
438
439         /*
440          * Already at root of cluster?
441          */
442         if (cursor->node->ondisk->parent == 0) 
443                 return (0);
444
445         /*
446          * Parent is root of cluster?
447          */
448         if (cursor->parent->ondisk->parent == 0)
449                 return (hammer_cursor_up(cursor, 0));
450
451         /*
452          * Ok, reload the cursor with the root of the cluster, then
453          * locate its parent.
454          */
455         cluster = cursor->node->cluster;
456         error = hammer_ref_cluster(cluster);
457         if (error)
458                 return (error);
459
460         hammer_unlock(&cursor->parent->lock);
461         hammer_rel_node(cursor->parent);
462         hammer_unlock(&cursor->node->lock);
463         hammer_rel_node(cursor->node);
464         cursor->parent = NULL;
465         cursor->parent_index = 0;
466
467         cursor->node = hammer_get_node(cluster, cluster->ondisk->clu_btree_root,
468                                        &error);
469         cursor->index = 0;
470         hammer_lock_ex(&cursor->node->lock);
471         hammer_rel_cluster(cluster, 0);
472         if (error == 0)
473                 error = hammer_load_cursor_parent(cursor);
474         return(error);
475 }
476
477 /*
478  * Cursor down through the current node, which must be an internal node.
479  *
480  * This routine adjusts the cursor and sets index to 0.
481  */
482 int
483 hammer_cursor_down(hammer_cursor_t cursor)
484 {
485         hammer_node_t node;
486         hammer_btree_elm_t elm;
487         hammer_volume_t volume;
488         hammer_cluster_t cluster;
489         int32_t vol_no;
490         int32_t clu_no;
491         int error;
492
493         /*
494          * The current node becomes the current parent
495          */
496         node = cursor->node;
497         KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
498         KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
499         if (cursor->parent) {
500                 hammer_unlock(&cursor->parent->lock);
501                 hammer_rel_node(cursor->parent);
502         }
503         cursor->parent = node;
504         cursor->parent_index = cursor->index;
505         cursor->node = NULL;
506         cursor->index = 0;
507
508         /*
509          * Extract element to push into at (node,index), set bounds.
510          */
511         elm = &node->ondisk->elms[cursor->parent_index];
512         cursor->left_bound = &elm[0].internal.base;
513         cursor->right_bound = &elm[1].internal.base;
514
515         /*
516          * Ok, push down into elm.  If rec_offset is non-zero this is
517          * an inter-cluster push, otherwise it is a intra-cluster push.
518          *
519          * Cursoring down through a cluster transition when the INCLUSTER
520          * flag is set is not legal.
521          */
522         if (elm->internal.rec_offset) {
523                 KKASSERT((cursor->flags & HAMMER_CURSOR_INCLUSTER) == 0);
524                 vol_no = elm->internal.subtree_vol_no;
525                 clu_no = elm->internal.subtree_clu_no;
526                 volume = hammer_get_volume(node->cluster->volume->hmp,
527                                            vol_no, &error);
528                 KKASSERT(error != EINVAL);
529                 if (error)
530                         return(error);
531                 cluster = hammer_get_cluster(volume, clu_no, &error, 0);
532                 KKASSERT(error != EINVAL);
533                 hammer_rel_volume(volume, 0);
534                 if (error)
535                         return(error);
536                 node = hammer_get_node(cluster,
537                                        cluster->ondisk->clu_btree_root,
538                                        &error);
539                 hammer_rel_cluster(cluster, 0);
540         } else {
541                 KKASSERT(elm->internal.subtree_offset != 0);
542                 node = hammer_get_node(node->cluster,
543                                        elm->internal.subtree_offset,
544                                        &error);
545                 if (error == 0) {
546                         KKASSERT(elm->internal.subtree_type == node->ondisk->type);
547                         if(node->ondisk->parent != cursor->parent->node_offset)
548                                 kprintf("node %p %d vs %d\n", node, node->ondisk->parent, cursor->parent->node_offset);
549                         KKASSERT(node->ondisk->parent == cursor->parent->node_offset);
550                 }
551         }
552         if (error == 0) {
553                 hammer_lock_ex(&node->lock);
554                 cursor->node = node;
555                 cursor->index = 0;
556         }
557         return(error);
558 }
559