Merge from vendor branch LIBARCHIVE:
[dragonfly.git] / sys / vfs / hammer / hammer_cursor.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sys/vfs/hammer/hammer_cursor.c,v 1.5 2007/11/30 00:16:56 dillon Exp $
35  */
36
37 /*
38  * HAMMER B-Tree index - cursor support routines
39  */
40 #include "hammer.h"
41
42 static int hammer_load_cursor_parent(hammer_cursor_t cursor);
43 static int hammer_load_cursor_parent_local(hammer_cursor_t cursor);
44 static int hammer_load_cursor_parent_cluster(hammer_cursor_t cursor);
45
46 /*
47  * Initialize a fresh cursor at the root of the filesystem hierarchy.
48  *
49  * cursor->parent will be NULL on return since we are loading the root
50  * node of the root cluster.
51  */
52 int
53 hammer_init_cursor_hmp(hammer_cursor_t cursor, hammer_mount_t hmp)
54 {
55         hammer_cluster_t cluster;
56         int error;
57
58         bzero(cursor, sizeof(*cursor));
59         cluster = hammer_get_root_cluster(hmp, &error);
60         if (error == 0) {
61                 cursor->node = hammer_get_node(cluster,
62                                                cluster->ondisk->clu_btree_root,
63                                                &error);
64                 hammer_rel_cluster(cluster, 0);
65                 if (error == 0)
66                         hammer_lock_ex(&cursor->node->lock);
67         }
68         if (error == 0)
69                 error = hammer_load_cursor_parent(cursor);
70         return(error);
71 }
72
73 /*
74  * Initialize a fresh cursor using the B-Tree node cached by the
75  * in-memory inode.
76  */
77 int
78 hammer_init_cursor_ip(hammer_cursor_t cursor, hammer_inode_t ip)
79 {
80         hammer_node_t node;
81         int error;
82
83         if (ip->cache) {
84                 bzero(cursor, sizeof(*cursor));
85                 node = ip->cache;
86                 error = hammer_ref_node(node);
87                 if (error == 0) {
88                         hammer_lock_ex(&node->lock);
89                         cursor->node = node;
90                         error = hammer_load_cursor_parent(cursor);
91                 } else {
92                         node = NULL;
93                         cursor->node = node;
94                 }
95         } else {
96                 error = hammer_init_cursor_hmp(cursor, ip->hmp);
97         }
98         return(error);
99 }
100
101 /*
102  * We are finished with a cursor.  We NULL out various fields as sanity
103  * check, in case the structure is inappropriately used afterwords.
104  */
105 void
106 hammer_done_cursor(hammer_cursor_t cursor)
107 {
108         if (cursor->parent) {
109                 hammer_unlock(&cursor->parent->lock);
110                 hammer_rel_node(cursor->parent);
111                 cursor->parent = NULL;
112         }
113         if (cursor->node) {
114                 hammer_unlock(&cursor->node->lock);
115                 hammer_rel_node(cursor->node);
116                 cursor->node = NULL;
117         }
118         if (cursor->data_buffer) {
119                 hammer_rel_buffer(cursor->data_buffer, 0);
120                 cursor->data_buffer = NULL;
121         }
122         if (cursor->record_buffer) {
123                 hammer_rel_buffer(cursor->record_buffer, 0);
124                 cursor->record_buffer = NULL;
125         }
126         if (cursor->ip)
127                 hammer_mem_done(cursor);
128
129         cursor->data = NULL;
130         cursor->record = NULL;
131         cursor->left_bound = NULL;
132         cursor->right_bound = NULL;
133 }
134
135 /*
136  * Acquire the parent B-Tree node of the specified node, returning a
137  * referenced but unlocked node.  NULL can be returned with *errorp == 0
138  * if node is the root node of the root cluster.
139  */
140 static
141 hammer_node_t
142 hammer_get_parent_node(hammer_node_t node, int *errorp)
143 {
144         hammer_cluster_t cluster;
145         hammer_node_t parent;
146
147         cluster = node->cluster;
148         if (node->ondisk->parent) {
149                 /*
150                  * Local parent
151                  */
152                 parent = hammer_get_node(cluster, node->ondisk->parent, errorp);
153         } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0) {
154                 /*
155                  * At cluster root, locate node in parent cluster
156                  */
157                 hammer_cluster_ondisk_t ondisk;
158                 hammer_cluster_t pcluster;
159                 hammer_volume_t pvolume;
160                 int32_t clu_no;
161                 int32_t vol_no;
162
163                 ondisk = cluster->ondisk;
164                 vol_no = ondisk->clu_btree_parent_vol_no;
165                 clu_no = ondisk->clu_btree_parent_clu_no;
166
167                 /*
168                  * Acquire the node from (volume, cluster, offset)
169                  */
170                 pvolume = hammer_get_volume(cluster->volume->hmp, vol_no,
171                                             errorp);
172                 if (*errorp)
173                         return (NULL);
174                 pcluster = hammer_get_cluster(pvolume, clu_no, errorp, 0);
175                 hammer_rel_volume(pvolume, 0);
176                 if (*errorp)
177                         return (NULL);
178                 parent = hammer_get_node(pcluster,
179                                          ondisk->clu_btree_parent_offset,
180                                          errorp);
181                 hammer_rel_cluster(pcluster, 0);
182         } else {
183                 /*
184                  * At root of root cluster, there is no parent.
185                  */
186                 parent = NULL;
187                 *errorp = 0;
188         }
189         return(parent);
190 }
191
192 /*
193  * Load the parent of cursor->node into cursor->parent.  There are several
194  * cases.  (1) The parent is in the current cluster.  (2) We are at the
195  * root of the cluster and the parent is in another cluster.  (3) We are at
196  * the root of the root cluster.
197  */
198 static
199 int
200 hammer_load_cursor_parent(hammer_cursor_t cursor)
201 {
202         hammer_cluster_t cluster;
203         int error;
204
205         cluster = cursor->node->cluster;
206
207         if (cursor->node->ondisk->parent) {
208                 error = hammer_load_cursor_parent_local(cursor);
209         } else if (cluster->ondisk->clu_btree_parent_vol_no >= 0) {
210                 error = hammer_load_cursor_parent_cluster(cursor);
211         } else {
212                 cursor->parent = NULL;
213                 cursor->parent_index = 0;
214                 cursor->left_bound = &cluster->ondisk->clu_btree_beg;
215                 cursor->right_bound = &cluster->ondisk->clu_btree_end;
216                 error = 0;
217         }
218         return(error);
219 }
220
221 static
222 int
223 hammer_load_cursor_parent_local(hammer_cursor_t cursor)
224 {
225         hammer_node_t node;
226         hammer_node_t parent;
227         hammer_btree_elm_t elm;
228         int error;
229         int i;
230
231         node = cursor->node;
232         parent = hammer_get_node(node->cluster, node->ondisk->parent, &error);
233         if (error)
234                 return(error);
235         elm = NULL;
236         for (i = 0; i < parent->ondisk->count; ++i) {
237                 elm = &parent->ondisk->elms[i];
238                 if (parent->ondisk->elms[i].internal.subtree_offset ==
239                     node->node_offset) {
240                         break;
241                 }
242         }
243         KKASSERT(i != parent->ondisk->count);
244         KKASSERT(parent->ondisk->elms[i].internal.rec_offset == 0);
245         cursor->parent = parent;
246         cursor->parent_index = i;
247         cursor->left_bound = &elm[0].internal.base;
248         cursor->right_bound = &elm[1].internal.base;
249
250         if (hammer_lock_ex_try(&parent->lock) != 0) {
251                 hammer_unlock(&cursor->node->lock);
252                 hammer_lock_ex(&parent->lock);
253                 hammer_lock_ex(&cursor->node->lock);
254                 /* XXX check node generation count */
255         }
256         return(error);
257 }
258
259 static
260 int
261 hammer_load_cursor_parent_cluster(hammer_cursor_t cursor)
262 {
263         hammer_cluster_ondisk_t ondisk;
264         hammer_cluster_t cluster;
265         hammer_volume_t volume;
266         hammer_node_t node;
267         hammer_node_t parent;
268         hammer_btree_elm_t elm;
269         int32_t clu_no;
270         int32_t vol_no;
271         int error;
272         int i;
273
274         node = cursor->node;
275         ondisk = node->cluster->ondisk;
276         vol_no = ondisk->clu_btree_parent_vol_no;
277         clu_no = ondisk->clu_btree_parent_clu_no;
278
279         /*
280          * Acquire the node from (volume, cluster, offset)
281          */
282         volume = hammer_get_volume(node->cluster->volume->hmp, vol_no, &error);
283         if (error)
284                 return (error);
285         cluster = hammer_get_cluster(volume, clu_no, &error, 0);
286         hammer_rel_volume(volume, 0);
287         if (error)
288                 return (error);
289         parent = hammer_get_node(cluster, ondisk->clu_btree_parent_offset,
290                                  &error);
291         hammer_rel_cluster(cluster, 0);
292         if (error)
293                 return (error);
294
295         /* 
296          * Ok, we have the node.  Locate the inter-cluster element
297          */
298         elm = NULL;
299         for (i = 0; i < parent->ondisk->count; ++i) {
300                 elm = &parent->ondisk->elms[i];
301                 if (elm->internal.rec_offset != 0 &&
302                     elm->internal.subtree_cluid == clu_no) {
303                         break;
304                 }
305         }
306         KKASSERT(i != parent->ondisk->count);
307         cursor->parent = parent;
308         cursor->parent_index = i;
309         cursor->left_bound = &elm[0].internal.base;
310         cursor->right_bound = &elm[1].internal.base;
311
312         KKASSERT(hammer_btree_cmp(cursor->left_bound,
313                  &parent->cluster->ondisk->clu_btree_beg) == 0);
314         KKASSERT(hammer_btree_cmp(cursor->right_bound,
315                  &parent->cluster->ondisk->clu_btree_end) == 0);
316
317         if (hammer_lock_ex_try(&parent->lock) != 0) {
318                 hammer_unlock(&cursor->node->lock);
319                 hammer_lock_ex(&parent->lock);
320                 hammer_lock_ex(&cursor->node->lock);
321                 /* XXX check node generation count */
322         }
323         return(0);
324 }
325
326 /*
327  * Cursor up to our parent node.  Return ENOENT if we are at the root of
328  * the root cluster.
329  *
330  * If doing a nonblocking cursor-up and we are unable to acquire the lock,
331  * the cursor remains unchanged.
332  */
333 int
334 hammer_cursor_up(hammer_cursor_t cursor, int nonblock)
335 {
336         hammer_node_t save;
337         int error;
338
339         /*
340          * If asked to do this non-blocking acquire a lock on the parent
341          * now, before we mess with the cursor.
342          */
343         if (nonblock) {
344                 save = hammer_get_parent_node(cursor->parent, &error);
345                 if (error)
346                         return(error);
347                 if (save) {
348                         if (hammer_lock_ex_try(&save->lock) != 0) {
349                                 hammer_rel_node(save);
350                                 return(EAGAIN);
351                         }
352                 }
353         } else {
354                 save = NULL;
355         }
356
357         /*
358          * Set the node to its parent.  If the parent is NULL we are at
359          * the root of the root cluster and return ENOENT.
360          */
361         hammer_unlock(&cursor->node->lock);
362         hammer_rel_node(cursor->node);
363         cursor->node = cursor->parent;
364         cursor->index = cursor->parent_index;
365         cursor->parent = NULL;
366         cursor->parent_index = 0;
367
368         if (cursor->node == NULL) {
369                 error = ENOENT;
370         } else {
371                 error = hammer_load_cursor_parent(cursor);
372         }
373         if (save) {
374                 hammer_unlock(&save->lock);
375                 hammer_rel_node(save);
376         }
377         return(error);
378 }
379
380 /*
381  * Set the cursor to the root of the current cursor's cluster.
382  */
383 int
384 hammer_cursor_toroot(hammer_cursor_t cursor)
385 {
386         hammer_cluster_t cluster;
387         int error;
388
389         /*
390          * Already at root of cluster?
391          */
392         if (cursor->node->ondisk->parent == 0) 
393                 return (0);
394
395         /*
396          * Parent is root of cluster?
397          */
398         if (cursor->parent->ondisk->parent == 0)
399                 return (hammer_cursor_up(cursor, 0));
400
401         /*
402          * Ok, reload the cursor with the root of the cluster, then
403          * locate its parent.
404          */
405         cluster = cursor->node->cluster;
406         error = hammer_ref_cluster(cluster);
407         if (error)
408                 return (error);
409
410         hammer_unlock(&cursor->parent->lock);
411         hammer_rel_node(cursor->parent);
412         hammer_unlock(&cursor->node->lock);
413         hammer_rel_node(cursor->node);
414         cursor->parent = NULL;
415         cursor->parent_index = 0;
416
417         cursor->node = hammer_get_node(cluster, cluster->ondisk->clu_btree_root,
418                                        &error);
419         cursor->index = 0;
420         hammer_rel_cluster(cluster, 0);
421         if (error == 0)
422                 error = hammer_load_cursor_parent(cursor);
423         return(error);
424 }
425
426 /*
427  * Cursor down through the current node, which must be an internal node.
428  *
429  * This routine adjusts the cursor and sets index to 0.
430  */
431 int
432 hammer_cursor_down(hammer_cursor_t cursor)
433 {
434         hammer_node_t node;
435         hammer_btree_elm_t elm;
436         hammer_volume_t volume;
437         hammer_cluster_t cluster;
438         int32_t vol_no;
439         int32_t clu_no;
440         int error;
441
442         /*
443          * The current node becomes the current parent
444          */
445         node = cursor->node;
446         KKASSERT(node->ondisk->type == HAMMER_BTREE_TYPE_INTERNAL);
447         KKASSERT(cursor->index >= 0 && cursor->index < node->ondisk->count);
448         if (cursor->parent) {
449                 hammer_unlock(&cursor->parent->lock);
450                 hammer_rel_node(cursor->parent);
451         }
452         cursor->parent = node;
453         cursor->parent_index = cursor->index;
454         cursor->node = NULL;
455         cursor->index = 0;
456
457         /*
458          * Extract element to push into at (node,index), set bounds.
459          */
460         elm = &node->ondisk->elms[cursor->parent_index];
461         cursor->left_bound = &elm[0].internal.base;
462         cursor->right_bound = &elm[1].internal.base;
463
464         /*
465          * Ok, push down into elm.  If rec_offset is non-zero this is
466          * an inter-cluster push, otherwise it is a intra-cluster push.
467          */
468         if (elm->internal.rec_offset) {
469                 vol_no = elm->internal.subtree_volno;
470                 clu_no = elm->internal.subtree_cluid;
471                 volume = hammer_get_volume(node->cluster->volume->hmp,
472                                            vol_no, &error);
473                 if (error)
474                         return(error);
475                 cluster = hammer_get_cluster(volume, clu_no, &error, 0);
476                 hammer_rel_volume(volume, 0);
477                 if (error)
478                         return(error);
479                 node = hammer_get_node(cluster,
480                                        cluster->ondisk->clu_btree_root,
481                                        &error);
482                 hammer_rel_cluster(cluster, 0);
483         } else {
484                 node = hammer_get_node(node->cluster,
485                                        elm->internal.subtree_offset,
486                                        &error);
487         }
488         if (error == 0) {
489                 hammer_lock_ex(&node->lock);
490                 cursor->node = node;
491                 cursor->index = 0;
492         }
493         return(error);
494 }
495