HAMMER 27/many: Major surgery - change allocation model
[dragonfly.git] / sbin / hammer / ondisk.c
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  * 
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * 
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  * 
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  * 
34  * $DragonFly: src/sbin/hammer/ondisk.c,v 1.10 2008/02/08 08:30:56 dillon Exp $
35  */
36
37 #include <sys/types.h>
38 #include <assert.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stdarg.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <err.h>
45 #include <fcntl.h>
46 #include "hammer_util.h"
47
48 static void init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type);
49 static hammer_off_t hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
50                         struct buffer_info **bufp, u_int16_t hdr_type);
51 #if 0
52 static void readhammerbuf(struct volume_info *vol, void *data,
53                         int64_t offset);
54 #endif
55 static void writehammerbuf(struct volume_info *vol, const void *data,
56                         int64_t offset);
57
58
59 uuid_t Hammer_FSType;
60 uuid_t Hammer_FSId;
61 int64_t BootAreaSize;
62 int64_t MemAreaSize;
63 int     UsingSuperClusters;
64 int     NumVolumes;
65 int     RootVolNo = -1;
66 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
67
68 /*
69  * Lookup the requested information structure and related on-disk buffer.
70  * Missing structures are created.
71  */
72 struct volume_info *
73 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
74 {
75         struct volume_info *vol;
76         struct volume_info *scan;
77         struct hammer_volume_ondisk *ondisk;
78         int n;
79
80         /*
81          * Allocate the volume structure
82          */
83         vol = malloc(sizeof(*vol));
84         bzero(vol, sizeof(*vol));
85         TAILQ_INIT(&vol->buffer_list);
86         vol->name = strdup(filename);
87         vol->fd = open(filename, oflags);
88         if (vol->fd < 0) {
89                 free(vol->name);
90                 free(vol);
91                 err(1, "setup_volume: %s: Open failed", filename);
92         }
93
94         /*
95          * Read or initialize the volume header
96          */
97         vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
98         if (isnew) {
99                 bzero(ondisk, HAMMER_BUFSIZE);
100         } else {
101                 n = pread(vol->fd, ondisk, HAMMER_BUFSIZE, 0);
102                 if (n != HAMMER_BUFSIZE) {
103                         err(1, "setup_volume: %s: Read failed at offset 0",
104                             filename);
105                 }
106                 vol_no = ondisk->vol_no;
107                 if (RootVolNo < 0) {
108                         RootVolNo = ondisk->vol_rootvol;
109                 } else if (RootVolNo != (int)ondisk->vol_rootvol) {
110                         errx(1, "setup_volume: %s: root volume disagreement: "
111                                 "%d vs %d",
112                                 vol->name, RootVolNo, ondisk->vol_rootvol);
113                 }
114
115                 if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) {
116                         errx(1, "setup_volume: %s: Header does not indicate "
117                                 "that this is a hammer volume", vol->name);
118                 }
119                 if (TAILQ_EMPTY(&VolList)) {
120                         Hammer_FSId = vol->ondisk->vol_fsid;
121                 } else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) {
122                         errx(1, "setup_volume: %s: FSId does match other "
123                                 "volumes!", vol->name);
124                 }
125         }
126         vol->vol_no = vol_no;
127
128         if (isnew) {
129                 init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);
130                 vol->cache.modified = 1;
131         }
132
133         /*
134          * Link the volume structure in
135          */
136         TAILQ_FOREACH(scan, &VolList, entry) {
137                 if (scan->vol_no == vol_no) {
138                         errx(1, "setup_volume %s: Duplicate volume number %d "
139                                 "against %s", filename, vol_no, scan->name);
140                 }
141         }
142         TAILQ_INSERT_TAIL(&VolList, vol, entry);
143         return(vol);
144 }
145
146 struct volume_info *
147 get_volume(int32_t vol_no)
148 {
149         struct volume_info *vol;
150
151         TAILQ_FOREACH(vol, &VolList, entry) {
152                 if (vol->vol_no == vol_no)
153                         break;
154         }
155         if (vol == NULL)
156                 errx(1, "get_volume: Volume %d does not exist!", vol_no);
157         ++vol->cache.refs;
158         /* not added to or removed from hammer cache */
159         return(vol);
160 }
161
162 void
163 rel_volume(struct volume_info *volume)
164 {
165         /* not added to or removed from hammer cache */
166         --volume->cache.refs;
167 }
168
169 /*
170  * Acquire the specified buffer.
171  */
172 struct buffer_info *
173 get_buffer(hammer_off_t buf_offset, int isnew)
174 {
175         void *ondisk;
176         struct buffer_info *buf;
177         struct volume_info *volume;
178         int n;
179         int vol_no;
180
181         assert((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER);
182
183         vol_no = HAMMER_VOL_DECODE(buf_offset);
184         volume = get_volume(vol_no);
185         buf_offset &= ~HAMMER_BUFMASK64;
186
187         TAILQ_FOREACH(buf, &volume->buffer_list, entry) {
188                 if (buf->buf_offset == buf_offset)
189                         break;
190         }
191         if (buf == NULL) {
192                 buf = malloc(sizeof(*buf));
193                 bzero(buf, sizeof(*buf));
194                 buf->buf_offset = buf_offset;
195                 buf->buf_disk_offset = volume->ondisk->vol_buf_beg +
196                                         (buf_offset & HAMMER_OFF_SHORT_MASK);
197                 buf->volume = volume;
198                 TAILQ_INSERT_TAIL(&volume->buffer_list, buf, entry);
199                 ++volume->cache.refs;
200                 buf->cache.u.buffer = buf;
201                 hammer_cache_add(&buf->cache, ISBUFFER);
202         }
203         ++buf->cache.refs;
204         hammer_cache_flush();
205         if ((ondisk = buf->ondisk) == NULL) {
206                 buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
207                 if (isnew == 0) {
208                         n = pread(volume->fd, ondisk, HAMMER_BUFSIZE,
209                                   buf->buf_disk_offset);
210                         if (n != HAMMER_BUFSIZE) {
211                                 err(1, "get_buffer: %s:%016llx Read failed at "
212                                        "offset %lld",
213                                     volume->name, buf->buf_offset,
214                                     buf->buf_disk_offset);
215                         }
216                 }
217         }
218         if (isnew) {
219                 bzero(ondisk, HAMMER_BUFSIZE);
220                 buf->cache.modified = 1;
221         }
222         return(buf);
223 }
224
225 void
226 rel_buffer(struct buffer_info *buffer)
227 {
228         struct volume_info *volume;
229
230         assert(buffer->cache.refs > 0);
231         if (--buffer->cache.refs == 0) {
232                 if (buffer->cache.delete) {
233                         volume = buffer->volume;
234                         if (buffer->cache.modified)
235                                 flush_buffer(buffer);
236                         TAILQ_REMOVE(&volume->buffer_list, buffer, entry);
237                         hammer_cache_del(&buffer->cache);
238                         free(buffer->ondisk);
239                         free(buffer);
240                         rel_volume(volume);
241                 }
242         }
243 }
244
245 /*
246  * Retrieve a pointer to a B-Tree node given a cluster offset.  The underlying
247  * bufp is freed if non-NULL and a referenced buffer is loaded into it.
248  */
249 hammer_node_ondisk_t
250 get_node(hammer_off_t node_offset, struct buffer_info **bufp)
251 {
252         struct buffer_info *buf;
253
254         if (*bufp)
255                 rel_buffer(*bufp);
256         *bufp = buf = get_buffer(node_offset, 0);
257         return((void *)((char *)buf->ondisk +
258                         (int32_t)(node_offset & HAMMER_BUFMASK)));
259 }
260
261 /*
262  * Allocate HAMMER elements - btree nodes, data storage, and record elements
263  *
264  * NOTE: hammer_alloc_fifo() initializes the fifo header for the returned
265  * item and zero's out the remainder, so don't bzero() it.
266  */
267 void *
268 alloc_btree_element(hammer_off_t *offp)
269 {
270         struct buffer_info *buf;
271         void *item;
272
273         *offp = hammer_alloc_fifo(sizeof(struct hammer_node_ondisk), 0,
274                                   &buf, HAMMER_HEAD_TYPE_BTREE);
275         item = (char *)buf->ondisk + ((int32_t)*offp & HAMMER_BUFMASK);
276         /* XXX buf not released, ptr remains valid */
277         return(item);
278 }
279
280 hammer_record_ondisk_t
281 alloc_record_element(hammer_off_t *offp, u_int8_t rec_type,
282                      int32_t rec_len, int32_t data_len, void **datap)
283 {
284         struct buffer_info *buf;
285         hammer_record_ondisk_t rec;
286         int32_t aligned_rec_len;
287
288         aligned_rec_len = (rec_len + HAMMER_HEAD_ALIGN_MASK) &
289                           ~HAMMER_HEAD_ALIGN_MASK;
290
291         *offp = hammer_alloc_fifo(aligned_rec_len, data_len, &buf,
292                                   HAMMER_HEAD_TYPE_RECORD);
293         rec = (void *)((char *)buf->ondisk + ((int32_t)*offp & HAMMER_BUFMASK));
294         rec->base.base.rec_type = rec_type;
295         if (data_len) {
296                 rec->base.data_off = *offp + aligned_rec_len;
297                 rec->base.data_len = data_len;
298                 *datap = (char *)rec + aligned_rec_len;
299         } else {
300                 *datap = NULL;
301         }
302         /* XXX buf not released, ptr remains valid */
303         return(rec);
304 }
305
306 /*
307  * Reserve space from the FIFO.  Make sure that bytes does not cross a 
308  * record boundary.
309  *
310  * Initialize the fifo header, keep track of the previous entry's size
311  * so the reverse poitner can be initialized (using lastBlk), and also
312  * store a terminator (used by the recovery code) which will be overwritten
313  * by the next allocation.
314  */
315 static
316 hammer_off_t
317 hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
318                   struct buffer_info **bufp, u_int16_t hdr_type)
319 {
320         struct buffer_info *buf;
321         struct volume_info *volume;
322         hammer_fifo_head_t head;
323         hammer_off_t off;
324         int32_t aligned_bytes;
325         static u_int32_t lastBlk;
326
327         aligned_bytes = (base_bytes + ext_bytes + HAMMER_HEAD_ALIGN_MASK) &
328                         ~HAMMER_HEAD_ALIGN_MASK;
329
330         volume = get_volume(RootVolNo);
331         off = volume->ondisk->vol0_fifo_end;
332
333         /*
334          * For now don't deal with transitions across buffer boundaries,
335          * only newfs_hammer uses this function.
336          */
337         assert((off & ~HAMMER_BUFMASK64) ==
338                  ((off + aligned_bytes + sizeof(*head)) & ~HAMMER_BUFMASK));
339
340         *bufp = buf = get_buffer(off, 0);
341
342         buf->cache.modified = 1;
343         volume->cache.modified = 1;
344
345         head = (void *)((char *)buf->ondisk + ((int32_t)off & HAMMER_BUFMASK));
346         bzero(head, base_bytes);
347
348         head->hdr_type = hdr_type;
349         head->hdr_rev_link = lastBlk;
350         head->hdr_fwd_link = aligned_bytes;
351         head->hdr_seq = volume->ondisk->vol0_next_seq++;
352         lastBlk = head->hdr_fwd_link;
353
354         volume->ondisk->vol0_fifo_end += aligned_bytes;
355         volume->cache.modified = 1;
356         head = (void *)((char *)head + aligned_bytes);
357         head->hdr_signature = HAMMER_HEAD_SIGNATURE;
358         head->hdr_type = HAMMER_HEAD_TYPE_TERM;
359         head->hdr_rev_link = lastBlk;
360         head->hdr_fwd_link = 0;
361         head->hdr_crc = 0;
362         head->hdr_seq = volume->ondisk->vol0_next_seq;
363
364         rel_volume(volume);
365
366         return(off);
367 }
368
369 /*
370  * Flush various tracking structures to disk
371  */
372
373 /*
374  * Flush various tracking structures to disk
375  */
376 void
377 flush_all_volumes(void)
378 {
379         struct volume_info *vol;
380
381         TAILQ_FOREACH(vol, &VolList, entry)
382                 flush_volume(vol);
383 }
384
385 void
386 flush_volume(struct volume_info *volume)
387 {
388         struct buffer_info *buffer;
389
390         TAILQ_FOREACH(buffer, &volume->buffer_list, entry)
391                 flush_buffer(buffer);
392         writehammerbuf(volume, volume->ondisk, 0);
393         volume->cache.modified = 0;
394 }
395
396 void
397 flush_buffer(struct buffer_info *buffer)
398 {
399         writehammerbuf(buffer->volume, buffer->ondisk, buffer->buf_disk_offset);
400         buffer->cache.modified = 0;
401 }
402
403 /*
404  * Generic buffer initialization
405  */
406 static void
407 init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type)
408 {
409         head->hdr_signature = HAMMER_HEAD_SIGNATURE;
410         head->hdr_type = hdr_type;
411         head->hdr_rev_link = 0;
412         head->hdr_fwd_link = 0;
413         head->hdr_crc = 0;
414         head->hdr_seq = 0;
415 }
416
417 #if 0
418 /*
419  * Core I/O operations
420  */
421 static void
422 readhammerbuf(struct volume_info *vol, void *data, int64_t offset)
423 {
424         ssize_t n;
425
426         n = pread(vol->fd, data, HAMMER_BUFSIZE, offset);
427         if (n != HAMMER_BUFSIZE)
428                 err(1, "Read volume %d (%s)", vol->vol_no, vol->name);
429 }
430
431 #endif
432
433 static void
434 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset)
435 {
436         ssize_t n;
437
438         n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset);
439         if (n != HAMMER_BUFSIZE)
440                 err(1, "Write volume %d (%s)", vol->vol_no, vol->name);
441 }
442
443 void
444 panic(const char *ctl, ...)
445 {
446         va_list va;
447
448         va_start(va, ctl);
449         vfprintf(stderr, ctl, va);
450         va_end(va);
451         fprintf(stderr, "\n");
452         exit(1);
453 }
454