50af62c3e8affbf83deb6bdf0521dbfd41b209d4
[dragonfly.git] / sys / vfs / hammer / hammer_volume.c
1 /*
2  * Copyright (c) 2009 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com> and
6  * Michael Neumann <mneumann@ntecs.de>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  */
36
37 #include "hammer.h"
38 #include <sys/fcntl.h>
39 #include <sys/nlookup.h>
40 #include <sys/buf.h>
41
42 static int
43 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly);
44
45 static void
46 hammer_close_device(struct vnode **devvpp, int ronly);
47
48 static int
49 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
50         const char *vol_name, int vol_no, int vol_count,
51         int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size);
52
53 static uint64_t
54 hammer_format_freemap(struct hammer_mount *hmp,
55         hammer_transaction_t trans,
56         hammer_volume_t volume);
57
58 static uint64_t
59 hammer_format_layer2_chunk(struct hammer_mount *hmp,
60         hammer_transaction_t trans,
61         hammer_off_t phys_offset,
62         hammer_off_t aligned_buf_end_off,
63         hammer_buffer_t *bufferp,
64         int *errorp);
65
66 static void
67 hammer_set_layer1_entry(struct hammer_mount *hmp,
68         hammer_transaction_t trans,
69         hammer_off_t phys_offset,
70         uint64_t free_bigblocks,
71         hammer_blockmap_t freemap,
72         hammer_buffer_t *bufferp,
73         int *errorp);
74
75 int
76 hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip,
77                 struct hammer_ioc_volume_add *ioc)
78 {
79         struct hammer_mount *hmp = trans->hmp;
80         struct mount *mp = hmp->mp;
81         hammer_volume_t volume;
82         hammer_volume_t root_volume;
83         int error;
84
85         if (mp->mnt_flag & MNT_RDONLY) {
86                 kprintf("Cannot add volume to read-only HAMMER filesystem\n");
87                 return (EINVAL);
88         }
89
90         if (hmp->nvolumes + 1 >= HAMMER_MAX_VOLUMES) {
91                 kprintf("Max number of HAMMER volumes exceeded\n");
92                 return (EINVAL);
93         }
94
95         /*
96          * Find an unused volume number.
97          */
98         int free_vol_no = 0;
99         while (free_vol_no < HAMMER_MAX_VOLUMES &&
100                RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, free_vol_no)) {
101                 ++free_vol_no;
102         }
103         if (free_vol_no >= HAMMER_MAX_VOLUMES) {
104                 kprintf("Max number of HAMMER volumes exceeded\n");
105                 return (EINVAL);
106         }
107
108         struct vnode *devvp = NULL;
109         error = hammer_setup_device(&devvp, ioc->device_name, 0);
110         if (error)
111                 goto end;
112         KKASSERT(devvp);
113         error = hammer_format_volume_header(
114                 hmp,
115                 devvp,
116                 hmp->rootvol->ondisk->vol_name,
117                 free_vol_no,
118                 hmp->nvolumes+1,
119                 ioc->vol_size,
120                 ioc->boot_area_size,
121                 ioc->mem_area_size);
122         hammer_close_device(&devvp, 0);
123         if (error)
124                 goto end;
125
126         error = hammer_install_volume(hmp, ioc->device_name, NULL);
127         if (error)
128                 goto end;
129
130         hammer_sync_lock_sh(trans);
131         hammer_lock_ex(&hmp->blkmap_lock);
132
133         ++hmp->nvolumes;
134
135         /*
136          * Set each volumes new value of the vol_count field.
137          */
138         for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
139                 if (vol_no == free_vol_no)
140                         continue;
141
142                 volume = hammer_get_volume(hmp, vol_no, &error);
143                 if (volume == NULL && error == ENOENT) {
144                         /*
145                          * Skip unused volume numbers
146                          */
147                         error = 0;
148                         continue;
149                 }
150                 KKASSERT(volume != NULL && error == 0);
151                 hammer_modify_volume_field(trans, volume, vol_count);
152                 volume->ondisk->vol_count = hmp->nvolumes;
153                 hammer_modify_volume_done(volume);
154                 hammer_rel_volume(volume, 0);
155         }
156
157         volume = hammer_get_volume(hmp, free_vol_no, &error);
158         KKASSERT(volume != NULL && error == 0);
159         root_volume = hammer_get_root_volume(hmp, &error);
160         KKASSERT(root_volume != NULL && error == 0);
161
162         uint64_t total_free_bigblocks =
163                 hammer_format_freemap(hmp, trans, volume);
164
165         /*
166          * Increase the total number of bigblocks
167          */
168         hammer_modify_volume_field(trans, root_volume,
169                 vol0_stat_bigblocks);
170         root_volume->ondisk->vol0_stat_bigblocks += total_free_bigblocks;
171         hammer_modify_volume_done(root_volume);
172
173         /*
174          * Increase the number of free bigblocks
175          * (including the copy in hmp)
176          */
177         hammer_modify_volume_field(trans, root_volume,
178                 vol0_stat_freebigblocks);
179         root_volume->ondisk->vol0_stat_freebigblocks += total_free_bigblocks;
180         hmp->copy_stat_freebigblocks =
181                 root_volume->ondisk->vol0_stat_freebigblocks;
182         hammer_modify_volume_done(root_volume);
183
184         hammer_rel_volume(root_volume, 0);
185         hammer_rel_volume(volume, 0);
186
187         hammer_unlock(&hmp->blkmap_lock);
188         hammer_sync_unlock(trans);
189
190 end:
191         if (error)
192                 kprintf("An error occurred: %d\n", error);
193         return (error);
194 }
195
196 static uint64_t
197 hammer_format_freemap(struct hammer_mount *hmp,
198         hammer_transaction_t trans,
199         hammer_volume_t volume)
200 {
201         hammer_off_t phys_offset;
202         hammer_buffer_t buffer = NULL;
203         hammer_blockmap_t freemap;
204         hammer_off_t aligned_buf_end_off;
205         uint64_t free_bigblocks;
206         uint64_t total_free_bigblocks;
207         int error = 0;
208
209         total_free_bigblocks = 0;
210
211         /*
212          * Calculate the usable size of the new volume, which
213          * must be aligned at a bigblock (8 MB) boundary.
214          */
215         aligned_buf_end_off = HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no,
216                 (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)
217                 & ~HAMMER_LARGEBLOCK_MASK64);
218
219         freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
220
221         /*
222          * Iterate the volume's address space in chunks of 4 TB,
223          * where each chunk consists of at least one physically
224          * available 8 MB bigblock.
225          *
226          * For each chunk we need one L1 entry and one L2 bigblock.
227          * We use the first bigblock of each chunk as L2 block.
228          */
229         for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no, 0);
230              phys_offset < aligned_buf_end_off;
231              phys_offset += HAMMER_BLOCKMAP_LAYER2) {
232
233                 free_bigblocks = hammer_format_layer2_chunk(hmp, trans,
234                         phys_offset, aligned_buf_end_off, &buffer, &error);
235                 KKASSERT(error == 0);
236
237                 hammer_set_layer1_entry(hmp, trans, phys_offset,
238                         free_bigblocks, freemap, &buffer, &error);
239                 KKASSERT(error == 0);
240
241                 total_free_bigblocks += free_bigblocks;
242         }
243
244         if (buffer) {
245                 hammer_rel_buffer(buffer, 0);
246                 buffer = NULL;
247         }
248
249         return total_free_bigblocks;
250 }
251
252 /*
253  * Format the L2 bigblock representing a 4 TB chunk.
254  *
255  * Returns the number of free bigblocks.
256  */
257 static uint64_t
258 hammer_format_layer2_chunk(struct hammer_mount *hmp,
259         hammer_transaction_t trans,
260         hammer_off_t phys_offset,
261         hammer_off_t aligned_buf_end_off,
262         hammer_buffer_t *bufferp,
263         int *errorp)
264 {
265         uint64_t free_bigblocks = 0;
266         hammer_off_t block_off;
267         hammer_off_t layer2_offset;
268         struct hammer_blockmap_layer2 *layer2;
269
270         for (block_off = 0;
271              block_off < HAMMER_BLOCKMAP_LAYER2;
272              block_off += HAMMER_LARGEBLOCK_SIZE) {
273                 layer2_offset = phys_offset +
274                                 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_off);
275                 layer2 = hammer_bread(hmp, layer2_offset, errorp, bufferp);
276                 if (*errorp)
277                         return free_bigblocks;
278
279                 KKASSERT(layer2);
280
281                 hammer_modify_buffer(trans, *bufferp, layer2, sizeof(*layer2));
282                 bzero(layer2, sizeof(*layer2));
283
284                 if (block_off == 0) {
285                         /*
286                          * The first entry represents the L2 bigblock itself.
287                          */
288                         layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
289                         layer2->append_off = HAMMER_LARGEBLOCK_SIZE;
290                         layer2->bytes_free = 0;
291                 } else if (phys_offset + block_off < aligned_buf_end_off) {
292                         layer2->zone = 0;
293                         layer2->append_off = 0;
294                         layer2->bytes_free = HAMMER_LARGEBLOCK_SIZE;
295                         ++free_bigblocks;
296                 } else {
297                         /*
298                          * Bigblock outside of physically available space
299                          */
300                         layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
301                         layer2->append_off = HAMMER_LARGEBLOCK_SIZE;
302                         layer2->bytes_free = 0;
303                 }
304                 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
305
306                 hammer_modify_buffer_done(*bufferp);
307         }
308
309         return free_bigblocks;
310 }
311
312 static void
313 hammer_set_layer1_entry(struct hammer_mount *hmp,
314         hammer_transaction_t trans,
315         hammer_off_t phys_offset,
316         uint64_t free_bigblocks,
317         hammer_blockmap_t freemap,
318         hammer_buffer_t *bufferp,
319         int *errorp)
320 {
321         struct hammer_blockmap_layer1 *layer1;
322         hammer_off_t layer1_offset;
323
324         layer1_offset = freemap->phys_offset +
325                         HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
326         layer1 = hammer_bread(hmp, layer1_offset, errorp, bufferp);
327         if (*errorp)
328                 return;
329         KKASSERT(layer1);
330         KKASSERT(layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL);
331
332         hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
333         bzero(layer1, sizeof(*layer1));
334         layer1->phys_offset = phys_offset;
335         layer1->blocks_free = free_bigblocks;
336         layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
337
338         hammer_modify_buffer_done(*bufferp);
339 }
340
341 static int
342 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly)
343 {
344         int error;
345         struct nlookupdata nd;
346
347         /*
348          * Get the device vnode
349          */
350         if (*devvpp == NULL) {
351                 error = nlookup_init(&nd, dev_path, UIO_SYSSPACE, NLC_FOLLOW);
352                 if (error == 0)
353                         error = nlookup(&nd);
354                 if (error == 0)
355                         error = cache_vref(&nd.nl_nch, nd.nl_cred, devvpp);
356                 nlookup_done(&nd);
357         } else {
358                 error = 0;
359         }
360
361         if (error == 0) {
362                 if (vn_isdisk(*devvpp, &error)) {
363                         error = vfs_mountedon(*devvpp);
364                 }
365         }
366         if (error == 0 && vcount(*devvpp) > 0)
367                 error = EBUSY;
368         if (error == 0) {
369                 vn_lock(*devvpp, LK_EXCLUSIVE | LK_RETRY);
370                 error = vinvalbuf(*devvpp, V_SAVE, 0, 0);
371                 if (error == 0) {
372                         error = VOP_OPEN(*devvpp,
373                                          (ronly ? FREAD : FREAD|FWRITE),
374                                          FSCRED, NULL);
375                 }
376                 vn_unlock(*devvpp);
377         }
378         if (error && *devvpp) {
379                 vrele(*devvpp);
380                 *devvpp = NULL;
381         }
382         return (error);
383 }
384
385 static void
386 hammer_close_device(struct vnode **devvpp, int ronly)
387 {
388         VOP_CLOSE(*devvpp, (ronly ? FREAD : FREAD|FWRITE));
389         if (*devvpp) {
390                 vinvalbuf(*devvpp, ronly ? 0 : V_SAVE, 0, 0);
391                 vrele(*devvpp);
392                 *devvpp = NULL;
393         }
394 }
395
396 static int
397 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
398         const char *vol_name, int vol_no, int vol_count,
399         int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size)
400 {
401         struct buf *bp = NULL;
402         struct hammer_volume_ondisk *ondisk;
403         int error;
404
405         /*
406          * Extract the volume number from the volume header and do various
407          * sanity checks.
408          */
409         KKASSERT(HAMMER_BUFSIZE >= sizeof(struct hammer_volume_ondisk));
410         error = bread(devvp, 0LL, HAMMER_BUFSIZE, &bp);
411         if (error || bp->b_bcount < sizeof(struct hammer_volume_ondisk))
412                 goto late_failure;
413
414         ondisk = (struct hammer_volume_ondisk*) bp->b_data;
415
416         /*
417          * Note that we do NOT allow to use a device that contains
418          * a valid HAMMER signature. It has to be cleaned up with dd
419          * before.
420          */
421         if (ondisk->vol_signature == HAMMER_FSBUF_VOLUME) {
422                 kprintf("hammer_volume_add: Formatting of valid HAMMER volume "
423                         "%s denied. Erase with dd!\n", vol_name);
424                 error = EFTYPE;
425                 goto late_failure;
426         }
427
428         bzero(ondisk, sizeof(struct hammer_volume_ondisk));
429         ksnprintf(ondisk->vol_name, sizeof(ondisk->vol_name), "%s", vol_name);
430         ondisk->vol_fstype = hmp->rootvol->ondisk->vol_fstype;
431         ondisk->vol_signature = HAMMER_FSBUF_VOLUME;
432         ondisk->vol_fsid = hmp->fsid;
433         ondisk->vol_rootvol = hmp->rootvol->vol_no;
434         ondisk->vol_no = vol_no;
435         ondisk->vol_count = vol_count;
436         ondisk->vol_version = hmp->version;
437
438         /*
439          * Reserve space for (future) header junk, setup our poor-man's
440          * bigblock allocator.
441          */
442         int64_t vol_alloc = HAMMER_BUFSIZE * 16;
443
444         ondisk->vol_bot_beg = vol_alloc;
445         vol_alloc += boot_area_size;
446         ondisk->vol_mem_beg = vol_alloc;
447         vol_alloc += mem_area_size;
448
449         /*
450          * The remaining area is the zone 2 buffer allocation area.  These
451          * buffers
452          */
453         ondisk->vol_buf_beg = vol_alloc;
454         ondisk->vol_buf_end = vol_size & ~(int64_t)HAMMER_BUFMASK;
455
456         if (ondisk->vol_buf_end < ondisk->vol_buf_beg) {
457                 kprintf("volume %d %s is too small to hold the volume header",
458                      ondisk->vol_no, ondisk->vol_name);
459                 error = EFTYPE;
460                 goto late_failure;
461         }
462
463         ondisk->vol_nblocks = (ondisk->vol_buf_end - ondisk->vol_buf_beg) /
464                               HAMMER_BUFSIZE;
465         ondisk->vol_blocksize = HAMMER_BUFSIZE;
466
467         /*
468          * Write volume header to disk
469          */
470         error = bwrite(bp);
471         bp = NULL;
472
473 late_failure:
474         if (bp)
475                 brelse(bp);
476         return (error);
477 }