e6cf38bcbac034c2f7a5068bf5b17b0f06391235
[dragonfly.git] / sys / vfs / hammer / hammer_volume.c
1 /*
2  * Copyright (c) 2009 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com> and
6  * Michael Neumann <mneumann@ntecs.de>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  */
36
37 #include "hammer.h"
38 #include <sys/fcntl.h>
39 #include <sys/nlookup.h>
40 #include <sys/buf.h>
41
42 #include <sys/buf2.h>
43
44 static int
45 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly);
46
47 static void
48 hammer_close_device(struct vnode **devvpp, int ronly);
49
50 static int
51 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
52         const char *vol_name, int vol_no, int vol_count,
53         int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size);
54
55 static int
56 hammer_clear_volume_header(struct vnode *devvp);
57
58 struct bigblock_stat {
59         uint64_t total_bigblocks;
60         uint64_t total_free_bigblocks;
61         uint64_t counter;
62 };
63
64 static int
65 hammer_format_freemap(hammer_transaction_t trans, hammer_volume_t volume,
66         struct bigblock_stat *stat);
67
68 static int
69 hammer_free_freemap(hammer_transaction_t trans, hammer_volume_t volume,
70         struct bigblock_stat *stat);
71
72 int
73 hammer_ioc_volume_add(hammer_transaction_t trans, hammer_inode_t ip,
74                 struct hammer_ioc_volume *ioc)
75 {
76         struct hammer_mount *hmp = trans->hmp;
77         struct mount *mp = hmp->mp;
78         hammer_volume_t volume;
79         int error;
80
81         if (mp->mnt_flag & MNT_RDONLY) {
82                 kprintf("Cannot add volume to read-only HAMMER filesystem\n");
83                 return (EINVAL);
84         }
85
86         if (hmp->nvolumes >= HAMMER_MAX_VOLUMES) {
87                 kprintf("Max number of HAMMER volumes exceeded\n");
88                 return (EINVAL);
89         }
90
91         if (hammer_lock_ex_try(&hmp->volume_lock) != 0) {
92                 kprintf("Another volume operation is in progress!\n");
93                 return (EAGAIN);
94         }
95
96         /*
97          * Find an unused volume number.
98          */
99         int free_vol_no = 0;
100         while (free_vol_no < HAMMER_MAX_VOLUMES &&
101                RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, free_vol_no)) {
102                 ++free_vol_no;
103         }
104         if (free_vol_no >= HAMMER_MAX_VOLUMES) {
105                 kprintf("Max number of HAMMER volumes exceeded\n");
106                 hammer_unlock(&hmp->volume_lock);
107                 return (EINVAL);
108         }
109
110         struct vnode *devvp = NULL;
111         error = hammer_setup_device(&devvp, ioc->device_name, 0);
112         if (error)
113                 goto end;
114         KKASSERT(devvp);
115         error = hammer_format_volume_header(
116                 hmp,
117                 devvp,
118                 hmp->rootvol->ondisk->vol_name,
119                 free_vol_no,
120                 hmp->nvolumes+1,
121                 ioc->vol_size,
122                 ioc->boot_area_size,
123                 ioc->mem_area_size);
124         hammer_close_device(&devvp, 0);
125         if (error)
126                 goto end;
127
128         error = hammer_install_volume(hmp, ioc->device_name, NULL);
129         if (error)
130                 goto end;
131
132         hammer_sync_lock_sh(trans);
133         hammer_lock_ex(&hmp->blkmap_lock);
134
135         ++hmp->nvolumes;
136
137         /*
138          * Set each volumes new value of the vol_count field.
139          */
140         for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
141                 volume = hammer_get_volume(hmp, vol_no, &error);
142                 if (volume == NULL && error == ENOENT) {
143                         /*
144                          * Skip unused volume numbers
145                          */
146                         error = 0;
147                         continue;
148                 }
149                 KKASSERT(volume != NULL && error == 0);
150                 hammer_modify_volume_field(trans, volume, vol_count);
151                 volume->ondisk->vol_count = hmp->nvolumes;
152                 hammer_modify_volume_done(volume);
153
154                 /*
155                  * Only changes to the header of the root volume
156                  * are automatically flushed to disk. For all
157                  * other volumes that we modify we do it here.
158                  *
159                  * No interlock is needed, volume buffers are not
160                  * messed with by bioops.
161                  */
162                 if (volume != trans->rootvol && volume->io.modified) {
163                         hammer_crc_set_volume(volume->ondisk);
164                         hammer_io_flush(&volume->io, 0);
165                 }
166
167                 hammer_rel_volume(volume, 0);
168         }
169
170         volume = hammer_get_volume(hmp, free_vol_no, &error);
171         KKASSERT(volume != NULL && error == 0);
172
173         struct bigblock_stat stat;
174         error = hammer_format_freemap(trans, volume, &stat);
175         KKASSERT(error == 0);
176
177         /*
178          * Increase the total number of big-blocks and update stat/vstat totals.
179          */
180         hammer_modify_volume_field(trans, trans->rootvol,
181                 vol0_stat_bigblocks);
182         trans->rootvol->ondisk->vol0_stat_bigblocks += stat.total_bigblocks;
183         hammer_modify_volume_done(trans->rootvol);
184         /*
185          * Big-block count changed so recompute the total number of blocks.
186          */
187         mp->mnt_stat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks *
188             (HAMMER_BIGBLOCK_SIZE / HAMMER_BUFSIZE);
189         mp->mnt_vstat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks *
190             (HAMMER_BIGBLOCK_SIZE / HAMMER_BUFSIZE);
191
192         /*
193          * Increase the number of free big-blocks
194          * (including the copy in hmp)
195          */
196         hammer_modify_volume_field(trans, trans->rootvol,
197                 vol0_stat_freebigblocks);
198         trans->rootvol->ondisk->vol0_stat_freebigblocks += stat.total_free_bigblocks;
199         hmp->copy_stat_freebigblocks =
200                 trans->rootvol->ondisk->vol0_stat_freebigblocks;
201         hammer_modify_volume_done(trans->rootvol);
202
203         hammer_rel_volume(volume, 0);
204
205         hammer_unlock(&hmp->blkmap_lock);
206         hammer_sync_unlock(trans);
207
208         KKASSERT(error == 0);
209 end:
210         hammer_unlock(&hmp->volume_lock);
211         if (error)
212                 kprintf("An error occurred: %d\n", error);
213         return (error);
214 }
215
216
217 /*
218  * Remove a volume.
219  */
220 int
221 hammer_ioc_volume_del(hammer_transaction_t trans, hammer_inode_t ip,
222                 struct hammer_ioc_volume *ioc)
223 {
224         struct hammer_mount *hmp = trans->hmp;
225         struct mount *mp = hmp->mp;
226         hammer_volume_t volume;
227         int error = 0;
228
229         if (mp->mnt_flag & MNT_RDONLY) {
230                 kprintf("Cannot del volume from read-only HAMMER filesystem\n");
231                 return (EINVAL);
232         }
233
234         if (hammer_lock_ex_try(&hmp->volume_lock) != 0) {
235                 kprintf("Another volume operation is in progress!\n");
236                 return (EAGAIN);
237         }
238
239         volume = NULL;
240
241         /*
242          * find volume by volname
243          */
244         for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
245                 volume = hammer_get_volume(hmp, vol_no, &error);
246                 if (volume == NULL && error == ENOENT) {
247                         /*
248                          * Skip unused volume numbers
249                          */
250                         error = 0;
251                         continue;
252                 }
253                 KKASSERT(volume != NULL && error == 0);
254                 if (strcmp(volume->vol_name, ioc->device_name) == 0) {
255                         break;
256                 }
257                 hammer_rel_volume(volume, 0);
258                 volume = NULL;
259         }
260
261         if (volume == NULL) {
262                 kprintf("Couldn't find volume\n");
263                 error = EINVAL;
264                 goto end;
265         }
266
267         if (volume == trans->rootvol) {
268                 kprintf("Cannot remove root-volume\n");
269                 hammer_rel_volume(volume, 0);
270                 error = EINVAL;
271                 goto end;
272         }
273
274         /*
275          * Reblock filesystem
276          */
277         hmp->volume_to_remove = volume->vol_no;
278
279         struct hammer_ioc_reblock reblock;
280         bzero(&reblock, sizeof(reblock));
281
282         reblock.key_beg.localization = HAMMER_MIN_LOCALIZATION;
283         reblock.key_beg.obj_id = HAMMER_MIN_OBJID;
284         reblock.key_end.localization = HAMMER_MAX_LOCALIZATION;
285         reblock.key_end.obj_id = HAMMER_MAX_OBJID;
286         reblock.head.flags = HAMMER_IOC_DO_FLAGS;
287         reblock.free_level = 0;
288
289         error = hammer_ioc_reblock(trans, ip, &reblock);
290
291         if (reblock.head.flags & HAMMER_IOC_HEAD_INTR) {
292                 error = EINTR;
293         }
294
295         if (error) {
296                 if (error == EINTR) {
297                         kprintf("reblock was interrupted\n");
298                 } else {
299                         kprintf("reblock failed: %d\n", error);
300                 }
301                 hmp->volume_to_remove = -1;
302                 hammer_rel_volume(volume, 0);
303                 goto end;
304         }
305
306         /*
307          * Sync filesystem
308          */
309         int count = 0;
310         while (hammer_flusher_haswork(hmp)) {
311                 hammer_flusher_sync(hmp);
312                 ++count;
313                 if (count >= 5) {
314                         if (count == 5)
315                                 kprintf("HAMMER: flushing.");
316                         else
317                                 kprintf(".");
318                         tsleep(&count, 0, "hmrufl", hz);
319                 }
320                 if (count == 30) {
321                         kprintf("giving up");
322                         break;
323                 }
324         }
325         kprintf("\n");
326
327         hammer_sync_lock_sh(trans);
328         hammer_lock_ex(&hmp->blkmap_lock);
329
330         /*
331          * We use stat later to update rootvol's big-block stats
332          */
333         struct bigblock_stat stat;
334         error = hammer_free_freemap(trans, volume, &stat);
335         if (error) {
336                 kprintf("Failed to free volume. Volume not empty!\n");
337                 hmp->volume_to_remove = -1;
338                 hammer_rel_volume(volume, 0);
339                 hammer_unlock(&hmp->blkmap_lock);
340                 hammer_sync_unlock(trans);
341                 goto end;
342         }
343
344         hmp->volume_to_remove = -1;
345
346         hammer_rel_volume(volume, 0);
347
348         /*
349          * Unload buffers
350          */
351         RB_SCAN(hammer_buf_rb_tree, &hmp->rb_bufs_root, NULL,
352                 hammer_unload_buffer, volume);
353
354         error = hammer_unload_volume(volume, NULL);
355         if (error == -1) {
356                 kprintf("Failed to unload volume\n");
357                 hammer_unlock(&hmp->blkmap_lock);
358                 hammer_sync_unlock(trans);
359                 goto end;
360         }
361
362         volume = NULL;
363         --hmp->nvolumes;
364
365         /*
366          * Set each volume's new value of the vol_count field.
367          */
368         for (int vol_no = 0; vol_no < HAMMER_MAX_VOLUMES; ++vol_no) {
369                 volume = hammer_get_volume(hmp, vol_no, &error);
370                 if (volume == NULL && error == ENOENT) {
371                         /*
372                          * Skip unused volume numbers
373                          */
374                         error = 0;
375                         continue;
376                 }
377
378                 KKASSERT(volume != NULL && error == 0);
379                 hammer_modify_volume_field(trans, volume, vol_count);
380                 volume->ondisk->vol_count = hmp->nvolumes;
381                 hammer_modify_volume_done(volume);
382
383                 /*
384                  * Only changes to the header of the root volume
385                  * are automatically flushed to disk. For all
386                  * other volumes that we modify we do it here.
387                  *
388                  * No interlock is needed, volume buffers are not
389                  * messed with by bioops.
390                  */
391                 if (volume != trans->rootvol && volume->io.modified) {
392                         hammer_crc_set_volume(volume->ondisk);
393                         hammer_io_flush(&volume->io, 0);
394                 }
395
396                 hammer_rel_volume(volume, 0);
397         }
398
399         /*
400          * Update the total number of big-blocks
401          */
402         hammer_modify_volume_field(trans, trans->rootvol,
403                 vol0_stat_bigblocks);
404         trans->rootvol->ondisk->vol0_stat_bigblocks -= stat.total_bigblocks;
405         hammer_modify_volume_done(trans->rootvol);
406
407         /*
408          * Update the number of free big-blocks
409          * (including the copy in hmp)
410          */
411         hammer_modify_volume_field(trans, trans->rootvol,
412                 vol0_stat_freebigblocks);
413         trans->rootvol->ondisk->vol0_stat_freebigblocks -= stat.total_free_bigblocks;
414         hmp->copy_stat_freebigblocks =
415                 trans->rootvol->ondisk->vol0_stat_freebigblocks;
416         hammer_modify_volume_done(trans->rootvol);
417         /*
418          * Big-block count changed so recompute the total number of blocks.
419          */
420         mp->mnt_stat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks *
421             (HAMMER_BIGBLOCK_SIZE / HAMMER_BUFSIZE);
422         mp->mnt_vstat.f_blocks = trans->rootvol->ondisk->vol0_stat_bigblocks *
423             (HAMMER_BIGBLOCK_SIZE / HAMMER_BUFSIZE);
424
425         hammer_unlock(&hmp->blkmap_lock);
426         hammer_sync_unlock(trans);
427
428         /*
429          * Erase the volume header of the removed device.
430          *
431          * This is to not accidentally mount the volume again.
432          */
433         struct vnode *devvp = NULL;
434         error = hammer_setup_device(&devvp, ioc->device_name, 0);
435         if (error) {
436                 kprintf("Failed to open device: %s\n", ioc->device_name);
437                 goto end;
438         }
439         KKASSERT(devvp);
440         error = hammer_clear_volume_header(devvp);
441         if (error) {
442                 kprintf("Failed to clear volume header of device: %s\n",
443                         ioc->device_name);
444                 goto end;
445         }
446         hammer_close_device(&devvp, 0);
447
448         KKASSERT(error == 0);
449 end:
450         hammer_unlock(&hmp->volume_lock);
451         return (error);
452 }
453
454
455 int
456 hammer_ioc_volume_list(hammer_transaction_t trans, hammer_inode_t ip,
457     struct hammer_ioc_volume_list *ioc)
458 {
459         struct hammer_mount *hmp = trans->hmp;
460         hammer_volume_t volume;
461         int error = 0;
462         int i, cnt, len;
463
464         for (i = 0, cnt = 0; i < HAMMER_MAX_VOLUMES && cnt < ioc->nvols; i++) {
465                 volume = hammer_get_volume(hmp, i, &error);
466                 if (volume == NULL && error == ENOENT) {
467                         error = 0;
468                         continue;
469                 }
470                 KKASSERT(volume != NULL && error == 0);
471
472                 len = strlen(volume->vol_name) + 1;
473                 KKASSERT(len <= MAXPATHLEN);
474
475                 error = copyout(volume->vol_name, ioc->vols[cnt].device_name,
476                                 len);
477                 if (error) {
478                         hammer_rel_volume(volume, 0);
479                         return (error);
480                 }
481                 cnt++;
482                 hammer_rel_volume(volume, 0);
483         }
484         ioc->nvols = cnt;
485
486         return (error);
487 }
488
489 /*
490  * Iterate over all usable L1 entries of the volume and
491  * the corresponding L2 entries.
492  */
493 static int
494 hammer_iterate_l1l2_entries(hammer_transaction_t trans, hammer_volume_t volume,
495         int (*callback)(hammer_transaction_t, hammer_volume_t, hammer_buffer_t*,
496                 struct hammer_blockmap_layer1*, struct hammer_blockmap_layer2*,
497                 hammer_off_t, hammer_off_t, void*),
498         void *data)
499 {
500         struct hammer_mount *hmp = trans->hmp;
501         hammer_blockmap_t freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
502         hammer_buffer_t buffer = NULL;
503         int error = 0;
504
505         hammer_off_t phys_off;
506         hammer_off_t block_off;
507         hammer_off_t layer1_off;
508         hammer_off_t layer2_off;
509         hammer_off_t aligned_buf_end_off;
510         struct hammer_blockmap_layer1 *layer1;
511         struct hammer_blockmap_layer2 *layer2;
512
513         /*
514          * Calculate the usable size of the volume, which
515          * must be aligned at a big-block (8 MB) boundary.
516          */
517         aligned_buf_end_off = (HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no,
518                 (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)
519                 & ~HAMMER_BIGBLOCK_MASK64));
520
521         /*
522          * Iterate the volume's address space in chunks of 4 TB, where each
523          * chunk consists of at least one physically available 8 MB big-block.
524          *
525          * For each chunk we need one L1 entry and one L2 big-block.
526          * We use the first big-block of each chunk as L2 block.
527          */
528         for (phys_off = HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no, 0);
529              phys_off < aligned_buf_end_off;
530              phys_off += HAMMER_BLOCKMAP_LAYER2) {
531                 for (block_off = 0;
532                      block_off < HAMMER_BLOCKMAP_LAYER2;
533                      block_off += HAMMER_BIGBLOCK_SIZE) {
534                         layer2_off = phys_off +
535                                 HAMMER_BLOCKMAP_LAYER2_OFFSET(block_off);
536                         layer2 = hammer_bread(hmp, layer2_off, &error, &buffer);
537                         if (error)
538                                 goto end;
539
540                         error = callback(trans, volume, &buffer, NULL,
541                                          layer2, phys_off, block_off, data);
542                         if (error)
543                                 goto end;
544                 }
545
546                 layer1_off = freemap->phys_offset +
547                                 HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_off);
548                 layer1 = hammer_bread(hmp, layer1_off, &error, &buffer);
549                 if (error)
550                         goto end;
551
552                 error = callback(trans, volume, &buffer, layer1, NULL,
553                                  phys_off, 0, data);
554                 if (error)
555                         goto end;
556         }
557
558 end:
559         if (buffer) {
560                 hammer_rel_buffer(buffer, 0);
561                 buffer = NULL;
562         }
563
564         return error;
565 }
566
567
568 static int
569 format_callback(hammer_transaction_t trans, hammer_volume_t volume,
570         hammer_buffer_t *bufferp,
571         struct hammer_blockmap_layer1 *layer1,
572         struct hammer_blockmap_layer2 *layer2,
573         hammer_off_t phys_off,
574         hammer_off_t block_off,
575         void *data)
576 {
577         struct bigblock_stat *stat = (struct bigblock_stat*)data;
578
579         /*
580          * Calculate the usable size of the volume, which must be aligned
581          * at a big-block (8 MB) boundary.
582          */
583         hammer_off_t aligned_buf_end_off;
584         aligned_buf_end_off = (HAMMER_ENCODE_RAW_BUFFER(volume->ondisk->vol_no,
585                 (volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)
586                 & ~HAMMER_BIGBLOCK_MASK64));
587
588         if (layer1) {
589                 KKASSERT(layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL);
590
591                 hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
592                 bzero(layer1, sizeof(*layer1));
593                 layer1->phys_offset = phys_off;
594                 layer1->blocks_free = stat->counter;
595                 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
596                 hammer_modify_buffer_done(*bufferp);
597
598                 stat->total_free_bigblocks += stat->counter;
599                 stat->counter = 0; /* reset */
600         } else if (layer2) {
601                 hammer_modify_buffer(trans, *bufferp, layer2, sizeof(*layer2));
602                 bzero(layer2, sizeof(*layer2));
603
604                 if (block_off == 0) {
605                         /*
606                          * The first entry represents the L2 big-block itself.
607                          */
608                         layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
609                         layer2->append_off = HAMMER_BIGBLOCK_SIZE;
610                         layer2->bytes_free = 0;
611                         ++stat->total_bigblocks;
612                 } else if (phys_off + block_off < aligned_buf_end_off) {
613                         /*
614                          * Available big-block
615                          */
616                         layer2->zone = 0;
617                         layer2->append_off = 0;
618                         layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
619                         ++stat->total_bigblocks;
620                         ++stat->counter;
621                 } else {
622                         /*
623                          * Big-block outside of physically available
624                          * space
625                          */
626                         layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
627                         layer2->append_off = HAMMER_BIGBLOCK_SIZE;
628                         layer2->bytes_free = 0;
629                 }
630
631                 layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
632                 hammer_modify_buffer_done(*bufferp);
633         } else {
634                 KKASSERT(0);
635         }
636
637         return 0;
638 }
639
640 static int
641 hammer_format_freemap(hammer_transaction_t trans, hammer_volume_t volume,
642         struct bigblock_stat *stat)
643 {
644         stat->total_bigblocks = 0;
645         stat->total_free_bigblocks = 0;
646         stat->counter = 0;
647         return hammer_iterate_l1l2_entries(trans, volume, format_callback, stat);
648 }
649
650 static int
651 free_callback(hammer_transaction_t trans, hammer_volume_t volume __unused,
652         hammer_buffer_t *bufferp,
653         struct hammer_blockmap_layer1 *layer1,
654         struct hammer_blockmap_layer2 *layer2,
655         hammer_off_t phys_off,
656         hammer_off_t block_off __unused,
657         void *data)
658 {
659         struct bigblock_stat *stat = (struct bigblock_stat*)data;
660
661         /*
662          * No modifications to ondisk structures
663          */
664         int testonly = (stat == NULL);
665
666         if (layer1) {
667                 if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
668                         /*
669                          * This layer1 entry is already free.
670                          */
671                         return 0;
672                 }
673
674                 KKASSERT((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
675                         trans->hmp->volume_to_remove);
676
677                 if (testonly)
678                         return 0;
679
680                 /*
681                  * Free the L1 entry
682                  */
683                 hammer_modify_buffer(trans, *bufferp, layer1, sizeof(*layer1));
684                 bzero(layer1, sizeof(*layer1));
685                 layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
686                 layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
687                 hammer_modify_buffer_done(*bufferp);
688
689                 return 0;
690         } else if (layer2) {
691                 if (layer2->zone == HAMMER_ZONE_UNAVAIL_INDEX) {
692                         return 0;
693                 }
694
695                 if (layer2->zone == HAMMER_ZONE_FREEMAP_INDEX) {
696                         if (stat) {
697                                 ++stat->total_bigblocks;
698                         }
699                         return 0;
700                 }
701
702                 if (layer2->append_off == 0 &&
703                     layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
704                         if (stat) {
705                                 ++stat->total_bigblocks;
706                                 ++stat->total_free_bigblocks;
707                         }
708                         return 0;
709                 }
710
711                 /*
712                  * We found a layer2 entry that is not empty!
713                  */
714                 return EBUSY;
715         } else {
716                 KKASSERT(0);
717         }
718
719         return EINVAL;
720 }
721
722 static int
723 hammer_free_freemap(hammer_transaction_t trans, hammer_volume_t volume,
724         struct bigblock_stat *stat)
725 {
726         int error;
727
728         stat->total_bigblocks = 0;
729         stat->total_free_bigblocks = 0;
730         stat->counter = 0;
731
732         error = hammer_iterate_l1l2_entries(trans, volume, free_callback, NULL);
733         if (error)
734                 return error;
735
736         error = hammer_iterate_l1l2_entries(trans, volume, free_callback, stat);
737         return error;
738 }
739
740 /************************************************************************
741  *                              MISC                                    *
742  ************************************************************************
743  */
744
745 static int
746 hammer_setup_device(struct vnode **devvpp, const char *dev_path, int ronly)
747 {
748         int error;
749         struct nlookupdata nd;
750
751         /*
752          * Get the device vnode
753          */
754         if (*devvpp == NULL) {
755                 error = nlookup_init(&nd, dev_path, UIO_SYSSPACE, NLC_FOLLOW);
756                 if (error == 0)
757                         error = nlookup(&nd);
758                 if (error == 0)
759                         error = cache_vref(&nd.nl_nch, nd.nl_cred, devvpp);
760                 nlookup_done(&nd);
761         } else {
762                 error = 0;
763         }
764
765         if (error == 0) {
766                 if (vn_isdisk(*devvpp, &error)) {
767                         error = vfs_mountedon(*devvpp);
768                 }
769         }
770         if (error == 0 && vcount(*devvpp) > 0)
771                 error = EBUSY;
772         if (error == 0) {
773                 vn_lock(*devvpp, LK_EXCLUSIVE | LK_RETRY);
774                 error = vinvalbuf(*devvpp, V_SAVE, 0, 0);
775                 if (error == 0) {
776                         error = VOP_OPEN(*devvpp,
777                                          (ronly ? FREAD : FREAD|FWRITE),
778                                          FSCRED, NULL);
779                 }
780                 vn_unlock(*devvpp);
781         }
782         if (error && *devvpp) {
783                 vrele(*devvpp);
784                 *devvpp = NULL;
785         }
786         return (error);
787 }
788
789 static void
790 hammer_close_device(struct vnode **devvpp, int ronly)
791 {
792         if (*devvpp) {
793                 vn_lock(*devvpp, LK_EXCLUSIVE | LK_RETRY);
794                 vinvalbuf(*devvpp, ronly ? 0 : V_SAVE, 0, 0);
795                 VOP_CLOSE(*devvpp, (ronly ? FREAD : FREAD|FWRITE), NULL);
796                 vn_unlock(*devvpp);
797                 vrele(*devvpp);
798                 *devvpp = NULL;
799         }
800 }
801
802 static int
803 hammer_format_volume_header(struct hammer_mount *hmp, struct vnode *devvp,
804         const char *vol_name, int vol_no, int vol_count,
805         int64_t vol_size, int64_t boot_area_size, int64_t mem_area_size)
806 {
807         struct buf *bp = NULL;
808         struct hammer_volume_ondisk *ondisk;
809         int error;
810
811         /*
812          * Extract the volume number from the volume header and do various
813          * sanity checks.
814          */
815         KKASSERT(HAMMER_BUFSIZE >= sizeof(struct hammer_volume_ondisk));
816         error = bread(devvp, 0LL, HAMMER_BUFSIZE, &bp);
817         if (error || bp->b_bcount < sizeof(struct hammer_volume_ondisk))
818                 goto late_failure;
819
820         ondisk = (struct hammer_volume_ondisk*) bp->b_data;
821
822         /*
823          * Note that we do NOT allow to use a device that contains
824          * a valid HAMMER signature. It has to be cleaned up with dd
825          * before.
826          */
827         if (ondisk->vol_signature == HAMMER_FSBUF_VOLUME) {
828                 kprintf("hammer_volume_add: Formatting of valid HAMMER volume "
829                         "%s denied. Erase with dd!\n", vol_name);
830                 error = EFTYPE;
831                 goto late_failure;
832         }
833
834         bzero(ondisk, sizeof(struct hammer_volume_ondisk));
835         ksnprintf(ondisk->vol_name, sizeof(ondisk->vol_name), "%s", vol_name);
836         ondisk->vol_fstype = hmp->rootvol->ondisk->vol_fstype;
837         ondisk->vol_signature = HAMMER_FSBUF_VOLUME;
838         ondisk->vol_fsid = hmp->fsid;
839         ondisk->vol_rootvol = hmp->rootvol->vol_no;
840         ondisk->vol_no = vol_no;
841         ondisk->vol_count = vol_count;
842         ondisk->vol_version = hmp->version;
843
844         /*
845          * Reserve space for (future) header junk, setup our poor-man's
846          * big-block allocator.
847          */
848         int64_t vol_alloc = HAMMER_BUFSIZE * 16;
849
850         ondisk->vol_bot_beg = vol_alloc;
851         vol_alloc += boot_area_size;
852         ondisk->vol_mem_beg = vol_alloc;
853         vol_alloc += mem_area_size;
854
855         /*
856          * The remaining area is the zone 2 buffer allocation area.  These
857          * buffers
858          */
859         ondisk->vol_buf_beg = vol_alloc;
860         ondisk->vol_buf_end = vol_size & ~(int64_t)HAMMER_BUFMASK;
861
862         if (ondisk->vol_buf_end < ondisk->vol_buf_beg) {
863                 kprintf("volume %d %s is too small to hold the volume header\n",
864                      ondisk->vol_no, ondisk->vol_name);
865                 error = EFTYPE;
866                 goto late_failure;
867         }
868
869         ondisk->vol_nblocks = (ondisk->vol_buf_end - ondisk->vol_buf_beg) /
870                               HAMMER_BUFSIZE;
871         ondisk->vol_blocksize = HAMMER_BUFSIZE;
872
873         /*
874          * Write volume header to disk
875          */
876         error = bwrite(bp);
877         bp = NULL;
878
879 late_failure:
880         if (bp)
881                 brelse(bp);
882         return (error);
883 }
884
885 /*
886  * Invalidates the volume header. Used by volume-del.
887  */
888 static int
889 hammer_clear_volume_header(struct vnode *devvp)
890 {
891         struct buf *bp = NULL;
892         struct hammer_volume_ondisk *ondisk;
893         int error;
894
895         KKASSERT(HAMMER_BUFSIZE >= sizeof(struct hammer_volume_ondisk));
896         error = bread(devvp, 0LL, HAMMER_BUFSIZE, &bp);
897         if (error || bp->b_bcount < sizeof(struct hammer_volume_ondisk))
898                 goto late_failure;
899
900         ondisk = (struct hammer_volume_ondisk*) bp->b_data;
901         bzero(ondisk, sizeof(struct hammer_volume_ondisk));
902
903         error = bwrite(bp);
904         bp = NULL;
905
906 late_failure:
907         if (bp)
908                 brelse(bp);
909         return (error);
910 }