2 * Copyright (c) 2007 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sbin/newfs_hammer/newfs_hammer.c,v 1.1 2007/10/10 19:35:53 dillon Exp $
37 #include <sys/types.h>
38 #include <sys/diskslice.h>
39 #include <sys/diskmbr.h>
42 #include <vfs/hammer/hammerfs.h>
63 uint32_t crc32(const void *buf, size_t size);
65 static void usage(void);
66 static void check_volume(struct vol_info *info);
67 static void format_volume(struct vol_info *info, int nvols, uuid_t *fsid,
68 const char *label, int32_t clsize);
69 static int32_t format_cluster(struct vol_info *info,
70 struct hammer_volume_ondisk *vol, int isroot);
71 static void format_root(struct vol_info *info,
72 struct hammer_volume_ondisk *vol,
73 struct hammer_cluster_ondisk *cl,
74 struct hammer_fsbuf_recs *recbuf,
75 struct hammer_fsbuf_data *databuf,
76 int recoff, int dataoff);
77 static const char *sizetostr(off_t size);
78 static int64_t getsize(const char *str, int64_t minval, int64_t maxval);
79 static hammer_tid_t createtid(void);
81 static void add_record(struct vol_info *info, struct hammer_volume_ondisk *vol,
82 struct hammer_cluster_ondisk *cl, hammer_record_t rec,
83 void *data, int bytes);
87 struct hammer_alist Buf_alist;
88 struct hammer_alist Vol_alist;
89 struct hammer_alist Clu_alist;
92 main(int ac, char **av)
101 const char *label = NULL;
104 * Sanity check basic filesystem structures
106 assert(sizeof(struct hammer_almeta) == HAMMER_ALMETA_SIZE);
107 assert(sizeof(struct hammer_fsbuf_head) == HAMMER_FSBUF_HEAD_SIZE);
108 assert(sizeof(struct hammer_volume_ondisk) <= HAMMER_BUFSIZE);
109 assert(sizeof(struct hammer_cluster_ondisk) <= HAMMER_BUFSIZE);
110 assert(sizeof(struct hammer_fsbuf_data) == HAMMER_BUFSIZE);
111 assert(sizeof(struct hammer_fsbuf_recs) == HAMMER_BUFSIZE);
112 assert(sizeof(struct hammer_fsbuf_btree) == HAMMER_BUFSIZE);
117 * Generate a filesysem id and lookup the filesystem type
120 uuid_name_lookup(&Hammer_FSType, "DragonFly HAMMER", &status);
121 if (status != uuid_s_ok) {
122 fprintf(stderr, "uuids file does not have the DragonFly HAMMER filesystem type\n");
127 * Initialize the alist templates we will be using
129 hammer_alist_template(&Buf_alist,
130 HAMMER_FSBUF_MAXBLKS, HAMMER_FSBUF_METAELMS);
131 hammer_alist_template(&Vol_alist,
132 HAMMER_VOL_MAXCLUSTERS, HAMMER_VOL_METAELMS);
133 hammer_alist_template(&Clu_alist,
134 HAMMER_CLU_MAXBUFFERS, HAMMER_CLU_METAELMS);
139 while ((ch = getopt(ac, av, "L:s:")) != -1) {
146 * The cluster's size is limited by type chunking and
147 * A-List limitations. Each chunk must be at least
148 * HAMMER_BUFSIZE and the A-List cannot accomodate
149 * more then 32768 buffers.
151 clsize = getsize(optarg,
152 HAMMER_BUFSIZE * 256LL,
153 HAMMER_BUFSIZE * 32768LL);
163 "newfs_hammer: A filesystem label must be specified\n");
168 * Collect volume information
174 VolInfoArray = malloc(sizeof(*VolInfoArray) * nvols);
176 for (i = 0; i < nvols; ++i) {
177 VolInfoArray[i].name = av[i];
178 VolInfoArray[i].volno = i;
179 check_volume(&VolInfoArray[i]);
180 total += VolInfoArray[i].size;
184 * Calculate the size of a cluster (clsize). A cluster is broken
185 * down into 256 chunks which must be at least filesystem buffer
186 * sized. This gives us a minimum chunk size of around 4MB.
189 clsize = HAMMER_BUFSIZE * 256;
190 while (clsize < total / nvols / 256 && clsize < 0x80000000U) {
195 printf("---------------------------------------------\n");
196 printf("%d volume%s total size %s\n",
197 nvols, (nvols == 1 ? "" : "s"), sizetostr(total));
198 printf("cluster-size: %s\n", sizetostr(clsize));
199 printf("max-volume-size: %s\n",
200 sizetostr((int64_t)clsize * 32768LL));
201 printf("max-filesystem-size: %s\n",
202 sizetostr((int64_t)clsize * 32768LL * 32768LL));
206 * Format volumes. Format the root volume last so vol0_nexttid
207 * represents the next TID globally.
209 for (i = nvols - 1; i >= 0; --i) {
210 format_volume(&VolInfoArray[i], nvols, &fsid, label, clsize);
219 fprintf(stderr, "newfs_hammer vol0 [vol1 ...]\n");
224 * Convert the size in bytes to a human readable string.
227 sizetostr(off_t size)
231 if (size < 1024 / 2) {
232 snprintf(buf, sizeof(buf), "%6.2f", (double)size);
233 } else if (size < 1024 * 1024 / 2) {
234 snprintf(buf, sizeof(buf), "%6.2fKB",
235 (double)size / 1024);
236 } else if (size < 1024 * 1024 * 1024LL / 2) {
237 snprintf(buf, sizeof(buf), "%6.2fMB",
238 (double)size / (1024 * 1024));
239 } else if (size < 1024 * 1024 * 1024LL * 1024LL / 2) {
240 snprintf(buf, sizeof(buf), "%6.2fGB",
241 (double)size / (1024 * 1024 * 1024LL));
243 snprintf(buf, sizeof(buf), "%6.2fTB",
244 (double)size / (1024 * 1024 * 1024LL * 1024LL));
250 getsize(const char *str, int64_t minval, int64_t maxval)
255 val = strtoll(str, &ptr, 0);
274 fprintf(stderr, "Unknown suffix in number '%s'\n", str);
278 fprintf(stderr, "Unknown suffix in number '%s'\n", str);
282 fprintf(stderr, "Value too small: %s, min is %s\n",
283 str, sizetostr(minval));
287 fprintf(stderr, "Value too large: %s, max is %s\n",
288 str, sizetostr(maxval));
297 static hammer_tid_t lasttid;
301 gettimeofday(&tv, NULL);
302 lasttid = tv.tv_sec * 1000000000LL +
309 allocbuffer(u_int64_t type)
311 hammer_fsbuf_head_t head;
313 head = malloc(HAMMER_BUFSIZE);
314 bzero(head, HAMMER_BUFSIZE);
315 head->buf_type = type;
318 * Filesystem buffer alist. The alist starts life in an
319 * all-allocated state. The alist will be in-band or out-of-band
320 * depending on the type of buffer and the number of blocks under
321 * management will also depend on the type of buffer.
323 hammer_alist_init(&Buf_alist, head->buf_almeta);
324 /* crc is set on writeout */
329 allocindexbuffer(struct vol_info *info, struct hammer_volume_ondisk *vol,
330 struct hammer_cluster_ondisk *cl, int64_t cloff, int64_t *poff)
332 hammer_fsbuf_head_t head;
335 head = allocbuffer(HAMMER_FSBUF_BTREE);
336 clno = hammer_alist_alloc(&Clu_alist, cl->clu_almeta, 1);
337 assert(clno != HAMMER_ALIST_BLOCK_NONE);
338 *poff = cloff + clno * HAMMER_BUFSIZE;
341 * Setup the allocation space for b-tree nodes
343 hammer_alist_free(&Buf_alist, head->buf_almeta,
344 0, HAMMER_BTREE_NODES - 1);
349 allocrecbuffer(struct vol_info *info, struct hammer_volume_ondisk *vol,
350 struct hammer_cluster_ondisk *cl, int64_t cloff, int64_t *poff)
352 hammer_fsbuf_head_t head;
355 head = allocbuffer(HAMMER_FSBUF_RECORDS);
356 clno = hammer_alist_alloc_rev(&Clu_alist, cl->clu_almeta, 1);
357 assert(clno != HAMMER_ALIST_BLOCK_NONE);
358 *poff = cloff + clno * HAMMER_BUFSIZE;
361 * Setup the allocation space for records nodes
363 hammer_alist_free(&Buf_alist, head->buf_almeta,
364 0, HAMMER_RECORD_NODES - 1);
369 allocdatabuffer(struct vol_info *info, struct hammer_volume_ondisk *vol,
370 struct hammer_cluster_ondisk *cl, int64_t cloff, int64_t *poff)
372 hammer_fsbuf_head_t head;
375 head = allocbuffer(HAMMER_FSBUF_DATA);
376 clno = hammer_alist_alloc_rev(&Clu_alist, cl->clu_almeta, 1);
377 assert(clno != HAMMER_ALIST_BLOCK_NONE);
378 *poff = cloff + clno * HAMMER_BUFSIZE;
381 * Setup the allocation space for piecemeal data
383 hammer_alist_free(&Buf_alist, head->buf_almeta,
384 0, HAMMER_DATA_NODES - 1);
389 writebuffer(struct vol_info *info, int64_t offset, hammer_fsbuf_head_t buf)
392 buf->buf_crc = crc32(buf, HAMMER_BUFSIZE);
394 if (lseek(info->fd, offset, 0) < 0) {
395 fprintf(stderr, "volume %d seek failed: %s\n", info->volno,
399 if (write(info->fd, buf, HAMMER_BUFSIZE) != HAMMER_BUFSIZE) {
400 fprintf(stderr, "volume %d write failed @%016llx: %s\n",
401 info->volno, offset, strerror(errno));
407 * Check basic volume characteristics. HAMMER filesystems use a minimum
408 * of a 16KB filesystem buffer size.
412 check_volume(struct vol_info *info)
414 struct partinfo pinfo;
418 * Get basic information about the volume
420 info->fd = open(info->name, O_RDWR);
422 fprintf(stderr, "Unable to open %s R+W: %s\n",
423 info->name, strerror(errno));
426 if (ioctl(info->fd, DIOCGPART, &pinfo) < 0) {
428 * Allow the formatting of regular filews as HAMMER volumes
430 if (fstat(info->fd, &st) < 0) {
431 fprintf(stderr, "Unable to stat %s: %s\n",
432 info->name, strerror(errno));
435 info->size = st.st_size;
436 info->type = "REGFILE";
439 * When formatting a block device as a HAMMER volume the
440 * sector size must be compatible. HAMMER uses 16384 byte
441 * filesystem buffers.
443 if (pinfo.reserved_blocks) {
444 fprintf(stderr, "HAMMER cannot be placed in a partition which overlaps the disklabel or MBR\n");
447 if (pinfo.media_blksize > 16384 ||
448 16384 % pinfo.media_blksize) {
449 fprintf(stderr, "A media sector size of %d is not supported\n", pinfo.media_blksize);
453 info->size = pinfo.media_size;
454 info->type = "DEVICE";
456 printf("volume %d %s %-15s size %s\n",
457 info->volno, info->type, info->name,
458 sizetostr(info->size));
462 * Format a HAMMER volume. Cluster 0 will be initially placed in volume 0.
466 format_volume(struct vol_info *info, int nvols, uuid_t *fsid,
467 const char *label, int32_t clsize)
469 struct hammer_volume_ondisk *vol;
473 minclsize = clsize / 2;
475 vol = allocbuffer(HAMMER_FSBUF_VOLUME);
477 vol->vol_fsid = *fsid;
478 vol->vol_fstype = Hammer_FSType;
479 snprintf(vol->vol_name, sizeof(vol->vol_name), "%s", label);
480 vol->vol_no = info->volno;
481 vol->vol_count = nvols;
482 vol->vol_version = 1;
483 vol->vol_clsize = clsize;
486 vol->vol_beg = HAMMER_BUFSIZE;
487 vol->vol_end = info->size;
489 if (vol->vol_end < vol->vol_beg) {
490 printf("volume %d %s is too small to hold the volume header\n",
491 info->volno, info->name);
496 * Out-of-band volume alist - manage clusters within the volume.
498 hammer_alist_init(&Vol_alist, vol->vol_almeta);
499 nclusters = (vol->vol_end - vol->vol_beg + minclsize) / clsize;
500 if (nclusters > 32768) {
501 fprintf(stderr, "Volume is too large, max %s\n",
502 sizetostr((int64_t)nclusters * clsize));
504 /*nclusters = 32768;*/
506 hammer_alist_free(&Vol_alist, vol->vol_almeta, 0, nclusters);
509 * Place the root cluster in volume 0.
511 vol->vol_rootvol = 0;
512 if (info->volno == vol->vol_rootvol) {
513 vol->vol0_rootcluster = format_cluster(info, vol, 1);
515 /* global next TID */
516 vol->vol0_nexttid = createtid();
520 * Generate the CRC and write out the volume header
522 writebuffer(info, 0, &vol->head);
526 * Format a hammer cluster. Returns byte offset in volume of cluster.
530 format_cluster(struct vol_info *info, struct hammer_volume_ondisk *vol,
533 struct hammer_cluster_ondisk *cl;
534 union hammer_record rec;
535 struct hammer_fsbuf_recs *recbuf;
536 struct hammer_fsbuf_btree *idxbuf;
537 struct hammer_fsbuf_data *databuf;
544 hammer_tid_t clu_id = createtid();
546 clno = hammer_alist_alloc(&Vol_alist, vol->vol_almeta, 1);
547 if (clno == HAMMER_ALIST_BLOCK_NONE) {
548 fprintf(stderr, "volume %d %s has insufficient space\n",
549 info->volno, info->name);
552 cloff = vol->vol_beg + clno * vol->vol_clsize;
553 printf("allocate cluster id=%016llx %d@%08llx\n", clu_id, clno, cloff);
555 bzero(&rec, sizeof(rec));
556 cl = allocbuffer(HAMMER_FSBUF_CLUSTER);
558 cl->vol_fsid = vol->vol_fsid;
559 cl->vol_fstype = vol->vol_fstype;
564 cl->clu_start = HAMMER_BUFSIZE;
565 if (info->size - cloff > vol->vol_clsize)
566 cl->clu_limit = vol->vol_clsize;
568 cl->clu_limit = (u_int32_t)(info->size - cloff);
571 * In-band filesystem buffer management A-List. The first filesystem
572 * buffer is the cluster header itself.
574 hammer_alist_init(&Clu_alist, cl->clu_almeta);
575 nbuffers = cl->clu_limit / HAMMER_BUFSIZE;
576 hammer_alist_free(&Clu_alist, cl->clu_almeta, 1, nbuffers - 1);
583 cl->idx_record = nbuffers - 1;
584 cl->clu_parent = 0; /* we are the root cluster (vol,cluster) */
587 * Allocate a buffer for the B-Tree index and another to hold
588 * records. The B-Tree buffer isn't strictly required since
589 * the only records we allocate comfortably fit in cl->clu_btree_root
590 * but do it anyway to guarentee some locality of reference.
592 idxbuf = allocindexbuffer(info, vol, cl, cloff, &idxoff);
593 recbuf = allocrecbuffer(info, vol, cl, cloff, &recoff);
594 databuf = allocdatabuffer(info, vol, cl, cloff, &dataoff);
597 * Cluster 0 is the root cluster. Set the B-Tree range for this
598 * cluster to the entire key space and format the root directory.
601 cl->clu_objstart.obj_id = -0x8000000000000000LL;
602 cl->clu_objstart.key = -0x8000000000000000LL;
603 cl->clu_objstart.create_tid = 0;
604 cl->clu_objstart.delete_tid = 0;
605 cl->clu_objstart.rec_type = 0;
606 cl->clu_objstart.obj_type = 0;
608 cl->clu_objend.obj_id = 0x7FFFFFFFFFFFFFFFLL;
609 cl->clu_objend.key = 0x7FFFFFFFFFFFFFFFLL;
610 cl->clu_objend.create_tid = 0xFFFFFFFFFFFFFFFFULL;
611 cl->clu_objend.delete_tid = 0xFFFFFFFFFFFFFFFFULL;
612 cl->clu_objend.rec_type = 0xFFFFU;
613 cl->clu_objend.obj_type = 0xFFFFU;
615 format_root(info, vol, cl, recbuf, databuf,
616 (int)(recoff - cloff), (int)(dataoff - cloff));
620 * Write-out and update the index, record, and cluster buffers
622 writebuffer(info, idxoff, &idxbuf->head);
623 writebuffer(info, recoff, &recbuf->head);
624 writebuffer(info, dataoff, &databuf->head);
625 writebuffer(info, cloff, &cl->head);
630 * Format the root directory. Basically we just lay down the inode record
631 * and create a degenerate entry in the cluster's root btree.
633 * There is no '.' or '..'.
637 format_root(struct vol_info *info, struct hammer_volume_ondisk *vol,
638 struct hammer_cluster_ondisk *cl, struct hammer_fsbuf_recs *recbuf,
639 struct hammer_fsbuf_data *databuf, int recoff, int dataoff)
641 struct hammer_inode_record *record;
642 struct hammer_inode_data *inode_data;
643 struct hammer_leaf_elm *elm;
648 * Allocate record and data space and calculate cluster-relative
649 * offsets for intra-cluster references.
651 recblk = hammer_alist_alloc(&Buf_alist, recbuf->head.buf_almeta, 1);
652 assert(recblk != HAMMER_ALIST_BLOCK_NONE);
653 inodeblk = hammer_alist_alloc(&Buf_alist, databuf->head.buf_almeta, 1);
654 assert(inodeblk != HAMMER_ALIST_BLOCK_NONE);
655 assert(sizeof(*inode_data) <= HAMMER_DATA_BLKSIZE);
657 record = &recbuf->recs[recblk].inode;
658 recoff += offsetof(struct hammer_fsbuf_recs, recs[recblk]);
660 inode_data = (void *)databuf->data[inodeblk];
661 dataoff += offsetof(struct hammer_fsbuf_data, data[inodeblk][0]);
664 * Populate the inode data and inode record for the root directory.
666 inode_data->version = HAMMER_INODE_DATA_VERSION;
667 inode_data->mode = 0755;
669 record->base.obj_id = 1;
670 record->base.key = 0;
671 record->base.create_tid = createtid();
672 record->base.delete_tid = 0;
673 record->base.rec_type = HAMMER_RECTYPE_INODE;
674 record->base.obj_type = HAMMER_OBJTYPE_DIRECTORY;
675 record->base.data_offset = dataoff;
676 record->base.data_len = sizeof(*inode_data);
677 record->base.data_crc = crc32(inode_data, sizeof(*inode_data));
678 record->ino_atime = record->base.create_tid;
679 record->ino_mtime = record->base.create_tid;
680 record->ino_size = 0;
681 record->ino_nlinks = 1;
684 * Insert the record into the B-Tree. The B-Tree is empty so just
685 * install the record manually.
687 assert(cl->clu_btree_root.count == 0);
688 cl->clu_btree_root.count = 1;
689 elm = &cl->clu_btree_root.elms[0].leaf;
690 elm->base.obj_id = record->base.obj_id;
691 elm->base.key = record->base.key;
692 elm->base.create_tid = record->base.create_tid;
693 elm->base.delete_tid = record->base.delete_tid;
694 elm->base.rec_type = record->base.rec_type;
695 elm->base.obj_type = record->base.obj_type;
697 elm->rec_offset = recoff;
698 elm->data_offset = record->base.data_offset;
699 elm->data_len = record->base.data_len;
700 elm->data_crc = record->base.data_crc;
704 panic(const char *ctl, ...)
709 vfprintf(stderr, ctl, va);
711 fprintf(stderr, "\n");