--- /dev/null
+PROG= hammer2
+SRCS= main.c subs.c
+SRCS+= cmd_remote.c cmd_snapshot.c
+#MAN= hammer2.8
+NOMAN= TRUE
+
+CFLAGS+= -I${.CURDIR}/../../sys
+LDADD= -lm -lutil -lmd
+DPADD= ${LIBM} ${LIBUTIL} ${LIBMD}
+
+#.PATH: ${.CURDIR}/../../sys/libkern
+#SRCS+= crc32.c
+
+.include <bsd.prog.mk>
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "hammer2.h"
+
+int
+cmd_remote_connect(const char *sel_path, const char *url)
+{
+ hammer2_ioc_remote_t remote;
+ int ecode = 0;
+ int fd;
+
+ if ((fd = hammer2_ioctl_handle(sel_path)) < 0)
+ return(1);
+ bzero(&remote, sizeof(remote));
+ remote.copyid = -1;
+ remote.fd = -1;
+ if (strlen(url) >= sizeof(remote.copy1.path)) {
+ fprintf(stderr, "hammer2: connect: Path too long\n");
+ close(fd);
+ return(1);
+ }
+ snprintf(remote.copy1.path, sizeof(remote.copy1.path), "%s", url);
+ if (ioctl(fd, HAMMER2IOC_ADD_REMOTE, &remote) < 0) {
+ perror("ioctl");
+ ecode = 1;
+ }
+ close(fd);
+ return 0;;
+}
+
+int
+cmd_remote_disconnect(const char *sel_path, const char *url)
+{
+ hammer2_ioc_remote_t remote;
+ int ecode = 0;
+ int fd;
+
+ if ((fd = hammer2_ioctl_handle(sel_path)) < 0)
+ return(1);
+ bzero(&remote, sizeof(remote));
+ remote.copyid = -1;
+ remote.fd = -1;
+ if (strlen(url) >= sizeof(remote.copy1.path)) {
+ fprintf(stderr, "hammer2: disconnect: Path too long\n");
+ close(fd);
+ return(1);
+ }
+ snprintf(remote.copy1.path, sizeof(remote.copy1.path), "%s", url);
+ if (ioctl(fd, HAMMER2IOC_DEL_REMOTE, &remote) < 0) {
+ perror("ioctl");
+ ecode = 1;
+ }
+ close(fd);
+ return 0;;
+}
+
+int
+cmd_remote_status(const char *sel_path, int all_opt __unused)
+{
+ hammer2_ioc_remote_t remote;
+ int ecode = 0;
+ int count = 0;
+ int fd;
+
+ if ((fd = hammer2_ioctl_handle(sel_path)) < 0)
+ return(1);
+ bzero(&remote, sizeof(remote));
+
+ while ((remote.copyid = remote.nextid) >= 0) {
+ if (ioctl(fd, HAMMER2IOC_GET_REMOTE, &remote) < 0) {
+ perror("ioctl");
+ ecode = 1;
+ break;
+ }
+ if (remote.copy1.copyid == 0)
+ continue;
+ if (count == 0)
+ printf("CPYID LABEL STATUS PATH\n");
+ printf("%5d %-15s %c%c%c.%02x %s\n",
+ remote.copy1.copyid,
+ remote.copy1.label,
+ '-', '-', '-',
+ remote.copy1.priority,
+ remote.copy1.path);
+ ++count;
+ }
+ if (count == 0)
+ printf("No linkages found\n");
+ return (ecode);
+}
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "hammer2.h"
+
+/*
+ * The snapshot is named <PFSNAME>_<YYYYMMDD.HHMMSS.TRANSID> unless
+ * overridden by a label.
+ *
+ * When local non-cache media is involved the media is
+ * first synchronized and the snapshot is then based on
+ * the media.
+ *
+ * If the media is remote the snapshot is created on the remote
+ * end (if you have sufficient administrative rights) and a local
+ * ADMIN or CACHE PFS is created with a connection to the snapshot
+ * on the remote.
+ *
+ * If the client has snapshot rights to multiple remotes then TBD.
+ */
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Rollup headers for hammer2 utility
+ */
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/file.h>
+#include <vfs/hammer2/hammer2_disk.h>
+#include <vfs/hammer2/hammer2_mount.h>
+#include <vfs/hammer2/hammer2_ioctl.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <errno.h>
+
+int hammer2_ioctl_handle(const char *sel_path);
+
+int cmd_remote_connect(const char *sel_path, const char *url);
+int cmd_remote_disconnect(const char *sel_path, const char *url);
+int cmd_remote_status(const char *sel_path, int all_opt);
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "hammer2.h"
+
+static void usage(int code);
+
+int
+main(int ac, char **av)
+{
+ const char *sel_path = ".";
+ const char *uuid_str = NULL;
+ int pfs_type = HAMMER2_PFSTYPE_NONE;
+ int quick_opt = 0;
+ int all_opt = 0;
+ int ecode = 0;
+ int ch;
+
+ /*
+ * Core options
+ */
+ while ((ch = getopt(ac, av, "aqs:t:u:")) != -1) {
+ switch(ch) {
+ case 'a':
+ all_opt = 1;
+ break;
+ case 'q':
+ /*
+ * Quick mode - do not block verifying certain
+ * operations such as (connect).
+ */
+ quick_opt = 1;
+ break;
+ case 's':
+ sel_path = optarg;
+ break;
+ case 't':
+ /*
+ * set node type for mkpfs
+ */
+ if (strcasecmp(optarg, "ADMIN") == 0) {
+ pfs_type = HAMMER2_PFSTYPE_ADMIN;
+ } else if (strcasecmp(optarg, "CACHE") == 0) {
+ pfs_type = HAMMER2_PFSTYPE_CACHE;
+ } else if (strcasecmp(optarg, "COPY") == 0) {
+ pfs_type = HAMMER2_PFSTYPE_COPY;
+ } else if (strcasecmp(optarg, "SLAVE") == 0) {
+ pfs_type = HAMMER2_PFSTYPE_SLAVE;
+ } else if (strcasecmp(optarg, "SOFT_SLAVE") == 0) {
+ pfs_type = HAMMER2_PFSTYPE_SOFT_SLAVE;
+ } else if (strcasecmp(optarg, "SOFT_MASTER") == 0) {
+ pfs_type = HAMMER2_PFSTYPE_SOFT_MASTER;
+ } else if (strcasecmp(optarg, "MASTER") == 0) {
+ pfs_type = HAMMER2_PFSTYPE_MASTER;
+ } else {
+ fprintf(stderr, "-t: Unrecognized node type\n");
+ usage(1);
+ }
+ break;
+ case 'u':
+ /*
+ * set uuid for mkpfs, else one will be generated
+ * (required for all except the MASTER node_type)
+ */
+ uuid_str = optarg;
+ break;
+ default:
+ fprintf(stderr, "Unknown option: %c\n", ch);
+ usage(1);
+ /* not reached */
+ break;
+ }
+ }
+
+ /*
+ * Adjust, then process the command
+ */
+ ac -= optind;
+ av += optind;
+ if (ac < 1) {
+ fprintf(stderr, "Missing command\n");
+ usage(1);
+ /* not reached */
+ }
+
+ if (strcmp(av[0], "connect") == 0) {
+ /*
+ * Add cluster connection
+ */
+ if (ac < 2) {
+ fprintf(stderr, "connect: missing argument\n");
+ usage(1);
+ }
+ ecode = cmd_remote_connect(sel_path, av[1]);
+ } else if (strcmp(av[0], "disconnect") == 0) {
+ /*
+ * Remove cluster connection
+ */
+ if (ac < 2) {
+ fprintf(stderr, "disconnect: missing argument\n");
+ usage(1);
+ }
+ ecode = cmd_remote_disconnect(sel_path, av[1]);
+ } else if (strcmp(av[0], "status") == 0) {
+ /*
+ * Get status of PFS and its connections (-a for all PFSs)
+ */
+ ecode = cmd_remote_status(sel_path, all_opt);
+ } else if (strcmp(av[0], "mkpfs") == 0) {
+ /*
+ * Create new PFS using pfs_type
+ */
+ } else if (strcmp(av[0], "snapshot") == 0) {
+ /*
+ * Create snapshot with optional pfs_type and optional
+ * label override.
+ */
+ } else if (strcmp(av[0], "helper") == 0) {
+ /*
+ * Typically run as a daemon, this multi-threaded helper
+ * subsystem manages socket communications for the
+ * filesystem.
+ */
+ } else {
+ fprintf(stderr, "Unrecognized command: %s\n", av[0]);
+ usage(1);
+ }
+ return (ecode);
+}
+
+static
+void
+usage(int code)
+{
+ fprintf(stderr,
+ "hammer2 [-s path] command...\n"
+ " -s path Select filesystem\n"
+ );
+ exit(code);
+}
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "hammer2.h"
+
+/*
+ * Obtain a file descriptor that the caller can execute ioctl()'s on.
+ */
+int
+hammer2_ioctl_handle(const char *sel_path)
+{
+ struct hammer2_ioc_version info;
+ int fd;
+
+ fd = open(sel_path, O_RDONLY, 0);
+ if (fd < 0) {
+ fprintf(stderr, "hammer2: Unable to open %s: %s\n",
+ sel_path, strerror(errno));
+ return(-1);
+ }
+ if (ioctl(fd, HAMMER2IOC_GET_VERSION, &info) < 0) {
+ fprintf(stderr, "hammer2: '%s' is not a hammer2 filesystem\n",
+ sel_path);
+ close(fd);
+ return(-1);
+ }
+ return (fd);
+}
missing copies (or remove excessive copies in the case where the copies
value is reduced on a live filesystem).
-* Intended to be clusterable, with a multi-master protocol under design
- but not expected to be fully operational until mid-2013. The media
- format for HAMMER1 was less condusive to logical clustering than I had
- hoped so I was never able to get that aspect of my personal goals
+* Clusterable with MESI cache coherency and dynamic granularity.
+ The media format for HAMMER1 was less condusive to logical clustering
+ than I had hoped so I was never able to get that aspect of my personal goals
working with HAMMER1. HAMMER2 effectively solves the issues that cropped
up with HAMMER1 (mainly that HAMMER1's B-Tree did not reflect the logical
file/directory hierarchy, making cache coherency very difficult).
HAMMER2's writable snapshots make it possible to implement several forms
of multi-master clustering.
-The general mechanics for most of the multi-master clustering implementations
-will be as follows:
-
- (a) Use the copies mechanism to specify all elements of the cluster,
- both local and remote (networked).
-
- (b) The core synchronization state operates just as it does for copies,
- simply requiring a fully-flushed ack from the remote in order to
- mark the blocks as having been fully synchronized.
-
- The mirror_tid may be used to locate these blocks, allowing the
- synchronization state to be updated on the fly at a much later
- time without requiring the state to be maintained in-memory.
- (also for crash recovery resynchronization purposes).
-
- (c) Data/meta-data can be retrieved from those copies which are marked
- as being synchronized, with priority given to the local storage
- relative to any given physical machine.
-
- This means that e.g. even in a master-slave orientation the slave
- may be able to satisfy a request from a program when the slave
- happens to be the local storage.
-
- (d) Transaction id synchronization between all elements of the cluster,
- typically through masking (assigning a cluster number using the low
- 3 bits of the transaction id).
-
- (e) General access (synchronized or otherwise) may require cache
- coherency mechanisms to run over the network.
-
- Implementing cache coherency is a major complexity issue.
-
- (f) General access (synchronized or otherwise) may require quorum
- agreement, using the synchronization flags in the blockrefs
- to determine whether agreement has been reached.
-
- Implementing quorum voting is a major complexity issue.
+This is important: The mount device path you specify serves to bootstrap
+your entry into the cluster, but your mount will make active connections
+to ALL copy elements in the hammer2_copy_data[] array (stored in the volume
+header) which match the PFSID of the directory in the super-root that you
+specified. The local media path does not have to be mentioned in this
+array but becomes part of the cluster based on its type and access
+rights. ALL ELEMENTS ARE TREATED ACCORDING TO TYPE NO MATTER WHICH ONE
+YOU MOUNT FROM.
+
+The actual cluster may be far larger than the elements you list in the
+hammer2_copy_data[] array. You list only the elements you wish to
+directly connect to and you are able to access the rest of the cluster
+indirectly through those connections.
+
+All nodes in the cluster may act as administrative proxies. All nodes
+in the cluster, including your mount point, are classified as one of the
+following as specified in the inode's structure:
+
+ ADMIN - Media does not participate, administrative proxy only
+ CACHE - Media only acts as a persistent cache
+ COPY - Media only acts as a local copy
+ SLAVE - Media is a RO slave that can be mounted RW
+
+ SOFT_SLAVE - This is a SLAVE which can become writable when
+ the quorum is not available, but is not guaranteed
+ to be able to be merged back when the quorum becomes
+ available again. Elements which cannot be merged
+ back remain localized and writable until manual
+ or scripted intervention recombines them.
+
+ SOFT_MASTER - Similar to the above but can form a sub-cluster
+ and run the quorum protocol within the sub-cluster
+ to serve machines that connect to the sub-cluster
+ when the master cluster is not available.
+
+ The SOFT_MASTER nodes in a sub-cluster must be
+ fully interconnected with each other.
+
+ MASTER - This is a MASTER node in the quorum protocol.
+
+ The MASTER nodes in a cluster must be fully
+ interconnected with each other.
+
+There are four major protocols:
+
+ Quorum protocol
+
+ This protocol is used between MASTER nodes to vote on operations
+ and resolve deadlocks.
+
+ This protocol is used between SOFT_MASTER nodes in a sub-cluster
+ to vote on operations, resolve deadlocks, determine what the latest
+ transaction id for an element is, and to perform commits.
+
+ Cache sub-protocol
+
+ This is the MESI sub-protocol which runs under the Quorum
+ protocol. This protocol is used to maintain cache state for
+ sub-trees to ensure that operations remain cache coherent.
+
+ Depending on administrative rights this protocol may or may
+ not allow a leaf node in the cluster to hold a cache element
+ indefinitely. The administrative controller may preemptively
+ downgrade a leaf with insufficient administrative rights
+ without giving it a chance to synchronize any modified state
+ back to the cluster.
+
+ Proxy protocol
+
+ The Quorum and Cache protocols only operate between MASTER
+ and SOFT_MASTER nodes. All other node types must use the
+ Proxy protocol to perform similar actions. This protocol
+ differs in that proxy requests are typically sent to just
+ one adjacent node and that node then maintains state and
+ forwards the request or performs the required operation.
+ When the link is lost to the proxy, the proxy automatically
+ forwards a deletion of the state to the other nodes based on
+ what it has recorded.
+
+ If a leaf has insufficient administrative rights it may not
+ be allowed to actually initiate a quorum operation and may only
+ be allowed to maintain partial MESI cache state or perhaps none
+ at all (since cache state can block other machines in the
+ cluster). Instead a leaf with insufficient rights will have to
+ make due with a preemptive loss of cache state and any allowed
+ modifying operations will have to be forwarded to the proxy which
+ continues forwarding it until a node with sufficient administrative
+ rights is encountered.
+
+ To reduce issues and give the cluster more breath, sub-clusters
+ made up of SOFT_MASTERs can be formed in order to provide full
+ cache coherent within a subset of machines and yet still tie them
+ into a greater cluster that they normally would not have such
+ access to. This effectively makes it possible to create a two
+ or three-tier fan-out of groups of machines which are cache-coherent
+ within the group, but perhaps not between groups, and use other
+ means to synchronize between the groups.
+
+ Media protocol
+
+ This is basically the physical media protocol.
There are lots of ways to implement multi-master environments using the
above core features but the implementation is going to be fairly complex
of directories. Instead, the (modify_tid) is used on a node-by-node basis
to determine cache state at any given level, and (mirror_tid) is used to
determine whether any recursively underlying state is desynchronized.
-
-* Simple semi-synchronized multi-master environment.
-
- In this environment all nodes are considered masters and modifications
- can be made on any of them, and then propagate to the others
- asynchronously via HAMMER2 mirror streams. One difference here is
- that kernel can activate these userland-managed streams automatically
- when the copies configuration is used to specify the cluster.
-
- The only type of conflict which isn't readily resolvable by comparing
- the (modify_tid) is when file data is updated. In this case user
- intervention might be required but, theoretically, it should be
- possible to automate most merges using a multi-way patch and, if not,
- choosing one and creating backup copies if the others to allow the
- user or sysop to resolve the conflict later.
-
-* Simple fully synchronized fail-over environment.
-
- In this environment there is one designated master and the remaining
- nodes are slaves. If the master fails all remaining nodes agree on a
- new master, possibly with the requirement that a quorum be achieved
- (if you don't want to allow the cluster to split).
-
- If network splits are allowed the each sub-cluster operates in this
- mode but recombining the clusters reverts to the first algorithm.
- If not allowed whomever no longer has a quorum will be forced to stall.
-
- In this environment the current designated master is responsible for
- managing locks for modifying operations. The designated master will
- proactively tell the other nodes to mark the blocks related to the
- modifying operation as no longer being synchronized while any local
- data at the node that acquired the lock (master or slave) remains
- marked as being synchronized.
-
- The node that succesfully gets the lock then issues the modifying
- operation to both its local copy and to the master, marking the
- master as being desynchronized until the master acknowledges receipt.
-
- In this environment any node can access data from local storage if
- the designated master copy is marked synchronized AND its (modify_tid)
- matches the slave copy's (modify_tid).
-
- However, if a slave disconnects from the master then reconnects the
- slave will have lost the master's desynchronization stream and must
- mark its root blockref for the master copy HAMMER2_BREF_DESYNCHLD as
- well as clear the SYNC1/SYNC2 bits. Setting DESYNCCHLD forces on-demand
- recursive reverification that the master and slave are (or are not) in
- sync in order to reestablish on the slave the synchronization state of
- the master.
-
- That might be a bit confusing but the whole point here is to allow
- read accesses to the filesystem to be satisfied by any node in a
- multi-master cluster, not just by the current designated master.
-
-* Fully cache coherent and synchronized multi-master environment.
-
- In this environment a quorum is required to perform any modifying
- action. All nodes are masters (there is no 'designated' master)
- and all nodes connect to all other nodes in a cross-bar.
-
- The quorum is specified by copies setup in the root volume configuration.
- A quorum of nodes in the cluster must agree on the copies configuration.
- If they do not the cluster cannot proceed to mount. Any other nodes
- not in the quorum which are in the cluster which disagree with the
- configuration will inherit the copies configuration from the quorum.
-
- Any modifying action will initiate a lock request locally to all nodes
- in the cluster. The modifying action is allowed to proceed the instant
- a quorum of nodes respond in the affirmative (even if some have not
- yet responded or are down). The modifying action is considered complete
- once the two-phase commit protocol succeeds. The modifying action
- typically creates and commits a temporary snapshot on at least a quorum
- of masters as phase-1 and then ties the snapshot back into the main
- mount as phase-2.
-
- These locks are cache-coherency locks and may be passively maintained
- in order to aggregate multiple operations under the same lock and thus
- under the same transaction from the point of view of the rest of the
- quorum.
-
- A lock request which interferes with a passively maintained lock will
- force the two-phase commit protocol to complete and then transfer
- ownership to the requesting entity, thus avoiding having to deal with
- deadlock protocols at this point in the state machine.
-
- Since any node can initiate concurrent lock requests to many other nodes
- it is possible to deadlock. When two nodes initiate conflicting lock
- requests to the cluster the one achieving the quorum basically wins and
- the other is forced to retry (go back one paragraph). In this situation
- no deadlock will occur.
-
- If three are more nodes initiate conflicting lock requests to the
- cluster a deadlock can occur whereby none of the nodes achieve a quorum.
- In this case every node will know which of the other nodes was granted
- the lock(s). Deadlock resolution then proceeds simultaniously on the
- three nodes (since they have the same information), whereby the lock
- holders on the losing end of the algorithm transfer their locks to one
- of the other nodes. The lock state and knowledge of the lock state is
- updated in real time on all nodes until a quorum is achieved.
-
-* Fully cache coherent and synchronized multi-master environment with
- passive read locking.
-
- This is a more complex form of clustering than the previous form.
- Take the previous form and add the ability to passively hold SHARED
- locks in addition to the EXCLUSIVE locks the previous form is able
- to hold.
-
- The advantage of being able to passively hold a shared lock on a sub-tree
- (locks can be held on single nodes or entire sub-trees) is that it is
- then possible for all nodes to validate a node (modify_tid) or entire
- sub-tree (mirror_tid) with a very short network transaction and then
- satisfy a large number of requests from local storage.
-
-* Fully cache coherent and synchronized multi-master environment with
- passive read locking and slave-only nodes.
-
- This is the MOST complex form of clustering we intend to support.
- In a multi-master environment requiring a quorum of masters to operate
- we implement all of the above plus ALSO allow additional nodes to be
- added to the cluster as slave-only nodes.
-
- The difference between a slave-only node and setting up a manual
- mirror-stream from the cluster to a read-only snapshot on another
- HAMMER2 filesystem is that the slave-only node will be fully
- cache coherent with either the cluster proper (if connected to a quorum
- of masters), or to one or more other nodes in the cluster (if not
- connected to a quorum of masters), EVEN if the slave itself is not
- completely caught up.
-
- So if the slave-only cluster node is connected to the rest of the cluster
- over a slow connection you basically get a combination of local disk
- speeds for any data that is locally in sync and network-limited speeds
- for any data that is not locally in sync.
-
- slave-only cluster nodes run a standard mirror-stream in the background
- to pull in the data as quickly as possible.
-
- This is in constrast to a manual mirror-stream to a read-only
- snapshot (basically a simple slave), which has no ability to bypass
- the local storage to handle out-of-date requests (in fact has no ability
- to detect that the local storage is out-of-date anyway).
+The inode structure also has two additional transaction ids used to optimize
+path lookups, stat, and directory lookup/scan operations.
KMOD= hammer2
SRCS= hammer2_vfsops.c hammer2_vnops.c hammer2_inode.c
SRCS+= hammer2_chain.c hammer2_freemap.c hammer2_subr.c hammer2_icrc.c
+SRCS+= hammer2_ioctl.c
.include <bsd.kmod.mk>
--- /dev/null
+#!/bin/csh
+#
+
+umount /mnt
+newfs_hammer2 -L ROOT /dev/da0s1b
--- /dev/null
+#!/bin/csh
+#
+
+umount /mnt >& /dev/null
+kldunload hammer2.ko >& /dev/null
+kldstat | fgrep hammer2.ko >& /dev/null
+if ( $status > 0 ) then
+ kldload /usr/obj/usr/src/sys/vfs/hammer2/hammer2.ko
+endif
+mount_hammer2 /dev/da0s1b@ROOT /mnt
+sysctl vfs.hammer2.debug=0
#include "hammer2_disk.h"
#include "hammer2_mount.h"
+#include "hammer2_ioctl.h"
struct hammer2_chain;
struct hammer2_inode;
int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2);
SPLAY_PROTOTYPE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp);
-#define HAMMER2_CHAIN_MODIFIED1 0x00000001 /* active mods */
+#define HAMMER2_CHAIN_MODIFIED 0x00000001 /* active mods */
#define HAMMER2_CHAIN_DIRTYEMBED 0x00000002 /* inode embedded */
#define HAMMER2_CHAIN_DIRTYBP 0x00000004 /* dirty on unlock */
#define HAMMER2_CHAIN_SUBMODIFIED 0x00000008 /* 1+ subs modified */
#define HAMMER2_CHAIN_IOFLUSH 0x00000100 /* bawrite on put */
#define HAMMER2_CHAIN_DEFERRED 0x00000200 /* on a deferral list*/
#define HAMMER2_CHAIN_DESTROYED 0x00000400 /* destroying */
+#define HAMMER2_CHAIN_MODIFIED_AUX 0x00000800 /* hmp->vchain only */
/*
* Flags passed to hammer2_chain_lookup() and hammer2_chain_next()
hammer2_chain_t *rchain; /* label-root */
struct hammer2_inode *iroot;
struct lock alloclk; /* lockmgr lock */
+ struct lock voldatalk; /* lockmgr lock */
hammer2_volume_data_t voldata;
hammer2_off_t freecache[HAMMER2_FREECACHE_TYPES][HAMMER2_MAX_RADIX+1];
void hammer2_inode_unlock_sh(hammer2_inode_t *ip);
void hammer2_inode_busy(hammer2_inode_t *ip);
void hammer2_inode_unbusy(hammer2_inode_t *ip);
+void hammer2_voldata_lock(hammer2_mount_t *hmp);
+void hammer2_voldata_unlock(hammer2_mount_t *hmp);
void hammer2_mount_exlock(hammer2_mount_t *hmp);
void hammer2_mount_shlock(hammer2_mount_t *hmp);
/*
* hammer2_chain.c
*/
+void hammer2_modify_volume(hammer2_mount_t *hmp);
hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp,
hammer2_blockref_t *bref);
void hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain);
void hammer2_chain_commit(hammer2_mount_t *hmp, hammer2_chain_t *chain);
/*
+ * hammer2_ioctl.c
+ */
+int hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data,
+ int fflag, struct ucred *cred);
+
+/*
* hammer2_freemap.c
*/
hammer2_off_t hammer2_freemap_alloc(hammer2_mount_t *hmp,
*/
#include <sys/cdefs.h>
-#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
return;
/*
- * Set MODIFIED1 and add a chain ref to prevent destruction. Both
+ * Set MODIFIED and add a chain ref to prevent destruction. Both
* modified flags share the same ref.
*/
- if ((chain->flags & HAMMER2_CHAIN_MODIFIED1) == 0) {
- atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
+ if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) {
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
hammer2_chain_ref(hmp, chain);
}
void *bdata;
/*
- * If the chain is already marked MODIFIED1 we can just return.
+ * If the chain is already marked MODIFIED we can just return.
*/
- if (chain->flags & HAMMER2_CHAIN_MODIFIED1) {
+ if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
if ((flags & HAMMER2_MODIFY_OPTDATA) == 0 &&
chain->bp == NULL) {
goto skip1;
}
/*
- * Set MODIFIED1 and add a chain ref to prevent destruction. Both
+ * Set MODIFIED and add a chain ref to prevent destruction. Both
* modified flags share the same ref.
*/
- atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1);
+ atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
hammer2_chain_ref(hmp, chain);
/*
skip1:
/*
* Setting the DIRTYBP flag will cause the buffer to be dirtied or
- * written-out on unlock. This bit is independent of the MODIFIED1
+ * written-out on unlock. This bit is independent of the MODIFIED
* bit because the chain may still need meta-data adjustments done
- * by virtue of MODIFIED1 for its parent, and the buffer can be
+ * by virtue of MODIFIED for its parent, and the buffer can be
* flushed out (possibly multiple times) by the OS before that.
*
* Clearing the INITIAL flag (for indirect blocks) indicates that
}
/*
+ * Mark the volume as having been modified. This short-cut version
+ * does not have to lock the volume's chain, which allows the ioctl
+ * code to make adjustments to connections without deadlocking.
+ */
+void
+hammer2_modify_volume(hammer2_mount_t *hmp)
+{
+ hammer2_voldata_lock(hmp);
+ atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED_AUX);
+ hammer2_voldata_unlock(hmp);
+}
+
+/*
* Locate an in-memory chain. The parent must be locked. The in-memory
* chain is returned or NULL if no in-memory chain is present.
*
/*
* (allocated) indicates that this is a newly-created chain element
* rather than a renamed chain element. In this situation we want
- * to place the chain element in the MODIFIED1 state.
+ * to place the chain element in the MODIFIED state.
*
* The data area will be set up as follows:
*
* to ensure that its state propagates up the newly
* connected parent.
*
- * We cannot depend on the chain being in a MODIFIED1
+ * We cannot depend on the chain being in a MODIFIED
* state, or it might already be in that state, so
* even if the parent calls hammer2_chain_modify()
* MOVED might not get set. Thus we have to set it
(parent->flags & HAMMER2_CHAIN_SUBMODIFIED)) {
if ((parent->flags & HAMMER2_CHAIN_DEFERRED) == 0 &&
((parent->flags & (HAMMER2_CHAIN_SUBMODIFIED |
- HAMMER2_CHAIN_MODIFIED1 |
+ HAMMER2_CHAIN_MODIFIED |
+ HAMMER2_CHAIN_MODIFIED_AUX |
HAMMER2_CHAIN_MOVED)) != 0)) {
hammer2_chain_ref(hmp, parent);
TAILQ_INSERT_TAIL(&info->flush_list,
&parent->shead, chain);
/*
* We only recurse if SUBMODIFIED (internal node)
- * or MODIFIED1 (internal node or leaf) is set.
+ * or MODIFIED (internal node or leaf) is set.
* However, we must still track whether any MOVED
* entries are present to determine if the parent's
* blockref's need updating or not.
if (chain->flags & HAMMER2_CHAIN_MOVED)
submoved = 1;
if ((chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
- HAMMER2_CHAIN_MODIFIED1)) == 0) {
+ HAMMER2_CHAIN_MODIFIED |
+ HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
continue;
}
* child (recursively) is still dirty.
*/
if (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
- HAMMER2_CHAIN_MODIFIED1)) {
+ HAMMER2_CHAIN_MODIFIED |
+ HAMMER2_CHAIN_MODIFIED_AUX)) {
submodified = 1;
if (hammer2_debug & 0x0008)
kprintf("s");
}
/*
- * If destroying the object we unconditonally clear the MODIFIED1
+ * If destroying the object we unconditonally clear the MODIFIED
* and MOVED bits, and we destroy the buffer without writing it
* out.
*
* free pool.
*/
if (parent->flags & HAMMER2_CHAIN_DESTROYED) {
- if (parent->flags & HAMMER2_CHAIN_MODIFIED1) {
+ if (parent->flags & HAMMER2_CHAIN_MODIFIED) {
if (parent->bp) {
parent->bp->b_flags |= B_INVAL|B_RELBUF;
}
atomic_clear_int(&parent->flags,
- HAMMER2_CHAIN_MODIFIED1);
+ HAMMER2_CHAIN_MODIFIED);
hammer2_chain_drop(hmp, parent);
}
+ if (parent->flags & HAMMER2_CHAIN_MODIFIED_AUX) {
+ atomic_clear_int(&parent->flags,
+ HAMMER2_CHAIN_MODIFIED_AUX);
+ }
if (parent->flags & HAMMER2_CHAIN_MOVED) {
atomic_clear_int(&parent->flags,
HAMMER2_CHAIN_MOVED);
/*
* Flush this chain entry only if it is marked modified.
*/
- if ((parent->flags & HAMMER2_CHAIN_MODIFIED1) == 0) {
+ if ((parent->flags & (HAMMER2_CHAIN_MODIFIED |
+ HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
goto done;
}
/*
- * Clear MODIFIED1 and set HAMMER2_CHAIN_MOVED. The caller
+ * Clear MODIFIED and set HAMMER2_CHAIN_MOVED. The caller
* will re-test the MOVED bit.
*
* bits own a single parent ref and the MOVED bit owns its own
* parent ref.
*/
- atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MODIFIED1);
- if (parent->flags & HAMMER2_CHAIN_MOVED) {
- hammer2_chain_drop(hmp, parent);
- } else {
- /* inherit ref from the MODIFIED1 we cleared */
- atomic_set_int(&parent->flags, HAMMER2_CHAIN_MOVED);
+ if (parent->flags & HAMMER2_CHAIN_MODIFIED) {
+ atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MODIFIED);
+ if (parent->flags & HAMMER2_CHAIN_MOVED) {
+ hammer2_chain_drop(hmp, parent);
+ } else {
+ /* inherit ref from the MODIFIED we cleared */
+ atomic_set_int(&parent->flags, HAMMER2_CHAIN_MOVED);
+ }
}
+ atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MODIFIED_AUX);
/*
* If this is part of a recursive flush we can go ahead and write
* wind up on our flush_list again.
*/
if ((scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
- HAMMER2_CHAIN_MODIFIED1)) == 0)
+ HAMMER2_CHAIN_MODIFIED |
+ HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
reflush = 1;
+ }
hammer2_chain_drop(hmp, scan);
}
if ((hammer2_debug & 0x0040) && reflush)
* be completely flushed.
*/
if (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
- HAMMER2_CHAIN_MODIFIED1 |
+ HAMMER2_CHAIN_MODIFIED |
+ HAMMER2_CHAIN_MODIFIED_AUX |
HAMMER2_CHAIN_MOVED)) {
hammer2_chain_parent_setsubmod(hmp, chain);
}
if (parent == NULL ||
chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
(chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
- HAMMER2_CHAIN_MODIFIED1 |
+ HAMMER2_CHAIN_MODIFIED |
+ HAMMER2_CHAIN_MODIFIED_AUX |
HAMMER2_CHAIN_MOVED)) != HAMMER2_CHAIN_MOVED) {
return;
}
#ifndef VFS_HAMMER2_DISK_H_
#define VFS_HAMMER2_DISK_H_
+#ifndef _SYS_UUID_H_
+#include <sys/uuid.h>
+#endif
+
/*
* The structures below represent the on-disk media structures for the HAMMER2
* filesystem. Note that all fields for on-disk structures are naturally
* inode number key via bit 63. Access to the hardlink silently looks up
* the real file and forwards all operations to that file. Removal of the
* last hardlink also removes the real file.
+ *
+ * (attr_tid) is only updated when the inode's specific attributes or regular
+ * file size has changed, and affects path lookups and stat. (attr_tid)
+ * represents a special cache coherency lock under the inode. The inode
+ * blockref's modify_tid will always cover it.
+ *
+ * (dirent_tid) is only updated when an entry under a directory inode has
+ * been created, deleted, renamed, or had its attributes change, and affects
+ * directory lookups and scans. (dirent_tid) represents another special cache
+ * coherency lock under the inode. The inode blockref's modify_tid will
+ * always cover it.
*/
#define HAMMER2_INODE_BYTES 1024 /* (asserted by code) */
#define HAMMER2_INODE_MAXNAME 256 /* maximum name in bytes */
hammer2_off_t size; /* 0060 size of file */
uint64_t nlinks; /* 0068 hard links (typ only dirs) */
hammer2_tid_t iparent; /* 0070 parent inum (recovery only) */
- uint64_t reserved78; /* 0078 */
-
+ uint8_t copies[8]; /* 0078 request copies to (up to 8) */
hammer2_off_t data_quota; /* 0080 subtree quota in bytes */
hammer2_off_t data_count; /* 0088 subtree byte count */
hammer2_off_t inode_quota; /* 0090 subtree quota inode count */
uint8_t reservedA3; /* 00A3 */
uint32_t reservedA4; /* 00A4 */
hammer2_key_t name_key; /* 00A8 full filename key */
- uint8_t copyids[8]; /* 00B0 request copies to (up to 8) */
- uuid_t pfsid; /* 00B8 pfs uuid if PFSROOT */
- uint64_t pfsinum; /* 00C8 pfs inum allocator */
- uint64_t reservedD0; /* 00D0 */
- uint64_t reservedD8; /* 00D8 */
+ uint8_t reservedB0[7]; /* 00B0 */
+ uint8_t pfs_type; /* 00B7 (if PFSROOT) node type */
+ uuid_t pfs_id; /* 00B8 (if PFSROOT) pfs uuid */
+ uint64_t pfs_inum; /* 00C8 (if PFSROOT) inum allocator */
+ hammer2_tid_t attr_tid; /* 00D0 attributes changed */
+ hammer2_tid_t dirent_tid; /* 00D8 directory/attr changed */
uint64_t reservedE0; /* 00E0 */
uint64_t reservedE8; /* 00E8 */
uint64_t reservedF0; /* 00F0 */
#define HAMMER2_OPFLAG_DIRECTDATA 0x01
#define HAMMER2_OPFLAG_PFSROOT 0x02
+#define HAMMER2_OPFLAG_COPYIDS 0x04 /* copyids override parent */
#define HAMMER2_OBJTYPE_UNKNOWN 0
#define HAMMER2_OBJTYPE_DIRECTORY 1
#define HAMMER2_CHECK_NONE 0
#define HAMMER2_CHECK_ICRC 1
+#define HAMMER2_PFSTYPE_NONE 0
+#define HAMMER2_PFSTYPE_ADMIN 1
+#define HAMMER2_PFSTYPE_CACHE 2
+#define HAMMER2_PFSTYPE_COPY 3
+#define HAMMER2_PFSTYPE_SLAVE 4
+#define HAMMER2_PFSTYPE_SOFT_SLAVE 5
+#define HAMMER2_PFSTYPE_SOFT_MASTER 6
+#define HAMMER2_PFSTYPE_MASTER 7
+
/*
* The allocref structure represents the allocation table. One 64K block
* is broken down into 4096 x 16 byte entries. Each indirect block chops
#define HAMMER2_ALLOCREF_LEAF 0x0004
/*
- * Copies information stored in the volume header. Typically formatted
- * e.g. like 'serno/A21343249.s1d'
- *
- * There are 8 copy_data[]'s in the volume header but up to 256 copyid's.
- * When a copy is removed its copyid remains reserved in the copyid bitmap
- * (copyexists[] bitmap in volume_data) until the copy references have
- * been removed from the entire filesystem and cannot be reused until the
- * removal is complete. However, new copy entries with other ids can be
- * instantly added, replacing the original copy_data[]... which is fine as
- * long as the copyid does not conflict.
- *
- * This structure must be exactly 64 bytes long.
+ * All HAMMER2 directories directly under the super-root on your local
+ * media can be mounted separately, even if they share the same physical
+ * device.
+ *
+ * When you do a HAMMER2 mount you are effectively tying into a HAMMER2
+ * cluster via local media. The local media does not have to participate
+ * in the cluster, other than to provide the hammer2_copy_data[] array and
+ * root inode for the mount.
+ *
+ * This is important: The mount device path you specify serves to bootstrap
+ * your entry into the cluster, but your mount will make active connections
+ * to ALL copy elements in the hammer2_copy_data[] array which match the
+ * PFSID of the directory in the super-root that you specified. The local
+ * media path does not have to be mentioned in this array but becomes part
+ * of the cluster based on its type and access rights. ALL ELEMENTS ARE
+ * TREATED ACCORDING TO TYPE NO MATTER WHICH ONE YOU MOUNT FROM.
+ *
+ * The actual cluster may be far larger than the elements you list in the
+ * hammer2_copy_data[] array. You list only the elements you wish to
+ * directly connect to and you are able to access the rest of the cluster
+ * indirectly through those connections.
+ *
+ * This structure must be exactly 128 bytes long.
*/
struct hammer2_copy_data {
- uint8_t copyid; /* 0-255 */
- uint8_t flags;
- uint8_t reserved02;
- uint8_t reserved03;
- uint8_t path[60]; /* up to 59-char string, nul-terminated */
+ uint8_t copyid; /* 00 copyid 0-255 (must match slot) */
+ uint8_t inprog; /* 01 operation in progress, or 0 */
+ uint8_t chain_to; /* 02 operation chaining to, or 0 */
+ uint8_t chain_from; /* 03 operation chaining from, or 0 */
+ uint16_t flags; /* 04-05 flags field */
+ uint8_t error; /* 06 last operational error */
+ uint8_t priority; /* 07 priority and round-robin flag */
+ uint8_t remote_pfs_type;/* 08 probed direct remote PFS type */
+ uint8_t reserved08[23]; /* 09-1F */
+ uuid_t pfsid; /* 20-2F copy target must match this uuid */
+ uint8_t label[16]; /* 30-3F import/export label */
+ uint8_t path[64]; /* 40-7F target specification string or key */
};
typedef struct hammer2_copy_data hammer2_copy_data_t;
-#define COPYDATAF_OUTOFSYNC 0x0001
+#define COPYDATAF_ENABLED 0x0001
+#define COPYDATAF_INPROG 0x0002
+#define COPYDATAF_CONN_RR 0x80 /* round-robin at same priority */
+#define COPYDATAF_CONN_EF 0x40 /* media errors flagged */
+#define COPYDATAF_CONN_PRI 0x0F /* select priority 0-15 (15=best) */
/*
* The volume header eats a 64K block. There is currently an issue where
#define HAMMER2_VOLUME_ID_HBO 0x48414d3205172011LLU
#define HAMMER2_VOLUME_ID_ABO 0x11201705324d4148LLU
+#define HAMMER2_COPYID_COUNT 256
+
struct hammer2_volume_data {
/*
- * 512-byte sector #0
+ * sector #0 - 512 bytes
*/
uint64_t magic; /* 0000 Signature */
hammer2_off_t boot_beg; /* 0008 Boot area (future) */
hammer2_crc32_t icrc_sects[8]; /* 01E0-01FF */
/*
- * 512-byte sector #1
+ * sector #1 - 512 bytes
*
* The entire sector is used by a blockset.
*/
- hammer2_blockset_t sroot_blockset; /* 0200 Superroot directory */
+ hammer2_blockset_t sroot_blockset; /* 0200-03FF Superroot dir */
/*
- * 512-byte sector #2-33
+ * sector #2-7
+ */
+ char sector2[512]; /* 0400-05FF reserved */
+ char sector3[512]; /* 0600-07FF reserved */
+ char sector4[512]; /* 0800-09FF reserved */
+ char sector5[512]; /* 0A00-0BFF reserved */
+ char sector6[512]; /* 0C00-0DFF reserved */
+ char sector7[512]; /* 0E00-0FFF reserved */
+
+ /*
+ * sector #8-71 - 32768 bytes
+ *
+ * Contains the configuration for up to 256 copyinfo targets. These
+ * specify local and remote copies operating as masters or slaves.
+ * copyid's 0 and 255 are reserved (0 indicates an empty slot and 255
+ * indicates the local media).
*
- * Up to 256 copyinfo specifications can be configured. Note that
- * any given subdirectory tree can only use 8 of the 256. Having
- * up to 256 configurable in the volume header allows
+ * Each inode contains a set of up to 8 copyids, either inherited
+ * from its parent or explicitly specified in the inode, which
+ * indexes into this array.
+ */
+ /* 1000-8FFF copyinfo config */
+ struct hammer2_copy_data copyinfo[HAMMER2_COPYID_COUNT];
+
+ /*
*
- * A specification takes 64 bytes. Each specification typically
- * configures a device path such as 'serno/<serial>.s1d'.
*/
- struct hammer2_copy_data copyinfo[256]; /* 0400-43FF copyinfo config */
/*
* Remaining sections are reserved for future use.
*/
- char reserved0400[0xBBFC]; /* 4400-FFFB reserved */
+ char reserved0400[0x6FFC]; /* 9000-FFFB reserved */
/*
* icrc on entire volume header
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
*nipp = nip;
nip->ip_data.type = hammer2_get_obj_type(vap->va_type);
+ hammer2_voldata_lock(hmp);
nip->ip_data.inum = hmp->voldata.alloc_tid++; /* XXX modify/lock */
+ hammer2_voldata_unlock(hmp);
nip->ip_data.version = HAMMER2_INODE_VERSION_ONE;
nip->ip_data.ctime = 0;
nip->ip_data.mtime = 0;
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * Ioctl Functions.
+ *
+ * WARNING! The ioctl functions which manipulate the connection state need
+ * to be able to run without deadlock on the volume's chain lock.
+ * Most of these functions use a separate lock.
+ */
+
+#include "hammer2.h"
+
+static int hammer2_ioctl_get_version(hammer2_inode_t *ip, void *data);
+static int hammer2_ioctl_get_remote(hammer2_inode_t *ip, void *data);
+static int hammer2_ioctl_add_remote(hammer2_inode_t *ip, void *data);
+static int hammer2_ioctl_del_remote(hammer2_inode_t *ip, void *data);
+static int hammer2_ioctl_rep_remote(hammer2_inode_t *ip, void *data);
+static int hammer2_ioctl_get_socket(hammer2_inode_t *ip, void *data);
+static int hammer2_ioctl_set_socket(hammer2_inode_t *ip, void *data);
+
+int
+hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data, int fflag,
+ struct ucred *cred)
+{
+ int error;
+
+ /*
+ * Standard root cred checks, will be selectively ignored below
+ * for ioctls that do not require root creds.
+ */
+ error = priv_check_cred(cred, PRIV_HAMMER_IOCTL, 0);
+
+ switch(com) {
+ case HAMMER2IOC_GET_VERSION:
+ /*
+ * Retrieve version and basic status
+ */
+ error = hammer2_ioctl_get_version(ip, data);
+ break;
+ case HAMMER2IOC_GET_REMOTE:
+ /*
+ * Retrieve information about a remote
+ */
+ if (error == 0)
+ error = hammer2_ioctl_get_remote(ip, data);
+ break;
+ case HAMMER2IOC_ADD_REMOTE:
+ /*
+ * Add new remote entry.
+ */
+ if (error == 0)
+ error = hammer2_ioctl_add_remote(ip, data);
+ break;
+ case HAMMER2IOC_DEL_REMOTE:
+ /*
+ * Delete existing remote entry
+ */
+ if (error == 0)
+ error = hammer2_ioctl_del_remote(ip, data);
+ break;
+ case HAMMER2IOC_REP_REMOTE:
+ /*
+ * Replace existing remote entry
+ */
+ if (error == 0)
+ error = hammer2_ioctl_rep_remote(ip, data);
+ break;
+ case HAMMER2IOC_GET_SOCKET:
+ /*
+ * Retrieve communications socket
+ */
+ if (error == 0)
+ error = hammer2_ioctl_get_socket(ip, data);
+ break;
+ case HAMMER2IOC_SET_SOCKET:
+ /*
+ * Set communications socket for connection
+ */
+ if (error == 0)
+ error = hammer2_ioctl_set_socket(ip, data);
+ break;
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+ return (error);
+}
+
+/*
+ * Retrieve version and basic info
+ */
+static int
+hammer2_ioctl_get_version(hammer2_inode_t *ip, void *data)
+{
+ hammer2_mount_t *hmp = ip->hmp;
+ hammer2_ioc_version_t *version = data;
+
+ version->version = hmp->voldata.version;
+ return 0;
+}
+
+/*
+ * Retrieve information about a remote
+ */
+static int
+hammer2_ioctl_get_remote(hammer2_inode_t *ip, void *data)
+{
+ hammer2_mount_t *hmp = ip->hmp;
+ hammer2_ioc_remote_t *remote = data;
+ int copyid = remote->copyid;
+
+ if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT)
+ return (EINVAL);
+
+ hammer2_voldata_lock(hmp);
+ remote->copy1 = hmp->voldata.copyinfo[copyid];
+ hammer2_voldata_unlock(hmp);
+
+ /*
+ * Adjust nextid (GET only)
+ */
+ while (++copyid < HAMMER2_COPYID_COUNT &&
+ hmp->voldata.copyinfo[copyid].copyid == 0) {
+ ++copyid;
+ }
+ if (copyid == HAMMER2_COPYID_COUNT)
+ remote->nextid = -1;
+ else
+ remote->nextid = copyid;
+
+ return(0);
+}
+
+/*
+ * Add new remote entry
+ */
+static int
+hammer2_ioctl_add_remote(hammer2_inode_t *ip, void *data)
+{
+ hammer2_mount_t *hmp = ip->hmp;
+ hammer2_ioc_remote_t *remote = data;
+ int copyid = remote->copyid;
+ int error = 0;
+
+ if (copyid >= HAMMER2_COPYID_COUNT)
+ return (EINVAL);
+
+ hammer2_voldata_lock(hmp);
+ if (copyid < 0) {
+ for (copyid = 1; copyid < HAMMER2_COPYID_COUNT; ++copyid) {
+ if (hmp->voldata.copyinfo[copyid].copyid == 0)
+ break;
+ }
+ if (copyid == HAMMER2_COPYID_COUNT) {
+ error = ENOSPC;
+ goto failed;
+ }
+ }
+ hammer2_modify_volume(hmp);
+ kprintf("copyid %d\n", copyid);
+ remote->copy1.copyid = copyid;
+ hmp->voldata.copyinfo[copyid] = remote->copy1;
+failed:
+ hammer2_voldata_unlock(hmp);
+ return (error);
+}
+
+/*
+ * Delete existing remote entry
+ */
+static int
+hammer2_ioctl_del_remote(hammer2_inode_t *ip, void *data)
+{
+ hammer2_mount_t *hmp = ip->hmp;
+ hammer2_ioc_remote_t *remote = data;
+ int copyid = remote->copyid;
+ int error = 0;
+
+ if (copyid >= HAMMER2_COPYID_COUNT)
+ return (EINVAL);
+ remote->copy1.path[sizeof(remote->copy1.path) - 1] = 0;
+ hammer2_voldata_lock(hmp);
+ if (copyid < 0) {
+ for (copyid = 1; copyid < HAMMER2_COPYID_COUNT; ++copyid) {
+ if (hmp->voldata.copyinfo[copyid].copyid == 0)
+ continue;
+ if (strcmp(remote->copy1.path,
+ hmp->voldata.copyinfo[copyid].path) == 0) {
+ break;
+ }
+ }
+ if (copyid == HAMMER2_COPYID_COUNT) {
+ error = ENOENT;
+ goto failed;
+ }
+ }
+ hammer2_modify_volume(hmp);
+ hmp->voldata.copyinfo[copyid].copyid = 0;
+failed:
+ hammer2_voldata_unlock(hmp);
+ return (error);
+}
+
+/*
+ * Replace existing remote entry
+ */
+static int
+hammer2_ioctl_rep_remote(hammer2_inode_t *ip, void *data)
+{
+ hammer2_mount_t *hmp = ip->hmp;
+ hammer2_ioc_remote_t *remote = data;
+ int copyid = remote->copyid;
+
+ if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT)
+ return (EINVAL);
+
+ hammer2_voldata_lock(hmp);
+ hammer2_voldata_unlock(hmp);
+
+ return(0);
+}
+
+/*
+ * Retrieve communications socket
+ */
+static int
+hammer2_ioctl_get_socket(hammer2_inode_t *ip, void *data)
+{
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Set communications socket for connection
+ */
+static int
+hammer2_ioctl_set_socket(hammer2_inode_t *ip, void *data)
+{
+ hammer2_mount_t *hmp = ip->hmp;
+ hammer2_ioc_remote_t *remote = data;
+ int copyid = remote->copyid;
+
+ if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT)
+ return (EINVAL);
+
+ hammer2_voldata_lock(hmp);
+ hammer2_voldata_unlock(hmp);
+
+ return(0);
+}
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved.
+ *
+ * This code is derived from software contributed to The DragonFly Project
+ * by Matthew Dillon <dillon@dragonflybsd.org>
+ * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * 3. Neither the name of The DragonFly Project nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific, prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef VFS_HAMMER2_IOCTL_H_
+#define VFS_HAMMER2_IOCTL_H_
+
+#ifndef _SYS_IOCCOM_H_
+#include <sys/ioccom.h>
+#endif
+#ifndef _VFS_HAMMER2_DISK_H_
+#include "hammer2_disk.h"
+#endif
+#ifndef _VFS_HAMMER2_MOUNT_H_
+#include "hammer2_mount.h"
+#endif
+
+/*
+ * get_version
+ */
+struct hammer2_ioc_version {
+ int version;
+ char reserved[256 - 4];
+};
+
+typedef struct hammer2_ioc_version hammer2_ioc_version_t;
+
+/*
+ * Ioctls to manage the volume->copyinfo[] array and to associate or
+ * disassociate sockets
+ */
+struct hammer2_ioc_remote {
+ int copyid;
+ int nextid; /* for iteration (get only) */
+ int fd; /* socket descriptor if applicable */
+ int reserved03;
+ int reserved04[8];
+ hammer2_copy_data_t copy1; /* copy spec */
+ hammer2_copy_data_t copy2; /* copy spec (rename ops only) */
+};
+
+typedef struct hammer2_ioc_remote hammer2_ioc_remote_t;
+
+#define HAMMER2IOC_GET_VERSION _IOWR('h', 64, struct hammer2_ioc_version)
+
+#define HAMMER2IOC_GET_REMOTE _IOWR('h', 68, struct hammer2_ioc_remote)
+#define HAMMER2IOC_ADD_REMOTE _IOWR('h', 69, struct hammer2_ioc_remote)
+#define HAMMER2IOC_DEL_REMOTE _IOWR('h', 70, struct hammer2_ioc_remote)
+#define HAMMER2IOC_REP_REMOTE _IOWR('h', 71, struct hammer2_ioc_remote)
+
+#define HAMMER2IOC_GET_SOCKET _IOWR('h', 76, struct hammer2_ioc_remote)
+#define HAMMER2IOC_SET_SOCKET _IOWR('h', 77, struct hammer2_ioc_remote)
+
+#endif
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
-#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
lockmgr(&hmp->vchain.lk, LK_RELEASE);
}
+void
+hammer2_voldata_lock(hammer2_mount_t *hmp)
+{
+ lockmgr(&hmp->voldatalk, LK_EXCLUSIVE);
+}
+
+void
+hammer2_voldata_unlock(hammer2_mount_t *hmp)
+{
+ lockmgr(&hmp->voldatalk, LK_RELEASE);
+}
+
/*
* Return the directory entry type for an inode
*/
/* hmp->vchain.u.xxx is left NULL */
lockinit(&hmp->vchain.lk, "volume", 0, LK_CANRECURSE);
lockinit(&hmp->alloclk, "h2alloc", 0, 0);
+ lockinit(&hmp->voldatalk, "voldata", 0, LK_CANRECURSE);
/*
* Install the volume header
mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE;
/*
+ * Optional fields
+ */
+ mp->mnt_iosize_max = MAXPHYS;
+
+ /*
* First locate the super-root inode, which is key 0 relative to the
* volume header's blockset.
*
return error;
/*
- * Flush any left over chains
+ * Flush any left over chains. The voldata lock is only used
+ * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX.
*/
- if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED1 |
+ hammer2_voldata_lock(hmp);
+ if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED |
+ HAMMER2_CHAIN_MODIFIED_AUX |
HAMMER2_CHAIN_SUBMODIFIED)) {
+ hammer2_voldata_unlock(hmp);
hammer2_vfs_sync(mp, MNT_WAIT);
+ } else {
+ hammer2_voldata_unlock(hmp);
}
- if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED1 |
+ if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED |
+ HAMMER2_CHAIN_MODIFIED_AUX |
HAMMER2_CHAIN_SUBMODIFIED)) {
kprintf("hammer2_unmount: chains left over after final sync\n");
if (hammer2_debug & 0x0010)
}
#endif
hammer2_chain_lock(hmp, &hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
- if (hmp->vchain.flags &
- (HAMMER2_CHAIN_MODIFIED1 | HAMMER2_CHAIN_SUBMODIFIED)) {
+ if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED |
+ HAMMER2_CHAIN_MODIFIED_AUX |
+ HAMMER2_CHAIN_SUBMODIFIED)) {
hammer2_chain_flush(hmp, &hmp->vchain);
haswork = 1;
} else {
if (error == 0 && haswork) {
struct buf *bp;
- kprintf("synchronize disk\n");
-
+ /*
+ * Synchronize the disk before flushing the volume
+ * header.
+ */
bp = getpbuf(NULL);
bp->b_bio1.bio_offset = 0;
bp->b_bufsize = 0;
biowait(&bp->b_bio1, "h2vol");
relpbuf(bp, NULL);
- kprintf("flush volume header\n");
-
+ /*
+ * Then we can safely flush the volume header. Volume
+ * data is locked separately to prevent ioctl functions
+ * from deadlocking due to a configuration issue.
+ */
bp = getblk(hmp->devvp, 0, HAMMER2_PBUFSIZE, 0, 0);
+ hammer2_voldata_lock(hmp);
bcopy(&hmp->voldata, bp->b_data, HAMMER2_PBUFSIZE);
+ hammer2_voldata_unlock(hmp);
bawrite(bp);
}
return (error);
ip = VTOI(vp);
if (vp->v_type == VNON || ip == NULL ||
- ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED1 |
+ ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED |
HAMMER2_CHAIN_DIRTYEMBED)) == 0 &&
RB_EMPTY(&vp->v_rbdirty_tree))) {
return(-1);
ip = VTOI(vp);
if (vp->v_type == VNON || vp->v_type == VBAD ||
- ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED1 |
+ ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED |
HAMMER2_CHAIN_DIRTYEMBED)) == 0 &&
RB_EMPTY(&vp->v_rbdirty_tree))) {
return(0);
biodone(nbio);
/*
- * This special flag does not follow the normal MODIFY1 rules
+ * This special flag does not follow the normal MODIFY rules
* because we might deadlock on ip. Instead we depend on
* VOP_FSYNC() to detect the case.
*/
return (0);
}
+/*
+ * hammer2_vop_ioctl { vp, command, data, fflag, cred }
+ */
+static
+int
+hammer2_vop_ioctl(struct vop_ioctl_args *ap)
+{
+ hammer2_mount_t *hmp;
+ hammer2_inode_t *ip;
+ int error;
+
+ ip = VTOI(ap->a_vp);
+ hmp = ip->hmp;
+
+ error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data,
+ ap->a_fflag, ap->a_cred);
+ return (error);
+}
+
static
int
hammer2_vop_mountctl(struct vop_mountctl_args *ap)
.vop_nresolve = hammer2_vop_nresolve,
.vop_nlookupdotdot = hammer2_vop_nlookupdotdot,
.vop_nmkdir = hammer2_vop_nmkdir,
+ .vop_ioctl = hammer2_vop_ioctl,
.vop_mountctl = hammer2_vop_mountctl,
.vop_bmap = hammer2_vop_bmap,
.vop_strategy = hammer2_vop_strategy,