From 2910a90c0cbd5e37a2c401f5ca63d4f6944351f1 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 5 Apr 2012 15:50:22 -0700 Subject: [PATCH] hammer2 - Start adding ioctl infrastructure, start writing hammer2 utility * Begin working on the ioctl infrastructure for HAMMER2. Start with basic version retrieval and the ability to persistently store connection configuration in the volume header. * Start writing the hammer2 utility, beginning with connection configuration commands (connect, disconnect, status). --- sbin/hammer2/Makefile | 14 ++ sbin/hammer2/cmd_remote.c | 123 +++++++++++++ sbin/hammer2/cmd_snapshot.c | 52 ++++++ sbin/hammer2/hammer2.h | 60 +++++++ sbin/hammer2/main.c | 172 ++++++++++++++++++ sbin/hammer2/subs.c | 60 +++++++ sys/vfs/hammer2/DESIGN | 289 ++++++++++++------------------- sys/vfs/hammer2/Makefile | 1 + sys/vfs/hammer2/donew2 | 5 + sys/vfs/hammer2/dossd2 | 11 ++ sys/vfs/hammer2/hammer2.h | 14 +- sys/vfs/hammer2/hammer2_chain.c | 87 ++++++---- sys/vfs/hammer2/hammer2_disk.h | 138 +++++++++++---- sys/vfs/hammer2/hammer2_inode.c | 3 +- sys/vfs/hammer2/hammer2_ioctl.c | 281 ++++++++++++++++++++++++++++++ sys/vfs/hammer2/hammer2_ioctl.h | 84 +++++++++ sys/vfs/hammer2/hammer2_subr.c | 13 +- sys/vfs/hammer2/hammer2_vfsops.c | 43 +++-- sys/vfs/hammer2/hammer2_vnops.c | 22 ++- 19 files changed, 1208 insertions(+), 264 deletions(-) create mode 100644 sbin/hammer2/Makefile create mode 100644 sbin/hammer2/cmd_remote.c create mode 100644 sbin/hammer2/cmd_snapshot.c create mode 100644 sbin/hammer2/hammer2.h create mode 100644 sbin/hammer2/main.c create mode 100644 sbin/hammer2/subs.c create mode 100755 sys/vfs/hammer2/donew2 create mode 100755 sys/vfs/hammer2/dossd2 create mode 100644 sys/vfs/hammer2/hammer2_ioctl.c create mode 100644 sys/vfs/hammer2/hammer2_ioctl.h diff --git a/sbin/hammer2/Makefile b/sbin/hammer2/Makefile new file mode 100644 index 0000000000..9e039282c0 --- /dev/null +++ b/sbin/hammer2/Makefile @@ -0,0 +1,14 @@ +PROG= hammer2 +SRCS= main.c subs.c +SRCS+= cmd_remote.c cmd_snapshot.c +#MAN= hammer2.8 +NOMAN= TRUE + +CFLAGS+= -I${.CURDIR}/../../sys +LDADD= -lm -lutil -lmd +DPADD= ${LIBM} ${LIBUTIL} ${LIBMD} + +#.PATH: ${.CURDIR}/../../sys/libkern +#SRCS+= crc32.c + +.include diff --git a/sbin/hammer2/cmd_remote.c b/sbin/hammer2/cmd_remote.c new file mode 100644 index 0000000000..4f723f7414 --- /dev/null +++ b/sbin/hammer2/cmd_remote.c @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * by Venkatesh Srinivas + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "hammer2.h" + +int +cmd_remote_connect(const char *sel_path, const char *url) +{ + hammer2_ioc_remote_t remote; + int ecode = 0; + int fd; + + if ((fd = hammer2_ioctl_handle(sel_path)) < 0) + return(1); + bzero(&remote, sizeof(remote)); + remote.copyid = -1; + remote.fd = -1; + if (strlen(url) >= sizeof(remote.copy1.path)) { + fprintf(stderr, "hammer2: connect: Path too long\n"); + close(fd); + return(1); + } + snprintf(remote.copy1.path, sizeof(remote.copy1.path), "%s", url); + if (ioctl(fd, HAMMER2IOC_ADD_REMOTE, &remote) < 0) { + perror("ioctl"); + ecode = 1; + } + close(fd); + return 0;; +} + +int +cmd_remote_disconnect(const char *sel_path, const char *url) +{ + hammer2_ioc_remote_t remote; + int ecode = 0; + int fd; + + if ((fd = hammer2_ioctl_handle(sel_path)) < 0) + return(1); + bzero(&remote, sizeof(remote)); + remote.copyid = -1; + remote.fd = -1; + if (strlen(url) >= sizeof(remote.copy1.path)) { + fprintf(stderr, "hammer2: disconnect: Path too long\n"); + close(fd); + return(1); + } + snprintf(remote.copy1.path, sizeof(remote.copy1.path), "%s", url); + if (ioctl(fd, HAMMER2IOC_DEL_REMOTE, &remote) < 0) { + perror("ioctl"); + ecode = 1; + } + close(fd); + return 0;; +} + +int +cmd_remote_status(const char *sel_path, int all_opt __unused) +{ + hammer2_ioc_remote_t remote; + int ecode = 0; + int count = 0; + int fd; + + if ((fd = hammer2_ioctl_handle(sel_path)) < 0) + return(1); + bzero(&remote, sizeof(remote)); + + while ((remote.copyid = remote.nextid) >= 0) { + if (ioctl(fd, HAMMER2IOC_GET_REMOTE, &remote) < 0) { + perror("ioctl"); + ecode = 1; + break; + } + if (remote.copy1.copyid == 0) + continue; + if (count == 0) + printf("CPYID LABEL STATUS PATH\n"); + printf("%5d %-15s %c%c%c.%02x %s\n", + remote.copy1.copyid, + remote.copy1.label, + '-', '-', '-', + remote.copy1.priority, + remote.copy1.path); + ++count; + } + if (count == 0) + printf("No linkages found\n"); + return (ecode); +} diff --git a/sbin/hammer2/cmd_snapshot.c b/sbin/hammer2/cmd_snapshot.c new file mode 100644 index 0000000000..2d46b9e5d2 --- /dev/null +++ b/sbin/hammer2/cmd_snapshot.c @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * by Venkatesh Srinivas + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "hammer2.h" + +/* + * The snapshot is named _ unless + * overridden by a label. + * + * When local non-cache media is involved the media is + * first synchronized and the snapshot is then based on + * the media. + * + * If the media is remote the snapshot is created on the remote + * end (if you have sufficient administrative rights) and a local + * ADMIN or CACHE PFS is created with a connection to the snapshot + * on the remote. + * + * If the client has snapshot rights to multiple remotes then TBD. + */ diff --git a/sbin/hammer2/hammer2.h b/sbin/hammer2/hammer2.h new file mode 100644 index 0000000000..32dc042526 --- /dev/null +++ b/sbin/hammer2/hammer2.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * by Venkatesh Srinivas + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Rollup headers for hammer2 utility + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int hammer2_ioctl_handle(const char *sel_path); + +int cmd_remote_connect(const char *sel_path, const char *url); +int cmd_remote_disconnect(const char *sel_path, const char *url); +int cmd_remote_status(const char *sel_path, int all_opt); diff --git a/sbin/hammer2/main.c b/sbin/hammer2/main.c new file mode 100644 index 0000000000..2474426477 --- /dev/null +++ b/sbin/hammer2/main.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * by Venkatesh Srinivas + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "hammer2.h" + +static void usage(int code); + +int +main(int ac, char **av) +{ + const char *sel_path = "."; + const char *uuid_str = NULL; + int pfs_type = HAMMER2_PFSTYPE_NONE; + int quick_opt = 0; + int all_opt = 0; + int ecode = 0; + int ch; + + /* + * Core options + */ + while ((ch = getopt(ac, av, "aqs:t:u:")) != -1) { + switch(ch) { + case 'a': + all_opt = 1; + break; + case 'q': + /* + * Quick mode - do not block verifying certain + * operations such as (connect). + */ + quick_opt = 1; + break; + case 's': + sel_path = optarg; + break; + case 't': + /* + * set node type for mkpfs + */ + if (strcasecmp(optarg, "ADMIN") == 0) { + pfs_type = HAMMER2_PFSTYPE_ADMIN; + } else if (strcasecmp(optarg, "CACHE") == 0) { + pfs_type = HAMMER2_PFSTYPE_CACHE; + } else if (strcasecmp(optarg, "COPY") == 0) { + pfs_type = HAMMER2_PFSTYPE_COPY; + } else if (strcasecmp(optarg, "SLAVE") == 0) { + pfs_type = HAMMER2_PFSTYPE_SLAVE; + } else if (strcasecmp(optarg, "SOFT_SLAVE") == 0) { + pfs_type = HAMMER2_PFSTYPE_SOFT_SLAVE; + } else if (strcasecmp(optarg, "SOFT_MASTER") == 0) { + pfs_type = HAMMER2_PFSTYPE_SOFT_MASTER; + } else if (strcasecmp(optarg, "MASTER") == 0) { + pfs_type = HAMMER2_PFSTYPE_MASTER; + } else { + fprintf(stderr, "-t: Unrecognized node type\n"); + usage(1); + } + break; + case 'u': + /* + * set uuid for mkpfs, else one will be generated + * (required for all except the MASTER node_type) + */ + uuid_str = optarg; + break; + default: + fprintf(stderr, "Unknown option: %c\n", ch); + usage(1); + /* not reached */ + break; + } + } + + /* + * Adjust, then process the command + */ + ac -= optind; + av += optind; + if (ac < 1) { + fprintf(stderr, "Missing command\n"); + usage(1); + /* not reached */ + } + + if (strcmp(av[0], "connect") == 0) { + /* + * Add cluster connection + */ + if (ac < 2) { + fprintf(stderr, "connect: missing argument\n"); + usage(1); + } + ecode = cmd_remote_connect(sel_path, av[1]); + } else if (strcmp(av[0], "disconnect") == 0) { + /* + * Remove cluster connection + */ + if (ac < 2) { + fprintf(stderr, "disconnect: missing argument\n"); + usage(1); + } + ecode = cmd_remote_disconnect(sel_path, av[1]); + } else if (strcmp(av[0], "status") == 0) { + /* + * Get status of PFS and its connections (-a for all PFSs) + */ + ecode = cmd_remote_status(sel_path, all_opt); + } else if (strcmp(av[0], "mkpfs") == 0) { + /* + * Create new PFS using pfs_type + */ + } else if (strcmp(av[0], "snapshot") == 0) { + /* + * Create snapshot with optional pfs_type and optional + * label override. + */ + } else if (strcmp(av[0], "helper") == 0) { + /* + * Typically run as a daemon, this multi-threaded helper + * subsystem manages socket communications for the + * filesystem. + */ + } else { + fprintf(stderr, "Unrecognized command: %s\n", av[0]); + usage(1); + } + return (ecode); +} + +static +void +usage(int code) +{ + fprintf(stderr, + "hammer2 [-s path] command...\n" + " -s path Select filesystem\n" + ); + exit(code); +} diff --git a/sbin/hammer2/subs.c b/sbin/hammer2/subs.c new file mode 100644 index 0000000000..a1f036ff1f --- /dev/null +++ b/sbin/hammer2/subs.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * by Venkatesh Srinivas + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "hammer2.h" + +/* + * Obtain a file descriptor that the caller can execute ioctl()'s on. + */ +int +hammer2_ioctl_handle(const char *sel_path) +{ + struct hammer2_ioc_version info; + int fd; + + fd = open(sel_path, O_RDONLY, 0); + if (fd < 0) { + fprintf(stderr, "hammer2: Unable to open %s: %s\n", + sel_path, strerror(errno)); + return(-1); + } + if (ioctl(fd, HAMMER2IOC_GET_VERSION, &info) < 0) { + fprintf(stderr, "hammer2: '%s' is not a hammer2 filesystem\n", + sel_path); + close(fd); + return(-1); + } + return (fd); +} diff --git a/sys/vfs/hammer2/DESIGN b/sys/vfs/hammer2/DESIGN index 7ff9bbe638..5e87f526f0 100644 --- a/sys/vfs/hammer2/DESIGN +++ b/sys/vfs/hammer2/DESIGN @@ -87,10 +87,9 @@ missing copies (or remove excessive copies in the case where the copies value is reduced on a live filesystem). -* Intended to be clusterable, with a multi-master protocol under design - but not expected to be fully operational until mid-2013. The media - format for HAMMER1 was less condusive to logical clustering than I had - hoped so I was never able to get that aspect of my personal goals +* Clusterable with MESI cache coherency and dynamic granularity. + The media format for HAMMER1 was less condusive to logical clustering + than I had hoped so I was never able to get that aspect of my personal goals working with HAMMER1. HAMMER2 effectively solves the issues that cropped up with HAMMER1 (mainly that HAMMER1's B-Tree did not reflect the logical file/directory hierarchy, making cache coherency very difficult). @@ -230,43 +229,107 @@ structures generally follow the kernel's filesystem hiearchy. Second, HAMMER2's writable snapshots make it possible to implement several forms of multi-master clustering. -The general mechanics for most of the multi-master clustering implementations -will be as follows: - - (a) Use the copies mechanism to specify all elements of the cluster, - both local and remote (networked). - - (b) The core synchronization state operates just as it does for copies, - simply requiring a fully-flushed ack from the remote in order to - mark the blocks as having been fully synchronized. - - The mirror_tid may be used to locate these blocks, allowing the - synchronization state to be updated on the fly at a much later - time without requiring the state to be maintained in-memory. - (also for crash recovery resynchronization purposes). - - (c) Data/meta-data can be retrieved from those copies which are marked - as being synchronized, with priority given to the local storage - relative to any given physical machine. - - This means that e.g. even in a master-slave orientation the slave - may be able to satisfy a request from a program when the slave - happens to be the local storage. - - (d) Transaction id synchronization between all elements of the cluster, - typically through masking (assigning a cluster number using the low - 3 bits of the transaction id). - - (e) General access (synchronized or otherwise) may require cache - coherency mechanisms to run over the network. - - Implementing cache coherency is a major complexity issue. - - (f) General access (synchronized or otherwise) may require quorum - agreement, using the synchronization flags in the blockrefs - to determine whether agreement has been reached. - - Implementing quorum voting is a major complexity issue. +This is important: The mount device path you specify serves to bootstrap +your entry into the cluster, but your mount will make active connections +to ALL copy elements in the hammer2_copy_data[] array (stored in the volume +header) which match the PFSID of the directory in the super-root that you +specified. The local media path does not have to be mentioned in this +array but becomes part of the cluster based on its type and access +rights. ALL ELEMENTS ARE TREATED ACCORDING TO TYPE NO MATTER WHICH ONE +YOU MOUNT FROM. + +The actual cluster may be far larger than the elements you list in the +hammer2_copy_data[] array. You list only the elements you wish to +directly connect to and you are able to access the rest of the cluster +indirectly through those connections. + +All nodes in the cluster may act as administrative proxies. All nodes +in the cluster, including your mount point, are classified as one of the +following as specified in the inode's structure: + + ADMIN - Media does not participate, administrative proxy only + CACHE - Media only acts as a persistent cache + COPY - Media only acts as a local copy + SLAVE - Media is a RO slave that can be mounted RW + + SOFT_SLAVE - This is a SLAVE which can become writable when + the quorum is not available, but is not guaranteed + to be able to be merged back when the quorum becomes + available again. Elements which cannot be merged + back remain localized and writable until manual + or scripted intervention recombines them. + + SOFT_MASTER - Similar to the above but can form a sub-cluster + and run the quorum protocol within the sub-cluster + to serve machines that connect to the sub-cluster + when the master cluster is not available. + + The SOFT_MASTER nodes in a sub-cluster must be + fully interconnected with each other. + + MASTER - This is a MASTER node in the quorum protocol. + + The MASTER nodes in a cluster must be fully + interconnected with each other. + +There are four major protocols: + + Quorum protocol + + This protocol is used between MASTER nodes to vote on operations + and resolve deadlocks. + + This protocol is used between SOFT_MASTER nodes in a sub-cluster + to vote on operations, resolve deadlocks, determine what the latest + transaction id for an element is, and to perform commits. + + Cache sub-protocol + + This is the MESI sub-protocol which runs under the Quorum + protocol. This protocol is used to maintain cache state for + sub-trees to ensure that operations remain cache coherent. + + Depending on administrative rights this protocol may or may + not allow a leaf node in the cluster to hold a cache element + indefinitely. The administrative controller may preemptively + downgrade a leaf with insufficient administrative rights + without giving it a chance to synchronize any modified state + back to the cluster. + + Proxy protocol + + The Quorum and Cache protocols only operate between MASTER + and SOFT_MASTER nodes. All other node types must use the + Proxy protocol to perform similar actions. This protocol + differs in that proxy requests are typically sent to just + one adjacent node and that node then maintains state and + forwards the request or performs the required operation. + When the link is lost to the proxy, the proxy automatically + forwards a deletion of the state to the other nodes based on + what it has recorded. + + If a leaf has insufficient administrative rights it may not + be allowed to actually initiate a quorum operation and may only + be allowed to maintain partial MESI cache state or perhaps none + at all (since cache state can block other machines in the + cluster). Instead a leaf with insufficient rights will have to + make due with a preemptive loss of cache state and any allowed + modifying operations will have to be forwarded to the proxy which + continues forwarding it until a node with sufficient administrative + rights is encountered. + + To reduce issues and give the cluster more breath, sub-clusters + made up of SOFT_MASTERs can be formed in order to provide full + cache coherent within a subset of machines and yet still tie them + into a greater cluster that they normally would not have such + access to. This effectively makes it possible to create a two + or three-tier fan-out of groups of machines which are cache-coherent + within the group, but perhaps not between groups, and use other + means to synchronize between the groups. + + Media protocol + + This is basically the physical media protocol. There are lots of ways to implement multi-master environments using the above core features but the implementation is going to be fairly complex @@ -283,145 +346,5 @@ state of flux DOES NOT invalidate the cache state for these higher levels of directories. Instead, the (modify_tid) is used on a node-by-node basis to determine cache state at any given level, and (mirror_tid) is used to determine whether any recursively underlying state is desynchronized. - -* Simple semi-synchronized multi-master environment. - - In this environment all nodes are considered masters and modifications - can be made on any of them, and then propagate to the others - asynchronously via HAMMER2 mirror streams. One difference here is - that kernel can activate these userland-managed streams automatically - when the copies configuration is used to specify the cluster. - - The only type of conflict which isn't readily resolvable by comparing - the (modify_tid) is when file data is updated. In this case user - intervention might be required but, theoretically, it should be - possible to automate most merges using a multi-way patch and, if not, - choosing one and creating backup copies if the others to allow the - user or sysop to resolve the conflict later. - -* Simple fully synchronized fail-over environment. - - In this environment there is one designated master and the remaining - nodes are slaves. If the master fails all remaining nodes agree on a - new master, possibly with the requirement that a quorum be achieved - (if you don't want to allow the cluster to split). - - If network splits are allowed the each sub-cluster operates in this - mode but recombining the clusters reverts to the first algorithm. - If not allowed whomever no longer has a quorum will be forced to stall. - - In this environment the current designated master is responsible for - managing locks for modifying operations. The designated master will - proactively tell the other nodes to mark the blocks related to the - modifying operation as no longer being synchronized while any local - data at the node that acquired the lock (master or slave) remains - marked as being synchronized. - - The node that succesfully gets the lock then issues the modifying - operation to both its local copy and to the master, marking the - master as being desynchronized until the master acknowledges receipt. - - In this environment any node can access data from local storage if - the designated master copy is marked synchronized AND its (modify_tid) - matches the slave copy's (modify_tid). - - However, if a slave disconnects from the master then reconnects the - slave will have lost the master's desynchronization stream and must - mark its root blockref for the master copy HAMMER2_BREF_DESYNCHLD as - well as clear the SYNC1/SYNC2 bits. Setting DESYNCCHLD forces on-demand - recursive reverification that the master and slave are (or are not) in - sync in order to reestablish on the slave the synchronization state of - the master. - - That might be a bit confusing but the whole point here is to allow - read accesses to the filesystem to be satisfied by any node in a - multi-master cluster, not just by the current designated master. - -* Fully cache coherent and synchronized multi-master environment. - - In this environment a quorum is required to perform any modifying - action. All nodes are masters (there is no 'designated' master) - and all nodes connect to all other nodes in a cross-bar. - - The quorum is specified by copies setup in the root volume configuration. - A quorum of nodes in the cluster must agree on the copies configuration. - If they do not the cluster cannot proceed to mount. Any other nodes - not in the quorum which are in the cluster which disagree with the - configuration will inherit the copies configuration from the quorum. - - Any modifying action will initiate a lock request locally to all nodes - in the cluster. The modifying action is allowed to proceed the instant - a quorum of nodes respond in the affirmative (even if some have not - yet responded or are down). The modifying action is considered complete - once the two-phase commit protocol succeeds. The modifying action - typically creates and commits a temporary snapshot on at least a quorum - of masters as phase-1 and then ties the snapshot back into the main - mount as phase-2. - - These locks are cache-coherency locks and may be passively maintained - in order to aggregate multiple operations under the same lock and thus - under the same transaction from the point of view of the rest of the - quorum. - - A lock request which interferes with a passively maintained lock will - force the two-phase commit protocol to complete and then transfer - ownership to the requesting entity, thus avoiding having to deal with - deadlock protocols at this point in the state machine. - - Since any node can initiate concurrent lock requests to many other nodes - it is possible to deadlock. When two nodes initiate conflicting lock - requests to the cluster the one achieving the quorum basically wins and - the other is forced to retry (go back one paragraph). In this situation - no deadlock will occur. - - If three are more nodes initiate conflicting lock requests to the - cluster a deadlock can occur whereby none of the nodes achieve a quorum. - In this case every node will know which of the other nodes was granted - the lock(s). Deadlock resolution then proceeds simultaniously on the - three nodes (since they have the same information), whereby the lock - holders on the losing end of the algorithm transfer their locks to one - of the other nodes. The lock state and knowledge of the lock state is - updated in real time on all nodes until a quorum is achieved. - -* Fully cache coherent and synchronized multi-master environment with - passive read locking. - - This is a more complex form of clustering than the previous form. - Take the previous form and add the ability to passively hold SHARED - locks in addition to the EXCLUSIVE locks the previous form is able - to hold. - - The advantage of being able to passively hold a shared lock on a sub-tree - (locks can be held on single nodes or entire sub-trees) is that it is - then possible for all nodes to validate a node (modify_tid) or entire - sub-tree (mirror_tid) with a very short network transaction and then - satisfy a large number of requests from local storage. - -* Fully cache coherent and synchronized multi-master environment with - passive read locking and slave-only nodes. - - This is the MOST complex form of clustering we intend to support. - In a multi-master environment requiring a quorum of masters to operate - we implement all of the above plus ALSO allow additional nodes to be - added to the cluster as slave-only nodes. - - The difference between a slave-only node and setting up a manual - mirror-stream from the cluster to a read-only snapshot on another - HAMMER2 filesystem is that the slave-only node will be fully - cache coherent with either the cluster proper (if connected to a quorum - of masters), or to one or more other nodes in the cluster (if not - connected to a quorum of masters), EVEN if the slave itself is not - completely caught up. - - So if the slave-only cluster node is connected to the rest of the cluster - over a slow connection you basically get a combination of local disk - speeds for any data that is locally in sync and network-limited speeds - for any data that is not locally in sync. - - slave-only cluster nodes run a standard mirror-stream in the background - to pull in the data as quickly as possible. - - This is in constrast to a manual mirror-stream to a read-only - snapshot (basically a simple slave), which has no ability to bypass - the local storage to handle out-of-date requests (in fact has no ability - to detect that the local storage is out-of-date anyway). +The inode structure also has two additional transaction ids used to optimize +path lookups, stat, and directory lookup/scan operations. diff --git a/sys/vfs/hammer2/Makefile b/sys/vfs/hammer2/Makefile index 8ec7e56e95..f0b2ffe120 100644 --- a/sys/vfs/hammer2/Makefile +++ b/sys/vfs/hammer2/Makefile @@ -7,5 +7,6 @@ CFLAGS+= -DINVARIANTS KMOD= hammer2 SRCS= hammer2_vfsops.c hammer2_vnops.c hammer2_inode.c SRCS+= hammer2_chain.c hammer2_freemap.c hammer2_subr.c hammer2_icrc.c +SRCS+= hammer2_ioctl.c .include diff --git a/sys/vfs/hammer2/donew2 b/sys/vfs/hammer2/donew2 new file mode 100755 index 0000000000..d98c5a2e1b --- /dev/null +++ b/sys/vfs/hammer2/donew2 @@ -0,0 +1,5 @@ +#!/bin/csh +# + +umount /mnt +newfs_hammer2 -L ROOT /dev/da0s1b diff --git a/sys/vfs/hammer2/dossd2 b/sys/vfs/hammer2/dossd2 new file mode 100755 index 0000000000..124869cb50 --- /dev/null +++ b/sys/vfs/hammer2/dossd2 @@ -0,0 +1,11 @@ +#!/bin/csh +# + +umount /mnt >& /dev/null +kldunload hammer2.ko >& /dev/null +kldstat | fgrep hammer2.ko >& /dev/null +if ( $status > 0 ) then + kldload /usr/obj/usr/src/sys/vfs/hammer2/hammer2.ko +endif +mount_hammer2 /dev/da0s1b@ROOT /mnt +sysctl vfs.hammer2.debug=0 diff --git a/sys/vfs/hammer2/hammer2.h b/sys/vfs/hammer2/hammer2.h index 53737d3839..dc7d64828c 100644 --- a/sys/vfs/hammer2/hammer2.h +++ b/sys/vfs/hammer2/hammer2.h @@ -65,6 +65,7 @@ #include "hammer2_disk.h" #include "hammer2_mount.h" +#include "hammer2_ioctl.h" struct hammer2_chain; struct hammer2_inode; @@ -123,7 +124,7 @@ typedef struct hammer2_chain hammer2_chain_t; int hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2); SPLAY_PROTOTYPE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp); -#define HAMMER2_CHAIN_MODIFIED1 0x00000001 /* active mods */ +#define HAMMER2_CHAIN_MODIFIED 0x00000001 /* active mods */ #define HAMMER2_CHAIN_DIRTYEMBED 0x00000002 /* inode embedded */ #define HAMMER2_CHAIN_DIRTYBP 0x00000004 /* dirty on unlock */ #define HAMMER2_CHAIN_SUBMODIFIED 0x00000008 /* 1+ subs modified */ @@ -134,6 +135,7 @@ SPLAY_PROTOTYPE(hammer2_chain_splay, hammer2_chain, snode, hammer2_chain_cmp); #define HAMMER2_CHAIN_IOFLUSH 0x00000100 /* bawrite on put */ #define HAMMER2_CHAIN_DEFERRED 0x00000200 /* on a deferral list*/ #define HAMMER2_CHAIN_DESTROYED 0x00000400 /* destroying */ +#define HAMMER2_CHAIN_MODIFIED_AUX 0x00000800 /* hmp->vchain only */ /* * Flags passed to hammer2_chain_lookup() and hammer2_chain_next() @@ -251,6 +253,7 @@ struct hammer2_mount { hammer2_chain_t *rchain; /* label-root */ struct hammer2_inode *iroot; struct lock alloclk; /* lockmgr lock */ + struct lock voldatalk; /* lockmgr lock */ hammer2_volume_data_t voldata; hammer2_off_t freecache[HAMMER2_FREECACHE_TYPES][HAMMER2_MAX_RADIX+1]; @@ -309,6 +312,8 @@ void hammer2_inode_lock_sh(hammer2_inode_t *ip); void hammer2_inode_unlock_sh(hammer2_inode_t *ip); void hammer2_inode_busy(hammer2_inode_t *ip); void hammer2_inode_unbusy(hammer2_inode_t *ip); +void hammer2_voldata_lock(hammer2_mount_t *hmp); +void hammer2_voldata_unlock(hammer2_mount_t *hmp); void hammer2_mount_exlock(hammer2_mount_t *hmp); void hammer2_mount_shlock(hammer2_mount_t *hmp); @@ -351,6 +356,7 @@ int hammer2_hardlink_create(hammer2_inode_t *ip, hammer2_inode_t *dip, /* * hammer2_chain.c */ +void hammer2_modify_volume(hammer2_mount_t *hmp); hammer2_chain_t *hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref); void hammer2_chain_free(hammer2_mount_t *hmp, hammer2_chain_t *chain); @@ -387,6 +393,12 @@ void hammer2_chain_delete(hammer2_mount_t *hmp, hammer2_chain_t *parent, void hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain); void hammer2_chain_commit(hammer2_mount_t *hmp, hammer2_chain_t *chain); +/* + * hammer2_ioctl.c + */ +int hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data, + int fflag, struct ucred *cred); + /* * hammer2_freemap.c */ diff --git a/sys/vfs/hammer2/hammer2_chain.c b/sys/vfs/hammer2/hammer2_chain.c index 8ea6d8070e..90fec617b6 100644 --- a/sys/vfs/hammer2/hammer2_chain.c +++ b/sys/vfs/hammer2/hammer2_chain.c @@ -39,7 +39,6 @@ * header. */ -#include #include #include #include @@ -582,11 +581,11 @@ hammer2_chain_resize(hammer2_mount_t *hmp, hammer2_chain_t *chain, return; /* - * Set MODIFIED1 and add a chain ref to prevent destruction. Both + * Set MODIFIED and add a chain ref to prevent destruction. Both * modified flags share the same ref. */ - if ((chain->flags & HAMMER2_CHAIN_MODIFIED1) == 0) { - atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1); + if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { + atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); hammer2_chain_ref(hmp, chain); } @@ -674,9 +673,9 @@ hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain, int flags) void *bdata; /* - * If the chain is already marked MODIFIED1 we can just return. + * If the chain is already marked MODIFIED we can just return. */ - if (chain->flags & HAMMER2_CHAIN_MODIFIED1) { + if (chain->flags & HAMMER2_CHAIN_MODIFIED) { if ((flags & HAMMER2_MODIFY_OPTDATA) == 0 && chain->bp == NULL) { goto skip1; @@ -685,10 +684,10 @@ hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain, int flags) } /* - * Set MODIFIED1 and add a chain ref to prevent destruction. Both + * Set MODIFIED and add a chain ref to prevent destruction. Both * modified flags share the same ref. */ - atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED1); + atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); hammer2_chain_ref(hmp, chain); /* @@ -726,9 +725,9 @@ hammer2_chain_modify(hammer2_mount_t *hmp, hammer2_chain_t *chain, int flags) skip1: /* * Setting the DIRTYBP flag will cause the buffer to be dirtied or - * written-out on unlock. This bit is independent of the MODIFIED1 + * written-out on unlock. This bit is independent of the MODIFIED * bit because the chain may still need meta-data adjustments done - * by virtue of MODIFIED1 for its parent, and the buffer can be + * by virtue of MODIFIED for its parent, and the buffer can be * flushed out (possibly multiple times) by the OS before that. * * Clearing the INITIAL flag (for indirect blocks) indicates that @@ -810,6 +809,19 @@ skip2: hammer2_chain_parent_setsubmod(hmp, chain); } +/* + * Mark the volume as having been modified. This short-cut version + * does not have to lock the volume's chain, which allows the ioctl + * code to make adjustments to connections without deadlocking. + */ +void +hammer2_modify_volume(hammer2_mount_t *hmp) +{ + hammer2_voldata_lock(hmp); + atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED_AUX); + hammer2_voldata_unlock(hmp); +} + /* * Locate an in-memory chain. The parent must be locked. The in-memory * chain is returned or NULL if no in-memory chain is present. @@ -1499,7 +1511,7 @@ again: /* * (allocated) indicates that this is a newly-created chain element * rather than a renamed chain element. In this situation we want - * to place the chain element in the MODIFIED1 state. + * to place the chain element in the MODIFIED state. * * The data area will be set up as follows: * @@ -1532,7 +1544,7 @@ again: * to ensure that its state propagates up the newly * connected parent. * - * We cannot depend on the chain being in a MODIFIED1 + * We cannot depend on the chain being in a MODIFIED * state, or it might already be in that state, so * even if the parent calls hammer2_chain_modify() * MOVED might not get set. Thus we have to set it @@ -2039,7 +2051,8 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *parent, (parent->flags & HAMMER2_CHAIN_SUBMODIFIED)) { if ((parent->flags & HAMMER2_CHAIN_DEFERRED) == 0 && ((parent->flags & (HAMMER2_CHAIN_SUBMODIFIED | - HAMMER2_CHAIN_MODIFIED1 | + HAMMER2_CHAIN_MODIFIED | + HAMMER2_CHAIN_MODIFIED_AUX | HAMMER2_CHAIN_MOVED)) != 0)) { hammer2_chain_ref(hmp, parent); TAILQ_INSERT_TAIL(&info->flush_list, @@ -2095,7 +2108,7 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *parent, &parent->shead, chain); /* * We only recurse if SUBMODIFIED (internal node) - * or MODIFIED1 (internal node or leaf) is set. + * or MODIFIED (internal node or leaf) is set. * However, we must still track whether any MOVED * entries are present to determine if the parent's * blockref's need updating or not. @@ -2103,7 +2116,8 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *parent, if (chain->flags & HAMMER2_CHAIN_MOVED) submoved = 1; if ((chain->flags & (HAMMER2_CHAIN_SUBMODIFIED | - HAMMER2_CHAIN_MODIFIED1)) == 0) { + HAMMER2_CHAIN_MODIFIED | + HAMMER2_CHAIN_MODIFIED_AUX)) == 0) { continue; } @@ -2127,7 +2141,8 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *parent, * child (recursively) is still dirty. */ if (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED | - HAMMER2_CHAIN_MODIFIED1)) { + HAMMER2_CHAIN_MODIFIED | + HAMMER2_CHAIN_MODIFIED_AUX)) { submodified = 1; if (hammer2_debug & 0x0008) kprintf("s"); @@ -2218,7 +2233,7 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *parent, } /* - * If destroying the object we unconditonally clear the MODIFIED1 + * If destroying the object we unconditonally clear the MODIFIED * and MOVED bits, and we destroy the buffer without writing it * out. * @@ -2228,14 +2243,18 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *parent, * free pool. */ if (parent->flags & HAMMER2_CHAIN_DESTROYED) { - if (parent->flags & HAMMER2_CHAIN_MODIFIED1) { + if (parent->flags & HAMMER2_CHAIN_MODIFIED) { if (parent->bp) { parent->bp->b_flags |= B_INVAL|B_RELBUF; } atomic_clear_int(&parent->flags, - HAMMER2_CHAIN_MODIFIED1); + HAMMER2_CHAIN_MODIFIED); hammer2_chain_drop(hmp, parent); } + if (parent->flags & HAMMER2_CHAIN_MODIFIED_AUX) { + atomic_clear_int(&parent->flags, + HAMMER2_CHAIN_MODIFIED_AUX); + } if (parent->flags & HAMMER2_CHAIN_MOVED) { atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MOVED); @@ -2247,24 +2266,28 @@ hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *parent, /* * Flush this chain entry only if it is marked modified. */ - if ((parent->flags & HAMMER2_CHAIN_MODIFIED1) == 0) { + if ((parent->flags & (HAMMER2_CHAIN_MODIFIED | + HAMMER2_CHAIN_MODIFIED_AUX)) == 0) { goto done; } /* - * Clear MODIFIED1 and set HAMMER2_CHAIN_MOVED. The caller + * Clear MODIFIED and set HAMMER2_CHAIN_MOVED. The caller * will re-test the MOVED bit. * * bits own a single parent ref and the MOVED bit owns its own * parent ref. */ - atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MODIFIED1); - if (parent->flags & HAMMER2_CHAIN_MOVED) { - hammer2_chain_drop(hmp, parent); - } else { - /* inherit ref from the MODIFIED1 we cleared */ - atomic_set_int(&parent->flags, HAMMER2_CHAIN_MOVED); + if (parent->flags & HAMMER2_CHAIN_MODIFIED) { + atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MODIFIED); + if (parent->flags & HAMMER2_CHAIN_MOVED) { + hammer2_chain_drop(hmp, parent); + } else { + /* inherit ref from the MODIFIED we cleared */ + atomic_set_int(&parent->flags, HAMMER2_CHAIN_MOVED); + } } + atomic_clear_int(&parent->flags, HAMMER2_CHAIN_MODIFIED_AUX); /* * If this is part of a recursive flush we can go ahead and write @@ -2471,8 +2494,10 @@ hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain) * wind up on our flush_list again. */ if ((scan->flags & (HAMMER2_CHAIN_SUBMODIFIED | - HAMMER2_CHAIN_MODIFIED1)) == 0) + HAMMER2_CHAIN_MODIFIED | + HAMMER2_CHAIN_MODIFIED_AUX)) == 0) { reflush = 1; + } hammer2_chain_drop(hmp, scan); } if ((hammer2_debug & 0x0040) && reflush) @@ -2484,7 +2509,8 @@ hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain) * be completely flushed. */ if (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED | - HAMMER2_CHAIN_MODIFIED1 | + HAMMER2_CHAIN_MODIFIED | + HAMMER2_CHAIN_MODIFIED_AUX | HAMMER2_CHAIN_MOVED)) { hammer2_chain_parent_setsubmod(hmp, chain); } @@ -2497,7 +2523,8 @@ hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain) if (parent == NULL || chain->bref.type != HAMMER2_BREF_TYPE_INODE || (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED | - HAMMER2_CHAIN_MODIFIED1 | + HAMMER2_CHAIN_MODIFIED | + HAMMER2_CHAIN_MODIFIED_AUX | HAMMER2_CHAIN_MOVED)) != HAMMER2_CHAIN_MOVED) { return; } diff --git a/sys/vfs/hammer2/hammer2_disk.h b/sys/vfs/hammer2/hammer2_disk.h index 9f559c5eaf..0b13055e23 100644 --- a/sys/vfs/hammer2/hammer2_disk.h +++ b/sys/vfs/hammer2/hammer2_disk.h @@ -35,6 +35,10 @@ #ifndef VFS_HAMMER2_DISK_H_ #define VFS_HAMMER2_DISK_H_ +#ifndef _SYS_UUID_H_ +#include +#endif + /* * The structures below represent the on-disk media structures for the HAMMER2 * filesystem. Note that all fields for on-disk structures are naturally @@ -387,6 +391,17 @@ typedef struct hammer2_indblock_data hammer2_indblock_data_t; * inode number key via bit 63. Access to the hardlink silently looks up * the real file and forwards all operations to that file. Removal of the * last hardlink also removes the real file. + * + * (attr_tid) is only updated when the inode's specific attributes or regular + * file size has changed, and affects path lookups and stat. (attr_tid) + * represents a special cache coherency lock under the inode. The inode + * blockref's modify_tid will always cover it. + * + * (dirent_tid) is only updated when an entry under a directory inode has + * been created, deleted, renamed, or had its attributes change, and affects + * directory lookups and scans. (dirent_tid) represents another special cache + * coherency lock under the inode. The inode blockref's modify_tid will + * always cover it. */ #define HAMMER2_INODE_BYTES 1024 /* (asserted by code) */ #define HAMMER2_INODE_MAXNAME 256 /* maximum name in bytes */ @@ -414,8 +429,7 @@ struct hammer2_inode_data { hammer2_off_t size; /* 0060 size of file */ uint64_t nlinks; /* 0068 hard links (typ only dirs) */ hammer2_tid_t iparent; /* 0070 parent inum (recovery only) */ - uint64_t reserved78; /* 0078 */ - + uint8_t copies[8]; /* 0078 request copies to (up to 8) */ hammer2_off_t data_quota; /* 0080 subtree quota in bytes */ hammer2_off_t data_count; /* 0088 subtree byte count */ hammer2_off_t inode_quota; /* 0090 subtree quota inode count */ @@ -425,11 +439,12 @@ struct hammer2_inode_data { uint8_t reservedA3; /* 00A3 */ uint32_t reservedA4; /* 00A4 */ hammer2_key_t name_key; /* 00A8 full filename key */ - uint8_t copyids[8]; /* 00B0 request copies to (up to 8) */ - uuid_t pfsid; /* 00B8 pfs uuid if PFSROOT */ - uint64_t pfsinum; /* 00C8 pfs inum allocator */ - uint64_t reservedD0; /* 00D0 */ - uint64_t reservedD8; /* 00D8 */ + uint8_t reservedB0[7]; /* 00B0 */ + uint8_t pfs_type; /* 00B7 (if PFSROOT) node type */ + uuid_t pfs_id; /* 00B8 (if PFSROOT) pfs uuid */ + uint64_t pfs_inum; /* 00C8 (if PFSROOT) inum allocator */ + hammer2_tid_t attr_tid; /* 00D0 attributes changed */ + hammer2_tid_t dirent_tid; /* 00D8 directory/attr changed */ uint64_t reservedE0; /* 00E0 */ uint64_t reservedE8; /* 00E8 */ uint64_t reservedF0; /* 00F0 */ @@ -447,6 +462,7 @@ typedef struct hammer2_inode_data hammer2_inode_data_t; #define HAMMER2_OPFLAG_DIRECTDATA 0x01 #define HAMMER2_OPFLAG_PFSROOT 0x02 +#define HAMMER2_OPFLAG_COPYIDS 0x04 /* copyids override parent */ #define HAMMER2_OBJTYPE_UNKNOWN 0 #define HAMMER2_OBJTYPE_DIRECTORY 1 @@ -468,6 +484,15 @@ typedef struct hammer2_inode_data hammer2_inode_data_t; #define HAMMER2_CHECK_NONE 0 #define HAMMER2_CHECK_ICRC 1 +#define HAMMER2_PFSTYPE_NONE 0 +#define HAMMER2_PFSTYPE_ADMIN 1 +#define HAMMER2_PFSTYPE_CACHE 2 +#define HAMMER2_PFSTYPE_COPY 3 +#define HAMMER2_PFSTYPE_SLAVE 4 +#define HAMMER2_PFSTYPE_SOFT_SLAVE 5 +#define HAMMER2_PFSTYPE_SOFT_MASTER 6 +#define HAMMER2_PFSTYPE_MASTER 7 + /* * The allocref structure represents the allocation table. One 64K block * is broken down into 4096 x 16 byte entries. Each indirect block chops @@ -540,30 +565,52 @@ typedef struct hammer2_allocref hammer2_allocref_t; #define HAMMER2_ALLOCREF_LEAF 0x0004 /* - * Copies information stored in the volume header. Typically formatted - * e.g. like 'serno/A21343249.s1d' - * - * There are 8 copy_data[]'s in the volume header but up to 256 copyid's. - * When a copy is removed its copyid remains reserved in the copyid bitmap - * (copyexists[] bitmap in volume_data) until the copy references have - * been removed from the entire filesystem and cannot be reused until the - * removal is complete. However, new copy entries with other ids can be - * instantly added, replacing the original copy_data[]... which is fine as - * long as the copyid does not conflict. - * - * This structure must be exactly 64 bytes long. + * All HAMMER2 directories directly under the super-root on your local + * media can be mounted separately, even if they share the same physical + * device. + * + * When you do a HAMMER2 mount you are effectively tying into a HAMMER2 + * cluster via local media. The local media does not have to participate + * in the cluster, other than to provide the hammer2_copy_data[] array and + * root inode for the mount. + * + * This is important: The mount device path you specify serves to bootstrap + * your entry into the cluster, but your mount will make active connections + * to ALL copy elements in the hammer2_copy_data[] array which match the + * PFSID of the directory in the super-root that you specified. The local + * media path does not have to be mentioned in this array but becomes part + * of the cluster based on its type and access rights. ALL ELEMENTS ARE + * TREATED ACCORDING TO TYPE NO MATTER WHICH ONE YOU MOUNT FROM. + * + * The actual cluster may be far larger than the elements you list in the + * hammer2_copy_data[] array. You list only the elements you wish to + * directly connect to and you are able to access the rest of the cluster + * indirectly through those connections. + * + * This structure must be exactly 128 bytes long. */ struct hammer2_copy_data { - uint8_t copyid; /* 0-255 */ - uint8_t flags; - uint8_t reserved02; - uint8_t reserved03; - uint8_t path[60]; /* up to 59-char string, nul-terminated */ + uint8_t copyid; /* 00 copyid 0-255 (must match slot) */ + uint8_t inprog; /* 01 operation in progress, or 0 */ + uint8_t chain_to; /* 02 operation chaining to, or 0 */ + uint8_t chain_from; /* 03 operation chaining from, or 0 */ + uint16_t flags; /* 04-05 flags field */ + uint8_t error; /* 06 last operational error */ + uint8_t priority; /* 07 priority and round-robin flag */ + uint8_t remote_pfs_type;/* 08 probed direct remote PFS type */ + uint8_t reserved08[23]; /* 09-1F */ + uuid_t pfsid; /* 20-2F copy target must match this uuid */ + uint8_t label[16]; /* 30-3F import/export label */ + uint8_t path[64]; /* 40-7F target specification string or key */ }; typedef struct hammer2_copy_data hammer2_copy_data_t; -#define COPYDATAF_OUTOFSYNC 0x0001 +#define COPYDATAF_ENABLED 0x0001 +#define COPYDATAF_INPROG 0x0002 +#define COPYDATAF_CONN_RR 0x80 /* round-robin at same priority */ +#define COPYDATAF_CONN_EF 0x40 /* media errors flagged */ +#define COPYDATAF_CONN_PRI 0x0F /* select priority 0-15 (15=best) */ /* * The volume header eats a 64K block. There is currently an issue where @@ -611,9 +658,11 @@ typedef struct hammer2_copy_data hammer2_copy_data_t; #define HAMMER2_VOLUME_ID_HBO 0x48414d3205172011LLU #define HAMMER2_VOLUME_ID_ABO 0x11201705324d4148LLU +#define HAMMER2_COPYID_COUNT 256 + struct hammer2_volume_data { /* - * 512-byte sector #0 + * sector #0 - 512 bytes */ uint64_t magic; /* 0000 Signature */ hammer2_off_t boot_beg; /* 0008 Boot area (future) */ @@ -671,28 +720,45 @@ struct hammer2_volume_data { hammer2_crc32_t icrc_sects[8]; /* 01E0-01FF */ /* - * 512-byte sector #1 + * sector #1 - 512 bytes * * The entire sector is used by a blockset. */ - hammer2_blockset_t sroot_blockset; /* 0200 Superroot directory */ + hammer2_blockset_t sroot_blockset; /* 0200-03FF Superroot dir */ /* - * 512-byte sector #2-33 + * sector #2-7 + */ + char sector2[512]; /* 0400-05FF reserved */ + char sector3[512]; /* 0600-07FF reserved */ + char sector4[512]; /* 0800-09FF reserved */ + char sector5[512]; /* 0A00-0BFF reserved */ + char sector6[512]; /* 0C00-0DFF reserved */ + char sector7[512]; /* 0E00-0FFF reserved */ + + /* + * sector #8-71 - 32768 bytes + * + * Contains the configuration for up to 256 copyinfo targets. These + * specify local and remote copies operating as masters or slaves. + * copyid's 0 and 255 are reserved (0 indicates an empty slot and 255 + * indicates the local media). * - * Up to 256 copyinfo specifications can be configured. Note that - * any given subdirectory tree can only use 8 of the 256. Having - * up to 256 configurable in the volume header allows + * Each inode contains a set of up to 8 copyids, either inherited + * from its parent or explicitly specified in the inode, which + * indexes into this array. + */ + /* 1000-8FFF copyinfo config */ + struct hammer2_copy_data copyinfo[HAMMER2_COPYID_COUNT]; + + /* * - * A specification takes 64 bytes. Each specification typically - * configures a device path such as 'serno/.s1d'. */ - struct hammer2_copy_data copyinfo[256]; /* 0400-43FF copyinfo config */ /* * Remaining sections are reserved for future use. */ - char reserved0400[0xBBFC]; /* 4400-FFFB reserved */ + char reserved0400[0x6FFC]; /* 9000-FFFB reserved */ /* * icrc on entire volume header diff --git a/sys/vfs/hammer2/hammer2_inode.c b/sys/vfs/hammer2/hammer2_inode.c index 0530528cbd..3bf6c379f7 100644 --- a/sys/vfs/hammer2/hammer2_inode.c +++ b/sys/vfs/hammer2/hammer2_inode.c @@ -33,7 +33,6 @@ * SUCH DAMAGE. */ #include -#include #include #include #include @@ -256,7 +255,9 @@ hammer2_inode_create(hammer2_mount_t *hmp, *nipp = nip; nip->ip_data.type = hammer2_get_obj_type(vap->va_type); + hammer2_voldata_lock(hmp); nip->ip_data.inum = hmp->voldata.alloc_tid++; /* XXX modify/lock */ + hammer2_voldata_unlock(hmp); nip->ip_data.version = HAMMER2_INODE_VERSION_ONE; nip->ip_data.ctime = 0; nip->ip_data.mtime = 0; diff --git a/sys/vfs/hammer2/hammer2_ioctl.c b/sys/vfs/hammer2/hammer2_ioctl.c new file mode 100644 index 0000000000..b83a7764e7 --- /dev/null +++ b/sys/vfs/hammer2/hammer2_ioctl.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * by Venkatesh Srinivas + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Ioctl Functions. + * + * WARNING! The ioctl functions which manipulate the connection state need + * to be able to run without deadlock on the volume's chain lock. + * Most of these functions use a separate lock. + */ + +#include "hammer2.h" + +static int hammer2_ioctl_get_version(hammer2_inode_t *ip, void *data); +static int hammer2_ioctl_get_remote(hammer2_inode_t *ip, void *data); +static int hammer2_ioctl_add_remote(hammer2_inode_t *ip, void *data); +static int hammer2_ioctl_del_remote(hammer2_inode_t *ip, void *data); +static int hammer2_ioctl_rep_remote(hammer2_inode_t *ip, void *data); +static int hammer2_ioctl_get_socket(hammer2_inode_t *ip, void *data); +static int hammer2_ioctl_set_socket(hammer2_inode_t *ip, void *data); + +int +hammer2_ioctl(hammer2_inode_t *ip, u_long com, void *data, int fflag, + struct ucred *cred) +{ + int error; + + /* + * Standard root cred checks, will be selectively ignored below + * for ioctls that do not require root creds. + */ + error = priv_check_cred(cred, PRIV_HAMMER_IOCTL, 0); + + switch(com) { + case HAMMER2IOC_GET_VERSION: + /* + * Retrieve version and basic status + */ + error = hammer2_ioctl_get_version(ip, data); + break; + case HAMMER2IOC_GET_REMOTE: + /* + * Retrieve information about a remote + */ + if (error == 0) + error = hammer2_ioctl_get_remote(ip, data); + break; + case HAMMER2IOC_ADD_REMOTE: + /* + * Add new remote entry. + */ + if (error == 0) + error = hammer2_ioctl_add_remote(ip, data); + break; + case HAMMER2IOC_DEL_REMOTE: + /* + * Delete existing remote entry + */ + if (error == 0) + error = hammer2_ioctl_del_remote(ip, data); + break; + case HAMMER2IOC_REP_REMOTE: + /* + * Replace existing remote entry + */ + if (error == 0) + error = hammer2_ioctl_rep_remote(ip, data); + break; + case HAMMER2IOC_GET_SOCKET: + /* + * Retrieve communications socket + */ + if (error == 0) + error = hammer2_ioctl_get_socket(ip, data); + break; + case HAMMER2IOC_SET_SOCKET: + /* + * Set communications socket for connection + */ + if (error == 0) + error = hammer2_ioctl_set_socket(ip, data); + break; + default: + error = EOPNOTSUPP; + break; + } + return (error); +} + +/* + * Retrieve version and basic info + */ +static int +hammer2_ioctl_get_version(hammer2_inode_t *ip, void *data) +{ + hammer2_mount_t *hmp = ip->hmp; + hammer2_ioc_version_t *version = data; + + version->version = hmp->voldata.version; + return 0; +} + +/* + * Retrieve information about a remote + */ +static int +hammer2_ioctl_get_remote(hammer2_inode_t *ip, void *data) +{ + hammer2_mount_t *hmp = ip->hmp; + hammer2_ioc_remote_t *remote = data; + int copyid = remote->copyid; + + if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT) + return (EINVAL); + + hammer2_voldata_lock(hmp); + remote->copy1 = hmp->voldata.copyinfo[copyid]; + hammer2_voldata_unlock(hmp); + + /* + * Adjust nextid (GET only) + */ + while (++copyid < HAMMER2_COPYID_COUNT && + hmp->voldata.copyinfo[copyid].copyid == 0) { + ++copyid; + } + if (copyid == HAMMER2_COPYID_COUNT) + remote->nextid = -1; + else + remote->nextid = copyid; + + return(0); +} + +/* + * Add new remote entry + */ +static int +hammer2_ioctl_add_remote(hammer2_inode_t *ip, void *data) +{ + hammer2_mount_t *hmp = ip->hmp; + hammer2_ioc_remote_t *remote = data; + int copyid = remote->copyid; + int error = 0; + + if (copyid >= HAMMER2_COPYID_COUNT) + return (EINVAL); + + hammer2_voldata_lock(hmp); + if (copyid < 0) { + for (copyid = 1; copyid < HAMMER2_COPYID_COUNT; ++copyid) { + if (hmp->voldata.copyinfo[copyid].copyid == 0) + break; + } + if (copyid == HAMMER2_COPYID_COUNT) { + error = ENOSPC; + goto failed; + } + } + hammer2_modify_volume(hmp); + kprintf("copyid %d\n", copyid); + remote->copy1.copyid = copyid; + hmp->voldata.copyinfo[copyid] = remote->copy1; +failed: + hammer2_voldata_unlock(hmp); + return (error); +} + +/* + * Delete existing remote entry + */ +static int +hammer2_ioctl_del_remote(hammer2_inode_t *ip, void *data) +{ + hammer2_mount_t *hmp = ip->hmp; + hammer2_ioc_remote_t *remote = data; + int copyid = remote->copyid; + int error = 0; + + if (copyid >= HAMMER2_COPYID_COUNT) + return (EINVAL); + remote->copy1.path[sizeof(remote->copy1.path) - 1] = 0; + hammer2_voldata_lock(hmp); + if (copyid < 0) { + for (copyid = 1; copyid < HAMMER2_COPYID_COUNT; ++copyid) { + if (hmp->voldata.copyinfo[copyid].copyid == 0) + continue; + if (strcmp(remote->copy1.path, + hmp->voldata.copyinfo[copyid].path) == 0) { + break; + } + } + if (copyid == HAMMER2_COPYID_COUNT) { + error = ENOENT; + goto failed; + } + } + hammer2_modify_volume(hmp); + hmp->voldata.copyinfo[copyid].copyid = 0; +failed: + hammer2_voldata_unlock(hmp); + return (error); +} + +/* + * Replace existing remote entry + */ +static int +hammer2_ioctl_rep_remote(hammer2_inode_t *ip, void *data) +{ + hammer2_mount_t *hmp = ip->hmp; + hammer2_ioc_remote_t *remote = data; + int copyid = remote->copyid; + + if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT) + return (EINVAL); + + hammer2_voldata_lock(hmp); + hammer2_voldata_unlock(hmp); + + return(0); +} + +/* + * Retrieve communications socket + */ +static int +hammer2_ioctl_get_socket(hammer2_inode_t *ip, void *data) +{ + return (EOPNOTSUPP); +} + +/* + * Set communications socket for connection + */ +static int +hammer2_ioctl_set_socket(hammer2_inode_t *ip, void *data) +{ + hammer2_mount_t *hmp = ip->hmp; + hammer2_ioc_remote_t *remote = data; + int copyid = remote->copyid; + + if (copyid < 0 || copyid >= HAMMER2_COPYID_COUNT) + return (EINVAL); + + hammer2_voldata_lock(hmp); + hammer2_voldata_unlock(hmp); + + return(0); +} diff --git a/sys/vfs/hammer2/hammer2_ioctl.h b/sys/vfs/hammer2/hammer2_ioctl.h new file mode 100644 index 0000000000..5ec7228da3 --- /dev/null +++ b/sys/vfs/hammer2/hammer2_ioctl.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2011-2012 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * by Venkatesh Srinivas + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +#ifndef VFS_HAMMER2_IOCTL_H_ +#define VFS_HAMMER2_IOCTL_H_ + +#ifndef _SYS_IOCCOM_H_ +#include +#endif +#ifndef _VFS_HAMMER2_DISK_H_ +#include "hammer2_disk.h" +#endif +#ifndef _VFS_HAMMER2_MOUNT_H_ +#include "hammer2_mount.h" +#endif + +/* + * get_version + */ +struct hammer2_ioc_version { + int version; + char reserved[256 - 4]; +}; + +typedef struct hammer2_ioc_version hammer2_ioc_version_t; + +/* + * Ioctls to manage the volume->copyinfo[] array and to associate or + * disassociate sockets + */ +struct hammer2_ioc_remote { + int copyid; + int nextid; /* for iteration (get only) */ + int fd; /* socket descriptor if applicable */ + int reserved03; + int reserved04[8]; + hammer2_copy_data_t copy1; /* copy spec */ + hammer2_copy_data_t copy2; /* copy spec (rename ops only) */ +}; + +typedef struct hammer2_ioc_remote hammer2_ioc_remote_t; + +#define HAMMER2IOC_GET_VERSION _IOWR('h', 64, struct hammer2_ioc_version) + +#define HAMMER2IOC_GET_REMOTE _IOWR('h', 68, struct hammer2_ioc_remote) +#define HAMMER2IOC_ADD_REMOTE _IOWR('h', 69, struct hammer2_ioc_remote) +#define HAMMER2IOC_DEL_REMOTE _IOWR('h', 70, struct hammer2_ioc_remote) +#define HAMMER2IOC_REP_REMOTE _IOWR('h', 71, struct hammer2_ioc_remote) + +#define HAMMER2IOC_GET_SOCKET _IOWR('h', 76, struct hammer2_ioc_remote) +#define HAMMER2IOC_SET_SOCKET _IOWR('h', 77, struct hammer2_ioc_remote) + +#endif diff --git a/sys/vfs/hammer2/hammer2_subr.c b/sys/vfs/hammer2/hammer2_subr.c index bf7ec1c5c4..03c8270bd9 100644 --- a/sys/vfs/hammer2/hammer2_subr.c +++ b/sys/vfs/hammer2/hammer2_subr.c @@ -33,7 +33,6 @@ * SUCH DAMAGE. */ #include -#include #include #include #include @@ -129,6 +128,18 @@ hammer2_mount_unlock(hammer2_mount_t *hmp) lockmgr(&hmp->vchain.lk, LK_RELEASE); } +void +hammer2_voldata_lock(hammer2_mount_t *hmp) +{ + lockmgr(&hmp->voldatalk, LK_EXCLUSIVE); +} + +void +hammer2_voldata_unlock(hammer2_mount_t *hmp) +{ + lockmgr(&hmp->voldatalk, LK_RELEASE); +} + /* * Return the directory entry type for an inode */ diff --git a/sys/vfs/hammer2/hammer2_vfsops.c b/sys/vfs/hammer2/hammer2_vfsops.c index 80e983a95a..2ece8ad7ff 100644 --- a/sys/vfs/hammer2/hammer2_vfsops.c +++ b/sys/vfs/hammer2/hammer2_vfsops.c @@ -326,6 +326,7 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, /* hmp->vchain.u.xxx is left NULL */ lockinit(&hmp->vchain.lk, "volume", 0, LK_CANRECURSE); lockinit(&hmp->alloclk, "h2alloc", 0, 0); + lockinit(&hmp->voldatalk, "voldata", 0, LK_CANRECURSE); /* * Install the volume header @@ -345,6 +346,11 @@ hammer2_vfs_mount(struct mount *mp, char *path, caddr_t data, mp->mnt_vstat.f_frsize = HAMMER2_PBUFSIZE; mp->mnt_vstat.f_bsize = HAMMER2_PBUFSIZE; + /* + * Optional fields + */ + mp->mnt_iosize_max = MAXPHYS; + /* * First locate the super-root inode, which is key 0 relative to the * volume header's blockset. @@ -450,13 +456,20 @@ hammer2_vfs_unmount(struct mount *mp, int mntflags) return error; /* - * Flush any left over chains + * Flush any left over chains. The voldata lock is only used + * to synchronize against HAMMER2_CHAIN_MODIFIED_AUX. */ - if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED1 | + hammer2_voldata_lock(hmp); + if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | + HAMMER2_CHAIN_MODIFIED_AUX | HAMMER2_CHAIN_SUBMODIFIED)) { + hammer2_voldata_unlock(hmp); hammer2_vfs_sync(mp, MNT_WAIT); + } else { + hammer2_voldata_unlock(hmp); } - if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED1 | + if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | + HAMMER2_CHAIN_MODIFIED_AUX | HAMMER2_CHAIN_SUBMODIFIED)) { kprintf("hammer2_unmount: chains left over after final sync\n"); if (hammer2_debug & 0x0010) @@ -619,8 +632,9 @@ hammer2_vfs_sync(struct mount *mp, int waitfor) } #endif hammer2_chain_lock(hmp, &hmp->vchain, HAMMER2_RESOLVE_ALWAYS); - if (hmp->vchain.flags & - (HAMMER2_CHAIN_MODIFIED1 | HAMMER2_CHAIN_SUBMODIFIED)) { + if (hmp->vchain.flags & (HAMMER2_CHAIN_MODIFIED | + HAMMER2_CHAIN_MODIFIED_AUX | + HAMMER2_CHAIN_SUBMODIFIED)) { hammer2_chain_flush(hmp, &hmp->vchain); haswork = 1; } else { @@ -640,8 +654,10 @@ hammer2_vfs_sync(struct mount *mp, int waitfor) if (error == 0 && haswork) { struct buf *bp; - kprintf("synchronize disk\n"); - + /* + * Synchronize the disk before flushing the volume + * header. + */ bp = getpbuf(NULL); bp->b_bio1.bio_offset = 0; bp->b_bufsize = 0; @@ -653,10 +669,15 @@ hammer2_vfs_sync(struct mount *mp, int waitfor) biowait(&bp->b_bio1, "h2vol"); relpbuf(bp, NULL); - kprintf("flush volume header\n"); - + /* + * Then we can safely flush the volume header. Volume + * data is locked separately to prevent ioctl functions + * from deadlocking due to a configuration issue. + */ bp = getblk(hmp->devvp, 0, HAMMER2_PBUFSIZE, 0, 0); + hammer2_voldata_lock(hmp); bcopy(&hmp->voldata, bp->b_data, HAMMER2_PBUFSIZE); + hammer2_voldata_unlock(hmp); bawrite(bp); } return (error); @@ -676,7 +697,7 @@ hammer2_sync_scan1(struct mount *mp, struct vnode *vp, void *data) ip = VTOI(vp); if (vp->v_type == VNON || ip == NULL || - ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED1 | + ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED | HAMMER2_CHAIN_DIRTYEMBED)) == 0 && RB_EMPTY(&vp->v_rbdirty_tree))) { return(-1); @@ -693,7 +714,7 @@ hammer2_sync_scan2(struct mount *mp, struct vnode *vp, void *data) ip = VTOI(vp); if (vp->v_type == VNON || vp->v_type == VBAD || - ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED1 | + ((ip->chain.flags & (HAMMER2_CHAIN_MODIFIED | HAMMER2_CHAIN_DIRTYEMBED)) == 0 && RB_EMPTY(&vp->v_rbdirty_tree))) { return(0); diff --git a/sys/vfs/hammer2/hammer2_vnops.c b/sys/vfs/hammer2/hammer2_vnops.c index 2a34597ef6..7dfe5512d9 100644 --- a/sys/vfs/hammer2/hammer2_vnops.c +++ b/sys/vfs/hammer2/hammer2_vnops.c @@ -2082,7 +2082,7 @@ hammer2_strategy_write(struct vop_strategy_args *ap) biodone(nbio); /* - * This special flag does not follow the normal MODIFY1 rules + * This special flag does not follow the normal MODIFY rules * because we might deadlock on ip. Instead we depend on * VOP_FSYNC() to detect the case. */ @@ -2096,6 +2096,25 @@ hammer2_strategy_write(struct vop_strategy_args *ap) return (0); } +/* + * hammer2_vop_ioctl { vp, command, data, fflag, cred } + */ +static +int +hammer2_vop_ioctl(struct vop_ioctl_args *ap) +{ + hammer2_mount_t *hmp; + hammer2_inode_t *ip; + int error; + + ip = VTOI(ap->a_vp); + hmp = ip->hmp; + + error = hammer2_ioctl(ip, ap->a_command, (void *)ap->a_data, + ap->a_fflag, ap->a_cred); + return (error); +} + static int hammer2_vop_mountctl(struct vop_mountctl_args *ap) @@ -2150,6 +2169,7 @@ struct vop_ops hammer2_vnode_vops = { .vop_nresolve = hammer2_vop_nresolve, .vop_nlookupdotdot = hammer2_vop_nlookupdotdot, .vop_nmkdir = hammer2_vop_nmkdir, + .vop_ioctl = hammer2_vop_ioctl, .vop_mountctl = hammer2_vop_mountctl, .vop_bmap = hammer2_vop_bmap, .vop_strategy = hammer2_vop_strategy, -- 2.41.0