hammer2 - Misc cluster protocol work
[dragonfly.git] / sys / vfs / hammer2 / hammer2_iocom.c
1 /*
2  * Copyright (c) 2011-2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  * by Daniel Flores (GSOC 2013 - mentored by Matthew Dillon, compression)
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/nlookup.h>
39 #include <sys/vnode.h>
40 #include <sys/mount.h>
41 #include <sys/fcntl.h>
42 #include <sys/buf.h>
43 #include <sys/uuid.h>
44 #include <sys/vfsops.h>
45 #include <sys/sysctl.h>
46 #include <sys/socket.h>
47 #include <sys/objcache.h>
48
49 #include <sys/proc.h>
50 #include <sys/namei.h>
51 #include <sys/mountctl.h>
52 #include <sys/dirent.h>
53 #include <sys/uio.h>
54
55 #include <sys/mutex.h>
56 #include <sys/mutex2.h>
57
58 #include "hammer2.h"
59 #include "hammer2_disk.h"
60 #include "hammer2_mount.h"
61
62 static int hammer2_rcvdmsg(kdmsg_msg_t *msg);
63 static void hammer2_autodmsg(kdmsg_msg_t *msg);
64 static int hammer2_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
65
66 void
67 hammer2_iocom_init(hammer2_mount_t *hmp)
68 {
69         /*
70          * Automatic LNK_CONN
71          * Automatic LNK_SPAN handling
72          * No automatic LNK_SPAN generation (we generate multiple spans
73          *                                   ourselves).
74          */
75         kdmsg_iocom_init(&hmp->iocom, hmp,
76                          KDMSG_IOCOMF_AUTOCONN |
77                          KDMSG_IOCOMF_AUTORXSPAN,
78                          hmp->mchain, hammer2_rcvdmsg);
79 }
80
81 void
82 hammer2_iocom_uninit(hammer2_mount_t *hmp)
83 {
84         kdmsg_iocom_uninit(&hmp->iocom);        /* XXX chain depend deadlck? */
85 }
86
87 /*
88  * Reconnect using the passed file pointer.  The caller must ref the
89  * fp for us.
90  */
91 void
92 hammer2_cluster_reconnect(hammer2_mount_t *hmp, struct file *fp)
93 {
94         size_t name_len;
95         const char *name = "disk-volume";
96
97         /*
98          * Closes old comm descriptor, kills threads, cleans up
99          * states, then installs the new descriptor and creates
100          * new threads.
101          */
102         kdmsg_iocom_reconnect(&hmp->iocom, fp, "hammer2");
103
104         /*
105          * Setup LNK_CONN fields for autoinitiated state machine.  We
106          * will use SPANs to advertise multiple PFSs so only pass the
107          * fsid and HAMMER2_PFSTYPE_SUPROOT for the AUTOCONN.
108          *
109          * Since we will be initiating multiple LNK_SPANs we cannot
110          * use AUTOTXSPAN, but we do use AUTORXSPAN so kdmsg tracks
111          * received LNK_SPANs, and we simply monitor those messages.
112          */
113         bzero(&hmp->iocom.auto_lnk_conn.pfs_clid,
114               sizeof(hmp->iocom.auto_lnk_conn.pfs_clid));
115         hmp->iocom.auto_lnk_conn.pfs_fsid = hmp->voldata.fsid;
116         hmp->iocom.auto_lnk_conn.pfs_type = HAMMER2_PFSTYPE_SUPROOT;
117         hmp->iocom.auto_lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
118 #if 0
119         hmp->iocom.auto_lnk_conn.peer_type = hmp->voldata.peer_type;
120 #endif
121         hmp->iocom.auto_lnk_conn.peer_type = DMSG_PEER_HAMMER2;
122
123         /*
124          * Filter adjustment.  Clients do not need visibility into other
125          * clients (otherwise millions of clients would present a serious
126          * problem).  The fs_label also serves to restrict the namespace.
127          */
128         hmp->iocom.auto_lnk_conn.peer_mask = 1LLU << DMSG_PEER_HAMMER2;
129         hmp->iocom.auto_lnk_conn.pfs_mask = (uint64_t)-1;
130
131 #if 0
132         switch (ipdata->pfs_type) {
133         case DMSG_PFSTYPE_CLIENT:
134                 hmp->iocom.auto_lnk_conn.peer_mask &=
135                                 ~(1LLU << DMSG_PFSTYPE_CLIENT);
136                 break;
137         default:
138                 break;
139         }
140 #endif
141
142         name_len = strlen(name);
143         if (name_len >= sizeof(hmp->iocom.auto_lnk_conn.fs_label))
144                 name_len = sizeof(hmp->iocom.auto_lnk_conn.fs_label) - 1;
145         bcopy(name, hmp->iocom.auto_lnk_conn.fs_label, name_len);
146         hmp->iocom.auto_lnk_conn.fs_label[name_len] = 0;
147
148         kdmsg_iocom_autoinitiate(&hmp->iocom, hammer2_autodmsg);
149 }
150
151 static int
152 hammer2_rcvdmsg(kdmsg_msg_t *msg)
153 {
154         kprintf("RCVMSG %08x\n", msg->tcmd);
155
156         switch(msg->tcmd) {
157         case DMSG_DBG_SHELL:
158                 /*
159                  * (non-transaction)
160                  * Execute shell command (not supported atm)
161                  */
162                 kdmsg_msg_result(msg, DMSG_ERR_NOSUPP);
163                 break;
164         case DMSG_DBG_SHELL | DMSGF_REPLY:
165                 /*
166                  * (non-transaction)
167                  */
168                 if (msg->aux_data) {
169                         msg->aux_data[msg->aux_size - 1] = 0;
170                         kprintf("HAMMER2 DBG: %s\n", msg->aux_data);
171                 }
172                 break;
173         default:
174                 /*
175                  * Unsupported message received.  We only need to
176                  * reply if it's a transaction in order to close our end.
177                  * Ignore any one-way messages or any further messages
178                  * associated with the transaction.
179                  *
180                  * NOTE: This case also includes DMSG_LNK_ERROR messages
181                  *       which might be one-way, replying to those would
182                  *       cause an infinite ping-pong.
183                  */
184                 if (msg->any.head.cmd & DMSGF_CREATE)
185                         kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
186                 break;
187         }
188         return(0);
189 }
190
191 /*
192  * This function is called after KDMSG has automatically handled processing
193  * of a LNK layer message (typically CONN or SPAN).
194  *
195  * We tag off the LNK_CONN to trigger our LNK_VOLCONF messages which
196  * advertises all available hammer2 super-root volumes.
197  *
198  * We collect span state
199  */
200 static void hammer2_update_spans(hammer2_mount_t *hmp, kdmsg_state_t *state);
201
202 static void
203 hammer2_autodmsg(kdmsg_msg_t *msg)
204 {
205         hammer2_mount_t *hmp = msg->state->iocom->handle;
206         int copyid;
207
208         switch(msg->tcmd) {
209         case DMSG_LNK_CONN | DMSGF_CREATE:
210         case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE:
211         case DMSG_LNK_CONN | DMSGF_DELETE:
212                 /*
213                  * NOTE: kern_dmsg will automatically issue a result,
214                  *       leaving the transaction open, for CREATEs,
215                  *       and will automatically issue a terminating reply
216                  *       for DELETEs.
217                  */
218                 break;
219         case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_REPLY:
220         case DMSG_LNK_CONN | DMSGF_CREATE | DMSGF_DELETE | DMSGF_REPLY:
221                 /*
222                  * Do a volume configuration dump when we receive a reply
223                  * to our auto-CONN (typically leaving the transaction open).
224                  */
225                 if (msg->any.head.cmd & DMSGF_CREATE) {
226                         kprintf("HAMMER2: VOLDATA DUMP\n");
227
228                         /*
229                          * Dump the configuration stored in the volume header.
230                          * This will typically be import/export access rights,
231                          * master encryption keys (encrypted), etc.
232                          */
233                         hammer2_voldata_lock(hmp);
234                         copyid = 0;
235                         while (copyid < HAMMER2_COPYID_COUNT) {
236                                 if (hmp->voldata.copyinfo[copyid].copyid)
237                                         hammer2_volconf_update(hmp, copyid);
238                                 ++copyid;
239                         }
240                         hammer2_voldata_unlock(hmp);
241
242                         kprintf("HAMMER2: INITIATE SPANs\n");
243                         hammer2_update_spans(hmp, msg->state);
244                 }
245                 if ((msg->any.head.cmd & DMSGF_DELETE) &&
246                     msg->state && (msg->state->txcmd & DMSGF_DELETE) == 0) {
247                         kprintf("HAMMER2: CONN WAS TERMINATED\n");
248                 }
249                 break;
250         case DMSG_LNK_SPAN | DMSGF_CREATE:
251                 /*
252                  * Monitor SPANs and issue a result, leaving the SPAN open
253                  * if it is something we can use now or in the future.
254                  */
255                 if (msg->any.lnk_span.peer_type != DMSG_PEER_HAMMER2) {
256                         kdmsg_msg_reply(msg, 0);
257                         break;
258                 }
259                 if (msg->any.lnk_span.proto_version != DMSG_SPAN_PROTO_1) {
260                         kdmsg_msg_reply(msg, 0);
261                         break;
262                 }
263                 DMSG_TERMINATE_STRING(msg->any.lnk_span.fs_label);
264                 kprintf("H2 +RXSPAN cmd=%08x (%-20s) cl=", msg->any.head.cmd, msg->any.lnk_span.fs_label);
265                 printf_uuid(&msg->any.lnk_span.pfs_clid);
266                 kprintf(" fs=");
267                 printf_uuid(&msg->any.lnk_span.pfs_fsid);
268                 kprintf(" type=%d\n", msg->any.lnk_span.pfs_type);
269                 kdmsg_msg_result(msg, 0);
270                 break;
271         case DMSG_LNK_SPAN | DMSGF_DELETE:
272                 /*
273                  * NOTE: kern_dmsg will automatically reply to DELETEs.
274                  */
275                 kprintf("H2 -RXSPAN\n");
276                 break;
277         default:
278                 break;
279         }
280 }
281
282 /*
283  * Update LNK_SPAN state
284  */
285 static void
286 hammer2_update_spans(hammer2_mount_t *hmp, kdmsg_state_t *state)
287 {
288         const hammer2_inode_data_t *ripdata;
289         hammer2_cluster_t *cparent;
290         hammer2_cluster_t *cluster;
291         hammer2_pfsmount_t *spmp;
292         hammer2_key_t key_next;
293         kdmsg_msg_t *rmsg;
294         size_t name_len;
295         int ddflag;
296
297         /*
298          * Lookup mount point under the media-localized super-root.
299          *
300          * cluster->pmp will incorrectly point to spmp and must be fixed
301          * up later on.
302          */
303         spmp = hmp->spmp;
304         cparent = hammer2_inode_lock_ex(spmp->iroot);
305         cluster = hammer2_cluster_lookup(cparent, &key_next,
306                                          HAMMER2_KEY_MIN,
307                                          HAMMER2_KEY_MAX,
308                                          0, &ddflag);
309         while (cluster) {
310                 if (hammer2_cluster_type(cluster) != HAMMER2_BREF_TYPE_INODE)
311                         continue;
312                 ripdata = &hammer2_cluster_rdata(cluster)->ipdata;
313                 kprintf("UPDATE SPANS: %s\n", ripdata->filename);
314
315                 rmsg = kdmsg_msg_alloc(&hmp->iocom.state0,
316                                        DMSG_LNK_SPAN | DMSGF_CREATE,
317                                        hammer2_lnk_span_reply, NULL);
318                 rmsg->any.lnk_span.pfs_clid = ripdata->pfs_clid;
319                 rmsg->any.lnk_span.pfs_fsid = ripdata->pfs_fsid;
320                 rmsg->any.lnk_span.pfs_type = ripdata->pfs_type;
321                 rmsg->any.lnk_span.peer_type = DMSG_PEER_HAMMER2;
322                 rmsg->any.lnk_span.proto_version = DMSG_SPAN_PROTO_1;
323                 name_len = ripdata->name_len;
324                 if (name_len >= sizeof(rmsg->any.lnk_span.fs_label))
325                         name_len = sizeof(rmsg->any.lnk_span.fs_label) - 1;
326                 bcopy(ripdata->filename, rmsg->any.lnk_span.fs_label, name_len);
327
328                 kdmsg_msg_write(rmsg);
329
330                 cluster = hammer2_cluster_next(cparent, cluster,
331                                                &key_next,
332                                                key_next,
333                                                HAMMER2_KEY_MAX,
334                                                0);
335         }
336         hammer2_inode_unlock_ex(spmp->iroot, cparent);
337 }
338
339 static
340 int
341 hammer2_lnk_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
342 {
343         if ((state->txcmd & DMSGF_DELETE) == 0 &&
344             (msg->any.head.cmd & DMSGF_DELETE)) {
345                 kdmsg_msg_reply(msg, 0);
346         }
347         return 0;
348 }
349
350 /*
351  * Volume configuration updates are passed onto the userland service
352  * daemon via the open LNK_CONN transaction.
353  */
354 void
355 hammer2_volconf_update(hammer2_mount_t *hmp, int index)
356 {
357         kdmsg_msg_t *msg;
358
359         /* XXX interlock against connection state termination */
360         kprintf("volconf update %p\n", hmp->iocom.conn_state);
361         if (hmp->iocom.conn_state) {
362                 kprintf("TRANSMIT VOLCONF VIA OPEN CONN TRANSACTION\n");
363                 msg = kdmsg_msg_alloc(hmp->iocom.conn_state,
364                                       DMSG_LNK_HAMMER2_VOLCONF,
365                                       NULL, NULL);
366                 H2_LNK_VOLCONF(msg)->copy = hmp->voldata.copyinfo[index];
367                 H2_LNK_VOLCONF(msg)->mediaid = hmp->voldata.fsid;
368                 H2_LNK_VOLCONF(msg)->index = index;
369                 kdmsg_msg_write(msg);
370         }
371 }