2 * Copyright (c) 2012 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@dragonflybsd.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * This module allows disk devices to be created and associated with a
36 * communications pipe or socket. You open the device and issue an
37 * ioctl() to install a new disk along with its communications descriptor.
39 * All further communication occurs via the descriptor using the DMSG
40 * LNK_CONN, LNK_SPAN, and BLOCK protocols. The descriptor can be a
41 * direct connection to a remote machine's disk (in-kernenl), to a remote
42 * cluster controller, to the local cluster controller, etc.
44 * /dev/xdisk is the control device, issue ioctl()s to create the /dev/xa%d
45 * devices. These devices look like raw disks to the system.
47 #include <sys/param.h>
48 #include <sys/systm.h>
51 #include <sys/device.h>
52 #include <sys/devicestat.h>
54 #include <sys/kernel.h>
55 #include <sys/malloc.h>
56 #include <sys/sysctl.h>
58 #include <sys/queue.h>
61 #include <sys/kern_syscall.h>
64 #include <sys/xdiskioctl.h>
67 #include <sys/thread2.h>
69 static int xdisk_attach(struct xdisk_attach_ioctl *xaioc);
70 static void xa_exit(kdmsg_iocom_t *iocom);
71 static int xa_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
72 static int xa_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg);
73 static int xa_lnk_rcvmsg(kdmsg_msg_t *msg);
74 static int xa_lnk_dbgmsg(kdmsg_msg_t *msg);
75 static int xa_adhoc_input(kdmsg_msg_t *msg);
77 MALLOC_DEFINE(M_XDISK, "Networked disk client", "Network Disks");
80 * Control device, issue ioctls to create xa devices.
82 static d_open_t xdisk_open;
83 static d_close_t xdisk_close;
84 static d_ioctl_t xdisk_ioctl;
86 static struct dev_ops xdisk_ops = {
87 { "xdisk", 0, D_MPSAFE },
89 .d_close = xdisk_close,
90 .d_ioctl = xdisk_ioctl
96 static d_open_t xa_open;
97 static d_close_t xa_close;
98 static d_ioctl_t xa_ioctl;
99 static d_strategy_t xa_strategy;
100 static d_psize_t xa_size;
102 static struct dev_ops xa_ops = {
103 { "xa", 0, D_DISK | D_CANFREE | D_MPSAFE },
108 .d_write = physwrite,
109 .d_strategy = xa_strategy,
114 TAILQ_ENTRY(xa_softc) entry;
117 struct xdisk_attach_ioctl xaioc;
118 struct disk_info info;
126 static struct lwkt_token xdisk_token = LWKT_TOKEN_INITIALIZER(xdisk_token);
127 static int xdisk_opencount;
128 static cdev_t xdisk_dev;
129 static TAILQ_HEAD(, xa_softc) xa_queue;
132 * Module initialization
135 xdisk_modevent(module_t mod, int type, void *data)
139 TAILQ_INIT(&xa_queue);
140 xdisk_dev = make_dev(&xdisk_ops, 0,
141 UID_ROOT, GID_WHEEL, 0600, "xdisk");
145 if (xdisk_opencount || TAILQ_FIRST(&xa_queue))
148 destroy_dev(xdisk_dev);
151 dev_ops_remove_all(&xdisk_ops);
152 dev_ops_remove_all(&xa_ops);
160 DEV_MODULE(xdisk, xdisk_modevent, 0);
166 xdisk_open(struct dev_open_args *ap)
168 lwkt_gettoken(&xdisk_token);
170 lwkt_reltoken(&xdisk_token);
175 xdisk_close(struct dev_close_args *ap)
177 lwkt_gettoken(&xdisk_token);
179 lwkt_reltoken(&xdisk_token);
184 xdisk_ioctl(struct dev_ioctl_args *ap)
190 error = xdisk_attach((void *)ap->a_data);
199 /************************************************************************
201 ************************************************************************/
204 xdisk_attach(struct xdisk_attach_ioctl *xaioc)
206 struct xa_softc *scan;
214 fp = holdfp(curproc->p_fd, xaioc->fd, -1);
218 xa = kmalloc(sizeof(*xa), M_XDISK, M_WAITOK|M_ZERO);
223 lwkt_gettoken(&xdisk_token);
226 TAILQ_FOREACH(scan, &xa_queue, entry) {
227 if (scan->unit == unit)
230 } while (scan != NULL);
233 TAILQ_INSERT_TAIL(&xa_queue, xa, entry);
234 lwkt_reltoken(&xdisk_token);
239 dev = disk_create(unit, &xa->disk, &xa_ops);
243 xa->info.d_media_blksize = 512;
244 xa->info.d_media_blocks = xaioc->size / 512;
245 xa->info.d_dsflags = DSO_MBRQUIET | DSO_RAWPSIZE;
246 xa->info.d_secpertrack = 32;
247 xa->info.d_nheads = 64;
248 xa->info.d_secpercyl = xa->info.d_secpertrack * xa->info.d_nheads;
249 xa->info.d_ncylinders = 0;
250 disk_setdiskinfo_sync(&xa->disk, &xa->info);
253 * Set up messaging connection
255 ksnprintf(devname, sizeof(devname), "xa%d", unit);
256 kdmsg_iocom_init(&xa->iocom, xa, M_XDISK,
260 xa->iocom.exit_func = xa_exit;
262 kern_uuidgen(&xa->pfs_fsid, 1);
263 kdmsg_iocom_reconnect(&xa->iocom, fp, devname);
266 * Issue DMSG_LNK_CONN for device. This sets up filters so hopefully
267 * the only SPANs we receive are from servers providing the label
268 * being configured. Hopefully that's just a single server(!)(!).
269 * (HAMMER peers might have multiple servers but block device peers
270 * currently only allow one). There could still be multiple spans
271 * due to there being multiple paths available, however.
274 msg = kdmsg_msg_alloc(&xa->iocom.router, DMSG_LNK_CONN | DMSGF_CREATE,
275 xa_msg_conn_reply, xa);
276 msg->any.lnk_conn.pfs_type = 0;
277 msg->any.lnk_conn.proto_version = DMSG_SPAN_PROTO_1;
278 msg->any.lnk_conn.peer_type = DMSG_PEER_BLOCK;
279 msg->any.lnk_conn.peer_mask = 1LLU << DMSG_PEER_BLOCK;
280 ksnprintf(msg->any.lnk_conn.cl_label,
281 sizeof(msg->any.lnk_conn.cl_label),
282 "%s", xaioc->cl_label);
283 msg->any.lnk_conn.pfs_fsid = xa->pfs_fsid;
284 xa->iocom.conn_state = msg->state;
285 kdmsg_msg_write(msg);
287 xa->inprog = 0; /* unstall msg thread exit (if racing) */
293 * Handle reply to our LNK_CONN transaction (transaction remains open)
297 xa_msg_conn_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
299 struct xa_softc *xa = state->any.any;
302 if (msg->any.head.cmd & DMSGF_CREATE) {
303 kprintf("XA LNK_CONN received reply\n");
304 rmsg = kdmsg_msg_alloc(&xa->iocom.router,
305 DMSG_LNK_SPAN | DMSGF_CREATE,
306 xa_msg_span_reply, xa);
307 rmsg->any.lnk_span.pfs_type = 0;
308 rmsg->any.lnk_span.proto_version = DMSG_SPAN_PROTO_1;
309 rmsg->any.lnk_span.peer_type = DMSG_PEER_BLOCK;
311 ksnprintf(rmsg->any.lnk_span.cl_label,
312 sizeof(rmsg->any.lnk_span.cl_label),
313 "%s", xa->xaioc.cl_label);
314 kdmsg_msg_write(rmsg);
316 if ((state->txcmd & DMSGF_DELETE) == 0 &&
317 (msg->any.head.cmd & DMSGF_DELETE)) {
318 kprintf("DISK LNK_CONN terminated by remote\n");
319 xa->iocom.conn_state = NULL;
320 kdmsg_msg_reply(msg, 0);
326 xa_msg_span_reply(kdmsg_state_t *state, kdmsg_msg_t *msg)
328 if ((state->txcmd & DMSGF_DELETE) == 0 &&
329 (msg->any.head.cmd & DMSGF_DELETE)) {
330 kprintf("SPAN REPLY - Our sent span was terminated by the "
331 "remote %08x state %p\n", msg->any.head.cmd, state);
332 kdmsg_msg_reply(msg, 0);
338 * Called from iocom core transmit thread upon disconnect.
342 xa_exit(kdmsg_iocom_t *iocom)
344 struct xa_softc *xa = iocom->handle;
346 kprintf("XA_EXIT UNIT %d\n", xa->unit);
348 kdmsg_iocom_uninit(iocom);
351 tsleep(xa, 0, "xarace", hz);
355 * XXX allow reconnection, wait for users to terminate?
358 disk_destroy(&xa->disk);
360 lwkt_gettoken(&xdisk_token);
361 TAILQ_REMOVE(&xa_queue, xa, entry);
362 lwkt_reltoken(&xdisk_token);
368 xa_lnk_rcvmsg(kdmsg_msg_t *msg)
370 switch(msg->any.head.cmd & DMSGF_TRANSMASK) {
371 case DMSG_LNK_CONN | DMSGF_CREATE:
373 * connection request from peer, send a streaming
374 * result of 0 (leave the transaction open). Transaction
375 * is left open for the duration of the connection, we
376 * let the kern_dmsg module clean it up on disconnect.
378 kdmsg_msg_result(msg, 0);
380 case DMSG_LNK_SPAN | DMSGF_CREATE:
382 * Incoming SPAN - transaction create
384 * We do not have to respond right now. Instead we will
385 * respond later on when the peer deletes their side.
388 case DMSG_LNK_SPAN | DMSGF_DELETE:
390 * Incoming SPAN - transaction delete.
392 * We must terminate our side so both ends can free up
393 * their recorded state.
396 case DMSG_LNK_SPAN | DMSGF_CREATE | DMSGF_DELETE:
398 * Incoming SPAN - transaction delete (degenerate span).
400 * We must terminate our side so both ends can free up
401 * their recorded state.
403 kdmsg_msg_reply(msg, 0);
407 * Unsupported LNK message received. We only need to
408 * reply if it's a transaction in order to close our end.
409 * Ignore any one-way messages are any further messages
410 * associated with the transaction.
412 * NOTE: This case also includes DMSG_LNK_ERROR messages
413 * which might be one-way, replying to those would
414 * cause an infinite ping-pong.
416 if (msg->any.head.cmd & DMSGF_CREATE)
417 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
424 xa_lnk_dbgmsg(kdmsg_msg_t *msg)
426 switch(msg->any.head.cmd & DMSGF_CMDSWMASK) {
429 * Execute shell command (not supported atm).
431 * This is a one-way packet but if not (e.g. if part of
432 * a streaming transaction), we will have already closed
435 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
437 case DMSG_DBG_SHELL | DMSGF_REPLY:
439 * Receive one or more replies to a shell command that we
442 * This is a one-way packet but if not (e.g. if part of
443 * a streaming transaction), we will have already closed
447 msg->aux_data[msg->aux_size - 1] = 0;
448 kprintf("DEBUGMSG: %s\n", msg->aux_data);
453 * We don't understand what is going on, issue a reply.
454 * This will take care of all left-over cases whether it
455 * is a transaction or one-way.
457 kdmsg_msg_reply(msg, DMSG_ERR_NOSUPP);
464 xa_adhoc_input(kdmsg_msg_t *msg)
466 kprintf("XA ADHOC INPUT MSG %08x\n", msg->any.head.cmd);
470 /************************************************************************
471 * XA DEVICE INTERFACE *
472 ************************************************************************/
475 xa_open(struct dev_open_args *ap)
477 cdev_t dev = ap->a_head.a_dev;
482 dev->si_bsize_phys = 512;
483 dev->si_bsize_best = 32768;
486 * Issue streaming open and wait for reply.
489 /* XXX check ap->a_oflags & FWRITE, EACCES if read-only */
495 xa_close(struct dev_close_args *ap)
497 cdev_t dev = ap->a_head.a_dev;
501 xa_strategy(struct dev_strategy_args *ap)
506 xa_ioctl(struct dev_ioctl_args *ap)
512 xa_size(struct dev_psize_args *ap)
516 if ((xa = ap->a_head.a_dev->si_drv1) == NULL)
520 ap->a_result = xa->info.d_media_blocks;