From a3d02589a63f66c372c122a7151a29d19c16b712 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 26 Apr 2007 02:11:00 +0000 Subject: [PATCH] Move syslink_desc to sys/syslink_rpc.h so kernel code does not need to #include sys/syslink.h. Add a kernel config option 'SYSLINK' to build with kern_syslink.c, so it can be worked on (read: broken) without interfering with other developer's kernel builds. Add a shims file for the syslink() system call for kernels not built with kern_syslink.c. The shims file can be used generally for this purpose. --- sys/conf/files | 5 +- sys/conf/options | 6 +- sys/kern/kern_syslink.c | 194 +++++++++++++++++++++++++++++++---- sys/kern/subr_shims.c | 58 +++++++++++ sys/sys/device.h | 6 +- sys/sys/syslink.h | 119 +++++----------------- sys/sys/syslink_msg.h | 221 ++++++++++++++++++++++++++++------------ sys/sys/syslink_rpc.h | 61 +++++++++++ sys/sys/vnode.h | 6 +- 9 files changed, 485 insertions(+), 191 deletions(-) create mode 100644 sys/kern/subr_shims.c create mode 100644 sys/sys/syslink_rpc.h diff --git a/sys/conf/files b/sys/conf/files index e2e0c1fbb8..175c0270fc 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/files,v 1.340.2.137 2003/06/04 17:10:30 sam Exp $ -# $DragonFly: src/sys/conf/files,v 1.157 2007/04/22 01:13:08 dillon Exp $ +# $DragonFly: src/sys/conf/files,v 1.158 2007/04/26 02:10:57 dillon Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -543,7 +543,7 @@ kern/kern_resource.c standard kern/kern_plimit.c standard kern/kern_slaballoc.c standard kern/kern_systimer.c standard -kern/kern_syslink.c standard +kern/kern_syslink.c optional syslink kern/kern_cputimer.c standard kern/kern_mpipe.c standard kern/kern_shutdown.c standard @@ -581,6 +581,7 @@ kern/subr_eventhandler.c standard kern/subr_kcore.c standard kern/subr_kobj.c standard kern/subr_log.c standard +kern/subr_shims.c standard kern/libmchain/subr_mchain.c optional libmchain kern/subr_module.c standard kern/subr_param.c standard diff --git a/sys/conf/options b/sys/conf/options index cc7a38fd7e..184ec98967 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -1,5 +1,5 @@ # $FreeBSD: src/sys/conf/options,v 1.191.2.53 2003/06/04 17:56:58 sam Exp $ -# $DragonFly: src/sys/conf/options,v 1.63 2007/02/11 01:51:28 swildner Exp $ +# $DragonFly: src/sys/conf/options,v 1.64 2007/04/26 02:10:57 dillon Exp $ # # On the handling of kernel options # @@ -538,3 +538,7 @@ SCTP_BLK_LOGGING opt_sctp.h SCTP_STR_LOGGING opt_sctp.h SCTP_FR_LOGGING opt_sctp.h SCTP_MAP_LOGGING opt_sctp.h + +# syslink kernel support +# +SYSLINK opt_syslink.h diff --git a/sys/kern/kern_syslink.c b/sys/kern/kern_syslink.c index 261ef18898..a5fcea0191 100644 --- a/sys/kern/kern_syslink.c +++ b/sys/kern/kern_syslink.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 The DragonFly Project. All rights reserved. + * Copyright (c) 2006-2007 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -31,12 +31,16 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/kern/kern_syslink.c,v 1.9 2007/04/22 00:59:25 dillon Exp $ + * $DragonFly: src/sys/kern/kern_syslink.c,v 1.10 2007/04/26 02:10:59 dillon Exp $ */ /* * This module implements the syslink() system call and protocol which * is used to glue clusters together as well as to interface userland * devices and filesystems to the kernel. + * + * We implement the management node concept in this module. A management + * node is basically a router node with additional features that take much + * of the protocol burden away from connecting terminal nodes. */ #include @@ -62,6 +66,8 @@ #include +#include "opt_syslink.h" + /* * Red-Black trees organizing the syslink 'router' nodes and connections * to router nodes. @@ -110,12 +116,13 @@ struct sldata { RB_ENTRY(sldata) rbnode; struct slrouter *router; /* organizing router */ struct file *xfp; /* external file pointer */ - struct socket *xso; /* external socket */ struct lock rlock; /* synchronizing lock */ struct lock wlock; /* synchronizing lock */ struct thread *rthread; /* helper thread */ struct thread *wthread; /* helper thread */ - struct sockbuf sio; /* accumulate mbufs */ + struct sockbuf sior; /* accumulate incoming mbufs */ + struct sockbuf siow; /* accumulate outgoing mbufs */ + struct sockaddr sa; /* used w/SLIF_SUBNET mode */ int bindex; /* broadcast index */ int flags; /* connection flags */ int linkid; @@ -513,11 +520,13 @@ syslink_add(struct slrouter *slrouter, struct syslink_info *info, * Complete initialization of the physical route node. Setting * sldata->router activates the node. */ - sbinit(&sldata->sio, SYSLINK_SIOBUFSIZE); + sbinit(&sldata->sior, SYSLINK_SIOBUFSIZE); + sbinit(&sldata->siow, SYSLINK_SIOBUFSIZE); sldata->bindex = slrouter->bbuf.windex; sldata->flags = info->flags & SLIF_USERFLAGS; lockinit(&sldata->rlock, "slread", 0, 0); lockinit(&sldata->wlock, "slwrite", 0, 0); + bcopy(&info->u.sa, &sldata->sa, sizeof(sldata->sa)); if (info->fd < 0) { /* @@ -544,7 +553,6 @@ syslink_add(struct slrouter *slrouter, struct syslink_info *info, /* two refs: reader thread and writer thread */ sldata->refs += 2; if (sldata->xfp->f_type == DTYPE_SOCKET) { - sldata->xso = (void *)sldata->xfp->f_data; lwkt_create(syslink_rthread_so, sldata, &sldata->rthread, NULL, 0, -1, "syslink_r"); @@ -589,34 +597,74 @@ syslink_rem(struct slrouter *slrouter, struct sldata *sldata, } /* - * This thread reads from an external descriptor into rbuf, then parses and - * dispatches syslink messages from rbuf. + * Read syslink messages from an external socket and route them. */ static void syslink_rthread_so(void *arg) { struct sldata *sldata = arg; + struct socket *so; struct sockaddr *sa; struct mbuf *m; int soflags; int linkid; int error; + int needsa; + + so = (void *)sldata->xfp->f_data; + sa = NULL; + + /* + * Calculate whether we need to get the peer address or not. + * We need to obtain the peer address for packet-mode sockets + * representing subnets (rather then single connections). + */ + needsa = (sldata->bits && (sldata->flags & SLIF_PACKET)); while ((sldata->flags & SLIF_RQUIT) == 0) { /* - * Read some data. This is easy if data is packetized, + * Read some data. This is easy if the data is packetized, * otherwise we can still obtain an mbuf chain but we have * to parse out the syslink messages. */ soflags = 0; - sa = NULL; - error = so_pru_soreceive(sldata->xso, - (sldata->bits ? &sa : NULL), - NULL, &sldata->sio, + error = so_pru_soreceive(so, + (needsa ? &sa : NULL), + NULL, &sldata->sior, NULL, &soflags); + + /* + * The target is responsible for adjusting the src address + * field in the syslink_msg. We may need subnet information + * from the sockaddr to accomplish this. + * + * For streams representing subnets the originator is + * responsible for tagging its subnet bits in the src + * address but we have to renormalize + */ linkid = sldata->linkid; - if (sldata->bits && sa) { + if (sldata->flags & SLIF_PACKET) { + if (sldata->bits) { + linkid += syslink_getsubnet(sa) & + ((1 << sldata->bits) - 1); + } + if ((m = sldata->sior.sb_mb) != NULL) { + sbinit(&sldata->sior, SYSLINK_SIOBUFSIZE); + syslink_route(sldata->router, linkid, m); + } + } else { + while ((m = syslink_parse_stream(&sldata->sior)) != NULL) { + syslink_route(sldata->router, linkid, m); + } + } + + + + /* + * + */ + if ((sldata->flags & SLIF_SUBNET) && sldata->bits && sa) { linkid += syslink_getsubnet(sa) & ((1 << sldata->bits) - 1); FREE(sa, M_SONAME); @@ -624,16 +672,24 @@ syslink_rthread_so(void *arg) if (error) break; + /* + * Note: Incoming syslink messages must have their headers + * adjusted to reflect the origination address. This will + * be handled by syslink_route. + */ if (sldata->flags & SLIF_PACKET) { /* - * Packetized data + * Packetized data can just be directly routed. */ - m = sldata->sio.sb_mb; - sbinit(&sldata->sio, SYSLINK_SIOBUFSIZE); - if (m) + if ((m = sldata->sior.sb_mb) != NULL) { + sbinit(&sldata->sior, SYSLINK_SIOBUFSIZE); syslink_route(sldata->router, linkid, m); + } } else { - while ((m = syslink_parse_stream(&sldata->sio)) != NULL) { + /* + * Stream data has to be parsed out. + */ + while ((m = syslink_parse_stream(&sldata->sior)) != NULL) { syslink_route(sldata->router, linkid, m); } } @@ -643,8 +699,9 @@ syslink_rthread_so(void *arg) * Mark us as done and deref sldata. Tell the writer to terminate as * well. */ - sbflush(&sldata->sio); sldata->flags |= SLIF_RDONE; + sbflush(&sldata->sior); + sbflush(&sldata->siow); if ((sldata->flags & SLIF_WDONE) == 0) { sldata->flags |= SLIF_WQUIT; wakeup(&sldata->wthread); @@ -654,13 +711,36 @@ syslink_rthread_so(void *arg) sldata_rels(sldata); } +/* + * Read syslink messages from an external descriptor and route them. Used + * when no socket interface is available. + */ static void syslink_rthread_fp(void *arg) { struct sldata *sldata = arg; +#if 0 + /* + * Loop until told otherwise + */ + while ((sldata->flags & SLIF_RQUIT) == 0) { + error = fp_read(slink->xfp, + slbuf->buf + + (slbuf->windex & slbuf->bufmask + ), + count, &count, 0, UIO_SYSSPACE); + } +#endif + + /* + * Mark us as done and deref sldata. Tell the writer to terminate as + * well. + */ sldata->flags |= SLIF_RDONE; + sbflush(&sldata->sior); + sbflush(&sldata->siow); if ((sldata->flags & SLIF_WDONE) == 0) { sldata->flags |= SLIF_WQUIT; wakeup(&sldata->wthread); @@ -795,12 +875,82 @@ void syslink_wthread_so(void *arg) { struct sldata *sldata = arg; -#if 0 - struct slbuf *slbuf = &sldata->wbuf; + struct slrouter *slrouter; struct syslink_msg *head; + struct sockaddr *sa; + struct socket *so; + struct iovec aiov; + struct uio auio; int error; + int avail; + int bytes; + +#if 0 + so = (void *)sldata->xfp->f_data; + slrouter = sldata->router; while ((sldata->flags & SLIF_WQUIT) == 0) { + /* + * Deal with any broadcast data sitting in the route node's + * broadcast buffer. If we have fallen too far behind the + * data may no longer be valid. + * + * avail -- available data in broadcast buffer and + * bytes -- available contiguous data in broadcast buffer + */ + if (slrouter->bbuf.rindex - sldata->bindex > 0) + sldata->bindex = slrouter->bbuf.rindex; + if ((avail = slrouter->bbuf.windex - sldata->bindex) > 0) { + bytes = slrouter->bbuf.bufsize - + (sldata->bindex & slrouter->bbuf.bufmask); + if (bytes > avail) + bytes = avail; + head = (void *)(slrouter->bbuf.buf + + (sldata->bindex & slrouter->bbuf.bufmask)); + /* + * Break into packets if necessary, else just write + * it all in one fell swoop. + */ + aiov.iov_base = (void *)head; + aiov.iov_len = bytes; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_resid = bytes; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_td = curthread; + if (sldata->flags & SLIF_PACKET) { + if (head->sm_bytes < SL_MIN_MESSAGE_SIZE) { + kprintf("syslink_msg too small, terminating\n"); + break; + } + if (head->sm_bytes > bytes) { + kprintf("syslink_msg not FIFO aligned, terminating\n"); + break; + } + bytes = SLMSG_ALIGN(head->sm_bytes); + so_pru_sosend(so, sa, &auio, NULL, NULL, 0, curthread); + } else { + so_pru_sosend(so, sa, &auio, NULL, NULL, 0, curthread); + } + continue; + } + + /* + * Deal with mbuf records waiting to be output + */ + if (sldata->siow.sb_mb != NULL) { + + } + + /* + * Block waiting for something to do. + */ + tsleep(&sldata->wthread, 0, "wait", 0); + } + + error = 0; for (;;) { int aligned_reclen; diff --git a/sys/kern/subr_shims.c b/sys/kern/subr_shims.c new file mode 100644 index 0000000000..75f9f525bf --- /dev/null +++ b/sys/kern/subr_shims.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2007 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/kern/subr_shims.c,v 1.1 2007/04/26 02:10:59 dillon Exp $ + */ +/* + * Shims for kernel-compiled features that are not present, allowing us + * to implement the support without excessived conditionals outside of + * this file. + */ + +#include +#include +#include +#include +#include + +#include "opt_syslink.h" + +#ifndef SYSLINK + +int +sys_syslink(struct syslink_args *uap) +{ + return(EOPNOTSUPP); +} + +#endif diff --git a/sys/sys/device.h b/sys/sys/device.h index 27f49f496e..3c3b7e7db8 100644 --- a/sys/sys/device.h +++ b/sys/sys/device.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/device.h,v 1.7 2006/09/10 01:26:40 dillon Exp $ + * $DragonFly: src/sys/sys/device.h,v 1.8 2007/04/26 02:11:00 dillon Exp $ */ #ifndef _SYS_DEVICE_H_ @@ -40,8 +40,8 @@ #ifndef _SYS_TYPES_H_ #include #endif -#ifndef _SYS_SYSLINK_H_ -#include +#ifndef _SYS_SYSLINK_RPC_H_ +#include #endif struct cdev; diff --git a/sys/sys/syslink.h b/sys/sys/syslink.h index b0474cb3a1..81ebe5c9ad 100644 --- a/sys/sys/syslink.h +++ b/sys/sys/syslink.h @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/syslink.h,v 1.6 2007/04/22 00:59:27 dillon Exp $ + * $DragonFly: src/sys/sys/syslink.h,v 1.7 2007/04/26 02:11:00 dillon Exp $ */ /* @@ -189,6 +189,9 @@ typedef int (*syslink_func_t)(struct syslink_generic_args *); #define SYSLINK_LABEL_SIZE 32 #define SYSLINK_ROUTER_MAXBITS 20 + +enum syslink_type { SYSLINK_TYPE_ROUTER, SYSLINK_TYPE_MANAGER, SYSLINK_TYPE_SEED, SYSLINK_TYPE_TERMINAL }; + /* * syslink_info structure * @@ -201,7 +204,7 @@ struct syslink_info { int linkid; /* linkid (base physical address) */ int bits; /* physical address bits if switched */ int flags; /* message control/switch flags */ - int reserved01; + enum syslink_type type; sysid_t sysid; /* route node sysid */ char label[SYSLINK_LABEL_SIZE]; /* symbolic name */ char reserved[32]; @@ -210,7 +213,26 @@ struct syslink_info { } u; }; +/* + * SLIF_PACKET - specify when the descriptor represents packetized data, + * where a single read or write reads or writes whole packets. + * For example, a UDP socket. Otherwise a stream is assumed. + * + * SLIF_XSWITCH- specify when the descriptor represents a switched message + * source where the target has no means of discerning the + * subnet address the message is being sent to. + * + * This case occurs when a stream connection is used to + * represented a switch instead of a single end-to-end + * connection. Instead of trying to tag the stream + * messages with some kind of mac header, we instead require + * that the originator pre-adjust the syslink_msg header's + * src and dst fields based on the number of bits being + * switched. The target will then renormalize the address + * fields to merge its own linkid base in. + */ #define SLIF_PACKET 0x0001 /* packetized, else stream */ +#define SLIF_XSWITCH 0x0002 /* router must extract/gen IP addrs */ #if defined(_KERNEL) || defined(_KERNEL_STRUCTURES) #define SLIF_RQUIT 0x0400 #define SLIF_WQUIT 0x0800 @@ -220,98 +242,7 @@ struct syslink_info { #define SLIF_ERROR 0x8000 #endif -#define SLIF_USERFLAGS (SLIF_PACKET) - - -/* - * A syslink structure represents an end-point for communications. System - * structures such as vnodes are typically associated with end-points and - * usually embed a syslink structure. There is typically one master - * structure (sl_remote_id == 0) and any number of slave structures at - * remote locations (sl_remote_id on slaves point to master). - * - * A ref counter is integrated into the structure and used by SYSLINK to - * keep track of sysid references sent to remote targets. This counter - * may also be used by the governing structure (e.g. vnode) so long as - * the SYSLINK API is used to manipulate it. - * - * An operations vector implements the ABI for the numerous functions - * associated with the system structure. E.G. VOPs for vnodes. The - * ops structure also references the transport and protocol layers. Using - * vnodes as an example, the ops structure would be replicated from a - * template on a per-mount basis. - */ -struct syslink { - sysid_t sl_source; - sysid_t sl_target; - int sl_refs; /* track references */ - struct syslink_ops *sl_ops; /* operations vector */ -}; - -/* - * The syslink_ops structure is typically embedded as part of a larger system - * structure. It conatins a reference to the transport layer (if any), - * protocol, and a structural offset range specifying the function vectors - * in the larger system structure. - * - * For example, vnode operations (VOPs) embed this structure in the vop_ops - * structure. - * - * The syslink_ops structure may be replaced as necessary. The VFS subsystem - * typically replicates syslink_ops on a per-mount basis and stores a pointer - * to the mount point in the larger system structure (vop_ops). - */ -struct syslink_ops { - struct syslink_proto *proto; - void *transport; /* FUTURE USE (transport layer) */ - int beg_offset; - int end_offset; -}; - -/* - * The syslink_desc structure describes a function vector in the protocol. - * This structure may be extended by the protocol to contain additional - * information. - */ -struct syslink_desc { - int sd_offset; /* offset into ops structure */ - const char *sd_name; /* name for debugging */ -}; - -/* - * The syslink_proto structure describes a protocol. The structure contains - * templates for the various ops structures required to implement the - * protocol. - */ -struct syslink_proto { - const char *sp_name; /* identifying name */ - int sp_flags; - int sp_opssize; /* structure embedding syslink_ops */ - struct syslink_ops *sp_call_encode; /* encode call */ - struct syslink_ops *sp_call_decode; /* decode call */ - struct syslink_ops *sp_reply_encode; /* encode reply */ - struct syslink_ops *sp_reply_decode; /* decode reply */ - struct syslink_ops *sp_ops; /* direct ABI calls */ -}; - -#define SPF_ALLOCATED 0x00000001 - -/* - * The syslink_generic_args structure contains the base data required in - * the arguments structure passed to any given ops function. This structure - * is typically extended with the actual call arguments. - */ -struct syslink_generic_args { - struct syslink_desc *a_desc; /* ABI method description */ - struct syslink *a_syslink; /* original syslink */ - /* extend arguments */ -}; - -typedef struct syslink *syslink_t; -typedef struct syslink_ops *syslink_ops_t; -typedef struct syslink_desc *syslink_desc_t; -typedef struct syslink_proto *syslink_proto_t; -typedef struct syslink_generic_args *syslink_generic_args_t; +#define SLIF_USERFLAGS (SLIF_PACKET|SLIF_XSWITCH) #if !defined(_KERNEL) int syslink(int, struct syslink_info *, size_t); diff --git a/sys/sys/syslink_msg.h b/sys/sys/syslink_msg.h index 4fd701f8db..a416e3f989 100644 --- a/sys/sys/syslink_msg.h +++ b/sys/sys/syslink_msg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004-2006 The DragonFly Project. All rights reserved. + * Copyright (c) 2004-2007 The DragonFly Project. All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Matthew Dillon @@ -31,7 +31,7 @@ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/syslink_msg.h,v 1.6 2007/04/03 20:21:19 dillon Exp $ + * $DragonFly: src/sys/sys/syslink_msg.h,v 1.7 2007/04/26 02:11:00 dillon Exp $ */ /* * The syslink infrastructure implements an optimized RPC mechanism across a @@ -53,8 +53,9 @@ #endif typedef u_int32_t sl_msgid_t; /* transaction sequencing */ +typedef u_int32_t sl_auxdata_t; /* auxillary data element */ typedef u_int16_t sl_cmd_t; /* command or error */ -typedef sl_cmd_t sl_error_t; +typedef u_int16_t sl_error_t; typedef u_int16_t sl_itemid_t; /* item id */ typedef u_int16_t sl_reclen_t; /* item length */ @@ -62,110 +63,198 @@ typedef u_int16_t sl_reclen_t; /* item length */ #define SL_ALIGNMASK (SL_ALIGN - 1) /* - * Stream or FIFO based messaging structures. - * + * The msgid is used to control transaction sequencing within a session, but + * also has a special meaning to the transport layer. A msgid of 0 indicates + * a PAD syslink message, used to pad FIFO buffers to prevent messages from + * being bisected by the end of the buffer. Since all structures are 8-byte + * aligned, 8-byte PAD messages are allowed. All other messages must be + * at least sizeof(syslink_msg). + * * The reclen is the actual record length in bytes prior to alignment. * The reclen must be aligned to obtain the actual size of a syslink_msg * or syslink_item structure. Note that the reclen includes structural * headers (i.e. it does not represent just the data payload, it represents * the entire structure). * + * Syslink messages allow special treatment for large data payloads, allowing + * the transport mechanism to separate the data payload into its own buffer + * or DMA area (for example, its own page), facilitating DMA and page-mapping + * operations at the end points while allowing the message to be maximally + * compressed during transport. This is typically handled by special casing + * a readv() or writev(). + * * Sessions are identified with a session id. The session id is a rendezvous * id that associates physical and logical routing information with a single * sysid, allowing us to both avoid storing the source and target logical id * in the syslink message AND ALSO providing a unique session id and validator - * which manages the abstracted connection between two entities. Otherwise - * the syslink message would become bloated with five sysid fields instead - * of the three we have now. - * - * Link layer communications is accomplished by specifying a target physical - * address of 0. + * which manages the abstracted 'connection' between two entities. This + * reduces bloat. * * The target physical address is deconstructed as the message hops across * the mesh. All 0's, or all 0's remaining indicates a link layer message * to be processed by the syslink route node itself. All 1's indicates * a broadcast message. Broadcast messages also require special attention. * Sending a message to a target address of 0 basically sends it to the - * directly connected syslink node. + * nearest route node as a link layer message. * * The source physical address normally starts out as 0 and is constructed * as the message hops across the mesh. The target can use the constructed * source address to respond to the originator of the message (as it must - * if it has not knowledge about the session id). A target with knowledge - * of the session id has the option of forging its own return both. - */ - -/* - * Raw protocol structures + * if it has no knowledge about the session). A target with knowledge + * of the session id has the option of forging its own return path. + * + * Checksums are the responsibility of higher layers but message checking + * elements can be negotiated or required as part of the syslink message's + * structured data. */ struct syslink_msg { - sl_cmd_t sm_cmd; /* protocol command code */ - sl_reclen_t sm_bytes; /* unaligned size of message */ - sl_msgid_t sm_msgid; /* message transaction control */ + sl_msgid_t sh_msgid; /* message transaction control */ + sl_reclen_t sh_payloadoff; /* offset of payload as a DMA aid */ + sl_reclen_t sh_bytes; /* unaligned size of message */ /* minimum syslink_msg size is 8 bytes (special PAD) */ - sysid_t sm_sessid; /* session id */ - sysid_t sm_srcphysid; /* originating physical id */ - sysid_t sm_dstphysid; /* target physical id */ + sysid_t sh_sessid; /* session id */ + sysid_t sh_srcphysid; /* transit routing */ + sysid_t sh_dstphysid; /* transit routing */ + /* 8-byte aligned structure */ + /* followed by structured data */ }; -#define SL_MIN_MESSAGE_SIZE offsetof(struct syslink_msg, sm_sessid) - -#define SL_MSGID_REPLY 0x80000000 /* command vs reply */ -#define SL_MSGID_ORIG 0x40000000 /* originator transaction */ - /* (else target transaction) */ -#define SL_MSGID_BEG 0x20000000 /* first msg in transaction */ -#define SL_MSGID_END 0x10000000 /* last msg in transaction */ -#define SL_MSGID_STRUCTURED 0x08000000 /* contains structured data */ -#define SL_MSGID_COMPLETE 0x04000000 /* msg not under construction */ -#define SL_MSGID_TRANS_MASK 0x00FFFF00 /* transaction id */ -#define SL_MSGID_SEQ_MASK 0x000000FF /* sequence no within trans */ - -#define SLMSG_ALIGN(bytes) (((bytes) + 7) & ~7) - /* - * Syslink message commands (16 bits, bit 15 must be 0) + * MSGID handling. This controls message transactions and PAD. Terminal + * nodes, such as filesystems, are state driven entities whos syslink + * message transactions are directly supported by the local on-machine route + * nodes they connect to. The route nodes use various fields in the header, + * particularly sm_msgid, sm_sessid, and sm_payloadoff, to optimally present + * syslink messages to the terminal node. In particular, a route node may + * present the payload for a syslink message or the message itself through + * some out-of-band means, such as by mapping it into memory. + * + * These route nodes also handle timeout and retry processing, providing + * appropriate response messages to terminal nodes if the target never replies + * to a transaction or some other exceptional condition occurs. The route + * node does not handle RETRY and other exceptional conditions itself.. + * that is, the route node is not responsible for storing the message, only + * routing it. The route node only tracks the related session(s). * - * Commands 0x0000-0x001F are reserved for the universal link layer, but - * except for 0x0000 (which is a PAD message), must still be properly - * routed. + * A route node only directly supports terminal nodes directly connected to + * it. Intermediate route nodes ignore the MSGID (other then the all 0's PAD + * case) and do not track indirect sessions. For example, a piece of + * hardware doing syslink message routing does not have to mess with + * any of this. * - * Commands 0x0020-0x002F are reserved for the universal protocol - * identification layer. + * A session id establishes a session between two entities. One terminal node + * is considered to be the originator of the session, the other terminal node + * is the target. However, once established, EITHER ENTITY may initiate + * a transaction (or both simulataniously). SH_MSGID_CMD_ORIGINATOR is used + * in all messages and replies related to a transaction initiated by the + * session originator, and SH_MSGID_CMD_TARGET is used in all messages and + * replies related to a transaction initiated by the session target. + * Establishment of new sessions uses SH_MSGID_CMD_FORGE. * - * Commands 0x0100-0x7FFF are protocol commands. + * Parallel transactions are supported by using different transaction ids + * amoungst the parallel transactions. Once a transaction id is used, it + * may not be reused until after the timeout period is exceeded. With 23 + * transaction id bits we have 8 million transaction ids, supporting around + * 26000 transactions per second with a 5 minute timeout. Note that + * multiple sessions may be established between any two entities, giving us + * essentially an unlimited number of transactions per second. * - * The command field is the error return field with bit 15 set in the - * reply. + * ENDIANESS - syslink messages may be transported with any endianess. This + * includes all fields including the syslink header and syslink element + * header fields. If upon reception SH_MSGID_ENDIAN_NORM is set in the msgid + * both end-points will have the same endianess and no translation is + * required. If SH_MSGID_ENDIAN_REV is set then the two end-points have + * different endianess and translation is required. Only little endian and + * bit endian transport is supported (that is, a simple reversal of bytes for + * each field). + * + * Intermediate route nodes (i.e. those not tracking the session) may NOT + * translate the endianess of the message in any fashion. The management + * node that talks to the actual resource is responsible for doing the + * endian translations for all the above fields... everything except the + * syslink_elm payload, which is described later. */ -#define SL_CMD_PAD 0x0000 -#define SL_CMD_LINK_MESH 0x0001 /* mesh construction */ -#define SL_CMD_LINK_REG 0x0002 /* register logical id */ -#define SL_CMD_LINK_DEREG 0x0003 /* unregister logical id */ -#define SL_CMD_LINK_ID 0x0004 /* link level identification */ +#define SL_MIN_MESSAGE_SIZE offsetof(struct syslink_msg, sm_sessid) +#define SL_MSG_ALIGN(bytes) (((bytes) + 7) & ~7) -#define SL_CMD_PROT_ID 0x0010 /* protocol & device ident */ +#define SH_MSGID_CMD_MASK 0xF0000000 +#define SH_MSGID_CMD_HEARTBEAT 0x60000000 /* seed heartbeat broadcast */ +#define SH_MSGID_CMD_TIMESYNC 0x50000000 /* timesync broadcast */ +#define SH_MSGID_CMD_ALLOCATE 0x40000000 /* allocate session id space */ +#define SH_MSGID_CMD_ORIGINATOR 0x30000000 /* origin initiated trans */ +#define SH_MSGID_CMD_TARGET 0x20000000 /* target initiated trans */ +#define SH_MSGID_CMD_ESTABLISH 0x10000000 /* establish session */ +#define SH_MSGID_CMD_PAD 0x00000000 + +#define SH_MSGID_REPLY 0x08000000 +#define SH_MSGID_ENDIAN_NORM 0x01000000 +#define SH_MSGID_ENDIAN_REV 0x00000001 +#define SM_MSGID_TRANS_MASK 0x00FFFFFE /* 23 bits */ /* - * Message elements for structured messages. If SL_MSGID_STRUCTURED is - * set the syslink message contains zero or more syslink_elm structures - * laid side by side. Each syslink_elm structure may or may not be - * structured (i.e. recursive). - * - * Most of the same SL_MSGID_* bits apply. The first structured element - * will have SL_MSGID_BEG set, the last will have SL_MSGID_END set (if - * there is only one element, both bits will be set in that element). If - * the payload is structured, SL_MSGID_STRUCTURED will be set. - * - * syslink_elm's may use the TRANS and SEQ bits in the msgid for other - * purposes. A syslink_elm is considered to be a PAD if se_cmd == 0. + * A syslink message is broken up into three pieces: (1) The headers, (2) The + * message elements, and (3) DMA payload. + * + * A non-PAD syslink message contains a single top-level message element. + * Unlike recursive message elements which can be iterated, the top level + * element is never iterated. There is always only one. The top level + * element is usually structured but does not have to be. The top level + * element's aux field represents the RPC protocol id for the command. + * + * A PAD syslink message contains no message elements. The entire syslink + * message is considered pad based on the header. + * + * A structured syslink message element may be specified by setting + * SE_CMDF_STRUCTURED. The data payload for a structured message element + * is a sequence of ZERO or MORE message elements until the payload size is + * reached. Each message element may be opaque or structured. Fully + * recursive message elements are supported in this manner. + * + * A syslink message element with SE_CMDF_MASTERPAYLOAD set is associated + * with the master payload for the syslink message as a whole. This field + * is only interpreted by terminal nodes and does not have to be used this + * way, but its a good idea to for debugging purposes. + * + * Syslink message elements are always 8-byte aligned. In order to + * guarentee an 8-byte alignment for our extended data, a 32 bit auxillary + * field is always included as part of the official syslink_elm structure + * definition. This field is actually part of the element command's data + * and its use, if any, depends on the element command. + * + * Syslink message elements do not have to be validated by intermediate + * route nodes but must ALWAYS be validated by the route node that connects + * to the terminal node intended to receive the syslink message. + * + * Only the header fields of a syslink_elm are translated for endianess + * by the management node. If the management node does have to do an + * endian conversion it will also set SE_CMDF_UNTRANSLATED in se_cmd (all + * of them, recursively, since it has to validate and translate the entire + * hierarchy anyway) and the rpc mechanism will be responsible for doing + * the conversion and clearing the flag. The seu_proto field IS always + * translated, which means that when used as aux data it must be referenced + * as a 32 bit field. + * + * As a fringe benefit, since the RPC command is the entire se_cmd field, + * flags and all, an untranslated element will wind up with an unrecognized + * command code and be reported as an error rather then being mis-executed. */ struct syslink_elm { sl_cmd_t se_cmd; sl_reclen_t se_bytes; - sl_msgid_t se_msgid; + union { + sl_auxdata_t seu_aux; /* aux data */ + sl_auxdata_t seu_proto; /* protocol field */ + } u; /* extended by data */ }; +#define SE_CMDF_STRUCTURED 0x8000 /* structured, else opaque */ +#define SE_CMDF_RESERVED4000 0x4000 +#define SE_CMDF_MASTERPAYLOAD 0x2000 /* DMA payload association */ +#define SE_CMDF_UNTRANSLATED 0x1000 /* needs endian translation */ + +#define SE_CMD_PAD 0x0000 /* CMD 0 is always PAD */ + typedef struct syslink_msg *syslink_msg_t; typedef struct syslink_elm *syslink_elm_t; diff --git a/sys/sys/syslink_rpc.h b/sys/sys/syslink_rpc.h new file mode 100644 index 0000000000..86ed6b2908 --- /dev/null +++ b/sys/sys/syslink_rpc.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2007 The DragonFly Project. All rights reserved. + * + * This code is derived from software contributed to The DragonFly Project + * by Matthew Dillon + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of The DragonFly Project nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific, prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $DragonFly: src/sys/sys/syslink_rpc.h,v 1.1 2007/04/26 02:11:00 dillon Exp $ + */ +/* + * The syslink infrastructure implements a protocol abstraction for RPC + * operations which allows syslink messages to be endian converted, + * processed into a callback, and generated. + * + * This header file supplies structures required for syslink RPC + * processing. + */ + +#ifndef _SYS_SYSLINK_RPC_H_ +#define _SYS_SYSLINK_RPC_H_ + +#ifndef _SYS_TYPES_H_ +#include +#endif + +/* + * Note: syslink_desc is also used by vnops and devops. + */ +struct syslink_desc { + int sd_offset; /* offset into ops structure */ + const char *sd_name; /* name for debugging */ +}; + +#endif + diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index ef43a56261..335d50288e 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -32,7 +32,7 @@ * * @(#)vnode.h 8.7 (Berkeley) 2/4/94 * $FreeBSD: src/sys/sys/vnode.h,v 1.111.2.19 2002/12/29 18:19:53 dillon Exp $ - * $DragonFly: src/sys/sys/vnode.h,v 1.72 2006/10/27 04:56:33 dillon Exp $ + * $DragonFly: src/sys/sys/vnode.h,v 1.73 2007/04/26 02:11:00 dillon Exp $ */ #ifndef _SYS_VNODE_H_ @@ -73,8 +73,8 @@ #ifndef _SYS_TREE_H_ #include #endif -#ifndef _SYS_SYSLINK_H_ -#include +#ifndef _SYS_SYSLINK_RPC_H_ +#include #endif #ifndef _SYS_CCMS_H_ #include -- 2.41.0