kernel - use new td_ucred in numerous places
[dragonfly.git] / sys / kern / lwkt_caps.c
CommitLineData
f6bf3af1 1/*
8c10bfcf
MD
2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
f6bf3af1
MD
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
8c10bfcf 10 *
f6bf3af1
MD
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
8c10bfcf
MD
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
f6bf3af1 32 * SUCH DAMAGE.
8c10bfcf 33 *
13d13d89 34 * $DragonFly: src/sys/kern/lwkt_caps.c,v 1.13 2007/02/26 21:41:08 corecode Exp $
f6bf3af1
MD
35 */
36
37/*
38 * This module implements the DragonFly LWKT IPC rendezvous and message
39 * passing API which operates between userland processes, between userland
40 * threads, and between userland processes and kernel threads. This API
41 * is known as the CAPS interface.
42 *
43 * Generally speaking this module abstracts the LWKT message port interface
44 * into userland Clients and Servers rendezvous through ports named
45 * by or wildcarded by (name,uid,gid). The kernel provides system calls
46 * which may be assigned to the mp_* fields in a userland-supplied
47 * kernel-managed port, and a registration interface which associates an
48 * upcall with a userland port. The kernel tracks authentication information
49 * and deals with connection failures by automatically replying to unreplied
50 * messages.
51 *
52 * From the userland perspective a client/server connection involves two
53 * message ports on the client and two message ports on the server.
54 */
55
56#include <sys/param.h>
57#include <sys/systm.h>
58#include <sys/kernel.h>
59#include <sys/sysproto.h>
60#include <sys/malloc.h>
61#include <sys/proc.h>
62#include <sys/ucred.h>
63#include <sys/caps.h>
64#include <sys/sysctl.h>
65#include <vm/vm.h>
66#include <vm/vm_extern.h>
67
68static int caps_process_msg(caps_kinfo_t caps, caps_kmsg_t msg, struct caps_sys_get_args *uap);
69static void caps_free(caps_kinfo_t caps);
70static void caps_free_msg(caps_kmsg_t msg);
973c11b9 71static int caps_name_check(const char *name, size_t len);
f6bf3af1
MD
72static caps_kinfo_t caps_free_msg_mcaps(caps_kmsg_t msg);
73static caps_kinfo_t kern_caps_sys_service(const char *name, uid_t uid,
74 gid_t gid, struct ucred *cred,
75 int flags, int *error);
76static caps_kinfo_t kern_caps_sys_client(const char *name, uid_t uid,
77 gid_t gid, struct ucred *cred, int flags, int *error);
78
79#define CAPS_HSIZE 64
80#define CAPS_HMASK (CAPS_HSIZE - 1)
81
82static caps_kinfo_t caps_hash_ary[CAPS_HSIZE];
e30f9e2c 83static int caps_waitsvc;
f6bf3af1
MD
84
85MALLOC_DEFINE(M_CAPS, "caps", "caps IPC messaging");
86
87static int caps_enabled;
88SYSCTL_INT(_kern, OID_AUTO, caps_enabled,
89 CTLFLAG_RW, &caps_enabled, 0, "Enable CAPS");
90
91/************************************************************************
92 * INLINE SUPPORT FUNCTIONS *
93 ************************************************************************/
94
95static __inline
96struct caps_kinfo **
97caps_hash(const char *name, int len)
98{
99 int hv = 0x7123F4B3;
100
101 while (--len >= 0)
102 hv = (hv << 5) ^ name[len] ^ (hv >> 23);
103 return(&caps_hash_ary[(hv ^ (hv >> 16)) & CAPS_HMASK]);
104}
105
106static __inline
107void
108caps_hold(caps_kinfo_t caps)
109{
110 ++caps->ci_refs;
111}
112
113static __inline
114void
115caps_drop(caps_kinfo_t caps)
116{
117 if (--caps->ci_refs == 0)
118 caps_free(caps);
119}
120
121/************************************************************************
122 * STATIC SUPPORT FUNCTIONS *
123 ************************************************************************/
124
125static
126caps_kinfo_t
127caps_find(const char *name, int len, uid_t uid, gid_t gid)
128{
129 caps_kinfo_t caps;
130 struct caps_kinfo **chash;
131
132 chash = caps_hash(name, len);
133 for (caps = *chash; caps; caps = caps->ci_hnext) {
134 if ((uid == (uid_t)-1 || uid == caps->ci_uid) &&
135 (gid == (gid_t)-1 || gid == caps->ci_gid) &&
136 len == caps->ci_namelen &&
137 bcmp(name, caps->ci_name, len) == 0
138 ) {
139 caps_hold(caps);
140 break;
141 }
142 }
143 return(caps);
144}
145
146static
147caps_kinfo_t
e30f9e2c 148caps_find_id(thread_t td, int id)
f6bf3af1 149{
f6bf3af1
MD
150 caps_kinfo_t caps;
151
152 for (caps = td->td_caps; caps; caps = caps->ci_tdnext) {
153 if (caps->ci_id == id) {
154 caps_hold(caps);
155 break;
156 }
157 }
158 return(caps);
159}
160
161static
162caps_kinfo_t
e30f9e2c 163caps_alloc(thread_t td, const char *name, int len, uid_t uid, gid_t gid,
f6bf3af1
MD
164 int flags, caps_type_t type)
165{
166 struct caps_kinfo **chash;
f6bf3af1
MD
167 caps_kinfo_t caps;
168 caps_kinfo_t ctmp;
169
77652cad 170 caps = kmalloc(offsetof(struct caps_kinfo, ci_name[len+1]),
f6bf3af1
MD
171 M_CAPS, M_WAITOK|M_ZERO);
172 TAILQ_INIT(&caps->ci_msgpendq);
173 TAILQ_INIT(&caps->ci_msguserq);
174 caps->ci_uid = uid; /* -1 == not registered for uid search */
175 caps->ci_gid = gid; /* -1 == not registered for gid search */
176 caps->ci_type = type;
177 caps->ci_refs = 1; /* CAPKF_TDLIST reference */
178 caps->ci_namelen = len;
179 caps->ci_flags = flags;
180 bcopy(name, caps->ci_name, len + 1);
181 if (type == CAPT_SERVICE) {
182 chash = caps_hash(caps->ci_name, len);
183 caps->ci_hnext = *chash;
184 *chash = caps;
185 caps->ci_flags |= CAPKF_HLIST;
186 }
187 if (td->td_caps) {
188 caps->ci_id = td->td_caps->ci_id + 1;
189 if (caps->ci_id < 0) {
190 /*
191 * It is virtually impossible for this case to occur.
192 */
193 caps->ci_id = 1;
e30f9e2c 194 while ((ctmp = caps_find_id(td, caps->ci_id)) != NULL) {
f6bf3af1
MD
195 caps_drop(ctmp);
196 ++caps->ci_id;
197 }
198 }
199 } else {
200 caps->ci_id = 1;
201 }
202 caps->ci_flags |= CAPKF_TDLIST;
203 caps->ci_tdnext = td->td_caps;
204 caps->ci_td = td;
205 td->td_caps = caps;
206 return(caps);
207}
208
209static
210caps_kmsg_t
211caps_alloc_msg(caps_kinfo_t caps)
212{
213 caps_kmsg_t msg;
214
efda3bd0 215 msg = kmalloc(sizeof(struct caps_kmsg), M_CAPS, M_WAITOK|M_ZERO);
f6bf3af1
MD
216 msg->km_msgid.c_id = (off_t)(uintptr_t)msg;
217 return(msg);
218}
219
220static
221caps_kmsg_t
222caps_find_msg(caps_kinfo_t caps, off_t msgid)
223{
224 caps_kmsg_t msg;
225
226 TAILQ_FOREACH(msg, &caps->ci_msguserq, km_node) {
227 if (msg->km_msgid.c_id == msgid)
228 return(msg);
229 }
230 TAILQ_FOREACH(msg, &caps->ci_msgpendq, km_node) {
231 if (msg->km_msgid.c_id == msgid)
232 return(msg);
233 }
234 return(NULL);
235}
236
237static
238caps_kinfo_t
9910d07b
MD
239caps_load_ccr(caps_kinfo_t caps, caps_kmsg_t msg, struct lwp *lp,
240 void *udata, int ubytes)
f6bf3af1 241{
9910d07b 242 struct ucred *cr = lp ? lp->lwp_thread->td_ucred : proc0.p_ucred;
f6bf3af1 243 caps_kinfo_t rcaps;
9910d07b 244 int i;
f6bf3af1
MD
245
246 /*
02fb4e24
MD
247 * replace km_mcaps with new VM state, return the old km_mcaps. The
248 * caller is expected to drop the rcaps ref count on return so we do
249 * not do it ourselves.
f6bf3af1
MD
250 */
251 rcaps = caps_free_msg_mcaps(msg); /* can be NULL */
f6bf3af1
MD
252 caps_hold(caps);
253 msg->km_mcaps = caps;
02fb4e24 254 xio_init_ubuf(&msg->km_xio, udata, ubytes, XIOF_READ);
f6bf3af1 255
9910d07b 256 msg->km_ccr.pid = lp ? lp->lwp_proc->p_pid : -1;
f6bf3af1
MD
257 msg->km_ccr.uid = cr->cr_ruid;
258 msg->km_ccr.euid = cr->cr_uid;
259 msg->km_ccr.gid = cr->cr_rgid;
260 msg->km_ccr.ngroups = MIN(cr->cr_ngroups, CAPS_MAXGROUPS);
261 for (i = 0; i < msg->km_ccr.ngroups; ++i)
262 msg->km_ccr.groups[i] = cr->cr_groups[i];
263 return(rcaps);
264}
265
266static void
267caps_dequeue_msg(caps_kinfo_t caps, caps_kmsg_t msg)
268{
269 if (msg->km_flags & CAPKMF_ONUSERQ)
270 TAILQ_REMOVE(&caps->ci_msguserq, msg, km_node);
271 if (msg->km_flags & CAPKMF_ONPENDQ)
272 TAILQ_REMOVE(&caps->ci_msgpendq, msg, km_node);
273 msg->km_flags &= ~(CAPKMF_ONPENDQ|CAPKMF_ONUSERQ);
274}
275
276static void
277caps_put_msg(caps_kinfo_t caps, caps_kmsg_t msg, caps_msg_state_t state)
278{
279 KKASSERT((msg->km_flags & (CAPKMF_ONUSERQ|CAPKMF_ONPENDQ)) == 0);
280
281 msg->km_flags |= CAPKMF_ONPENDQ;
282 msg->km_flags &= ~CAPKMF_PEEKED;
283 msg->km_state = state;
284 TAILQ_INSERT_TAIL(&caps->ci_msgpendq, msg, km_node);
285
286 /*
287 * Instead of waking up the service for both new messages and disposals,
288 * just wakeup the service for new messages and it will process the
289 * previous disposal in the same loop, reducing the number of context
290 * switches required to run an IPC.
291 */
292 if (state != CAPMS_DISPOSE)
293 wakeup(caps);
294 caps_drop(caps);
295}
296
297/*
298 * caps_free_msg_mcaps()
f6bf3af1
MD
299 */
300static
301caps_kinfo_t
302caps_free_msg_mcaps(caps_kmsg_t msg)
303{
304 caps_kinfo_t mcaps;
305
02fb4e24
MD
306 mcaps = msg->km_mcaps; /* may be NULL */
307 msg->km_mcaps = NULL;
308 if (msg->km_xio.xio_npages)
309 xio_release(&msg->km_xio);
f6bf3af1
MD
310 return(mcaps);
311}
312
313/*
314 * caps_free_msg()
315 *
316 * Free a caps placeholder message. The message must not be on any queues.
317 */
318static void
319caps_free_msg(caps_kmsg_t msg)
320{
321 caps_kinfo_t rcaps;
322
323 if ((rcaps = caps_free_msg_mcaps(msg)) != NULL)
324 caps_drop(rcaps);
efda3bd0 325 kfree(msg, M_CAPS);
f6bf3af1
MD
326}
327
328/*
e30f9e2c
MD
329 * Validate the service name
330 */
331static int
973c11b9 332caps_name_check(const char *name, size_t len)
e30f9e2c 333{
973c11b9 334 size_t i;
e30f9e2c
MD
335 char c;
336
337 for (i = len - 1; i >= 0; --i) {
338 c = name[i];
339 if (c >= '0' && c <= '9')
340 continue;
341 if (c >= 'a' && c <= 'z')
342 continue;
343 if (c >= 'A' && c <= 'Z')
344 continue;
345 if (c == '_' || c == '.')
346 continue;
347 return(EINVAL);
348 }
349 return(0);
350}
351
352/*
f6bf3af1
MD
353 * caps_term()
354 *
355 * Terminate portions of a caps info structure. This is used to close
356 * an end-point or to flush particular messages on an end-point.
357 *
358 * This function should not be called with CAPKF_TDLIST unless the caller
359 * has an additional hold on the caps structure.
360 */
361static void
362caps_term(caps_kinfo_t caps, int flags, caps_kinfo_t cflush)
363{
9910d07b 364 struct thread *td = curthread;
f6bf3af1
MD
365 struct caps_kinfo **scan;
366 caps_kmsg_t msg;
367
368 if (flags & CAPKF_TDLIST)
369 caps->ci_flags |= CAPKF_CLOSED;
370
371 if (flags & CAPKF_FLUSH) {
372 int mflags;
373 struct caps_kmsg_queue tmpuserq;
374 struct caps_kmsg_queue tmppendq;
375 caps_kinfo_t rcaps;
376
377 TAILQ_INIT(&tmpuserq);
378 TAILQ_INIT(&tmppendq);
379
380 while ((msg = TAILQ_FIRST(&caps->ci_msgpendq)) != NULL ||
381 (msg = TAILQ_FIRST(&caps->ci_msguserq)) != NULL
382 ) {
383 mflags = msg->km_flags & (CAPKMF_ONUSERQ|CAPKMF_ONPENDQ);
384 caps_dequeue_msg(caps, msg);
385
386 if (cflush && msg->km_mcaps != cflush) {
387 if (mflags & CAPKMF_ONUSERQ)
388 TAILQ_INSERT_TAIL(&tmpuserq, msg, km_node);
389 else
390 TAILQ_INSERT_TAIL(&tmppendq, msg, km_node);
391 } else {
392 /*
393 * Dispose of the message. If the received message is a
394 * request we must reply it. If the received message is
395 * a reply we must return it for disposal. If the
396 * received message is a disposal request we simply free it.
397 */
398 switch(msg->km_state) {
399 case CAPMS_REQUEST:
400 case CAPMS_REQUEST_RETRY:
9910d07b 401 rcaps = caps_load_ccr(caps, msg, td->td_lwp, NULL, 0);
f6bf3af1
MD
402 if (rcaps->ci_flags & CAPKF_CLOSED) {
403 /*
404 * can't reply, if we never read the message (its on
405 * the pending queue), or if we are closed ourselves,
406 * we can just free the message. Otherwise we have
407 * to send ourselves a disposal request (multi-threaded
408 * services have to deal with disposal requests for
409 * messages that might be in progress).
410 */
411 if ((caps->ci_flags & CAPKF_CLOSED) ||
412 (mflags & CAPKMF_ONPENDQ)
413 ) {
414 caps_free_msg(msg);
415 caps_drop(rcaps);
416 } else {
417 caps_drop(rcaps);
02fb4e24 418 caps_hold(caps); /* for message */
f6bf3af1
MD
419 caps_put_msg(caps, msg, CAPMS_DISPOSE);
420 }
421 } else {
422 /*
02fb4e24
MD
423 * auto-reply to the originator. rcaps already
424 * has a dangling hold so we do not have to hold it
425 * again.
f6bf3af1
MD
426 */
427 caps_put_msg(rcaps, msg, CAPMS_REPLY);
428 }
429 break;
430 case CAPMS_REPLY:
431 case CAPMS_REPLY_RETRY:
9910d07b 432 rcaps = caps_load_ccr(caps, msg, td->td_lwp, NULL, 0);
f6bf3af1
MD
433 if (caps == rcaps || (rcaps->ci_flags & CAPKF_CLOSED)) {
434 caps_free_msg(msg); /* degenerate disposal case */
435 caps_drop(rcaps);
436 } else {
437 caps_put_msg(rcaps, msg, CAPMS_DISPOSE);
438 }
439 break;
440 case CAPMS_DISPOSE:
441 caps_free_msg(msg);
442 break;
443 }
444 }
445 }
446 while ((msg = TAILQ_FIRST(&tmpuserq)) != NULL) {
447 TAILQ_REMOVE(&tmpuserq, msg, km_node);
448 TAILQ_INSERT_TAIL(&caps->ci_msguserq, msg, km_node);
449 msg->km_flags |= CAPKMF_ONUSERQ;
450 }
451 while ((msg = TAILQ_FIRST(&tmppendq)) != NULL) {
452 TAILQ_REMOVE(&tmppendq, msg, km_node);
453 TAILQ_INSERT_TAIL(&caps->ci_msgpendq, msg, km_node);
454 msg->km_flags |= CAPKMF_ONPENDQ;
455 }
456 }
457 if ((flags & CAPKF_HLIST) && (caps->ci_flags & CAPKF_HLIST)) {
458 for (scan = caps_hash(caps->ci_name, caps->ci_namelen);
459 *scan != caps;
460 scan = &(*scan)->ci_hnext
461 ) {
462 KKASSERT(*scan != NULL);
463 }
464 *scan = caps->ci_hnext;
465 caps->ci_hnext = (void *)-1;
466 caps->ci_flags &= ~CAPKF_HLIST;
467 }
468 if ((flags & CAPKF_TDLIST) && (caps->ci_flags & CAPKF_TDLIST)) {
f6bf3af1
MD
469 for (scan = &caps->ci_td->td_caps;
470 *scan != caps;
471 scan = &(*scan)->ci_tdnext
472 ) {
473 KKASSERT(*scan != NULL);
474 }
475 *scan = caps->ci_tdnext;
476 caps->ci_flags &= ~CAPKF_TDLIST;
477 caps->ci_tdnext = (void *)-1;
478 caps->ci_td = NULL;
479 caps_drop(caps);
480 }
481 if ((flags & CAPKF_RCAPS) && (caps->ci_flags & CAPKF_RCAPS)) {
482 caps_kinfo_t ctmp;
483
484 caps->ci_flags &= ~CAPKF_RCAPS;
485 if ((ctmp = caps->ci_rcaps)) {
486 caps->ci_rcaps = NULL;
487 caps_term(ctmp, CAPKF_FLUSH, caps);
488 caps_drop(ctmp);
489 }
490 }
491}
492
493static void
494caps_free(caps_kinfo_t caps)
495{
496 KKASSERT(TAILQ_EMPTY(&caps->ci_msgpendq));
497 KKASSERT(TAILQ_EMPTY(&caps->ci_msguserq));
498 KKASSERT((caps->ci_flags & (CAPKF_HLIST|CAPKF_TDLIST)) == 0);
efda3bd0 499 kfree(caps, M_CAPS);
f6bf3af1
MD
500}
501
502/************************************************************************
503 * PROCESS SUPPORT FUNCTIONS *
504 ************************************************************************/
505
e30f9e2c
MD
506/*
507 * Create dummy entries in p2 so we can return the appropriate
508 * error code. Robust userland code will check the error for a
509 * forked condition and reforge the connection.
510 */
f6bf3af1 511void
13d13d89 512caps_fork(struct thread *td1, struct thread *td2)
f6bf3af1 513{
e30f9e2c
MD
514 caps_kinfo_t caps1;
515 caps_kinfo_t caps2;
e30f9e2c
MD
516
517 /*
518 * Create dummy entries with the same id's as the originals. Note
519 * that service entries are not re-added to the hash table. The
520 * dummy entries return an ENOTCONN error allowing userland code to
521 * detect that a fork occured. Userland must reconnect to the service.
522 */
523 for (caps1 = td1->td_caps; caps1; caps1 = caps1->ci_tdnext) {
524 if (caps1->ci_flags & CAPF_NOFORK)
525 continue;
526 caps2 = caps_alloc(td2,
527 caps1->ci_name, caps1->ci_namelen,
528 caps1->ci_uid, caps1->ci_gid,
529 caps1->ci_flags & CAPF_UFLAGS, CAPT_FORKED);
530 caps2->ci_id = caps1->ci_id;
531 }
532
533 /*
534 * Reverse the list order to maintain highest-id-first
535 */
536 caps2 = td2->td_caps;
537 td2->td_caps = NULL;
538 while (caps2) {
539 caps1 = caps2->ci_tdnext;
540 caps2->ci_tdnext = td2->td_caps;
541 td2->td_caps = caps2;
542 caps2 = caps1;
543 }
f6bf3af1
MD
544}
545
546void
547caps_exit(struct thread *td)
548{
549 caps_kinfo_t caps;
550
551 while ((caps = td->td_caps) != NULL) {
552 caps_hold(caps);
553 caps_term(caps, CAPKF_TDLIST|CAPKF_HLIST|CAPKF_FLUSH|CAPKF_RCAPS, NULL);
554 caps_drop(caps);
555 }
556}
557
558/************************************************************************
559 * SYSTEM CALLS *
560 ************************************************************************/
561
562/*
563 * caps_sys_service(name, uid, gid, upcid, flags);
564 *
565 * Create an IPC service using the specified name, uid, gid, and flags.
566 * Either uid or gid can be -1, but not both. The port identifier is
567 * returned.
568 *
569 * upcid can either be an upcall or a kqueue identifier (XXX)
3919ced0
MD
570 *
571 * MPALMOSTSAFE
f6bf3af1
MD
572 */
573int
753fd850 574sys_caps_sys_service(struct caps_sys_service_args *uap)
f6bf3af1 575{
9910d07b 576 struct ucred *cred = curthread->td_ucred;
f6bf3af1
MD
577 char name[CAPS_MAXNAMELEN];
578 caps_kinfo_t caps;
973c11b9 579 size_t len;
f6bf3af1
MD
580 int error;
581
582 if (caps_enabled == 0)
583 return(EOPNOTSUPP);
584 if ((error = copyinstr(uap->name, name, CAPS_MAXNAMELEN, &len)) != 0)
585 return(error);
973c11b9 586 if ((ssize_t)--len <= 0)
f6bf3af1 587 return(EINVAL);
3919ced0 588 get_mplock();
f6bf3af1 589
3919ced0
MD
590 if ((error = caps_name_check(name, len)) == 0) {
591 caps = kern_caps_sys_service(name, uap->uid, uap->gid, cred,
592 uap->flags & CAPF_UFLAGS, &error);
593 if (caps)
594 uap->sysmsg_result = caps->ci_id;
595 }
596 rel_mplock();
f6bf3af1
MD
597 return(error);
598}
599
600/*
601 * caps_sys_client(name, uid, gid, upcid, flags);
602 *
603 * Create an IPC client connected to the specified service. Either uid or gid
604 * may be -1, indicating a wildcard, but not both. The port identifier is
605 * returned.
606 *
607 * upcid can either be an upcall or a kqueue identifier (XXX)
3919ced0
MD
608 *
609 * MPALMOSTSAFE
f6bf3af1
MD
610 */
611int
753fd850 612sys_caps_sys_client(struct caps_sys_client_args *uap)
f6bf3af1 613{
9910d07b 614 struct ucred *cred = curthread->td_ucred;
f6bf3af1
MD
615 char name[CAPS_MAXNAMELEN];
616 caps_kinfo_t caps;
973c11b9 617 size_t len;
f6bf3af1
MD
618 int error;
619
620 if (caps_enabled == 0)
621 return(EOPNOTSUPP);
622 if ((error = copyinstr(uap->name, name, CAPS_MAXNAMELEN, &len)) != 0)
623 return(error);
973c11b9 624 if ((ssize_t)--len <= 0)
f6bf3af1 625 return(EINVAL);
3919ced0 626 get_mplock();
f6bf3af1 627
3919ced0
MD
628 if ((error = caps_name_check(name, len)) == 0) {
629 caps = kern_caps_sys_client(name, uap->uid, uap->gid, cred,
630 uap->flags & CAPF_UFLAGS, &error);
631 if (caps)
632 uap->sysmsg_result = caps->ci_id;
633 }
634 rel_mplock();
f6bf3af1
MD
635 return(error);
636}
637
3919ced0
MD
638/*
639 * MPALMOSTSAFE
640 */
f6bf3af1 641int
753fd850 642sys_caps_sys_close(struct caps_sys_close_args *uap)
f6bf3af1 643{
9910d07b 644 struct thread *td = curthread;
f6bf3af1 645 caps_kinfo_t caps;
3919ced0 646 int error;
f6bf3af1 647
3919ced0
MD
648 get_mplock();
649
9910d07b 650 if ((caps = caps_find_id(td, uap->portid)) != NULL) {
3919ced0
MD
651 caps_term(caps, CAPKF_TDLIST|CAPKF_HLIST|CAPKF_FLUSH|CAPKF_RCAPS,
652 NULL);
653 caps_drop(caps);
654 error = 0;
655 } else {
656 error = EINVAL;
657 }
658 rel_mplock();
659 return(error);
f6bf3af1
MD
660}
661
3919ced0
MD
662/*
663 * MPALMOSTSAFE
664 */
e30f9e2c 665int
753fd850 666sys_caps_sys_setgen(struct caps_sys_setgen_args *uap)
e30f9e2c 667{
9910d07b 668 struct thread *td = curthread;
e30f9e2c 669 caps_kinfo_t caps;
02fb4e24 670 int error;
e30f9e2c 671
3919ced0
MD
672 get_mplock();
673
9910d07b 674 if ((caps = caps_find_id(td, uap->portid)) != NULL) {
3919ced0
MD
675 if (caps->ci_type == CAPT_FORKED) {
676 error = ENOTCONN;
677 } else {
678 caps->ci_gen = uap->gen;
679 error = 0;
680 }
681 caps_drop(caps);
02fb4e24 682 } else {
3919ced0 683 error = EINVAL;
02fb4e24 684 }
3919ced0 685 rel_mplock();
02fb4e24 686 return(error);
e30f9e2c
MD
687}
688
3919ced0
MD
689/*
690 * MPALMOSTSAFE
691 */
e30f9e2c 692int
753fd850 693sys_caps_sys_getgen(struct caps_sys_getgen_args *uap)
e30f9e2c 694{
9910d07b 695 struct thread *td = curthread;
e30f9e2c 696 caps_kinfo_t caps;
02fb4e24 697 int error;
e30f9e2c 698
3919ced0
MD
699 get_mplock();
700
9910d07b 701 if ((caps = caps_find_id(td, uap->portid)) != NULL) {
3919ced0
MD
702 if (caps->ci_type == CAPT_FORKED) {
703 error = ENOTCONN;
704 } else if (caps->ci_rcaps == NULL) {
705 error = EINVAL;
706 } else {
707 uap->sysmsg_result64 = caps->ci_rcaps->ci_gen;
708 error = 0;
709 }
710 caps_drop(caps);
02fb4e24 711 } else {
3919ced0 712 error = EINVAL;
02fb4e24 713 }
3919ced0 714 rel_mplock();
02fb4e24 715 return(error);
e30f9e2c
MD
716}
717
f6bf3af1
MD
718/*
719 * caps_sys_put(portid, msg, msgsize)
720 *
721 * Send an opaque message of the specified size to the specified port. This
722 * function may only be used with a client port. The message id is returned.
3919ced0
MD
723 *
724 * MPALMOSTSAFE
f6bf3af1
MD
725 */
726int
753fd850 727sys_caps_sys_put(struct caps_sys_put_args *uap)
f6bf3af1 728{
9910d07b 729 struct thread *td = curthread;
f6bf3af1
MD
730 caps_kinfo_t caps;
731 caps_kmsg_t msg;
e30f9e2c 732 int error;
f6bf3af1
MD
733
734 if (uap->msgsize < 0)
735 return(EINVAL);
3919ced0
MD
736 get_mplock();
737
9910d07b 738 if ((caps = caps_find_id(td, uap->portid)) == NULL) {
3919ced0
MD
739 error = EINVAL;
740 goto done;
741 }
02fb4e24 742 if (caps->ci_type == CAPT_FORKED) {
e30f9e2c 743 error = ENOTCONN;
02fb4e24
MD
744 } else if (caps->ci_rcaps == NULL) {
745 error = EINVAL;
746 } else if (caps->ci_cmsgcount > CAPS_MAXINPROG) {
747 /*
748 * If this client has queued a large number of messages return
749 * ENOBUFS. The client must process some replies before it can
750 * send new messages. The server can also throttle a client by
751 * holding its replies. XXX allow a server to refuse messages from
752 * a client.
753 */
754 error = ENOBUFS;
f6bf3af1 755 } else {
02fb4e24
MD
756 msg = caps_alloc_msg(caps);
757 uap->sysmsg_offset = msg->km_msgid.c_id;
758
759 /*
760 * If the remote end is closed return ENOTCONN immediately, otherwise
761 * send it to the remote end.
762 *
763 * Note: since this is a new message, caps_load_ccr() returns a remote
764 * caps of NULL.
765 */
766 if (caps->ci_rcaps->ci_flags & CAPKF_CLOSED) {
767 error = ENOTCONN;
768 caps_free_msg(msg);
769 } else {
770 /*
771 * new message, load_ccr returns NULL. hold rcaps for put_msg
772 */
773 error = 0;
9910d07b 774 caps_load_ccr(caps, msg, td->td_lwp, uap->msg, uap->msgsize);
02fb4e24
MD
775 caps_hold(caps->ci_rcaps);
776 ++caps->ci_cmsgcount;
777 caps_put_msg(caps->ci_rcaps, msg, CAPMS_REQUEST); /* drops rcaps */
778 }
f6bf3af1
MD
779 }
780 caps_drop(caps);
3919ced0
MD
781done:
782 rel_mplock();
e30f9e2c 783 return(error);
f6bf3af1
MD
784}
785
786/*
787 * caps_sys_reply(portid, msg, msgsize, msgid)
788 *
789 * Reply to the message referenced by the specified msgid, supplying opaque
790 * data back to the originator.
3919ced0
MD
791 *
792 * MPALMOSTSAFE
f6bf3af1
MD
793 */
794int
753fd850 795sys_caps_sys_reply(struct caps_sys_reply_args *uap)
f6bf3af1 796{
9910d07b 797 struct thread *td = curthread;
f6bf3af1
MD
798 caps_kinfo_t caps;
799 caps_kinfo_t rcaps;
800 caps_kmsg_t msg;
02fb4e24 801 int error;
f6bf3af1
MD
802
803 if (uap->msgsize < 0)
804 return(EINVAL);
3919ced0
MD
805 get_mplock();
806
9910d07b 807 if ((caps = caps_find_id(td, uap->portid)) == NULL) {
3919ced0
MD
808 error = EINVAL;
809 goto done;
810 }
02fb4e24
MD
811 if (caps->ci_type == CAPT_FORKED) {
812 /*
813 * The caps structure is just a fork placeholder, tell the caller
814 * that he has to reconnect.
815 */
816 error = ENOTCONN;
817 } else if ((msg = caps_find_msg(caps, uap->msgcid)) == NULL) {
818 /*
819 * Could not find message being replied to (other side might have
820 * gone away).
821 */
822 error = EINVAL;
823 } else if ((msg->km_flags & CAPKMF_ONUSERQ) == 0) {
824 /*
825 * Trying to reply to a non-replyable message
826 */
827 error = EINVAL;
f6bf3af1 828 } else {
02fb4e24
MD
829 /*
830 * If the remote end is closed requeue to ourselves for disposal.
831 * Otherwise send the reply to the other end (the other end will
832 * return a passive DISPOSE to us when it has eaten the data)
833 */
834 error = 0;
835 caps_dequeue_msg(caps, msg);
02fb4e24 836 if (msg->km_mcaps->ci_flags & CAPKF_CLOSED) {
9910d07b 837 caps_drop(caps_load_ccr(caps, msg, td->td_lwp, NULL, 0));
02fb4e24
MD
838 caps_hold(caps); /* ref for message */
839 caps_put_msg(caps, msg, CAPMS_DISPOSE);
840 } else {
9910d07b 841 rcaps = caps_load_ccr(caps, msg, td->td_lwp, uap->msg, uap->msgsize);
02fb4e24
MD
842 caps_put_msg(rcaps, msg, CAPMS_REPLY);
843 }
f6bf3af1
MD
844 }
845 caps_drop(caps);
3919ced0
MD
846done:
847 rel_mplock();
02fb4e24 848 return(error);
f6bf3af1
MD
849}
850
851/*
852 * caps_sys_get(portid, msg, maxsize, msgid, ccr)
853 *
854 * Retrieve the next ready message on the port, store its message id in
855 * uap->msgid and return the length of the message. If the message is too
856 * large to fit the message id, length, and creds are still returned, but
857 * the message is not dequeued (the caller is expected to call again with
858 * a larger buffer or to reply the messageid if it does not want to handle
859 * the message).
860 *
861 * EWOULDBLOCK is returned if no messages are pending. Note that 0-length
862 * messages are perfectly acceptable so 0 can be legitimately returned.
3919ced0
MD
863 *
864 * MPALMOSTSAFE
f6bf3af1
MD
865 */
866int
753fd850 867sys_caps_sys_get(struct caps_sys_get_args *uap)
f6bf3af1 868{
9910d07b 869 struct thread *td = curthread;
f6bf3af1
MD
870 caps_kinfo_t caps;
871 caps_kmsg_t msg;
02fb4e24 872 int error;
f6bf3af1
MD
873
874 if (uap->maxsize < 0)
875 return(EINVAL);
3919ced0
MD
876 get_mplock();
877
9910d07b 878 if ((caps = caps_find_id(td, uap->portid)) != NULL) {
3919ced0
MD
879 if (caps->ci_type == CAPT_FORKED) {
880 error = ENOTCONN;
881 } else if ((msg = TAILQ_FIRST(&caps->ci_msgpendq)) == NULL) {
882 error = EWOULDBLOCK;
883 } else {
884 error = caps_process_msg(caps, msg, uap);
885 }
02fb4e24 886 } else {
3919ced0 887 error = EINVAL;
f6bf3af1 888 }
02fb4e24 889 caps_drop(caps);
3919ced0 890 rel_mplock();
02fb4e24 891 return(error);
f6bf3af1
MD
892}
893
894/*
895 * caps_sys_wait(portid, msg, maxsize, msgid, ccr)
896 *
897 * Retrieve the next ready message on the port, store its message id in
898 * uap->msgid and return the length of the message. If the message is too
899 * large to fit the message id, length, and creds are still returned, but
900 * the message is not dequeued (the caller is expected to call again with
901 * a larger buffer or to reply the messageid if it does not want to handle
902 * the message).
903 *
904 * This function blocks until interrupted or a message is received.
905 * Note that 0-length messages are perfectly acceptable so 0 can be
906 * legitimately returned.
3919ced0
MD
907 *
908 * MPALMOSTSAFE
f6bf3af1
MD
909 */
910int
753fd850 911sys_caps_sys_wait(struct caps_sys_wait_args *uap)
f6bf3af1 912{
9910d07b 913 struct thread *td = curthread;
f6bf3af1
MD
914 caps_kinfo_t caps;
915 caps_kmsg_t msg;
916 int error;
917
918 if (uap->maxsize < 0)
919 return(EINVAL);
3919ced0
MD
920 get_mplock();
921
9910d07b 922 if ((caps = caps_find_id(td, uap->portid)) != NULL) {
3919ced0
MD
923 if (caps->ci_type == CAPT_FORKED) {
924 error = ENOTCONN;
925 } else {
926 error = 0;
927 while ((msg = TAILQ_FIRST(&caps->ci_msgpendq)) == NULL) {
928 if ((error = tsleep(caps, PCATCH, "caps", 0)) != 0)
929 break;
930 }
931 if (error == 0) {
932 error = caps_process_msg(caps, msg,
933 (struct caps_sys_get_args *)uap);
934 }
f6bf3af1 935 }
3919ced0
MD
936 } else {
937 error = EINVAL;
f6bf3af1 938 }
02fb4e24 939 caps_drop(caps);
3919ced0 940 rel_mplock();
02fb4e24 941 return(error);
f6bf3af1
MD
942}
943
944static int
3919ced0
MD
945caps_process_msg(caps_kinfo_t caps, caps_kmsg_t msg,
946 struct caps_sys_get_args *uap)
f6bf3af1 947{
9910d07b 948 struct thread *td = curthread;
f6bf3af1
MD
949 int error = 0;
950 int msgsize;
951 caps_kinfo_t rcaps;
952
953 msg->km_flags |= CAPKMF_PEEKED;
02fb4e24 954 msgsize = msg->km_xio.xio_bytes;
f6bf3af1
MD
955 if (msgsize <= uap->maxsize)
956 caps_dequeue_msg(caps, msg);
957
02fb4e24 958 if (msg->km_xio.xio_bytes != 0) {
03aa69bd 959 error = xio_copy_xtou(&msg->km_xio, 0, uap->msg,
02fb4e24 960 min(msg->km_xio.xio_bytes, uap->maxsize));
f6bf3af1 961 if (error) {
7ed63b6c 962 if (msg->km_mcaps->ci_td && msg->km_mcaps->ci_td->td_proc) {
6ea70f76 963 kprintf("xio_copy_xtou: error %d from proc %d\n",
7ed63b6c
MD
964 error, msg->km_mcaps->ci_td->td_proc->p_pid);
965 }
f6bf3af1
MD
966 if (msgsize > uap->maxsize)
967 caps_dequeue_msg(caps, msg);
968 msgsize = 0;
969 error = 0;
970 }
971 }
972
973 if (uap->msgid)
974 error = copyout(&msg->km_msgid, uap->msgid, sizeof(msg->km_msgid));
975 if (uap->ccr)
976 error = copyout(&msg->km_ccr, uap->ccr, sizeof(msg->km_ccr));
977 if (error == 0)
978 uap->sysmsg_result = msgsize;
979
980 /*
981 * If the message was dequeued we must deal with it.
982 */
983 if (msgsize <= uap->maxsize) {
984 switch(msg->km_state) {
985 case CAPMS_REQUEST:
986 case CAPMS_REQUEST_RETRY:
987 TAILQ_INSERT_TAIL(&caps->ci_msguserq, msg, km_node);
988 msg->km_flags |= CAPKMF_ONUSERQ;
989 break;
990 case CAPMS_REPLY:
991 case CAPMS_REPLY_RETRY:
992 --caps->ci_cmsgcount;
9910d07b 993 rcaps = caps_load_ccr(caps, msg, td->td_lwp, NULL, 0);
f6bf3af1
MD
994 if (caps == rcaps || (rcaps->ci_flags & CAPKF_CLOSED)) {
995 /* degenerate disposal case */
996 caps_free_msg(msg);
997 caps_drop(rcaps);
998 } else {
999 caps_put_msg(rcaps, msg, CAPMS_DISPOSE);
1000 }
1001 break;
1002 case CAPMS_DISPOSE:
1003 caps_free_msg(msg);
1004 break;
1005 }
1006 }
f6bf3af1
MD
1007 return(error);
1008}
1009
1010/*
1011 * caps_sys_abort(portid, msgcid, flags)
1012 *
1013 * Abort a previously sent message. You must still wait for the message
1014 * to be returned after sending the abort request. This function will
1015 * return the appropriate CAPS_ABORT_* code depending on what it had
1016 * to do.
3919ced0
MD
1017 *
1018 * MPALMOSTSAFE
f6bf3af1
MD
1019 */
1020int
753fd850 1021sys_caps_sys_abort(struct caps_sys_abort_args *uap)
f6bf3af1
MD
1022{
1023 uap->sysmsg_result = CAPS_ABORT_NOTIMPL;
1024 return(0);
1025}
1026
1027/*
1028 * KERNEL SYSCALL SEPARATION SUPPORT FUNCTIONS
1029 */
1030
1031static
1032caps_kinfo_t
1033kern_caps_sys_service(const char *name, uid_t uid, gid_t gid,
1034 struct ucred *cred, int flags, int *error)
1035{
9910d07b 1036 struct thread *td = curthread;
f6bf3af1
MD
1037 caps_kinfo_t caps;
1038 int len;
1039
1040 len = strlen(name);
1041
1042 /*
1043 * Make sure we can use the uid and gid
1044 */
1045 if (cred) {
1046 if (cred->cr_uid != 0 && uid != (uid_t)-1 && cred->cr_uid != uid) {
1047 *error = EPERM;
1048 return(NULL);
1049 }
1050 if (cred->cr_uid != 0 && gid != (gid_t)-1 && !groupmember(gid, cred)) {
1051 *error = EPERM;
1052 return(NULL);
1053 }
1054 }
1055
1056 /*
1057 * Handle CAPF_EXCL
1058 */
1059 if (flags & CAPF_EXCL) {
1060 if ((caps = caps_find(name, strlen(name), uid, gid)) != NULL) {
1061 caps_drop(caps);
1062 *error = EEXIST;
1063 return(NULL);
1064 }
1065 }
1066
1067 /*
1068 * Create the service
1069 */
9910d07b 1070 caps = caps_alloc(td, name, len,
e30f9e2c
MD
1071 uid, gid, flags & CAPF_UFLAGS, CAPT_SERVICE);
1072 wakeup(&caps_waitsvc);
f6bf3af1
MD
1073 return(caps);
1074}
1075
1076static
1077caps_kinfo_t
1078kern_caps_sys_client(const char *name, uid_t uid, gid_t gid,
1079 struct ucred *cred, int flags, int *error)
1080{
9910d07b 1081 struct thread *td = curthread;
f6bf3af1
MD
1082 caps_kinfo_t caps, rcaps;
1083 int len;
1084
1085 len = strlen(name);
1086
1087 /*
1088 * Locate the CAPS service (rcaps ref is for caps->ci_rcaps)
1089 */
e30f9e2c 1090again:
f6bf3af1 1091 if ((rcaps = caps_find(name, len, uid, gid)) == NULL) {
e30f9e2c
MD
1092 if (flags & CAPF_WAITSVC) {
1093 char cbuf[32];
f8c7a42d 1094 ksnprintf(cbuf, sizeof(cbuf), "C%s", name);
e30f9e2c
MD
1095 *error = tsleep(&caps_waitsvc, PCATCH, cbuf, 0);
1096 if (*error == 0)
1097 goto again;
1098 } else {
1099 *error = ENOENT;
1100 }
f6bf3af1
MD
1101 return(NULL);
1102 }
1103
1104 /*
1105 * Check permissions
1106 */
1107 if (cred) {
1108 *error = EACCES;
1109 if ((flags & CAPF_USER) && (rcaps->ci_flags & CAPF_USER)) {
1110 if (rcaps->ci_uid != (uid_t)-1 && rcaps->ci_uid == cred->cr_uid)
1111 *error = 0;
1112 }
1113 if ((flags & CAPF_GROUP) && (rcaps->ci_flags & CAPF_GROUP)) {
1114 if (rcaps->ci_gid != (gid_t)-1 && groupmember(rcaps->ci_gid, cred))
1115 *error = 0;
1116 }
1117 if ((flags & CAPF_WORLD) && (rcaps->ci_flags & CAPF_WORLD)) {
1118 *error = 0;
1119 }
1120 if (*error) {
1121 caps_drop(rcaps);
1122 return(NULL);
1123 }
1124 } else {
1125 *error = 0;
1126 }
1127
1128 /*
1129 * Allocate the client side and connect to the server
1130 */
9910d07b 1131 caps = caps_alloc(td, name, len,
e30f9e2c 1132 uid, gid, flags & CAPF_UFLAGS, CAPT_CLIENT);
f6bf3af1
MD
1133 caps->ci_rcaps = rcaps;
1134 caps->ci_flags |= CAPKF_RCAPS;
1135 return(caps);
1136}
1137