2 * ----------------------------------------------------------------------------
3 * "THE BEER-WARE LICENSE" (Revision 42):
4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
5 * can do whatever you want with this stuff. If we meet some day, and you think
6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
7 * ----------------------------------------------------------------------------
11 * Copyright (c) 2006 Victor Balada Diaz <victor@bsdes.net>
12 * All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * $FreeBSD: src/sys/kern/kern_jail.c,v 1.6.2.3 2001/08/17 01:00:26 rwatson Exp $
39 * $DragonFly: src/sys/kern/kern_jail.c,v 1.19 2008/05/17 18:20:33 dillon Exp $
42 #include "opt_inet6.h"
44 #include <sys/param.h>
45 #include <sys/types.h>
46 #include <sys/kernel.h>
47 #include <sys/systm.h>
48 #include <sys/errno.h>
49 #include <sys/sysproto.h>
50 #include <sys/malloc.h>
51 #include <sys/nlookup.h>
52 #include <sys/namecache.h>
56 #include <sys/socket.h>
57 #include <sys/sysctl.h>
58 #include <sys/kern_syscall.h>
60 #include <netinet/in.h>
61 #include <netinet6/in6_var.h>
63 #include <sys/mplock2.h>
65 static struct prison *prison_find(int);
66 static void prison_ipcache_init(struct prison *);
68 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
70 SYSCTL_NODE(, OID_AUTO, jail, CTLFLAG_RW, 0,
73 int jail_set_hostname_allowed = 1;
74 SYSCTL_INT(_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW,
75 &jail_set_hostname_allowed, 0,
76 "Processes in jail can set their hostnames");
78 int jail_socket_unixiproute_only = 1;
79 SYSCTL_INT(_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW,
80 &jail_socket_unixiproute_only, 0,
81 "Processes in jail are limited to creating UNIX/IPv[46]/route sockets only");
83 int jail_sysvipc_allowed = 0;
84 SYSCTL_INT(_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW,
85 &jail_sysvipc_allowed, 0,
86 "Processes in jail can use System V IPC primitives");
88 int jail_chflags_allowed = 0;
89 SYSCTL_INT(_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW,
90 &jail_chflags_allowed, 0,
91 "Process in jail can set chflags(1)");
93 int jail_allow_raw_sockets = 0;
94 SYSCTL_INT(_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW,
95 &jail_allow_raw_sockets, 0,
96 "Process in jail can create raw sockets");
101 LIST_HEAD(prisonlist, prison);
102 struct prisonlist allprison = LIST_HEAD_INITIALIZER(&allprison);
105 kern_jail_attach(int jid)
107 struct proc *p = curthread->td_proc;
111 pr = prison_find(jid);
115 error = kern_chroot(&pr->pr_root);
120 lwkt_gettoken(&p->p_token);
122 p->p_ucred->cr_prison = pr;
123 p->p_flags |= P_JAILED;
124 lwkt_reltoken(&p->p_token);
130 assign_prison_id(struct prison *pr)
135 tryprid = lastprid + 1;
136 if (tryprid == JAIL_MAX)
139 LIST_FOREACH(tpr, &allprison, pr_list) {
140 if (tpr->pr_id != tryprid)
143 if (tryprid == JAIL_MAX) {
148 pr->pr_id = lastprid = tryprid;
154 kern_jail(struct prison *pr, struct jail *j)
157 struct nlookupdata nd;
159 error = nlookup_init(&nd, j->path, UIO_USERSPACE, NLC_FOLLOW);
164 error = nlookup(&nd);
169 cache_copy(&nd.nl_nch, &pr->pr_root);
171 varsymset_init(&pr->pr_varsymset, NULL);
172 prison_ipcache_init(pr);
174 error = assign_prison_id(pr);
176 varsymset_clean(&pr->pr_varsymset);
181 LIST_INSERT_HEAD(&allprison, pr, pr_list);
184 error = kern_jail_attach(pr->pr_id);
186 LIST_REMOVE(pr, pr_list);
188 varsymset_clean(&pr->pr_varsymset);
197 * jail_args(syscallarg(struct jail *) jail)
202 sys_jail(struct jail_args *uap)
204 struct thread *td = curthread;
206 struct jail_ip_storage *jip;
211 uap->sysmsg_result = -1;
213 error = priv_check(td, PRIV_JAIL_CREATE);
217 error = copyin(uap->jail, &jversion, sizeof(jversion));
221 pr = kmalloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
222 SLIST_INIT(&pr->pr_ips);
227 /* Single IPv4 jails. */
230 struct sockaddr_in ip4addr;
232 error = copyin(uap->jail, &jv0, sizeof(jv0));
237 j.hostname = jv0.hostname;
239 jip = kmalloc(sizeof(*jip), M_PRISON, M_WAITOK | M_ZERO);
240 ip4addr.sin_family = AF_INET;
241 ip4addr.sin_addr.s_addr = htonl(jv0.ip_number);
242 memcpy(&jip->ip, &ip4addr, sizeof(ip4addr));
243 SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
249 * DragonFly multi noIP/IPv4/IPv6 jails
251 * NOTE: This version is unsupported by FreeBSD
252 * (which uses version 2 instead).
255 error = copyin(uap->jail, &j, sizeof(j));
259 for (int i = 0; i < j.n_ips; i++) {
260 jip = kmalloc(sizeof(*jip), M_PRISON,
262 SLIST_INSERT_HEAD(&pr->pr_ips, jip, entries);
263 error = copyin(&j.ips[i], &jip->ip,
264 sizeof(struct sockaddr_storage));
274 error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0);
278 error = kern_jail(pr, &j);
282 uap->sysmsg_result = pr->pr_id;
288 while (!SLIST_EMPTY(&pr->pr_ips)) {
289 jip = SLIST_FIRST(&pr->pr_ips);
290 SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
291 kfree(jip, M_PRISON);
299 * int jail_attach(int jid);
304 sys_jail_attach(struct jail_attach_args *uap)
306 struct thread *td = curthread;
309 error = priv_check(td, PRIV_JAIL_ATTACH);
313 error = kern_jail_attach(uap->jid);
319 prison_ipcache_init(struct prison *pr)
321 struct jail_ip_storage *jis;
322 struct sockaddr_in *ip4;
323 struct sockaddr_in6 *ip6;
325 SLIST_FOREACH(jis, &pr->pr_ips, entries) {
326 switch (jis->ip.ss_family) {
328 ip4 = (struct sockaddr_in *)&jis->ip;
329 if ((ntohl(ip4->sin_addr.s_addr) >> IN_CLASSA_NSHIFT) ==
331 /* loopback address */
332 if (pr->local_ip4 == NULL)
336 if (pr->nonlocal_ip4 == NULL)
337 pr->nonlocal_ip4 = ip4;
342 ip6 = (struct sockaddr_in6 *)&jis->ip;
343 if (IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr)) {
344 /* loopback address */
345 if (pr->local_ip6 == NULL)
349 if (pr->nonlocal_ip6 == NULL)
350 pr->nonlocal_ip6 = ip6;
358 * Changes INADDR_LOOPBACK for a valid jail address.
359 * ip is in network byte order.
360 * Returns 1 if the ip is among jail valid ips.
361 * Returns 0 if is not among jail valid ips or
362 * if couldn't replace INADDR_LOOPBACK for a valid
366 prison_replace_wildcards(struct thread *td, struct sockaddr *ip)
368 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
369 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
372 if (td->td_proc == NULL || td->td_ucred == NULL)
374 if ((pr = td->td_ucred->cr_prison) == NULL)
377 if ((ip->sa_family == AF_INET &&
378 ip4->sin_addr.s_addr == htonl(INADDR_ANY)) ||
379 (ip->sa_family == AF_INET6 &&
380 IN6_IS_ADDR_UNSPECIFIED(&ip6->sin6_addr)))
382 if ((ip->sa_family == AF_INET &&
383 ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
384 (ip->sa_family == AF_INET6 &&
385 IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
386 if (!prison_get_local(pr, ip->sa_family, ip) &&
387 !prison_get_nonlocal(pr, ip->sa_family, ip))
392 if (jailed_ip(pr, ip))
398 prison_remote_ip(struct thread *td, struct sockaddr *ip)
400 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
401 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
404 if (td == NULL || td->td_proc == NULL || td->td_ucred == NULL)
406 if ((pr = td->td_ucred->cr_prison) == NULL)
408 if ((ip->sa_family == AF_INET &&
409 ip4->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) ||
410 (ip->sa_family == AF_INET6 &&
411 IN6_IS_ADDR_LOOPBACK(&ip6->sin6_addr))) {
412 if (!prison_get_local(pr, ip->sa_family, ip) &&
413 !prison_get_nonlocal(pr, ip->sa_family, ip))
422 * Prison get non loopback ip:
423 * - af is the address family of the ip we want (AF_INET|AF_INET6).
424 * - If ip != NULL, put the first IP address that is not a loopback address
427 * ip is in network by order and we don't touch it unless we find a valid ip.
428 * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
429 * or NULL. This struct may not be modified.
432 prison_get_nonlocal(struct prison *pr, sa_family_t af, struct sockaddr *ip)
434 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
435 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
437 /* Check if it is cached */
440 if (ip4 != NULL && pr->nonlocal_ip4 != NULL)
441 ip4->sin_addr.s_addr = pr->nonlocal_ip4->sin_addr.s_addr;
442 return (struct sockaddr *)pr->nonlocal_ip4;
445 if (ip6 != NULL && pr->nonlocal_ip6 != NULL)
446 ip6->sin6_addr = pr->nonlocal_ip6->sin6_addr;
447 return (struct sockaddr *)pr->nonlocal_ip6;
455 * Prison get loopback ip.
456 * - af is the address family of the ip we want (AF_INET|AF_INET6).
457 * - If ip != NULL, put the first IP address that is not a loopback address
460 * ip is in network by order and we don't touch it unless we find a valid ip.
461 * No matter if ip == NULL or not, we return either a valid struct sockaddr *,
462 * or NULL. This struct may not be modified.
465 prison_get_local(struct prison *pr, sa_family_t af, struct sockaddr *ip)
467 struct sockaddr_in *ip4 = (struct sockaddr_in *)ip;
468 struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)ip;
470 /* Check if it is cached */
473 if (ip4 != NULL && pr->local_ip4 != NULL)
474 ip4->sin_addr.s_addr = pr->local_ip4->sin_addr.s_addr;
475 return (struct sockaddr *)pr->local_ip4;
478 if (ip6 != NULL && pr->local_ip6 != NULL)
479 ip6->sin6_addr = pr->local_ip6->sin6_addr;
480 return (struct sockaddr *)pr->local_ip6;
487 /* Check if the IP is among ours, if it is return 1, else 0 */
489 jailed_ip(struct prison *pr, struct sockaddr *ip)
491 struct jail_ip_storage *jis;
492 struct sockaddr_in *jip4, *ip4;
493 struct sockaddr_in6 *jip6, *ip6;
497 ip4 = (struct sockaddr_in *)ip;
498 ip6 = (struct sockaddr_in6 *)ip;
499 SLIST_FOREACH(jis, &pr->pr_ips, entries) {
500 switch (ip->sa_family) {
502 jip4 = (struct sockaddr_in *) &jis->ip;
503 if (jip4->sin_family == AF_INET &&
504 ip4->sin_addr.s_addr == jip4->sin_addr.s_addr)
508 jip6 = (struct sockaddr_in6 *) &jis->ip;
509 if (jip6->sin6_family == AF_INET6 &&
510 IN6_ARE_ADDR_EQUAL(&ip6->sin6_addr,
521 prison_if(struct ucred *cred, struct sockaddr *sa)
524 struct sockaddr_in *sai = (struct sockaddr_in*) sa;
526 pr = cred->cr_prison;
528 if (((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
529 && jail_socket_unixiproute_only)
531 else if ((sai->sin_family != AF_INET) && (sai->sin_family != AF_INET6))
533 else if (jailed_ip(pr, sa))
539 * Returns a prison instance, or NULL on failure.
541 static struct prison *
542 prison_find(int prid)
546 LIST_FOREACH(pr, &allprison, pr_list) {
547 if (pr->pr_id == prid)
554 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
556 struct thread *td = curthread;
557 struct jail_ip_storage *jip;
559 struct sockaddr_in6 *jsin6;
561 struct sockaddr_in *jsin;
564 unsigned int jlssize, jlsused;
566 char *jls; /* Jail list */
567 char *oip; /* Output ip */
568 char *fullpath, *freepath;
572 if (jailed(td->td_ucred))
581 jlssize = (count * 1024);
582 jls = kmalloc(jlssize + 1, M_TEMP, M_WAITOK | M_ZERO);
583 if (count < prisoncount) {
589 LIST_FOREACH(pr, &allprison, pr_list) {
590 error = cache_fullpath(lp->lwp_proc, &pr->pr_root,
591 &fullpath, &freepath, 0);
594 if (jlsused && jlsused < jlssize)
595 jls[jlsused++] = '\n';
596 count = ksnprintf(jls + jlsused, (jlssize - jlsused),
598 pr->pr_id, pr->pr_host, fullpath);
599 kfree(freepath, M_TEMP);
605 SLIST_FOREACH(jip, &pr->pr_ips, entries) {
606 jsin = (struct sockaddr_in *)&jip->ip;
608 switch(jsin->sin_family) {
610 oip = inet_ntoa(jsin->sin_addr);
614 jsin6 = (struct sockaddr_in6 *)&jip->ip;
615 oip = ip6_sprintf(&jsin6->sin6_addr);
623 if ((jlssize - jlsused) < (strlen(oip) + 1)) {
627 count = ksnprintf(jls + jlsused, (jlssize - jlsused),
637 * pr_id <SPC> hostname1 <SPC> PATH1 <SPC> IP1 <SPC> IP2\npr_id...
639 error = SYSCTL_OUT(req, jls, jlsused);
645 SYSCTL_OID(_jail, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0,
646 sysctl_jail_list, "A", "List of active jails");
652 prison_hold(struct prison *pr)
654 atomic_add_int(&pr->pr_ref, 1);
661 prison_free(struct prison *pr)
663 struct jail_ip_storage *jls;
665 KKASSERT(pr->pr_ref > 0);
666 if (atomic_fetchadd_int(&pr->pr_ref, -1) != 1)
670 * The MP lock is needed on the last ref to adjust
678 LIST_REMOVE(pr, pr_list);
685 while (!SLIST_EMPTY(&pr->pr_ips)) {
686 jls = SLIST_FIRST(&pr->pr_ips);
687 SLIST_REMOVE_HEAD(&pr->pr_ips, entries);
688 kfree(jls, M_PRISON);
691 if (pr->pr_linux != NULL)
692 kfree(pr->pr_linux, M_PRISON);
693 varsymset_clean(&pr->pr_varsymset);
694 cache_drop(&pr->pr_root);
699 * Check if permisson for a specific privilege is granted within jail.
704 prison_priv_check(struct ucred *cred, int priv)
710 case PRIV_CRED_SETUID:
711 case PRIV_CRED_SETEUID:
712 case PRIV_CRED_SETGID:
713 case PRIV_CRED_SETEGID:
714 case PRIV_CRED_SETGROUPS:
715 case PRIV_CRED_SETREUID:
716 case PRIV_CRED_SETREGID:
717 case PRIV_CRED_SETRESUID:
718 case PRIV_CRED_SETRESGID:
720 case PRIV_VFS_SYSFLAGS:
723 case PRIV_VFS_CHROOT:
725 case PRIV_VFS_CHFLAGS_DEV:
726 case PRIV_VFS_REVOKE:
727 case PRIV_VFS_MKNOD_BAD:
728 case PRIV_VFS_MKNOD_WHT:
729 case PRIV_VFS_MKNOD_DIR:
730 case PRIV_VFS_SETATTR:
731 case PRIV_VFS_SETGID:
733 case PRIV_PROC_SETRLIMIT:
734 case PRIV_PROC_SETLOGIN:
736 case PRIV_SYSCTL_WRITEJAIL:
738 case PRIV_VARSYM_SYS:
740 case PRIV_SETHOSTNAME:
742 case PRIV_PROC_TRESPASS:
746 case PRIV_UFS_QUOTAON:
747 case PRIV_UFS_QUOTAOFF:
748 case PRIV_VFS_SETQUOTA:
749 case PRIV_UFS_SETUSE:
750 case PRIV_VFS_GETQUOTA:
754 case PRIV_DEBUG_UNPRIV:
759 * Allow jailed root to bind reserved ports.
761 case PRIV_NETINET_RESERVEDPORT:
766 * Conditionally allow creating raw sockets in jail.
768 case PRIV_NETINET_RAW:
769 if (jail_allow_raw_sockets)
774 case PRIV_HAMMER_IOCTL: