kernel - Major signal path adjustments to fix races, tsleep race fixes, +more
[dragonfly.git] / sys / kern / kern_resource.c
CommitLineData
984263bc
MD
1/*-
2 * Copyright (c) 1982, 1986, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
3a6117bb 40 * $DragonFly: src/sys/kern/kern_resource.c,v 1.35 2008/05/27 05:25:34 dillon Exp $
984263bc
MD
41 */
42
43#include "opt_compat.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/file.h>
9697c509 49#include <sys/kern_syscall.h>
984263bc
MD
50#include <sys/kernel.h>
51#include <sys/resourcevar.h>
52#include <sys/malloc.h>
53#include <sys/proc.h>
895c1f85 54#include <sys/priv.h>
984263bc 55#include <sys/time.h>
508ceb09 56#include <sys/lockf.h>
984263bc
MD
57
58#include <vm/vm.h>
59#include <vm/vm_param.h>
60#include <sys/lock.h>
61#include <vm/pmap.h>
62#include <vm/vm_map.h>
63
37af14fe 64#include <sys/thread2.h>
9d7a637e 65#include <sys/spinlock2.h>
37af14fe 66
402ed7e1 67static int donice (struct proc *chgp, int n);
aa166ad1 68static int doionice (struct proc *chgp, int n);
984263bc
MD
69
70static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
71#define UIHASH(uid) (&uihashtbl[(uid) & uihash])
9d7a637e 72static struct spinlock uihash_lock;
984263bc
MD
73static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
74static u_long uihash; /* size of hash table - 1 */
75
402ed7e1
RG
76static struct uidinfo *uicreate (uid_t uid);
77static struct uidinfo *uilookup (uid_t uid);
984263bc
MD
78
79/*
80 * Resource controls and accounting.
81 */
82
8fa76237
MD
83struct getpriority_info {
84 int low;
85 int who;
86};
87
88static int getpriority_callback(struct proc *p, void *data);
89
3919ced0
MD
90/*
91 * MPALMOSTSAFE
92 */
984263bc 93int
753fd850 94sys_getpriority(struct getpriority_args *uap)
984263bc 95{
8fa76237 96 struct getpriority_info info;
41c20dac
MD
97 struct proc *curp = curproc;
98 struct proc *p;
99 int low = PRIO_MAX + 1;
3919ced0
MD
100 int error;
101
984263bc 102 switch (uap->which) {
984263bc 103 case PRIO_PROCESS:
58c2553a 104 if (uap->who == 0) {
984263bc 105 p = curp;
58c2553a
MD
106 PHOLD(p);
107 } else {
984263bc 108 p = pfind(uap->who);
58c2553a
MD
109 }
110 if (p) {
111 if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
112 low = p->p_nice;
113 }
114 PRELE(p);
115 }
984263bc
MD
116 break;
117
41c20dac
MD
118 case PRIO_PGRP:
119 {
1fd87d54 120 struct pgrp *pg;
984263bc 121
58c2553a 122 if (uap->who == 0) {
984263bc 123 pg = curp->p_pgrp;
58c2553a
MD
124 pgref(pg);
125 } else if ((pg = pgfind(uap->who)) == NULL) {
984263bc 126 break;
58c2553a
MD
127 } /* else ref held from pgfind */
128
984263bc 129 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
58c2553a
MD
130 if (PRISON_CHECK(curp->p_ucred, p->p_ucred) &&
131 p->p_nice < low) {
984263bc 132 low = p->p_nice;
58c2553a 133 }
984263bc 134 }
58c2553a 135 pgrel(pg);
984263bc
MD
136 break;
137 }
984263bc
MD
138 case PRIO_USER:
139 if (uap->who == 0)
140 uap->who = curp->p_ucred->cr_uid;
8fa76237
MD
141 info.low = low;
142 info.who = uap->who;
143 allproc_scan(getpriority_callback, &info);
144 low = info.low;
984263bc
MD
145 break;
146
147 default:
3919ced0
MD
148 error = EINVAL;
149 goto done;
150 }
151 if (low == PRIO_MAX + 1) {
152 error = ESRCH;
153 goto done;
984263bc 154 }
c7114eea 155 uap->sysmsg_result = low;
3919ced0
MD
156 error = 0;
157done:
3919ced0 158 return (error);
984263bc
MD
159}
160
8fa76237
MD
161/*
162 * Figure out the current lowest nice priority for processes owned
163 * by the specified user.
164 */
165static
166int
167getpriority_callback(struct proc *p, void *data)
168{
169 struct getpriority_info *info = data;
170
171 if (PRISON_CHECK(curproc->p_ucred, p->p_ucred) &&
172 p->p_ucred->cr_uid == info->who &&
173 p->p_nice < info->low) {
174 info->low = p->p_nice;
175 }
176 return(0);
177}
178
179struct setpriority_info {
180 int prio;
181 int who;
182 int error;
183 int found;
184};
185
186static int setpriority_callback(struct proc *p, void *data);
187
3919ced0
MD
188/*
189 * MPALMOSTSAFE
190 */
984263bc 191int
753fd850 192sys_setpriority(struct setpriority_args *uap)
984263bc 193{
8fa76237 194 struct setpriority_info info;
41c20dac
MD
195 struct proc *curp = curproc;
196 struct proc *p;
984263bc
MD
197 int found = 0, error = 0;
198
58c2553a 199 lwkt_gettoken(&proc_token);
3919ced0 200
984263bc 201 switch (uap->which) {
984263bc 202 case PRIO_PROCESS:
58c2553a 203 if (uap->who == 0) {
984263bc 204 p = curp;
58c2553a
MD
205 PHOLD(p);
206 } else {
984263bc 207 p = pfind(uap->who);
58c2553a
MD
208 }
209 if (p) {
210 if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
211 error = donice(p, uap->prio);
212 found++;
213 }
214 PRELE(p);
215 }
984263bc
MD
216 break;
217
41c20dac
MD
218 case PRIO_PGRP:
219 {
1fd87d54 220 struct pgrp *pg;
984263bc 221
58c2553a 222 if (uap->who == 0) {
984263bc 223 pg = curp->p_pgrp;
58c2553a
MD
224 pgref(pg);
225 } else if ((pg = pgfind(uap->who)) == NULL) {
984263bc 226 break;
58c2553a
MD
227 } /* else ref held from pgfind */
228
984263bc 229 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
41c20dac
MD
230 if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
231 error = donice(p, uap->prio);
984263bc
MD
232 found++;
233 }
234 }
58c2553a 235 pgrel(pg);
984263bc
MD
236 break;
237 }
984263bc
MD
238 case PRIO_USER:
239 if (uap->who == 0)
240 uap->who = curp->p_ucred->cr_uid;
8fa76237
MD
241 info.prio = uap->prio;
242 info.who = uap->who;
243 info.error = 0;
244 info.found = 0;
245 allproc_scan(setpriority_callback, &info);
246 error = info.error;
247 found = info.found;
984263bc
MD
248 break;
249
250 default:
3919ced0
MD
251 error = EINVAL;
252 found = 1;
253 break;
984263bc 254 }
3919ced0 255
58c2553a
MD
256 lwkt_reltoken(&proc_token);
257
984263bc 258 if (found == 0)
3919ced0 259 error = ESRCH;
984263bc
MD
260 return (error);
261}
262
8fa76237
MD
263static
264int
265setpriority_callback(struct proc *p, void *data)
266{
267 struct setpriority_info *info = data;
268 int error;
269
270 if (p->p_ucred->cr_uid == info->who &&
271 PRISON_CHECK(curproc->p_ucred, p->p_ucred)) {
272 error = donice(p, info->prio);
273 if (error)
274 info->error = error;
275 ++info->found;
276 }
277 return(0);
278}
279
984263bc 280static int
41c20dac 281donice(struct proc *chgp, int n)
984263bc 282{
41c20dac
MD
283 struct proc *curp = curproc;
284 struct ucred *cr = curp->p_ucred;
08f2f1bb 285 struct lwp *lp;
984263bc 286
41c20dac
MD
287 if (cr->cr_uid && cr->cr_ruid &&
288 cr->cr_uid != chgp->p_ucred->cr_uid &&
289 cr->cr_ruid != chgp->p_ucred->cr_uid)
984263bc
MD
290 return (EPERM);
291 if (n > PRIO_MAX)
292 n = PRIO_MAX;
293 if (n < PRIO_MIN)
294 n = PRIO_MIN;
3b1d99e9 295 if (n < chgp->p_nice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0))
984263bc
MD
296 return (EACCES);
297 chgp->p_nice = n;
4643740a
MD
298 FOREACH_LWP_IN_PROC(lp, chgp) {
299 LWPHOLD(lp);
08f2f1bb 300 chgp->p_usched->resetpriority(lp);
4643740a
MD
301 LWPRELE(lp);
302 }
984263bc
MD
303 return (0);
304}
305
aa166ad1
AH
306
307struct ioprio_get_info {
308 int high;
309 int who;
310};
311
312static int ioprio_get_callback(struct proc *p, void *data);
313
314/*
315 * MPALMOSTSAFE
316 */
317int
318sys_ioprio_get(struct ioprio_get_args *uap)
319{
320 struct ioprio_get_info info;
321 struct proc *curp = curproc;
322 struct proc *p;
323 int high = IOPRIO_MIN-2;
324 int error;
325
58c2553a 326 lwkt_gettoken(&proc_token);
aa166ad1
AH
327
328 switch (uap->which) {
329 case PRIO_PROCESS:
58c2553a 330 if (uap->who == 0) {
aa166ad1 331 p = curp;
58c2553a
MD
332 PHOLD(p);
333 } else {
aa166ad1 334 p = pfind(uap->who);
58c2553a
MD
335 }
336 if (p) {
337 if (PRISON_CHECK(curp->p_ucred, p->p_ucred))
338 high = p->p_ionice;
339 PRELE(p);
340 }
aa166ad1
AH
341 break;
342
343 case PRIO_PGRP:
344 {
345 struct pgrp *pg;
346
58c2553a 347 if (uap->who == 0) {
aa166ad1 348 pg = curp->p_pgrp;
58c2553a
MD
349 pgref(pg);
350 } else if ((pg = pgfind(uap->who)) == NULL) {
aa166ad1 351 break;
58c2553a
MD
352 } /* else ref held from pgfind */
353
aa166ad1 354 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
58c2553a
MD
355 if (PRISON_CHECK(curp->p_ucred, p->p_ucred) &&
356 p->p_nice > high)
aa166ad1
AH
357 high = p->p_ionice;
358 }
58c2553a 359 pgrel(pg);
aa166ad1
AH
360 break;
361 }
362 case PRIO_USER:
363 if (uap->who == 0)
364 uap->who = curp->p_ucred->cr_uid;
365 info.high = high;
366 info.who = uap->who;
367 allproc_scan(ioprio_get_callback, &info);
368 high = info.high;
369 break;
370
371 default:
372 error = EINVAL;
373 goto done;
374 }
375 if (high == IOPRIO_MIN-2) {
376 error = ESRCH;
377 goto done;
378 }
379 uap->sysmsg_result = high;
380 error = 0;
381done:
58c2553a
MD
382 lwkt_reltoken(&proc_token);
383
aa166ad1
AH
384 return (error);
385}
386
387/*
388 * Figure out the current lowest nice priority for processes owned
389 * by the specified user.
390 */
391static
392int
393ioprio_get_callback(struct proc *p, void *data)
394{
395 struct ioprio_get_info *info = data;
396
397 if (PRISON_CHECK(curproc->p_ucred, p->p_ucred) &&
398 p->p_ucred->cr_uid == info->who &&
399 p->p_ionice > info->high) {
400 info->high = p->p_ionice;
401 }
402 return(0);
403}
404
405
406struct ioprio_set_info {
407 int prio;
408 int who;
409 int error;
410 int found;
411};
412
413static int ioprio_set_callback(struct proc *p, void *data);
414
415/*
416 * MPALMOSTSAFE
417 */
418int
419sys_ioprio_set(struct ioprio_set_args *uap)
420{
421 struct ioprio_set_info info;
422 struct proc *curp = curproc;
423 struct proc *p;
424 int found = 0, error = 0;
425
58c2553a 426 lwkt_gettoken(&proc_token);
aa166ad1
AH
427
428 switch (uap->which) {
429 case PRIO_PROCESS:
58c2553a 430 if (uap->who == 0) {
aa166ad1 431 p = curp;
58c2553a
MD
432 PHOLD(p);
433 } else {
aa166ad1 434 p = pfind(uap->who);
58c2553a
MD
435 }
436 if (p) {
437 if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
438 error = doionice(p, uap->prio);
439 found++;
440 }
441 PRELE(p);
442 }
aa166ad1
AH
443 break;
444
445 case PRIO_PGRP:
446 {
447 struct pgrp *pg;
448
58c2553a 449 if (uap->who == 0) {
aa166ad1 450 pg = curp->p_pgrp;
58c2553a
MD
451 pgref(pg);
452 } else if ((pg = pgfind(uap->who)) == NULL) {
aa166ad1 453 break;
58c2553a
MD
454 } /* else ref held from pgfind */
455
aa166ad1
AH
456 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
457 if (PRISON_CHECK(curp->p_ucred, p->p_ucred)) {
458 error = doionice(p, uap->prio);
459 found++;
460 }
461 }
58c2553a 462 pgrel(pg);
aa166ad1
AH
463 break;
464 }
465 case PRIO_USER:
466 if (uap->who == 0)
467 uap->who = curp->p_ucred->cr_uid;
468 info.prio = uap->prio;
469 info.who = uap->who;
470 info.error = 0;
471 info.found = 0;
472 allproc_scan(ioprio_set_callback, &info);
473 error = info.error;
474 found = info.found;
475 break;
476
477 default:
478 error = EINVAL;
479 found = 1;
480 break;
481 }
482
58c2553a
MD
483 lwkt_reltoken(&proc_token);
484
aa166ad1
AH
485 if (found == 0)
486 error = ESRCH;
487 return (error);
488}
489
490static
491int
492ioprio_set_callback(struct proc *p, void *data)
493{
494 struct ioprio_set_info *info = data;
495 int error;
496
497 if (p->p_ucred->cr_uid == info->who &&
498 PRISON_CHECK(curproc->p_ucred, p->p_ucred)) {
499 error = doionice(p, info->prio);
500 if (error)
501 info->error = error;
502 ++info->found;
503 }
504 return(0);
505}
506
507int
508doionice(struct proc *chgp, int n)
509{
510 struct proc *curp = curproc;
511 struct ucred *cr = curp->p_ucred;
512
513 if (cr->cr_uid && cr->cr_ruid &&
514 cr->cr_uid != chgp->p_ucred->cr_uid &&
515 cr->cr_ruid != chgp->p_ucred->cr_uid)
516 return (EPERM);
517 if (n > IOPRIO_MAX)
518 n = IOPRIO_MAX;
519 if (n < IOPRIO_MIN)
520 n = IOPRIO_MIN;
521 if (n < chgp->p_ionice && priv_check_cred(cr, PRIV_SCHED_SETPRIORITY, 0))
522 return (EACCES);
523 chgp->p_ionice = n;
524
525 return (0);
526
527}
528
3919ced0
MD
529/*
530 * MPALMOSTSAFE
531 */
649d3bd2
MD
532int
533sys_lwp_rtprio(struct lwp_rtprio_args *uap)
534{
58c2553a 535 struct proc *p;
649d3bd2
MD
536 struct lwp *lp;
537 struct rtprio rtp;
9910d07b 538 struct ucred *cr = curthread->td_ucred;
649d3bd2
MD
539 int error;
540
541 error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
542 if (error)
543 return error;
3919ced0 544 if (uap->pid < 0)
649d3bd2 545 return EINVAL;
3919ced0 546
58c2553a
MD
547 lwkt_gettoken(&proc_token);
548
3919ced0 549 if (uap->pid == 0) {
58c2553a
MD
550 p = curproc;
551 PHOLD(p);
649d3bd2
MD
552 } else {
553 p = pfind(uap->pid);
554 }
555
3919ced0
MD
556 if (p == NULL) {
557 error = ESRCH;
558 goto done;
649d3bd2
MD
559 }
560
561 if (uap->tid < -1) {
3919ced0
MD
562 error = EINVAL;
563 goto done;
564 }
565 if (uap->tid == -1) {
649d3bd2
MD
566 /*
567 * sadly, tid can be 0 so we can't use 0 here
568 * like sys_rtprio()
569 */
570 lp = curthread->td_lwp;
571 } else {
3e291793 572 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid);
3919ced0
MD
573 if (lp == NULL) {
574 error = ESRCH;
575 goto done;
576 }
649d3bd2
MD
577 }
578
579 switch (uap->function) {
580 case RTP_LOOKUP:
3919ced0
MD
581 error = copyout(&lp->lwp_rtprio, uap->rtp,
582 sizeof(struct rtprio));
583 break;
649d3bd2
MD
584 case RTP_SET:
585 if (cr->cr_uid && cr->cr_ruid &&
586 cr->cr_uid != p->p_ucred->cr_uid &&
587 cr->cr_ruid != p->p_ucred->cr_uid) {
3919ced0
MD
588 error = EPERM;
589 break;
649d3bd2
MD
590 }
591 /* disallow setting rtprio in most cases if not superuser */
3b1d99e9 592 if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) {
649d3bd2
MD
593 /* can't set someone else's */
594 if (uap->pid) { /* XXX */
3919ced0
MD
595 error = EPERM;
596 break;
649d3bd2
MD
597 }
598 /* can't set realtime priority */
599/*
600 * Realtime priority has to be restricted for reasons which should be
601 * obvious. However, for idle priority, there is a potential for
602 * system deadlock if an idleprio process gains a lock on a resource
603 * that other processes need (and the idleprio process can't run
604 * due to a CPU-bound normal process). Fix me! XXX
605 */
606 if (RTP_PRIO_IS_REALTIME(rtp.type)) {
3919ced0
MD
607 error = EPERM;
608 break;
649d3bd2
MD
609 }
610 }
611 switch (rtp.type) {
612#ifdef RTP_PRIO_FIFO
613 case RTP_PRIO_FIFO:
614#endif
615 case RTP_PRIO_REALTIME:
616 case RTP_PRIO_NORMAL:
617 case RTP_PRIO_IDLE:
12358529
MD
618 if (rtp.prio > RTP_PRIO_MAX) {
619 error = EINVAL;
620 } else {
621 lp->lwp_rtprio = rtp;
622 error = 0;
623 }
3919ced0 624 break;
649d3bd2 625 default:
3919ced0
MD
626 error = EINVAL;
627 break;
649d3bd2 628 }
3919ced0 629 break;
649d3bd2 630 default:
3919ced0
MD
631 error = EINVAL;
632 break;
649d3bd2 633 }
3919ced0
MD
634
635done:
58c2553a
MD
636 if (p)
637 PRELE(p);
638 lwkt_reltoken(&proc_token);
639
3919ced0 640 return (error);
649d3bd2
MD
641}
642
984263bc
MD
643/*
644 * Set realtime priority
3919ced0
MD
645 *
646 * MPALMOSTSAFE
984263bc 647 */
984263bc 648int
753fd850 649sys_rtprio(struct rtprio_args *uap)
984263bc 650{
41c20dac 651 struct proc *p;
08f2f1bb 652 struct lwp *lp;
9910d07b 653 struct ucred *cr = curthread->td_ucred;
984263bc
MD
654 struct rtprio rtp;
655 int error;
656
657 error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
658 if (error)
659 return (error);
660
58c2553a
MD
661 lwkt_gettoken(&proc_token);
662
663 if (uap->pid == 0) {
664 p = curproc;
665 PHOLD(p);
666 } else {
984263bc 667 p = pfind(uap->pid);
58c2553a 668 }
984263bc 669
3919ced0
MD
670 if (p == NULL) {
671 error = ESRCH;
672 goto done;
673 }
984263bc 674
08f2f1bb
SS
675 /* XXX lwp */
676 lp = FIRST_LWP_IN_PROC(p);
984263bc
MD
677 switch (uap->function) {
678 case RTP_LOOKUP:
3919ced0
MD
679 error = copyout(&lp->lwp_rtprio, uap->rtp,
680 sizeof(struct rtprio));
681 break;
984263bc 682 case RTP_SET:
41c20dac
MD
683 if (cr->cr_uid && cr->cr_ruid &&
684 cr->cr_uid != p->p_ucred->cr_uid &&
3919ced0
MD
685 cr->cr_ruid != p->p_ucred->cr_uid) {
686 error = EPERM;
687 break;
688 }
984263bc 689 /* disallow setting rtprio in most cases if not superuser */
3b1d99e9 690 if (priv_check_cred(cr, PRIV_SCHED_RTPRIO, 0)) {
984263bc 691 /* can't set someone else's */
3919ced0
MD
692 if (uap->pid) {
693 error = EPERM;
694 break;
695 }
984263bc
MD
696 /* can't set realtime priority */
697/*
698 * Realtime priority has to be restricted for reasons which should be
699 * obvious. However, for idle priority, there is a potential for
700 * system deadlock if an idleprio process gains a lock on a resource
701 * that other processes need (and the idleprio process can't run
702 * due to a CPU-bound normal process). Fix me! XXX
703 */
3919ced0
MD
704 if (RTP_PRIO_IS_REALTIME(rtp.type)) {
705 error = EPERM;
706 break;
707 }
984263bc
MD
708 }
709 switch (rtp.type) {
710#ifdef RTP_PRIO_FIFO
711 case RTP_PRIO_FIFO:
712#endif
713 case RTP_PRIO_REALTIME:
714 case RTP_PRIO_NORMAL:
715 case RTP_PRIO_IDLE:
3919ced0
MD
716 if (rtp.prio > RTP_PRIO_MAX) {
717 error = EINVAL;
718 break;
719 }
08f2f1bb 720 lp->lwp_rtprio = rtp;
3919ced0
MD
721 error = 0;
722 break;
984263bc 723 default:
3919ced0
MD
724 error = EINVAL;
725 break;
984263bc 726 }
3919ced0 727 break;
984263bc 728 default:
3919ced0
MD
729 error = EINVAL;
730 break;
984263bc 731 }
3919ced0 732done:
58c2553a
MD
733 if (p)
734 PRELE(p);
735 lwkt_reltoken(&proc_token);
736
3919ced0 737 return (error);
984263bc
MD
738}
739
3919ced0
MD
740/*
741 * MPSAFE
742 */
984263bc 743int
753fd850 744sys_setrlimit(struct __setrlimit_args *uap)
984263bc
MD
745{
746 struct rlimit alim;
747 int error;
748
9697c509
DRJ
749 error = copyin(uap->rlp, &alim, sizeof(alim));
750 if (error)
984263bc 751 return (error);
9697c509
DRJ
752
753 error = kern_setrlimit(uap->which, &alim);
754
755 return (error);
984263bc
MD
756}
757
3919ced0
MD
758/*
759 * MPSAFE
760 */
984263bc 761int
753fd850 762sys_getrlimit(struct __getrlimit_args *uap)
9697c509
DRJ
763{
764 struct rlimit lim;
765 int error;
766
767 error = kern_getrlimit(uap->which, &lim);
768
769 if (error == 0)
770 error = copyout(&lim, uap->rlp, sizeof(*uap->rlp));
771 return error;
984263bc
MD
772}
773
774/*
fde7ac71 775 * Transform the running time and tick information in lwp lp's thread into user,
984263bc 776 * system, and interrupt time usage.
d16a8831
MD
777 *
778 * Since we are limited to statclock tick granularity this is a statisical
779 * calculation which will be correct over the long haul, but should not be
780 * expected to measure fine grained deltas.
585aafb6
MD
781 *
782 * It is possible to catch a lwp in the midst of being created, so
783 * check whether lwp_thread is NULL or not.
984263bc
MD
784 */
785void
fde7ac71 786calcru(struct lwp *lp, struct timeval *up, struct timeval *sp)
984263bc 787{
585aafb6 788 struct thread *td;
984263bc 789
d16a8831
MD
790 /*
791 * Calculate at the statclock level. YYY if the thread is owned by
792 * another cpu we need to forward the request to the other cpu, or
585aafb6
MD
793 * have a token to interlock the information in order to avoid racing
794 * thread destruction.
d16a8831 795 */
585aafb6
MD
796 if ((td = lp->lwp_thread) != NULL) {
797 crit_enter();
798 up->tv_sec = td->td_uticks / 1000000;
799 up->tv_usec = td->td_uticks % 1000000;
800 sp->tv_sec = td->td_sticks / 1000000;
801 sp->tv_usec = td->td_sticks % 1000000;
802 crit_exit();
803 }
984263bc
MD
804}
805
fde7ac71
SS
806/*
807 * Aggregate resource statistics of all lwps of a process.
808 *
809 * proc.p_ru keeps track of all statistics directly related to a proc. This
810 * consists of RSS usage and nswap information and aggregate numbers for all
811 * former lwps of this proc.
812 *
813 * proc.p_cru is the sum of all stats of reaped children.
814 *
815 * lwp.lwp_ru contains the stats directly related to one specific lwp, meaning
816 * packet, scheduler switch or page fault counts, etc. This information gets
817 * added to lwp.lwp_proc.p_ru when the lwp exits.
818 */
819void
820calcru_proc(struct proc *p, struct rusage *ru)
821{
822 struct timeval upt, spt;
823 long *rip1, *rip2;
824 struct lwp *lp;
825
826 *ru = p->p_ru;
827
828 FOREACH_LWP_IN_PROC(lp, p) {
829 calcru(lp, &upt, &spt);
830 timevaladd(&ru->ru_utime, &upt);
831 timevaladd(&ru->ru_stime, &spt);
832 for (rip1 = &ru->ru_first, rip2 = &lp->lwp_ru.ru_first;
833 rip1 <= &ru->ru_last;
834 rip1++, rip2++)
835 *rip1 += *rip2;
836 }
837}
838
839
3919ced0
MD
840/*
841 * MPALMOSTSAFE
842 */
984263bc 843int
753fd850 844sys_getrusage(struct getrusage_args *uap)
984263bc 845{
fde7ac71 846 struct rusage ru;
41c20dac 847 struct rusage *rup;
3919ced0 848 int error;
984263bc 849
58c2553a 850 lwkt_gettoken(&proc_token);
984263bc 851
3919ced0 852 switch (uap->who) {
984263bc 853 case RUSAGE_SELF:
fde7ac71
SS
854 rup = &ru;
855 calcru_proc(curproc, rup);
3919ced0 856 error = 0;
984263bc 857 break;
984263bc 858 case RUSAGE_CHILDREN:
fde7ac71 859 rup = &curproc->p_cru;
3919ced0 860 error = 0;
984263bc 861 break;
984263bc 862 default:
3919ced0
MD
863 error = EINVAL;
864 break;
984263bc 865 }
3919ced0
MD
866 if (error == 0)
867 error = copyout(rup, uap->rusage, sizeof(struct rusage));
58c2553a 868 lwkt_reltoken(&proc_token);
3919ced0 869 return (error);
984263bc
MD
870}
871
872void
792033e7 873ruadd(struct rusage *ru, struct rusage *ru2)
984263bc 874{
1fd87d54
RG
875 long *ip, *ip2;
876 int i;
984263bc
MD
877
878 timevaladd(&ru->ru_utime, &ru2->ru_utime);
879 timevaladd(&ru->ru_stime, &ru2->ru_stime);
880 if (ru->ru_maxrss < ru2->ru_maxrss)
881 ru->ru_maxrss = ru2->ru_maxrss;
882 ip = &ru->ru_first; ip2 = &ru2->ru_first;
883 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
884 *ip++ += *ip2++;
885}
886
887/*
984263bc
MD
888 * Find the uidinfo structure for a uid. This structure is used to
889 * track the total resource consumption (process count, socket buffer
890 * size, etc.) for the uid and impose limits.
891 */
892void
792033e7 893uihashinit(void)
984263bc 894{
9d7a637e 895 spin_init(&uihash_lock);
984263bc
MD
896 uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
897}
898
0d355d3b
MD
899/*
900 * NOTE: Must be called with uihash_lock held
901 *
902 * MPSAFE
903 */
984263bc 904static struct uidinfo *
792033e7 905uilookup(uid_t uid)
984263bc
MD
906{
907 struct uihashhead *uipp;
908 struct uidinfo *uip;
909
910 uipp = UIHASH(uid);
792033e7 911 LIST_FOREACH(uip, uipp, ui_hash) {
984263bc
MD
912 if (uip->ui_uid == uid)
913 break;
792033e7 914 }
984263bc
MD
915 return (uip);
916}
917
0d355d3b 918/*
c9110f4f
MD
919 * Helper function to creat ea uid that could not be found.
920 * This function will properly deal with races.
921 *
0d355d3b
MD
922 * MPSAFE
923 */
984263bc 924static struct uidinfo *
792033e7 925uicreate(uid_t uid)
984263bc 926{
9d7a637e 927 struct uidinfo *uip, *tmp;
d37c8f7f 928
f61e468f
MD
929 /*
930 * Allocate space and check for a race
931 */
d37c8f7f
MD
932 uip = kmalloc(sizeof(*uip), M_UIDINFO, M_WAITOK|M_ZERO);
933
f61e468f
MD
934 /*
935 * Initialize structure and enter it into the hash table
936 */
9d7a637e 937 spin_init(&uip->ui_lock);
984263bc 938 uip->ui_uid = uid;
9d7a637e 939 uip->ui_ref = 1; /* we're returning a ref */
98a7f915 940 varsymset_init(&uip->ui_varsymset, NULL);
9d7a637e
AE
941
942 /*
943 * Somebody may have already created the uidinfo for this
944 * uid. If so, return that instead.
945 */
287a8577 946 spin_lock(&uihash_lock);
9d7a637e
AE
947 tmp = uilookup(uid);
948 if (tmp != NULL) {
c9110f4f 949 uihold(tmp);
287a8577 950 spin_unlock(&uihash_lock);
c9110f4f 951
9d7a637e 952 spin_uninit(&uip->ui_lock);
c9110f4f 953 varsymset_clean(&uip->ui_varsymset);
9d7a637e
AE
954 FREE(uip, M_UIDINFO);
955 uip = tmp;
956 } else {
957 LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
287a8577 958 spin_unlock(&uihash_lock);
9d7a637e 959 }
984263bc
MD
960 return (uip);
961}
962
0d355d3b 963/*
c9110f4f
MD
964 *
965 *
0d355d3b
MD
966 * MPSAFE
967 */
984263bc 968struct uidinfo *
792033e7 969uifind(uid_t uid)
984263bc
MD
970{
971 struct uidinfo *uip;
972
287a8577 973 spin_lock(&uihash_lock);
984263bc 974 uip = uilookup(uid);
9d7a637e 975 if (uip == NULL) {
287a8577 976 spin_unlock(&uihash_lock);
984263bc 977 uip = uicreate(uid);
9d7a637e
AE
978 } else {
979 uihold(uip);
287a8577 980 spin_unlock(&uihash_lock);
9d7a637e 981 }
984263bc
MD
982 return (uip);
983}
984
0d355d3b 985/*
c9110f4f
MD
986 * Helper funtion to remove a uidinfo whos reference count is
987 * transitioning from 1->0. The reference count is 1 on call.
988 *
989 * Zero is returned on success, otherwise non-zero and the
990 * uiphas not been removed.
991 *
0d355d3b
MD
992 * MPSAFE
993 */
c9110f4f 994static __inline int
792033e7 995uifree(struct uidinfo *uip)
984263bc 996{
c9110f4f
MD
997 /*
998 * If we are still the only holder after acquiring the uihash_lock
999 * we can safely unlink the uip and destroy it. Otherwise we lost
1000 * a race and must fail.
1001 */
287a8577 1002 spin_lock(&uihash_lock);
c9110f4f 1003 if (uip->ui_ref != 1) {
287a8577 1004 spin_unlock(&uihash_lock);
c9110f4f
MD
1005 return(-1);
1006 }
1007 LIST_REMOVE(uip, ui_hash);
287a8577 1008 spin_unlock(&uihash_lock);
9d7a637e
AE
1009
1010 /*
c9110f4f
MD
1011 * The uip is now orphaned and we can destroy it at our
1012 * leisure.
9d7a637e 1013 */
792033e7 1014 if (uip->ui_sbsize != 0)
c9110f4f
MD
1015 kprintf("freeing uidinfo: uid = %d, sbsize = %jd\n",
1016 uip->ui_uid, (intmax_t)uip->ui_sbsize);
792033e7 1017 if (uip->ui_proccnt != 0)
6ea70f76 1018 kprintf("freeing uidinfo: uid = %d, proccnt = %ld\n",
792033e7 1019 uip->ui_uid, uip->ui_proccnt);
9d7a637e 1020
98a7f915 1021 varsymset_clean(&uip->ui_varsymset);
9d7a637e 1022 lockuninit(&uip->ui_varsymset.vx_lock);
9d7a637e 1023 spin_uninit(&uip->ui_lock);
792033e7 1024 FREE(uip, M_UIDINFO);
c9110f4f 1025 return(0);
792033e7 1026}
984263bc 1027
0d355d3b
MD
1028/*
1029 * MPSAFE
1030 */
792033e7
MD
1031void
1032uihold(struct uidinfo *uip)
1033{
9d7a637e 1034 atomic_add_int(&uip->ui_ref, 1);
61f96b6f 1035 KKASSERT(uip->ui_ref >= 0);
792033e7
MD
1036}
1037
0d355d3b 1038/*
c9110f4f
MD
1039 * NOTE: It is important for us to not drop the ref count to 0
1040 * because this can cause a 2->0/2->0 race with another
1041 * concurrent dropper. Losing the race in that situation
1042 * can cause uip to become stale for one of the other
1043 * threads.
1044 *
0d355d3b
MD
1045 * MPSAFE
1046 */
792033e7
MD
1047void
1048uidrop(struct uidinfo *uip)
1049{
c9110f4f
MD
1050 int ref;
1051
61f96b6f 1052 KKASSERT(uip->ui_ref > 0);
c9110f4f
MD
1053
1054 for (;;) {
1055 ref = uip->ui_ref;
1056 cpu_ccfence();
1057 if (ref == 1) {
1058 if (uifree(uip) == 0)
1059 break;
1060 } else if (atomic_cmpset_int(&uip->ui_ref, ref, ref - 1)) {
1061 break;
1062 }
1063 /* else retry */
9d7a637e 1064 }
792033e7
MD
1065}
1066
1067void
1068uireplace(struct uidinfo **puip, struct uidinfo *nuip)
1069{
1070 uidrop(*puip);
1071 *puip = nuip;
984263bc
MD
1072}
1073
1074/*
1075 * Change the count associated with number of processes
1076 * a given user is using. When 'max' is 0, don't enforce a limit
1077 */
1078int
792033e7 1079chgproccnt(struct uidinfo *uip, int diff, int max)
984263bc 1080{
9d7a637e 1081 int ret;
287a8577 1082 spin_lock(&uip->ui_lock);
984263bc 1083 /* don't allow them to exceed max, but allow subtraction */
9d7a637e
AE
1084 if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
1085 ret = 0;
1086 } else {
1087 uip->ui_proccnt += diff;
1088 if (uip->ui_proccnt < 0)
1089 kprintf("negative proccnt for uid = %d\n", uip->ui_uid);
1090 ret = 1;
1091 }
287a8577 1092 spin_unlock(&uip->ui_lock);
9d7a637e 1093 return ret;
984263bc
MD
1094}
1095
1096/*
1097 * Change the total socket buffer size a user has used.
1098 */
1099int
792033e7 1100chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max)
984263bc
MD
1101{
1102 rlim_t new;
984263bc 1103
287a8577 1104 spin_lock(&uip->ui_lock);
984263bc 1105 new = uip->ui_sbsize + to - *hiwat;
9d7a637e 1106 KKASSERT(new >= 0);
3a6117bb
MD
1107
1108 /*
1109 * If we are trying to increase the socket buffer size
1110 * Scale down the hi water mark when we exceed the user's
1111 * allowed socket buffer space.
1112 *
1113 * We can't scale down too much or we will blow up atomic packet
1114 * operations.
1115 */
1116 if (to > *hiwat && to > MCLBYTES && new > max) {
1117 to = to * max / new;
1118 if (to < MCLBYTES)
1119 to = MCLBYTES;
984263bc
MD
1120 }
1121 uip->ui_sbsize = new;
1122 *hiwat = to;
287a8577 1123 spin_unlock(&uip->ui_lock);
984263bc
MD
1124 return (1);
1125}
792033e7 1126