gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1982, 1986, 1989, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	* 1. Redistributions of source code must retain the above copyright
	9	* notice, this list of conditions and the following disclaimer.
	10	* 2. Redistributions in binary form must reproduce the above copyright
	11	* notice, this list of conditions and the following disclaimer in the
	12	* documentation and/or other materials provided with the distribution.
	13	* 3. All advertising materials mentioning features or use of this software
	14	* must display the following acknowledgement:
	15	* This product includes software developed by the University of
	16	* California, Berkeley and its contributors.
	17	* 4. Neither the name of the University nor the names of its contributors
	18	* may be used to endorse or promote products derived from this software
	19	* without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	22	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	24	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	25	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	26	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	27	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	28	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	29	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	30	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	31	* SUCH DAMAGE.
	32	*
	33	* @(#)kern_time.c 8.1 (Berkeley) 6/10/93
	34	* $FreeBSD: src/sys/kern/kern_time.c,v 1.68.2.1 2002/10/01 08:00:41 bde Exp $
	35	* $DragonFly: src/sys/kern/kern_time.c,v 1.20 2005/04/14 07:55:36 joerg Exp $
	36	*/
	37
	38	#include <sys/param.h>
	39	#include <sys/systm.h>
	40	#include <sys/buf.h>
	41	#include <sys/sysproto.h>
	42	#include <sys/resourcevar.h>
	43	#include <sys/signalvar.h>
	44	#include <sys/kernel.h>
	45	#include <sys/systm.h>
	46	#include <sys/sysent.h>
	47	#include <sys/sysunion.h>
	48	#include <sys/proc.h>
	49	#include <sys/time.h>
	50	#include <sys/vnode.h>
	51	#include <sys/sysctl.h>
	52	#include <vm/vm.h>
	53	#include <vm/vm_extern.h>
	54	#include <sys/msgport2.h>
	55	#include <sys/thread2.h>
	56
	57	struct timezone tz;
	58
	59	/*
	60	* Time of day and interval timer support.
	61	*
	62	* These routines provide the kernel entry points to get and set
	63	* the time-of-day and per-process interval timers. Subroutines
	64	* here provide support for adding and subtracting timeval structures
	65	* and decrementing interval timers, optionally reloading the interval
	66	* timers when they expire.
	67	*/
	68
	69	static int nanosleep1 (struct timespec *rqt,
	70	struct timespec *rmt);
	71	static int settime (struct timeval *);
	72	static void timevalfix (struct timeval *);
	73	static void no_lease_updatetime (int);
	74
	75	static int sleep_hard_us = 100;
	76	SYSCTL_INT(_kern, OID_AUTO, sleep_hard_us, CTLFLAG_RW, &sleep_hard_us, 0, "")
	77
	78	static void
	79	no_lease_updatetime(deltat)
	80	int deltat;
	81	{
	82	}
	83
	84	void (*lease_updatetime) (int) = no_lease_updatetime;
	85
	86	static int
	87	settime(tv)
	88	struct timeval *tv;
	89	{
	90	struct timeval delta, tv1, tv2;
	91	static struct timeval maxtime, laststep;
	92	struct timespec ts;
	93
	94	crit_enter();
	95	microtime(&tv1);
	96	delta = *tv;
	97	timevalsub(&delta, &tv1);
	98
	99	/*
	100	* If the system is secure, we do not allow the time to be
	101	* set to a value earlier than 1 second less than the highest
	102	* time we have yet seen. The worst a miscreant can do in
	103	* this circumstance is "freeze" time. He couldn't go
	104	* back to the past.
	105	*
	106	* We similarly do not allow the clock to be stepped more
	107	* than one second, nor more than once per second. This allows
	108	* a miscreant to make the clock march double-time, but no worse.
	109	*/
	110	if (securelevel > 1) {
	111	if (delta.tv_sec < 0 \|\| delta.tv_usec < 0) {
	112	/*
	113	* Update maxtime to latest time we've seen.
	114	*/
	115	if (tv1.tv_sec > maxtime.tv_sec)
	116	maxtime = tv1;
	117	tv2 = *tv;
	118	timevalsub(&tv2, &maxtime);
	119	if (tv2.tv_sec < -1) {
	120	tv->tv_sec = maxtime.tv_sec - 1;
	121	printf("Time adjustment clamped to -1 second\n");
	122	}
	123	} else {
	124	if (tv1.tv_sec == laststep.tv_sec) {
	125	crit_exit();
	126	return (EPERM);
	127	}
	128	if (delta.tv_sec > 1) {
	129	tv->tv_sec = tv1.tv_sec + 1;
	130	printf("Time adjustment clamped to +1 second\n");
	131	}
	132	laststep = *tv;
	133	}
	134	}
	135
	136	ts.tv_sec = tv->tv_sec;
	137	ts.tv_nsec = tv->tv_usec * 1000;
	138	set_timeofday(&ts);
	139	lease_updatetime(delta.tv_sec);
	140	crit_exit();
	141	resettodr();
	142	return (0);
	143	}
	144
	145	/* ARGSUSED */
	146	int
	147	clock_gettime(struct clock_gettime_args *uap)
	148	{
	149	struct timespec ats;
	150
	151	switch(uap->clock_id) {
	152	case CLOCK_REALTIME:
	153	nanotime(&ats);
	154	return (copyout(&ats, uap->tp, sizeof(ats)));
	155	case CLOCK_MONOTONIC:
	156	nanouptime(&ats);
	157	return (copyout(&ats, uap->tp, sizeof(ats)));
	158	default:
	159	return (EINVAL);
	160	}
	161	}
	162
	163	/* ARGSUSED */
	164	int
	165	clock_settime(struct clock_settime_args *uap)
	166	{
	167	struct thread *td = curthread;
	168	struct timeval atv;
	169	struct timespec ats;
	170	int error;
	171
	172	if ((error = suser(td)) != 0)
	173	return (error);
	174	switch(uap->clock_id) {
	175	case CLOCK_REALTIME:
	176	if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0)
	177	return (error);
	178	if (ats.tv_nsec < 0 \|\| ats.tv_nsec >= 1000000000)
	179	return (EINVAL);
	180	/* XXX Don't convert nsec->usec and back */
	181	TIMESPEC_TO_TIMEVAL(&atv, &ats);
	182	error = settime(&atv);
	183	return (error);
	184	default:
	185	return (EINVAL);
	186	}
	187	}
	188
	189	int
	190	clock_getres(struct clock_getres_args *uap)
	191	{
	192	struct timespec ts;
	193
	194	switch(uap->clock_id) {
	195	case CLOCK_REALTIME:
	196	case CLOCK_MONOTONIC:
	197	/*
	198	* Round up the result of the division cheaply
	199	* by adding 1. Rounding up is especially important
	200	* if rounding down would give 0. Perfect rounding
	201	* is unimportant.
	202	*/
	203	ts.tv_sec = 0;
	204	ts.tv_nsec = 1000000000 / cputimer_freq + 1;
	205	return(copyout(&ts, uap->tp, sizeof(ts)));
	206	default:
	207	return(EINVAL);
	208	}
	209	}
	210
	211	/*
	212	* nanosleep1()
	213	*
	214	* This is a general helper function for nanosleep() (aka sleep() aka
	215	* usleep()).
	216	*
	217	* If there is less then one tick's worth of time left and
	218	* we haven't done a yield, or the remaining microseconds is
	219	* ridiculously low, do a yield. This avoids having
	220	* to deal with systimer overheads when the system is under
	221	* heavy loads. If we have done a yield already then use
	222	* a systimer and an uninterruptable thread wait.
	223	*
	224	* If there is more then a tick's worth of time left,
	225	* calculate the baseline ticks and use an interruptable
	226	* tsleep, then handle the fine-grained delay on the next
	227	* loop. This usually results in two sleeps occuring, a long one
	228	* and a short one.
	229	*/
	230	static void
	231	ns1_systimer(systimer_t info)
	232	{
	233	lwkt_schedule(info->data);
	234	}
	235
	236	static int
	237	nanosleep1(struct timespec rqt, struct timespec rmt)
	238	{
	239	static int nanowait;
	240	struct timespec ts, ts2, ts3;
	241	struct timeval tv;
	242	int error;
	243	int tried_yield;
	244
	245	if (rqt->tv_nsec < 0 \|\| rqt->tv_nsec >= 1000000000)
	246	return (EINVAL);
	247	if (rqt->tv_sec < 0 \|\| (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
	248	return (0);
	249	nanouptime(&ts);
	250	timespecadd(&ts, rqt); /* ts = target timestamp compare */
	251	TIMESPEC_TO_TIMEVAL(&tv, rqt); /* tv = sleep interval */
	252	tried_yield = 0;
	253
	254	for (;;) {
	255	int ticks;
	256	struct systimer info;
	257
	258	ticks = tv.tv_usec / tick; /* approximate */
	259
	260	if (tv.tv_sec == 0 && ticks == 0) {
	261	thread_t td = curthread;
	262	if (tried_yield \|\| tv.tv_usec < sleep_hard_us) {
	263	tried_yield = 0;
	264	uio_yield();
	265	} else {
	266	crit_enter_quick(td);
	267	systimer_init_oneshot(&info, ns1_systimer,
	268	td, tv.tv_usec);
	269	lwkt_deschedule_self(td);
	270	crit_exit_quick(td);
	271	lwkt_switch();
	272	systimer_del(&info); /* make sure it's gone */
	273	}
	274	error = iscaught(td->td_proc);
	275	} else if (tv.tv_sec == 0) {
	276	error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
	277	} else {
	278	ticks = tvtohz_low(&tv); /* also handles overflow */
	279	error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
	280	}
	281	nanouptime(&ts2);
	282	if (error && error != EWOULDBLOCK) {
	283	if (error == ERESTART)
	284	error = EINTR;
	285	if (rmt != NULL) {
	286	timespecsub(&ts, &ts2);
	287	if (ts.tv_sec < 0)
	288	timespecclear(&ts);
	289	*rmt = ts;
	290	}
	291	return (error);
	292	}
	293	if (timespeccmp(&ts2, &ts, >=))
	294	return (0);
	295	ts3 = ts;
	296	timespecsub(&ts3, &ts2);
	297	TIMESPEC_TO_TIMEVAL(&tv, &ts3);
	298	}
	299	}
	300
	301	static void nanosleep_done(void *arg);
	302	static void nanosleep_copyout(union sysunion *sysun);
	303
	304	/* ARGSUSED */
	305	int
	306	nanosleep(struct nanosleep_args *uap)
	307	{
	308	int error;
	309	struct sysmsg_sleep *smsleep = &uap->sysmsg.sm.sleep;
	310
	311	error = copyin(uap->rqtp, &smsleep->rqt, sizeof(smsleep->rqt));
	312	if (error)
	313	return (error);
	314	/*
	315	* YYY clean this up to always use the callout, note that an abort
	316	* implementation should record the residual in the async case.
	317	*/
	318	if (uap->sysmsg.lmsg.ms_flags & MSGF_ASYNC) {
	319	quad_t ticks;
	320
	321	ticks = (quad_t)smsleep->rqt.tv_nsec * hz / 1000000000LL;
	322	if (smsleep->rqt.tv_sec)
	323	ticks += (quad_t)smsleep->rqt.tv_sec * hz;
	324	if (ticks <= 0) {
	325	if (ticks == 0)
	326	error = 0;
	327	else
	328	error = EINVAL;
	329	} else {
	330	uap->sysmsg.copyout = nanosleep_copyout;
	331	uap->sysmsg.lmsg.ms_flags &= ~MSGF_DONE;
	332	callout_init(&smsleep->timer);
	333	callout_reset(&smsleep->timer, ticks, nanosleep_done, uap);
	334	error = EASYNC;
	335	}
	336	} else {
	337	/*
	338	* Old synchronous sleep code, copyout the residual if
	339	* nanosleep was interrupted.
	340	*/
	341	error = nanosleep1(&smsleep->rqt, &smsleep->rmt);
	342	if (error && uap->rmtp)
	343	error = copyout(&smsleep->rmt, uap->rmtp, sizeof(smsleep->rmt));
	344	}
	345	return (error);
	346	}
	347
	348	/*
	349	* Asynch completion for the nanosleep() syscall. This function may be
	350	* called from any context and cannot legally access the originating
	351	* thread, proc, or its user space.
	352	*
	353	* YYY change the callout interface API so we can simply assign the replymsg
	354	* function to it directly.
	355	*/
	356	static void
	357	nanosleep_done(void *arg)
	358	{
	359	struct nanosleep_args *uap = arg;
	360	lwkt_msg_t msg = &uap->sysmsg.lmsg;
	361
	362	lwkt_replymsg(msg, 0);
	363	}
	364
	365	/*
	366	* Asynch return for the nanosleep() syscall, called in the context of the
	367	* originating thread when it pulls the message off the reply port. This
	368	* function is responsible for any copyouts to userland. Kernel threads
	369	* which do their own internal system calls will not usually call the return
	370	* function.
	371	*/
	372	static void
	373	nanosleep_copyout(union sysunion *sysun)
	374	{
	375	struct nanosleep_args *uap = &sysun->nanosleep;
	376	struct sysmsg_sleep *smsleep = &uap->sysmsg.sm.sleep;
	377
	378	if (sysun->lmsg.ms_error && uap->rmtp) {
	379	sysun->lmsg.ms_error =
	380	copyout(&smsleep->rmt, uap->rmtp, sizeof(smsleep->rmt));
	381	}
	382	}
	383
	384	/* ARGSUSED */
	385	int
	386	gettimeofday(struct gettimeofday_args *uap)
	387	{
	388	struct timeval atv;
	389	int error = 0;
	390
	391	if (uap->tp) {
	392	microtime(&atv);
	393	if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
	394	sizeof (atv))))
	395	return (error);
	396	}
	397	if (uap->tzp)
	398	error = copyout((caddr_t)&tz, (caddr_t)uap->tzp,
	399	sizeof (tz));
	400	return (error);
	401	}
	402
	403	/* ARGSUSED */
	404	int
	405	settimeofday(struct settimeofday_args *uap)
	406	{
	407	struct thread *td = curthread;
	408	struct timeval atv;
	409	struct timezone atz;
	410	int error;
	411
	412	if ((error = suser(td)))
	413	return (error);
	414	/* Verify all parameters before changing time. */
	415	if (uap->tv) {
	416	if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
	417	sizeof(atv))))
	418	return (error);
	419	if (atv.tv_usec < 0 \|\| atv.tv_usec >= 1000000)
	420	return (EINVAL);
	421	}
	422	if (uap->tzp &&
	423	(error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz))))
	424	return (error);
	425	if (uap->tv && (error = settime(&atv)))
	426	return (error);
	427	if (uap->tzp)
	428	tz = atz;
	429	return (0);
	430	}
	431
	432	static void
	433	kern_adjtime_common(void)
	434	{
	435	if ((ntp_delta >= 0 && ntp_delta < ntp_default_tick_delta) \|\|
	436	(ntp_delta < 0 && ntp_delta > ntp_default_tick_delta))
	437	ntp_tick_delta = ntp_delta;
	438	else if (ntp_delta > ntp_big_delta)
	439	ntp_tick_delta = 10 * ntp_default_tick_delta;
	440	else if (ntp_delta < -ntp_big_delta)
	441	ntp_tick_delta = -10 * ntp_default_tick_delta;
	442	else if (ntp_delta > 0)
	443	ntp_tick_delta = ntp_default_tick_delta;
	444	else
	445	ntp_tick_delta = -ntp_default_tick_delta;
	446	}
	447
	448	void
	449	kern_adjtime(int64_t delta, int64_t *odelta)
	450	{
	451	int origcpu;
	452
	453	if ((origcpu = mycpu->gd_cpuid) != 0) {
	454	lwkt_setcpu_self(globaldata_find(0));
	455	cpu_mb1();
	456	}
	457
	458	crit_enter();
	459	*odelta = ntp_delta;
	460	ntp_delta += delta;
	461	kern_adjtime_common();
	462	crit_exit();
	463
	464	if (origcpu != 0) {
	465	lwkt_setcpu_self(globaldata_find(origcpu));
	466	cpu_mb1();
	467	}
	468	}
	469
	470	void
	471	kern_reladjtime(int64_t delta)
	472	{
	473	int origcpu;
	474
	475	if ((origcpu = mycpu->gd_cpuid) != 0) {
	476	lwkt_setcpu_self(globaldata_find(0));
	477	cpu_mb1();
	478	}
	479
	480	crit_enter();
	481	ntp_delta += delta;
	482	kern_adjtime_common();
	483	crit_exit();
	484
	485	if (origcpu != 0) {
	486	lwkt_setcpu_self(globaldata_find(origcpu));
	487	cpu_mb1();
	488	}
	489	}
	490
	491	/* ARGSUSED */
	492	int
	493	adjtime(struct adjtime_args *uap)
	494	{
	495	struct thread *td = curthread;
	496	struct timeval atv;
	497	int64_t ndelta, odelta;
	498	int error;
	499
	500	if ((error = suser(td)))
	501	return (error);
	502	if ((error =
	503	copyin((caddr_t)uap->delta, (caddr_t)&atv, sizeof(struct timeval))))
	504	return (error);
	505
	506	/*
	507	* Compute the total correction and the rate at which to apply it.
	508	* Round the adjustment down to a whole multiple of the per-tick
	509	* delta, so that after some number of incremental changes in
	510	* hardclock(), tickdelta will become zero, lest the correction
	511	* overshoot and start taking us away from the desired final time.
	512	*/
	513	ndelta = atv.tv_sec * 1000000000 + atv.tv_usec * 1000;
	514	kern_adjtime(ndelta, &odelta);
	515
	516	if (uap->olddelta) {
	517	atv.tv_sec = odelta / 1000000000;
	518	atv.tv_usec = odelta % 1000000 / 1000;
	519	(void) copyout((caddr_t)&atv, (caddr_t)uap->olddelta,
	520	sizeof(struct timeval));
	521	}
	522	return (0);
	523	}
	524
	525	static int
	526	sysctl_adjtime(SYSCTL_HANDLER_ARGS)
	527	{
	528	int64_t delta;
	529	int error;
	530
	531	if (req->oldptr != NULL) {
	532	delta = 0;
	533	error = SYSCTL_OUT(req, &delta, sizeof(delta));
	534	if (error)
	535	return (error);
	536	}
	537	if (req->newptr != NULL) {
	538	if (suser(curthread))
	539	return (EPERM);
	540	error = SYSCTL_IN(req, &delta, sizeof(delta));
	541	if (error)
	542	return (error);
	543	kern_reladjtime(delta);
	544	}
	545	return (0);
	546	}
	547
	548	SYSCTL_NODE(_kern, OID_AUTO, ntp, CTLFLAG_RW, 0, "NTP related controls");
	549	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, permanent, CTLFLAG_RW,
	550	&ntp_tick_permanent, sizeof(ntp_tick_permanent),
	551	"LU", "permanent per-tick correct");
	552	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, delta, CTLFLAG_RD,
	553	&ntp_delta, sizeof(ntp_delta), "LU",
	554	"one-time delta");
	555	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, big_delta, CTLFLAG_RD,
	556	&ntp_big_delta, sizeof(ntp_big_delta), "LU",
	557	"threshold for fast adjustment");
	558	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, tick_delta, CTLFLAG_RD,
	559	&ntp_tick_delta, sizeof(ntp_tick_delta), "LU",
	560	"per-tick adjustment");
	561	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, default_tick_delta, CTLFLAG_RD,
	562	&ntp_default_tick_delta, sizeof(ntp_default_tick_delta), "LU",
	563	"default per-tick adjustment");
	564	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, next_leaf_second, CTLFLAG_RW,
	565	&ntp_leaf_second, sizeof(ntp_leaf_second), "LU",
	566	"next leaf second");
	567	SYSCTL_INT(_kern_ntp, OID_AUTO, insert_leaf_second, CTLFLAG_RW,
	568	&ntp_leaf_insert, 0, "insert or remove leaf second");
	569	SYSCTL_PROC(_kern_ntp, OID_AUTO, adjust,
	570	CTLTYPE_OPAQUE\|CTLFLAG_RW, 0, 0,
	571	sysctl_adjtime, "", "relative adjust for delta");
	572
	573	/*
	574	* Get value of an interval timer. The process virtual and
	575	* profiling virtual time timers are kept in the p_stats area, since
	576	* they can be swapped out. These are kept internally in the
	577	* way they are specified externally: in time until they expire.
	578	*
	579	* The real time interval timer is kept in the process table slot
	580	* for the process, and its value (it_value) is kept as an
	581	* absolute time rather than as a delta, so that it is easy to keep
	582	* periodic real-time signals from drifting.
	583	*
	584	* Virtual time timers are processed in the hardclock() routine of
	585	* kern_clock.c. The real time timer is processed by a timeout
	586	* routine, called from the softclock() routine. Since a callout
	587	* may be delayed in real time due to interrupt processing in the system,
	588	* it is possible for the real time timeout routine (realitexpire, given below),
	589	* to be delayed in real time past when it is supposed to occur. It
	590	* does not suffice, therefore, to reload the real timer .it_value from the
	591	* real time timers .it_interval. Rather, we compute the next time in
	592	* absolute time the timer should go off.
	593	*/
	594	/* ARGSUSED */
	595	int
	596	getitimer(struct getitimer_args *uap)
	597	{
	598	struct proc *p = curproc;
	599	struct timeval ctv;
	600	struct itimerval aitv;
	601
	602	if (uap->which > ITIMER_PROF)
	603	return (EINVAL);
	604	crit_enter();
	605	if (uap->which == ITIMER_REAL) {
	606	/*
	607	* Convert from absolute to relative time in .it_value
	608	* part of real time timer. If time for real time timer
	609	* has passed return 0, else return difference between
	610	* current time and time for the timer to go off.
	611	*/
	612	aitv = p->p_realtimer;
	613	if (timevalisset(&aitv.it_value)) {
	614	getmicrouptime(&ctv);
	615	if (timevalcmp(&aitv.it_value, &ctv, <))
	616	timevalclear(&aitv.it_value);
	617	else
	618	timevalsub(&aitv.it_value, &ctv);
	619	}
	620	} else {
	621	aitv = p->p_stats->p_timer[uap->which];
	622	}
	623	crit_exit();
	624	return (copyout((caddr_t)&aitv, (caddr_t)uap->itv,
	625	sizeof (struct itimerval)));
	626	}
	627
	628	/* ARGSUSED */
	629	int
	630	setitimer(struct setitimer_args *uap)
	631	{
	632	struct itimerval aitv;
	633	struct timeval ctv;
	634	struct itimerval *itvp;
	635	struct proc *p = curproc;
	636	int error;
	637
	638	if (uap->which > ITIMER_PROF)
	639	return (EINVAL);
	640	itvp = uap->itv;
	641	if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
	642	sizeof(struct itimerval))))
	643	return (error);
	644	if ((uap->itv = uap->oitv) &&
	645	(error = getitimer((struct getitimer_args *)uap)))
	646	return (error);
	647	if (itvp == 0)
	648	return (0);
	649	if (itimerfix(&aitv.it_value))
	650	return (EINVAL);
	651	if (!timevalisset(&aitv.it_value))
	652	timevalclear(&aitv.it_interval);
	653	else if (itimerfix(&aitv.it_interval))
	654	return (EINVAL);
	655	crit_enter();
	656	if (uap->which == ITIMER_REAL) {
	657	if (timevalisset(&p->p_realtimer.it_value))
	658	callout_stop(&p->p_ithandle);
	659	if (timevalisset(&aitv.it_value))
	660	callout_reset(&p->p_ithandle,
	661	tvtohz_high(&aitv.it_value), realitexpire, p);
	662	getmicrouptime(&ctv);
	663	timevaladd(&aitv.it_value, &ctv);
	664	p->p_realtimer = aitv;
	665	} else {
	666	p->p_stats->p_timer[uap->which] = aitv;
	667	}
	668	crit_exit();
	669	return (0);
	670	}
	671
	672	/*
	673	* Real interval timer expired:
	674	* send process whose timer expired an alarm signal.
	675	* If time is not set up to reload, then just return.
	676	* Else compute next time timer should go off which is > current time.
	677	* This is where delay in processing this timeout causes multiple
	678	* SIGALRM calls to be compressed into one.
	679	* tvtohz_high() always adds 1 to allow for the time until the next clock
	680	* interrupt being strictly less than 1 clock tick, but we don't want
	681	* that here since we want to appear to be in sync with the clock
	682	* interrupt even when we're delayed.
	683	*/
	684	void
	685	realitexpire(arg)
	686	void *arg;
	687	{
	688	struct proc *p;
	689	struct timeval ctv, ntv;
	690
	691	p = (struct proc *)arg;
	692	psignal(p, SIGALRM);
	693	if (!timevalisset(&p->p_realtimer.it_interval)) {
	694	timevalclear(&p->p_realtimer.it_value);
	695	return;
	696	}
	697	for (;;) {
	698	crit_enter();
	699	timevaladd(&p->p_realtimer.it_value,
	700	&p->p_realtimer.it_interval);
	701	getmicrouptime(&ctv);
	702	if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) {
	703	ntv = p->p_realtimer.it_value;
	704	timevalsub(&ntv, &ctv);
	705	callout_reset(&p->p_ithandle, tvtohz_low(&ntv),
	706	realitexpire, p);
	707	crit_exit();
	708	return;
	709	}
	710	crit_exit();
	711	}
	712	}
	713
	714	/*
	715	* Check that a proposed value to load into the .it_value or
	716	* .it_interval part of an interval timer is acceptable, and
	717	* fix it to have at least minimal value (i.e. if it is less
	718	* than the resolution of the clock, round it up.)
	719	*/
	720	int
	721	itimerfix(tv)
	722	struct timeval *tv;
	723	{
	724
	725	if (tv->tv_sec < 0 \|\| tv->tv_sec > 100000000 \|\|
	726	tv->tv_usec < 0 \|\| tv->tv_usec >= 1000000)
	727	return (EINVAL);
	728	if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
	729	tv->tv_usec = tick;
	730	return (0);
	731	}
	732
	733	/*
	734	* Decrement an interval timer by a specified number
	735	* of microseconds, which must be less than a second,
	736	* i.e. < 1000000. If the timer expires, then reload
	737	* it. In this case, carry over (usec - old value) to
	738	* reduce the value reloaded into the timer so that
	739	* the timer does not drift. This routine assumes
	740	* that it is called in a context where the timers
	741	* on which it is operating cannot change in value.
	742	*/
	743	int
	744	itimerdecr(itp, usec)
	745	struct itimerval *itp;
	746	int usec;
	747	{
	748
	749	if (itp->it_value.tv_usec < usec) {
	750	if (itp->it_value.tv_sec == 0) {
	751	/* expired, and already in next interval */
	752	usec -= itp->it_value.tv_usec;
	753	goto expire;
	754	}
	755	itp->it_value.tv_usec += 1000000;
	756	itp->it_value.tv_sec--;
	757	}
	758	itp->it_value.tv_usec -= usec;
	759	usec = 0;
	760	if (timevalisset(&itp->it_value))
	761	return (1);
	762	/* expired, exactly at end of interval */
	763	expire:
	764	if (timevalisset(&itp->it_interval)) {
	765	itp->it_value = itp->it_interval;
	766	itp->it_value.tv_usec -= usec;
	767	if (itp->it_value.tv_usec < 0) {
	768	itp->it_value.tv_usec += 1000000;
	769	itp->it_value.tv_sec--;
	770	}
	771	} else
	772	itp->it_value.tv_usec = 0; /* sec is already 0 */
	773	return (0);
	774	}
	775
	776	/*
	777	* Add and subtract routines for timevals.
	778	* N.B.: subtract routine doesn't deal with
	779	* results which are before the beginning,
	780	* it just gets very confused in this case.
	781	* Caveat emptor.
	782	*/
	783	void
	784	timevaladd(t1, t2)
	785	struct timeval t1, t2;
	786	{
	787
	788	t1->tv_sec += t2->tv_sec;
	789	t1->tv_usec += t2->tv_usec;
	790	timevalfix(t1);
	791	}
	792
	793	void
	794	timevalsub(t1, t2)
	795	struct timeval t1, t2;
	796	{
	797
	798	t1->tv_sec -= t2->tv_sec;
	799	t1->tv_usec -= t2->tv_usec;
	800	timevalfix(t1);
	801	}
	802
	803	static void
	804	timevalfix(t1)
	805	struct timeval *t1;
	806	{
	807
	808	if (t1->tv_usec < 0) {
	809	t1->tv_sec--;
	810	t1->tv_usec += 1000000;
	811	}
	812	if (t1->tv_usec >= 1000000) {
	813	t1->tv_sec++;
	814	t1->tv_usec -= 1000000;
	815	}
	816	}
	817
	818	/*
	819	* ratecheck(): simple time-based rate-limit checking.
	820	*/
	821	int
	822	ratecheck(struct timeval lasttime, const struct timeval mininterval)
	823	{
	824	struct timeval tv, delta;
	825	int rv = 0;
	826
	827	getmicrouptime(&tv); /* NB: 10ms precision */
	828	delta = tv;
	829	timevalsub(&delta, lasttime);
	830
	831	/*
	832	* check for 0,0 is so that the message will be seen at least once,
	833	* even if interval is huge.
	834	*/
	835	if (timevalcmp(&delta, mininterval, >=) \|\|
	836	(lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
	837	*lasttime = tv;
	838	rv = 1;
	839	}
	840
	841	return (rv);
	842	}
	843
	844	/*
	845	* ppsratecheck(): packets (or events) per second limitation.
	846	*
	847	* Return 0 if the limit is to be enforced (e.g. the caller
	848	* should drop a packet because of the rate limitation).
	849	*
	850	* maxpps of 0 always causes zero to be returned. maxpps of -1
	851	* always causes 1 to be returned; this effectively defeats rate
	852	* limiting.
	853	*
	854	* Note that we maintain the struct timeval for compatibility
	855	* with other bsd systems. We reuse the storage and just monitor
	856	* clock ticks for minimal overhead.
	857	*/
	858	int
	859	ppsratecheck(struct timeval lasttime, int curpps, int maxpps)
	860	{
	861	int now;
	862
	863	/*
	864	* Reset the last time and counter if this is the first call
	865	* or more than a second has passed since the last update of
	866	* lasttime.
	867	*/
	868	now = ticks;
	869	if (lasttime->tv_sec == 0 \|\| (u_int)(now - lasttime->tv_sec) >= hz) {
	870	lasttime->tv_sec = now;
	871	*curpps = 1;
	872	return (maxpps != 0);
	873	} else {
	874	(curpps)++; / NB: ignore potential overflow */
	875	return (maxpps < 0 \|\| *curpps < maxpps);
	876	}
	877	}
	878