gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1982, 1986, 1989, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	* 1. Redistributions of source code must retain the above copyright
	9	* notice, this list of conditions and the following disclaimer.
	10	* 2. Redistributions in binary form must reproduce the above copyright
	11	* notice, this list of conditions and the following disclaimer in the
	12	* documentation and/or other materials provided with the distribution.
	13	* 3. All advertising materials mentioning features or use of this software
	14	* must display the following acknowledgement:
	15	* This product includes software developed by the University of
	16	* California, Berkeley and its contributors.
	17	* 4. Neither the name of the University nor the names of its contributors
	18	* may be used to endorse or promote products derived from this software
	19	* without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	22	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	24	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	25	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	26	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	27	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	28	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	29	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	30	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	31	* SUCH DAMAGE.
	32	*
	33	* @(#)kern_time.c 8.1 (Berkeley) 6/10/93
	34	* $FreeBSD: src/sys/kern/kern_time.c,v 1.68.2.1 2002/10/01 08:00:41 bde Exp $
	35	* $DragonFly: src/sys/kern/kern_time.c,v 1.24 2005/04/22 17:41:15 joerg Exp $
	36	*/
	37
	38	#include <sys/param.h>
	39	#include <sys/systm.h>
	40	#include <sys/buf.h>
	41	#include <sys/sysproto.h>
	42	#include <sys/resourcevar.h>
	43	#include <sys/signalvar.h>
	44	#include <sys/kernel.h>
	45	#include <sys/systm.h>
	46	#include <sys/sysent.h>
	47	#include <sys/sysunion.h>
	48	#include <sys/proc.h>
	49	#include <sys/time.h>
	50	#include <sys/vnode.h>
	51	#include <sys/sysctl.h>
	52	#include <vm/vm.h>
	53	#include <vm/vm_extern.h>
	54	#include <sys/msgport2.h>
	55	#include <sys/thread2.h>
	56
	57	struct timezone tz;
	58
	59	/*
	60	* Time of day and interval timer support.
	61	*
	62	* These routines provide the kernel entry points to get and set
	63	* the time-of-day and per-process interval timers. Subroutines
	64	* here provide support for adding and subtracting timeval structures
	65	* and decrementing interval timers, optionally reloading the interval
	66	* timers when they expire.
	67	*/
	68
	69	static int nanosleep1 (struct timespec *rqt,
	70	struct timespec *rmt);
	71	static int settime (struct timeval *);
	72	static void timevalfix (struct timeval *);
	73	static void no_lease_updatetime (int);
	74
	75	static int sleep_hard_us = 100;
	76	SYSCTL_INT(_kern, OID_AUTO, sleep_hard_us, CTLFLAG_RW, &sleep_hard_us, 0, "")
	77
	78	static void
	79	no_lease_updatetime(deltat)
	80	int deltat;
	81	{
	82	}
	83
	84	void (*lease_updatetime) (int) = no_lease_updatetime;
	85
	86	static int
	87	settime(tv)
	88	struct timeval *tv;
	89	{
	90	struct timeval delta, tv1, tv2;
	91	static struct timeval maxtime, laststep;
	92	struct timespec ts;
	93	int origcpu;
	94
	95	if ((origcpu = mycpu->gd_cpuid) != 0)
	96	lwkt_setcpu_self(globaldata_find(0));
	97
	98	crit_enter();
	99	microtime(&tv1);
	100	delta = *tv;
	101	timevalsub(&delta, &tv1);
	102
	103	/*
	104	* If the system is secure, we do not allow the time to be
	105	* set to a value earlier than 1 second less than the highest
	106	* time we have yet seen. The worst a miscreant can do in
	107	* this circumstance is "freeze" time. He couldn't go
	108	* back to the past.
	109	*
	110	* We similarly do not allow the clock to be stepped more
	111	* than one second, nor more than once per second. This allows
	112	* a miscreant to make the clock march double-time, but no worse.
	113	*/
	114	if (securelevel > 1) {
	115	if (delta.tv_sec < 0 \|\| delta.tv_usec < 0) {
	116	/*
	117	* Update maxtime to latest time we've seen.
	118	*/
	119	if (tv1.tv_sec > maxtime.tv_sec)
	120	maxtime = tv1;
	121	tv2 = *tv;
	122	timevalsub(&tv2, &maxtime);
	123	if (tv2.tv_sec < -1) {
	124	tv->tv_sec = maxtime.tv_sec - 1;
	125	printf("Time adjustment clamped to -1 second\n");
	126	}
	127	} else {
	128	if (tv1.tv_sec == laststep.tv_sec) {
	129	crit_exit();
	130	return (EPERM);
	131	}
	132	if (delta.tv_sec > 1) {
	133	tv->tv_sec = tv1.tv_sec + 1;
	134	printf("Time adjustment clamped to +1 second\n");
	135	}
	136	laststep = *tv;
	137	}
	138	}
	139
	140	ts.tv_sec = tv->tv_sec;
	141	ts.tv_nsec = tv->tv_usec * 1000;
	142	set_timeofday(&ts);
	143	lease_updatetime(delta.tv_sec);
	144	crit_exit();
	145
	146	if (origcpu != 0)
	147	lwkt_setcpu_self(globaldata_find(origcpu));
	148
	149	resettodr();
	150	return (0);
	151	}
	152
	153	/* ARGSUSED */
	154	int
	155	clock_gettime(struct clock_gettime_args *uap)
	156	{
	157	struct timespec ats;
	158
	159	switch(uap->clock_id) {
	160	case CLOCK_REALTIME:
	161	nanotime(&ats);
	162	return (copyout(&ats, uap->tp, sizeof(ats)));
	163	case CLOCK_MONOTONIC:
	164	nanouptime(&ats);
	165	return (copyout(&ats, uap->tp, sizeof(ats)));
	166	default:
	167	return (EINVAL);
	168	}
	169	}
	170
	171	/* ARGSUSED */
	172	int
	173	clock_settime(struct clock_settime_args *uap)
	174	{
	175	struct thread *td = curthread;
	176	struct timeval atv;
	177	struct timespec ats;
	178	int error;
	179
	180	if ((error = suser(td)) != 0)
	181	return (error);
	182	switch(uap->clock_id) {
	183	case CLOCK_REALTIME:
	184	if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0)
	185	return (error);
	186	if (ats.tv_nsec < 0 \|\| ats.tv_nsec >= 1000000000)
	187	return (EINVAL);
	188	/* XXX Don't convert nsec->usec and back */
	189	TIMESPEC_TO_TIMEVAL(&atv, &ats);
	190	error = settime(&atv);
	191	return (error);
	192	default:
	193	return (EINVAL);
	194	}
	195	}
	196
	197	int
	198	clock_getres(struct clock_getres_args *uap)
	199	{
	200	struct timespec ts;
	201
	202	switch(uap->clock_id) {
	203	case CLOCK_REALTIME:
	204	case CLOCK_MONOTONIC:
	205	/*
	206	* Round up the result of the division cheaply
	207	* by adding 1. Rounding up is especially important
	208	* if rounding down would give 0. Perfect rounding
	209	* is unimportant.
	210	*/
	211	ts.tv_sec = 0;
	212	ts.tv_nsec = 1000000000 / cputimer_freq + 1;
	213	return(copyout(&ts, uap->tp, sizeof(ts)));
	214	default:
	215	return(EINVAL);
	216	}
	217	}
	218
	219	/*
	220	* nanosleep1()
	221	*
	222	* This is a general helper function for nanosleep() (aka sleep() aka
	223	* usleep()).
	224	*
	225	* If there is less then one tick's worth of time left and
	226	* we haven't done a yield, or the remaining microseconds is
	227	* ridiculously low, do a yield. This avoids having
	228	* to deal with systimer overheads when the system is under
	229	* heavy loads. If we have done a yield already then use
	230	* a systimer and an uninterruptable thread wait.
	231	*
	232	* If there is more then a tick's worth of time left,
	233	* calculate the baseline ticks and use an interruptable
	234	* tsleep, then handle the fine-grained delay on the next
	235	* loop. This usually results in two sleeps occuring, a long one
	236	* and a short one.
	237	*/
	238	static void
	239	ns1_systimer(systimer_t info)
	240	{
	241	lwkt_schedule(info->data);
	242	}
	243
	244	static int
	245	nanosleep1(struct timespec rqt, struct timespec rmt)
	246	{
	247	static int nanowait;
	248	struct timespec ts, ts2, ts3;
	249	struct timeval tv;
	250	int error;
	251	int tried_yield;
	252
	253	if (rqt->tv_nsec < 0 \|\| rqt->tv_nsec >= 1000000000)
	254	return (EINVAL);
	255	if (rqt->tv_sec < 0 \|\| (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
	256	return (0);
	257	nanouptime(&ts);
	258	timespecadd(&ts, rqt); /* ts = target timestamp compare */
	259	TIMESPEC_TO_TIMEVAL(&tv, rqt); /* tv = sleep interval */
	260	tried_yield = 0;
	261
	262	for (;;) {
	263	int ticks;
	264	struct systimer info;
	265
	266	ticks = tv.tv_usec / tick; /* approximate */
	267
	268	if (tv.tv_sec == 0 && ticks == 0) {
	269	thread_t td = curthread;
	270	if (tried_yield \|\| tv.tv_usec < sleep_hard_us) {
	271	tried_yield = 0;
	272	uio_yield();
	273	} else {
	274	crit_enter_quick(td);
	275	systimer_init_oneshot(&info, ns1_systimer,
	276	td, tv.tv_usec);
	277	lwkt_deschedule_self(td);
	278	crit_exit_quick(td);
	279	lwkt_switch();
	280	systimer_del(&info); /* make sure it's gone */
	281	}
	282	error = iscaught(td->td_proc);
	283	} else if (tv.tv_sec == 0) {
	284	error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
	285	} else {
	286	ticks = tvtohz_low(&tv); /* also handles overflow */
	287	error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
	288	}
	289	nanouptime(&ts2);
	290	if (error && error != EWOULDBLOCK) {
	291	if (error == ERESTART)
	292	error = EINTR;
	293	if (rmt != NULL) {
	294	timespecsub(&ts, &ts2);
	295	if (ts.tv_sec < 0)
	296	timespecclear(&ts);
	297	*rmt = ts;
	298	}
	299	return (error);
	300	}
	301	if (timespeccmp(&ts2, &ts, >=))
	302	return (0);
	303	ts3 = ts;
	304	timespecsub(&ts3, &ts2);
	305	TIMESPEC_TO_TIMEVAL(&tv, &ts3);
	306	}
	307	}
	308
	309	static void nanosleep_done(void *arg);
	310	static void nanosleep_copyout(union sysunion *sysun);
	311
	312	/* ARGSUSED */
	313	int
	314	nanosleep(struct nanosleep_args *uap)
	315	{
	316	int error;
	317	struct sysmsg_sleep *smsleep = &uap->sysmsg.sm.sleep;
	318
	319	error = copyin(uap->rqtp, &smsleep->rqt, sizeof(smsleep->rqt));
	320	if (error)
	321	return (error);
	322	/*
	323	* YYY clean this up to always use the callout, note that an abort
	324	* implementation should record the residual in the async case.
	325	*/
	326	if (uap->sysmsg.lmsg.ms_flags & MSGF_ASYNC) {
	327	quad_t ticks;
	328
	329	ticks = (quad_t)smsleep->rqt.tv_nsec * hz / 1000000000LL;
	330	if (smsleep->rqt.tv_sec)
	331	ticks += (quad_t)smsleep->rqt.tv_sec * hz;
	332	if (ticks <= 0) {
	333	if (ticks == 0)
	334	error = 0;
	335	else
	336	error = EINVAL;
	337	} else {
	338	uap->sysmsg.copyout = nanosleep_copyout;
	339	uap->sysmsg.lmsg.ms_flags &= ~MSGF_DONE;
	340	callout_init(&smsleep->timer);
	341	callout_reset(&smsleep->timer, ticks, nanosleep_done, uap);
	342	error = EASYNC;
	343	}
	344	} else {
	345	/*
	346	* Old synchronous sleep code, copyout the residual if
	347	* nanosleep was interrupted.
	348	*/
	349	error = nanosleep1(&smsleep->rqt, &smsleep->rmt);
	350	if (error && uap->rmtp)
	351	error = copyout(&smsleep->rmt, uap->rmtp, sizeof(smsleep->rmt));
	352	}
	353	return (error);
	354	}
	355
	356	/*
	357	* Asynch completion for the nanosleep() syscall. This function may be
	358	* called from any context and cannot legally access the originating
	359	* thread, proc, or its user space.
	360	*
	361	* YYY change the callout interface API so we can simply assign the replymsg
	362	* function to it directly.
	363	*/
	364	static void
	365	nanosleep_done(void *arg)
	366	{
	367	struct nanosleep_args *uap = arg;
	368	lwkt_msg_t msg = &uap->sysmsg.lmsg;
	369
	370	lwkt_replymsg(msg, 0);
	371	}
	372
	373	/*
	374	* Asynch return for the nanosleep() syscall, called in the context of the
	375	* originating thread when it pulls the message off the reply port. This
	376	* function is responsible for any copyouts to userland. Kernel threads
	377	* which do their own internal system calls will not usually call the return
	378	* function.
	379	*/
	380	static void
	381	nanosleep_copyout(union sysunion *sysun)
	382	{
	383	struct nanosleep_args *uap = &sysun->nanosleep;
	384	struct sysmsg_sleep *smsleep = &uap->sysmsg.sm.sleep;
	385
	386	if (sysun->lmsg.ms_error && uap->rmtp) {
	387	sysun->lmsg.ms_error =
	388	copyout(&smsleep->rmt, uap->rmtp, sizeof(smsleep->rmt));
	389	}
	390	}
	391
	392	/* ARGSUSED */
	393	int
	394	gettimeofday(struct gettimeofday_args *uap)
	395	{
	396	struct timeval atv;
	397	int error = 0;
	398
	399	if (uap->tp) {
	400	microtime(&atv);
	401	if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
	402	sizeof (atv))))
	403	return (error);
	404	}
	405	if (uap->tzp)
	406	error = copyout((caddr_t)&tz, (caddr_t)uap->tzp,
	407	sizeof (tz));
	408	return (error);
	409	}
	410
	411	/* ARGSUSED */
	412	int
	413	settimeofday(struct settimeofday_args *uap)
	414	{
	415	struct thread *td = curthread;
	416	struct timeval atv;
	417	struct timezone atz;
	418	int error;
	419
	420	if ((error = suser(td)))
	421	return (error);
	422	/* Verify all parameters before changing time. */
	423	if (uap->tv) {
	424	if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
	425	sizeof(atv))))
	426	return (error);
	427	if (atv.tv_usec < 0 \|\| atv.tv_usec >= 1000000)
	428	return (EINVAL);
	429	}
	430	if (uap->tzp &&
	431	(error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz))))
	432	return (error);
	433	if (uap->tv && (error = settime(&atv)))
	434	return (error);
	435	if (uap->tzp)
	436	tz = atz;
	437	return (0);
	438	}
	439
	440	static void
	441	kern_adjtime_common(void)
	442	{
	443	if ((ntp_delta >= 0 && ntp_delta < ntp_default_tick_delta) \|\|
	444	(ntp_delta < 0 && ntp_delta > ntp_default_tick_delta))
	445	ntp_tick_delta = ntp_delta;
	446	else if (ntp_delta > ntp_big_delta)
	447	ntp_tick_delta = 10 * ntp_default_tick_delta;
	448	else if (ntp_delta < -ntp_big_delta)
	449	ntp_tick_delta = -10 * ntp_default_tick_delta;
	450	else if (ntp_delta > 0)
	451	ntp_tick_delta = ntp_default_tick_delta;
	452	else
	453	ntp_tick_delta = -ntp_default_tick_delta;
	454	}
	455
	456	void
	457	kern_adjtime(int64_t delta, int64_t *odelta)
	458	{
	459	int origcpu;
	460
	461	if ((origcpu = mycpu->gd_cpuid) != 0)
	462	lwkt_setcpu_self(globaldata_find(0));
	463
	464	crit_enter();
	465	*odelta = ntp_delta;
	466	ntp_delta += delta;
	467	kern_adjtime_common();
	468	crit_exit();
	469
	470	if (origcpu != 0)
	471	lwkt_setcpu_self(globaldata_find(origcpu));
	472	}
	473
	474	void
	475	kern_reladjtime(int64_t delta)
	476	{
	477	int origcpu;
	478
	479	if ((origcpu = mycpu->gd_cpuid) != 0)
	480	lwkt_setcpu_self(globaldata_find(0));
	481
	482	crit_enter();
	483	ntp_delta += delta;
	484	kern_adjtime_common();
	485	crit_exit();
	486
	487	if (origcpu != 0)
	488	lwkt_setcpu_self(globaldata_find(origcpu));
	489	}
	490
	491	static void
	492	kern_adjfreq(int64_t rate)
	493	{
	494	int origcpu;
	495
	496	if ((origcpu = mycpu->gd_cpuid) != 0)
	497	lwkt_setcpu_self(globaldata_find(0));
	498
	499	crit_enter();
	500	ntp_tick_permanent = rate;
	501	crit_exit();
	502
	503	if (origcpu != 0)
	504	lwkt_setcpu_self(globaldata_find(origcpu));
	505	}
	506
	507	/* ARGSUSED */
	508	int
	509	adjtime(struct adjtime_args *uap)
	510	{
	511	struct thread *td = curthread;
	512	struct timeval atv;
	513	int64_t ndelta, odelta;
	514	int error;
	515
	516	if ((error = suser(td)))
	517	return (error);
	518	if ((error =
	519	copyin((caddr_t)uap->delta, (caddr_t)&atv, sizeof(struct timeval))))
	520	return (error);
	521
	522	/*
	523	* Compute the total correction and the rate at which to apply it.
	524	* Round the adjustment down to a whole multiple of the per-tick
	525	* delta, so that after some number of incremental changes in
	526	* hardclock(), tickdelta will become zero, lest the correction
	527	* overshoot and start taking us away from the desired final time.
	528	*/
	529	ndelta = atv.tv_sec * 1000000000 + atv.tv_usec * 1000;
	530	kern_adjtime(ndelta, &odelta);
	531
	532	if (uap->olddelta) {
	533	atv.tv_sec = odelta / 1000000000;
	534	atv.tv_usec = odelta % 1000000 / 1000;
	535	(void) copyout((caddr_t)&atv, (caddr_t)uap->olddelta,
	536	sizeof(struct timeval));
	537	}
	538	return (0);
	539	}
	540
	541	static int
	542	sysctl_adjtime(SYSCTL_HANDLER_ARGS)
	543	{
	544	int64_t delta;
	545	int error;
	546
	547	if (req->oldptr != NULL) {
	548	delta = 0;
	549	error = SYSCTL_OUT(req, &delta, sizeof(delta));
	550	if (error)
	551	return (error);
	552	}
	553	if (req->newptr != NULL) {
	554	if (suser(curthread))
	555	return (EPERM);
	556	error = SYSCTL_IN(req, &delta, sizeof(delta));
	557	if (error)
	558	return (error);
	559	kern_reladjtime(delta);
	560	}
	561	return (0);
	562	}
	563
	564	static int
	565	sysctl_adjfreq(SYSCTL_HANDLER_ARGS)
	566	{
	567	int64_t freqdelta;
	568	int error;
	569
	570	if (req->oldptr != NULL) {
	571	freqdelta = ntp_tick_permanent * hz;
	572	error = SYSCTL_OUT(req, &freqdelta, sizeof(freqdelta));
	573	if (error)
	574	return (error);
	575	}
	576	if (req->newptr != NULL) {
	577	if (suser(curthread))
	578	return (EPERM);
	579	error = SYSCTL_IN(req, &freqdelta, sizeof(freqdelta));
	580	if (error)
	581	return (error);
	582
	583	freqdelta /= hz;
	584	kern_adjfreq(freqdelta);
	585	}
	586	return (0);
	587	}
	588
	589	SYSCTL_NODE(_kern, OID_AUTO, ntp, CTLFLAG_RW, 0, "NTP related controls");
	590	SYSCTL_PROC(_kern_ntp, OID_AUTO, permanent,
	591	CTLTYPE_OPAQUE\|CTLFLAG_RW, 0, 0,
	592	sysctl_adjfreq, "LU", "permanent correction per second");
	593	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, delta, CTLFLAG_RD,
	594	&ntp_delta, sizeof(ntp_delta), "LU",
	595	"one-time delta");
	596	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, big_delta, CTLFLAG_RD,
	597	&ntp_big_delta, sizeof(ntp_big_delta), "LU",
	598	"threshold for fast adjustment");
	599	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, tick_delta, CTLFLAG_RD,
	600	&ntp_tick_delta, sizeof(ntp_tick_delta), "LU",
	601	"per-tick adjustment");
	602	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, default_tick_delta, CTLFLAG_RD,
	603	&ntp_default_tick_delta, sizeof(ntp_default_tick_delta), "LU",
	604	"default per-tick adjustment");
	605	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, next_leap_second, CTLFLAG_RW,
	606	&ntp_leap_second, sizeof(ntp_leap_second), "LU",
	607	"next leap second");
	608	SYSCTL_INT(_kern_ntp, OID_AUTO, insert_leap_second, CTLFLAG_RW,
	609	&ntp_leap_insert, 0, "insert or remove leap second");
	610	SYSCTL_PROC(_kern_ntp, OID_AUTO, adjust,
	611	CTLTYPE_OPAQUE\|CTLFLAG_RW, 0, 0,
	612	sysctl_adjtime, "", "relative adjust for delta");
	613
	614	/*
	615	* Get value of an interval timer. The process virtual and
	616	* profiling virtual time timers are kept in the p_stats area, since
	617	* they can be swapped out. These are kept internally in the
	618	* way they are specified externally: in time until they expire.
	619	*
	620	* The real time interval timer is kept in the process table slot
	621	* for the process, and its value (it_value) is kept as an
	622	* absolute time rather than as a delta, so that it is easy to keep
	623	* periodic real-time signals from drifting.
	624	*
	625	* Virtual time timers are processed in the hardclock() routine of
	626	* kern_clock.c. The real time timer is processed by a timeout
	627	* routine, called from the softclock() routine. Since a callout
	628	* may be delayed in real time due to interrupt processing in the system,
	629	* it is possible for the real time timeout routine (realitexpire, given below),
	630	* to be delayed in real time past when it is supposed to occur. It
	631	* does not suffice, therefore, to reload the real timer .it_value from the
	632	* real time timers .it_interval. Rather, we compute the next time in
	633	* absolute time the timer should go off.
	634	*/
	635	/* ARGSUSED */
	636	int
	637	getitimer(struct getitimer_args *uap)
	638	{
	639	struct proc *p = curproc;
	640	struct timeval ctv;
	641	struct itimerval aitv;
	642
	643	if (uap->which > ITIMER_PROF)
	644	return (EINVAL);
	645	crit_enter();
	646	if (uap->which == ITIMER_REAL) {
	647	/*
	648	* Convert from absolute to relative time in .it_value
	649	* part of real time timer. If time for real time timer
	650	* has passed return 0, else return difference between
	651	* current time and time for the timer to go off.
	652	*/
	653	aitv = p->p_realtimer;
	654	if (timevalisset(&aitv.it_value)) {
	655	getmicrouptime(&ctv);
	656	if (timevalcmp(&aitv.it_value, &ctv, <))
	657	timevalclear(&aitv.it_value);
	658	else
	659	timevalsub(&aitv.it_value, &ctv);
	660	}
	661	} else {
	662	aitv = p->p_stats->p_timer[uap->which];
	663	}
	664	crit_exit();
	665	return (copyout((caddr_t)&aitv, (caddr_t)uap->itv,
	666	sizeof (struct itimerval)));
	667	}
	668
	669	/* ARGSUSED */
	670	int
	671	setitimer(struct setitimer_args *uap)
	672	{
	673	struct itimerval aitv;
	674	struct timeval ctv;
	675	struct itimerval *itvp;
	676	struct proc *p = curproc;
	677	int error;
	678
	679	if (uap->which > ITIMER_PROF)
	680	return (EINVAL);
	681	itvp = uap->itv;
	682	if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
	683	sizeof(struct itimerval))))
	684	return (error);
	685	if ((uap->itv = uap->oitv) &&
	686	(error = getitimer((struct getitimer_args *)uap)))
	687	return (error);
	688	if (itvp == 0)
	689	return (0);
	690	if (itimerfix(&aitv.it_value))
	691	return (EINVAL);
	692	if (!timevalisset(&aitv.it_value))
	693	timevalclear(&aitv.it_interval);
	694	else if (itimerfix(&aitv.it_interval))
	695	return (EINVAL);
	696	crit_enter();
	697	if (uap->which == ITIMER_REAL) {
	698	if (timevalisset(&p->p_realtimer.it_value))
	699	callout_stop(&p->p_ithandle);
	700	if (timevalisset(&aitv.it_value))
	701	callout_reset(&p->p_ithandle,
	702	tvtohz_high(&aitv.it_value), realitexpire, p);
	703	getmicrouptime(&ctv);
	704	timevaladd(&aitv.it_value, &ctv);
	705	p->p_realtimer = aitv;
	706	} else {
	707	p->p_stats->p_timer[uap->which] = aitv;
	708	}
	709	crit_exit();
	710	return (0);
	711	}
	712
	713	/*
	714	* Real interval timer expired:
	715	* send process whose timer expired an alarm signal.
	716	* If time is not set up to reload, then just return.
	717	* Else compute next time timer should go off which is > current time.
	718	* This is where delay in processing this timeout causes multiple
	719	* SIGALRM calls to be compressed into one.
	720	* tvtohz_high() always adds 1 to allow for the time until the next clock
	721	* interrupt being strictly less than 1 clock tick, but we don't want
	722	* that here since we want to appear to be in sync with the clock
	723	* interrupt even when we're delayed.
	724	*/
	725	void
	726	realitexpire(arg)
	727	void *arg;
	728	{
	729	struct proc *p;
	730	struct timeval ctv, ntv;
	731
	732	p = (struct proc *)arg;
	733	psignal(p, SIGALRM);
	734	if (!timevalisset(&p->p_realtimer.it_interval)) {
	735	timevalclear(&p->p_realtimer.it_value);
	736	return;
	737	}
	738	for (;;) {
	739	crit_enter();
	740	timevaladd(&p->p_realtimer.it_value,
	741	&p->p_realtimer.it_interval);
	742	getmicrouptime(&ctv);
	743	if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) {
	744	ntv = p->p_realtimer.it_value;
	745	timevalsub(&ntv, &ctv);
	746	callout_reset(&p->p_ithandle, tvtohz_low(&ntv),
	747	realitexpire, p);
	748	crit_exit();
	749	return;
	750	}
	751	crit_exit();
	752	}
	753	}
	754
	755	/*
	756	* Check that a proposed value to load into the .it_value or
	757	* .it_interval part of an interval timer is acceptable, and
	758	* fix it to have at least minimal value (i.e. if it is less
	759	* than the resolution of the clock, round it up.)
	760	*/
	761	int
	762	itimerfix(tv)
	763	struct timeval *tv;
	764	{
	765
	766	if (tv->tv_sec < 0 \|\| tv->tv_sec > 100000000 \|\|
	767	tv->tv_usec < 0 \|\| tv->tv_usec >= 1000000)
	768	return (EINVAL);
	769	if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
	770	tv->tv_usec = tick;
	771	return (0);
	772	}
	773
	774	/*
	775	* Decrement an interval timer by a specified number
	776	* of microseconds, which must be less than a second,
	777	* i.e. < 1000000. If the timer expires, then reload
	778	* it. In this case, carry over (usec - old value) to
	779	* reduce the value reloaded into the timer so that
	780	* the timer does not drift. This routine assumes
	781	* that it is called in a context where the timers
	782	* on which it is operating cannot change in value.
	783	*/
	784	int
	785	itimerdecr(itp, usec)
	786	struct itimerval *itp;
	787	int usec;
	788	{
	789
	790	if (itp->it_value.tv_usec < usec) {
	791	if (itp->it_value.tv_sec == 0) {
	792	/* expired, and already in next interval */
	793	usec -= itp->it_value.tv_usec;
	794	goto expire;
	795	}
	796	itp->it_value.tv_usec += 1000000;
	797	itp->it_value.tv_sec--;
	798	}
	799	itp->it_value.tv_usec -= usec;
	800	usec = 0;
	801	if (timevalisset(&itp->it_value))
	802	return (1);
	803	/* expired, exactly at end of interval */
	804	expire:
	805	if (timevalisset(&itp->it_interval)) {
	806	itp->it_value = itp->it_interval;
	807	itp->it_value.tv_usec -= usec;
	808	if (itp->it_value.tv_usec < 0) {
	809	itp->it_value.tv_usec += 1000000;
	810	itp->it_value.tv_sec--;
	811	}
	812	} else
	813	itp->it_value.tv_usec = 0; /* sec is already 0 */
	814	return (0);
	815	}
	816
	817	/*
	818	* Add and subtract routines for timevals.
	819	* N.B.: subtract routine doesn't deal with
	820	* results which are before the beginning,
	821	* it just gets very confused in this case.
	822	* Caveat emptor.
	823	*/
	824	void
	825	timevaladd(t1, t2)
	826	struct timeval t1, t2;
	827	{
	828
	829	t1->tv_sec += t2->tv_sec;
	830	t1->tv_usec += t2->tv_usec;
	831	timevalfix(t1);
	832	}
	833
	834	void
	835	timevalsub(t1, t2)
	836	struct timeval t1, t2;
	837	{
	838
	839	t1->tv_sec -= t2->tv_sec;
	840	t1->tv_usec -= t2->tv_usec;
	841	timevalfix(t1);
	842	}
	843
	844	static void
	845	timevalfix(t1)
	846	struct timeval *t1;
	847	{
	848
	849	if (t1->tv_usec < 0) {
	850	t1->tv_sec--;
	851	t1->tv_usec += 1000000;
	852	}
	853	if (t1->tv_usec >= 1000000) {
	854	t1->tv_sec++;
	855	t1->tv_usec -= 1000000;
	856	}
	857	}
	858
	859	/*
	860	* ratecheck(): simple time-based rate-limit checking.
	861	*/
	862	int
	863	ratecheck(struct timeval lasttime, const struct timeval mininterval)
	864	{
	865	struct timeval tv, delta;
	866	int rv = 0;
	867
	868	getmicrouptime(&tv); /* NB: 10ms precision */
	869	delta = tv;
	870	timevalsub(&delta, lasttime);
	871
	872	/*
	873	* check for 0,0 is so that the message will be seen at least once,
	874	* even if interval is huge.
	875	*/
	876	if (timevalcmp(&delta, mininterval, >=) \|\|
	877	(lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
	878	*lasttime = tv;
	879	rv = 1;
	880	}
	881
	882	return (rv);
	883	}
	884
	885	/*
	886	* ppsratecheck(): packets (or events) per second limitation.
	887	*
	888	* Return 0 if the limit is to be enforced (e.g. the caller
	889	* should drop a packet because of the rate limitation).
	890	*
	891	* maxpps of 0 always causes zero to be returned. maxpps of -1
	892	* always causes 1 to be returned; this effectively defeats rate
	893	* limiting.
	894	*
	895	* Note that we maintain the struct timeval for compatibility
	896	* with other bsd systems. We reuse the storage and just monitor
	897	* clock ticks for minimal overhead.
	898	*/
	899	int
	900	ppsratecheck(struct timeval lasttime, int curpps, int maxpps)
	901	{
	902	int now;
	903
	904	/*
	905	* Reset the last time and counter if this is the first call
	906	* or more than a second has passed since the last update of
	907	* lasttime.
	908	*/
	909	now = ticks;
	910	if (lasttime->tv_sec == 0 \|\| (u_int)(now - lasttime->tv_sec) >= hz) {
	911	lasttime->tv_sec = now;
	912	*curpps = 1;
	913	return (maxpps != 0);
	914	} else {
	915	(curpps)++; / NB: ignore potential overflow */
	916	return (maxpps < 0 \|\| *curpps < maxpps);
	917	}
	918	}
	919