gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1982, 1986, 1989, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	* 1. Redistributions of source code must retain the above copyright
	9	* notice, this list of conditions and the following disclaimer.
	10	* 2. Redistributions in binary form must reproduce the above copyright
	11	* notice, this list of conditions and the following disclaimer in the
	12	* documentation and/or other materials provided with the distribution.
	13	* 3. All advertising materials mentioning features or use of this software
	14	* must display the following acknowledgement:
	15	* This product includes software developed by the University of
	16	* California, Berkeley and its contributors.
	17	* 4. Neither the name of the University nor the names of its contributors
	18	* may be used to endorse or promote products derived from this software
	19	* without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	22	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	23	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	24	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	25	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	26	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	27	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	28	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	29	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	30	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	31	* SUCH DAMAGE.
	32	*
	33	* @(#)kern_time.c 8.1 (Berkeley) 6/10/93
	34	* $FreeBSD: src/sys/kern/kern_time.c,v 1.68.2.1 2002/10/01 08:00:41 bde Exp $
	35	* $DragonFly: src/sys/kern/kern_time.c,v 1.25 2005/04/23 18:46:54 joerg Exp $
	36	*/
	37
	38	#include <sys/param.h>
	39	#include <sys/systm.h>
	40	#include <sys/buf.h>
	41	#include <sys/sysproto.h>
	42	#include <sys/resourcevar.h>
	43	#include <sys/signalvar.h>
	44	#include <sys/kernel.h>
	45	#include <sys/systm.h>
	46	#include <sys/sysent.h>
	47	#include <sys/sysunion.h>
	48	#include <sys/proc.h>
	49	#include <sys/time.h>
	50	#include <sys/vnode.h>
	51	#include <sys/sysctl.h>
	52	#include <vm/vm.h>
	53	#include <vm/vm_extern.h>
	54	#include <sys/msgport2.h>
	55	#include <sys/thread2.h>
	56
	57	struct timezone tz;
	58
	59	/*
	60	* Time of day and interval timer support.
	61	*
	62	* These routines provide the kernel entry points to get and set
	63	* the time-of-day and per-process interval timers. Subroutines
	64	* here provide support for adding and subtracting timeval structures
	65	* and decrementing interval timers, optionally reloading the interval
	66	* timers when they expire.
	67	*/
	68
	69	static int nanosleep1 (struct timespec *rqt,
	70	struct timespec *rmt);
	71	static int settime (struct timeval *);
	72	static void timevalfix (struct timeval *);
	73	static void no_lease_updatetime (int);
	74
	75	static int sleep_hard_us = 100;
	76	SYSCTL_INT(_kern, OID_AUTO, sleep_hard_us, CTLFLAG_RW, &sleep_hard_us, 0, "")
	77
	78	static void
	79	no_lease_updatetime(deltat)
	80	int deltat;
	81	{
	82	}
	83
	84	void (*lease_updatetime) (int) = no_lease_updatetime;
	85
	86	static int
	87	settime(tv)
	88	struct timeval *tv;
	89	{
	90	struct timeval delta, tv1, tv2;
	91	static struct timeval maxtime, laststep;
	92	struct timespec ts;
	93	int origcpu;
	94
	95	if ((origcpu = mycpu->gd_cpuid) != 0)
	96	lwkt_setcpu_self(globaldata_find(0));
	97
	98	crit_enter();
	99	microtime(&tv1);
	100	delta = *tv;
	101	timevalsub(&delta, &tv1);
	102
	103	/*
	104	* If the system is secure, we do not allow the time to be
	105	* set to a value earlier than 1 second less than the highest
	106	* time we have yet seen. The worst a miscreant can do in
	107	* this circumstance is "freeze" time. He couldn't go
	108	* back to the past.
	109	*
	110	* We similarly do not allow the clock to be stepped more
	111	* than one second, nor more than once per second. This allows
	112	* a miscreant to make the clock march double-time, but no worse.
	113	*/
	114	if (securelevel > 1) {
	115	if (delta.tv_sec < 0 \|\| delta.tv_usec < 0) {
	116	/*
	117	* Update maxtime to latest time we've seen.
	118	*/
	119	if (tv1.tv_sec > maxtime.tv_sec)
	120	maxtime = tv1;
	121	tv2 = *tv;
	122	timevalsub(&tv2, &maxtime);
	123	if (tv2.tv_sec < -1) {
	124	tv->tv_sec = maxtime.tv_sec - 1;
	125	printf("Time adjustment clamped to -1 second\n");
	126	}
	127	} else {
	128	if (tv1.tv_sec == laststep.tv_sec) {
	129	crit_exit();
	130	return (EPERM);
	131	}
	132	if (delta.tv_sec > 1) {
	133	tv->tv_sec = tv1.tv_sec + 1;
	134	printf("Time adjustment clamped to +1 second\n");
	135	}
	136	laststep = *tv;
	137	}
	138	}
	139
	140	ts.tv_sec = tv->tv_sec;
	141	ts.tv_nsec = tv->tv_usec * 1000;
	142	set_timeofday(&ts);
	143	lease_updatetime(delta.tv_sec);
	144	crit_exit();
	145
	146	if (origcpu != 0)
	147	lwkt_setcpu_self(globaldata_find(origcpu));
	148
	149	resettodr();
	150	return (0);
	151	}
	152
	153	/* ARGSUSED */
	154	int
	155	clock_gettime(struct clock_gettime_args *uap)
	156	{
	157	struct timespec ats;
	158
	159	switch(uap->clock_id) {
	160	case CLOCK_REALTIME:
	161	nanotime(&ats);
	162	return (copyout(&ats, uap->tp, sizeof(ats)));
	163	case CLOCK_MONOTONIC:
	164	nanouptime(&ats);
	165	return (copyout(&ats, uap->tp, sizeof(ats)));
	166	default:
	167	return (EINVAL);
	168	}
	169	}
	170
	171	/* ARGSUSED */
	172	int
	173	clock_settime(struct clock_settime_args *uap)
	174	{
	175	struct thread *td = curthread;
	176	struct timeval atv;
	177	struct timespec ats;
	178	int error;
	179
	180	if ((error = suser(td)) != 0)
	181	return (error);
	182	switch(uap->clock_id) {
	183	case CLOCK_REALTIME:
	184	if ((error = copyin(uap->tp, &ats, sizeof(ats))) != 0)
	185	return (error);
	186	if (ats.tv_nsec < 0 \|\| ats.tv_nsec >= 1000000000)
	187	return (EINVAL);
	188	/* XXX Don't convert nsec->usec and back */
	189	TIMESPEC_TO_TIMEVAL(&atv, &ats);
	190	error = settime(&atv);
	191	return (error);
	192	default:
	193	return (EINVAL);
	194	}
	195	}
	196
	197	int
	198	clock_getres(struct clock_getres_args *uap)
	199	{
	200	struct timespec ts;
	201
	202	switch(uap->clock_id) {
	203	case CLOCK_REALTIME:
	204	case CLOCK_MONOTONIC:
	205	/*
	206	* Round up the result of the division cheaply
	207	* by adding 1. Rounding up is especially important
	208	* if rounding down would give 0. Perfect rounding
	209	* is unimportant.
	210	*/
	211	ts.tv_sec = 0;
	212	ts.tv_nsec = 1000000000 / cputimer_freq + 1;
	213	return(copyout(&ts, uap->tp, sizeof(ts)));
	214	default:
	215	return(EINVAL);
	216	}
	217	}
	218
	219	/*
	220	* nanosleep1()
	221	*
	222	* This is a general helper function for nanosleep() (aka sleep() aka
	223	* usleep()).
	224	*
	225	* If there is less then one tick's worth of time left and
	226	* we haven't done a yield, or the remaining microseconds is
	227	* ridiculously low, do a yield. This avoids having
	228	* to deal with systimer overheads when the system is under
	229	* heavy loads. If we have done a yield already then use
	230	* a systimer and an uninterruptable thread wait.
	231	*
	232	* If there is more then a tick's worth of time left,
	233	* calculate the baseline ticks and use an interruptable
	234	* tsleep, then handle the fine-grained delay on the next
	235	* loop. This usually results in two sleeps occuring, a long one
	236	* and a short one.
	237	*/
	238	static void
	239	ns1_systimer(systimer_t info)
	240	{
	241	lwkt_schedule(info->data);
	242	}
	243
	244	static int
	245	nanosleep1(struct timespec rqt, struct timespec rmt)
	246	{
	247	static int nanowait;
	248	struct timespec ts, ts2, ts3;
	249	struct timeval tv;
	250	int error;
	251	int tried_yield;
	252
	253	if (rqt->tv_nsec < 0 \|\| rqt->tv_nsec >= 1000000000)
	254	return (EINVAL);
	255	if (rqt->tv_sec < 0 \|\| (rqt->tv_sec == 0 && rqt->tv_nsec == 0))
	256	return (0);
	257	nanouptime(&ts);
	258	timespecadd(&ts, rqt); /* ts = target timestamp compare */
	259	TIMESPEC_TO_TIMEVAL(&tv, rqt); /* tv = sleep interval */
	260	tried_yield = 0;
	261
	262	for (;;) {
	263	int ticks;
	264	struct systimer info;
	265
	266	ticks = tv.tv_usec / tick; /* approximate */
	267
	268	if (tv.tv_sec == 0 && ticks == 0) {
	269	thread_t td = curthread;
	270	if (tried_yield \|\| tv.tv_usec < sleep_hard_us) {
	271	tried_yield = 0;
	272	uio_yield();
	273	} else {
	274	crit_enter_quick(td);
	275	systimer_init_oneshot(&info, ns1_systimer,
	276	td, tv.tv_usec);
	277	lwkt_deschedule_self(td);
	278	crit_exit_quick(td);
	279	lwkt_switch();
	280	systimer_del(&info); /* make sure it's gone */
	281	}
	282	error = iscaught(td->td_proc);
	283	} else if (tv.tv_sec == 0) {
	284	error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
	285	} else {
	286	ticks = tvtohz_low(&tv); /* also handles overflow */
	287	error = tsleep(&nanowait, PCATCH, "nanslp", ticks);
	288	}
	289	nanouptime(&ts2);
	290	if (error && error != EWOULDBLOCK) {
	291	if (error == ERESTART)
	292	error = EINTR;
	293	if (rmt != NULL) {
	294	timespecsub(&ts, &ts2);
	295	if (ts.tv_sec < 0)
	296	timespecclear(&ts);
	297	*rmt = ts;
	298	}
	299	return (error);
	300	}
	301	if (timespeccmp(&ts2, &ts, >=))
	302	return (0);
	303	ts3 = ts;
	304	timespecsub(&ts3, &ts2);
	305	TIMESPEC_TO_TIMEVAL(&tv, &ts3);
	306	}
	307	}
	308
	309	static void nanosleep_done(void *arg);
	310	static void nanosleep_copyout(union sysunion *sysun);
	311
	312	/* ARGSUSED */
	313	int
	314	nanosleep(struct nanosleep_args *uap)
	315	{
	316	int error;
	317	struct sysmsg_sleep *smsleep = &uap->sysmsg.sm.sleep;
	318
	319	error = copyin(uap->rqtp, &smsleep->rqt, sizeof(smsleep->rqt));
	320	if (error)
	321	return (error);
	322	/*
	323	* YYY clean this up to always use the callout, note that an abort
	324	* implementation should record the residual in the async case.
	325	*/
	326	if (uap->sysmsg.lmsg.ms_flags & MSGF_ASYNC) {
	327	quad_t ticks;
	328
	329	ticks = (quad_t)smsleep->rqt.tv_nsec * hz / 1000000000LL;
	330	if (smsleep->rqt.tv_sec)
	331	ticks += (quad_t)smsleep->rqt.tv_sec * hz;
	332	if (ticks <= 0) {
	333	if (ticks == 0)
	334	error = 0;
	335	else
	336	error = EINVAL;
	337	} else {
	338	uap->sysmsg.copyout = nanosleep_copyout;
	339	uap->sysmsg.lmsg.ms_flags &= ~MSGF_DONE;
	340	callout_init(&smsleep->timer);
	341	callout_reset(&smsleep->timer, ticks, nanosleep_done, uap);
	342	error = EASYNC;
	343	}
	344	} else {
	345	/*
	346	* Old synchronous sleep code, copyout the residual if
	347	* nanosleep was interrupted.
	348	*/
	349	error = nanosleep1(&smsleep->rqt, &smsleep->rmt);
	350	if (error && uap->rmtp)
	351	error = copyout(&smsleep->rmt, uap->rmtp, sizeof(smsleep->rmt));
	352	}
	353	return (error);
	354	}
	355
	356	/*
	357	* Asynch completion for the nanosleep() syscall. This function may be
	358	* called from any context and cannot legally access the originating
	359	* thread, proc, or its user space.
	360	*
	361	* YYY change the callout interface API so we can simply assign the replymsg
	362	* function to it directly.
	363	*/
	364	static void
	365	nanosleep_done(void *arg)
	366	{
	367	struct nanosleep_args *uap = arg;
	368	lwkt_msg_t msg = &uap->sysmsg.lmsg;
	369
	370	lwkt_replymsg(msg, 0);
	371	}
	372
	373	/*
	374	* Asynch return for the nanosleep() syscall, called in the context of the
	375	* originating thread when it pulls the message off the reply port. This
	376	* function is responsible for any copyouts to userland. Kernel threads
	377	* which do their own internal system calls will not usually call the return
	378	* function.
	379	*/
	380	static void
	381	nanosleep_copyout(union sysunion *sysun)
	382	{
	383	struct nanosleep_args *uap = &sysun->nanosleep;
	384	struct sysmsg_sleep *smsleep = &uap->sysmsg.sm.sleep;
	385
	386	if (sysun->lmsg.ms_error && uap->rmtp) {
	387	sysun->lmsg.ms_error =
	388	copyout(&smsleep->rmt, uap->rmtp, sizeof(smsleep->rmt));
	389	}
	390	}
	391
	392	/* ARGSUSED */
	393	int
	394	gettimeofday(struct gettimeofday_args *uap)
	395	{
	396	struct timeval atv;
	397	int error = 0;
	398
	399	if (uap->tp) {
	400	microtime(&atv);
	401	if ((error = copyout((caddr_t)&atv, (caddr_t)uap->tp,
	402	sizeof (atv))))
	403	return (error);
	404	}
	405	if (uap->tzp)
	406	error = copyout((caddr_t)&tz, (caddr_t)uap->tzp,
	407	sizeof (tz));
	408	return (error);
	409	}
	410
	411	/* ARGSUSED */
	412	int
	413	settimeofday(struct settimeofday_args *uap)
	414	{
	415	struct thread *td = curthread;
	416	struct timeval atv;
	417	struct timezone atz;
	418	int error;
	419
	420	if ((error = suser(td)))
	421	return (error);
	422	/* Verify all parameters before changing time. */
	423	if (uap->tv) {
	424	if ((error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
	425	sizeof(atv))))
	426	return (error);
	427	if (atv.tv_usec < 0 \|\| atv.tv_usec >= 1000000)
	428	return (EINVAL);
	429	}
	430	if (uap->tzp &&
	431	(error = copyin((caddr_t)uap->tzp, (caddr_t)&atz, sizeof(atz))))
	432	return (error);
	433	if (uap->tv && (error = settime(&atv)))
	434	return (error);
	435	if (uap->tzp)
	436	tz = atz;
	437	return (0);
	438	}
	439
	440	static void
	441	kern_adjtime_common(void)
	442	{
	443	if ((ntp_delta >= 0 && ntp_delta < ntp_default_tick_delta) \|\|
	444	(ntp_delta < 0 && ntp_delta > ntp_default_tick_delta))
	445	ntp_tick_delta = ntp_delta;
	446	else if (ntp_delta > ntp_big_delta)
	447	ntp_tick_delta = 10 * ntp_default_tick_delta;
	448	else if (ntp_delta < -ntp_big_delta)
	449	ntp_tick_delta = -10 * ntp_default_tick_delta;
	450	else if (ntp_delta > 0)
	451	ntp_tick_delta = ntp_default_tick_delta;
	452	else
	453	ntp_tick_delta = -ntp_default_tick_delta;
	454	}
	455
	456	void
	457	kern_adjtime(int64_t delta, int64_t *odelta)
	458	{
	459	int origcpu;
	460
	461	if ((origcpu = mycpu->gd_cpuid) != 0)
	462	lwkt_setcpu_self(globaldata_find(0));
	463
	464	crit_enter();
	465	*odelta = ntp_delta;
	466	ntp_delta += delta;
	467	kern_adjtime_common();
	468	crit_exit();
	469
	470	if (origcpu != 0)
	471	lwkt_setcpu_self(globaldata_find(origcpu));
	472	}
	473
	474	static void
	475	kern_get_ntp_delta(int64_t *delta)
	476	{
	477	int origcpu;
	478
	479	if ((origcpu = mycpu->gd_cpuid) != 0)
	480	lwkt_setcpu_self(globaldata_find(0));
	481
	482	crit_enter();
	483	*delta = ntp_delta;
	484	crit_exit();
	485
	486	if (origcpu != 0)
	487	lwkt_setcpu_self(globaldata_find(origcpu));
	488	}
	489
	490	void
	491	kern_reladjtime(int64_t delta)
	492	{
	493	int origcpu;
	494
	495	if ((origcpu = mycpu->gd_cpuid) != 0)
	496	lwkt_setcpu_self(globaldata_find(0));
	497
	498	crit_enter();
	499	ntp_delta += delta;
	500	kern_adjtime_common();
	501	crit_exit();
	502
	503	if (origcpu != 0)
	504	lwkt_setcpu_self(globaldata_find(origcpu));
	505	}
	506
	507	static void
	508	kern_adjfreq(int64_t rate)
	509	{
	510	int origcpu;
	511
	512	if ((origcpu = mycpu->gd_cpuid) != 0)
	513	lwkt_setcpu_self(globaldata_find(0));
	514
	515	crit_enter();
	516	ntp_tick_permanent = rate;
	517	crit_exit();
	518
	519	if (origcpu != 0)
	520	lwkt_setcpu_self(globaldata_find(origcpu));
	521	}
	522
	523	/* ARGSUSED */
	524	int
	525	adjtime(struct adjtime_args *uap)
	526	{
	527	struct thread *td = curthread;
	528	struct timeval atv;
	529	int64_t ndelta, odelta;
	530	int error;
	531
	532	if ((error = suser(td)))
	533	return (error);
	534	if ((error =
	535	copyin((caddr_t)uap->delta, (caddr_t)&atv, sizeof(struct timeval))))
	536	return (error);
	537
	538	/*
	539	* Compute the total correction and the rate at which to apply it.
	540	* Round the adjustment down to a whole multiple of the per-tick
	541	* delta, so that after some number of incremental changes in
	542	* hardclock(), tickdelta will become zero, lest the correction
	543	* overshoot and start taking us away from the desired final time.
	544	*/
	545	ndelta = atv.tv_sec * 1000000000 + atv.tv_usec * 1000;
	546	kern_adjtime(ndelta, &odelta);
	547
	548	if (uap->olddelta) {
	549	atv.tv_sec = odelta / 1000000000;
	550	atv.tv_usec = odelta % 1000000 / 1000;
	551	(void) copyout((caddr_t)&atv, (caddr_t)uap->olddelta,
	552	sizeof(struct timeval));
	553	}
	554	return (0);
	555	}
	556
	557	static int
	558	sysctl_adjtime(SYSCTL_HANDLER_ARGS)
	559	{
	560	int64_t delta;
	561	int error;
	562
	563	if (req->oldptr != NULL) {
	564	delta = 0;
	565	error = SYSCTL_OUT(req, &delta, sizeof(delta));
	566	if (error)
	567	return (error);
	568	}
	569	if (req->newptr != NULL) {
	570	if (suser(curthread))
	571	return (EPERM);
	572	error = SYSCTL_IN(req, &delta, sizeof(delta));
	573	if (error)
	574	return (error);
	575	kern_reladjtime(delta);
	576	}
	577	return (0);
	578	}
	579
	580	static int
	581	sysctl_delta(SYSCTL_HANDLER_ARGS)
	582	{
	583	int64_t delta, old_delta;
	584	int error;
	585
	586	if (req->newptr != NULL) {
	587	if (suser(curthread))
	588	return (EPERM);
	589	error = SYSCTL_IN(req, &delta, sizeof(delta));
	590	if (error)
	591	return (error);
	592	kern_adjtime(delta, &old_delta);
	593	/* Fall through for writing old_delta */
	594	} else if (req->oldptr != NULL) {
	595	kern_get_ntp_delta(&old_delta);
	596	}
	597
	598	if (req->oldptr != NULL) {
	599	error = SYSCTL_OUT(req, &old_delta, sizeof(old_delta));
	600	if (error)
	601	return (error);
	602	}
	603
	604	return (0);
	605	}
	606
	607	static int
	608	sysctl_adjfreq(SYSCTL_HANDLER_ARGS)
	609	{
	610	int64_t freqdelta;
	611	int error;
	612
	613	if (req->oldptr != NULL) {
	614	freqdelta = ntp_tick_permanent * hz;
	615	error = SYSCTL_OUT(req, &freqdelta, sizeof(freqdelta));
	616	if (error)
	617	return (error);
	618	}
	619	if (req->newptr != NULL) {
	620	if (suser(curthread))
	621	return (EPERM);
	622	error = SYSCTL_IN(req, &freqdelta, sizeof(freqdelta));
	623	if (error)
	624	return (error);
	625
	626	freqdelta /= hz;
	627	kern_adjfreq(freqdelta);
	628	}
	629	return (0);
	630	}
	631
	632	SYSCTL_NODE(_kern, OID_AUTO, ntp, CTLFLAG_RW, 0, "NTP related controls");
	633	SYSCTL_PROC(_kern_ntp, OID_AUTO, permanent,
	634	CTLFLAG_RW, 0, 0,
	635	sysctl_adjfreq, "LU", "permanent correction per second");
	636	SYSCTL_PROC(_kern_ntp, OID_AUTO, delta,
	637	CTLFLAG_RW, 0, 0,
	638	sysctl_delta, "LU", "one-time delta");
	639	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, big_delta, CTLFLAG_RD,
	640	&ntp_big_delta, sizeof(ntp_big_delta), "LU",
	641	"threshold for fast adjustment");
	642	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, tick_delta, CTLFLAG_RD,
	643	&ntp_tick_delta, sizeof(ntp_tick_delta), "LU",
	644	"per-tick adjustment");
	645	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, default_tick_delta, CTLFLAG_RD,
	646	&ntp_default_tick_delta, sizeof(ntp_default_tick_delta), "LU",
	647	"default per-tick adjustment");
	648	SYSCTL_OPAQUE(_kern_ntp, OID_AUTO, next_leap_second, CTLFLAG_RW,
	649	&ntp_leap_second, sizeof(ntp_leap_second), "LU",
	650	"next leap second");
	651	SYSCTL_INT(_kern_ntp, OID_AUTO, insert_leap_second, CTLFLAG_RW,
	652	&ntp_leap_insert, 0, "insert or remove leap second");
	653	SYSCTL_PROC(_kern_ntp, OID_AUTO, adjust,
	654	CTLFLAG_RW, 0, 0,
	655	sysctl_adjtime, "", "relative adjust for delta");
	656
	657	/*
	658	* Get value of an interval timer. The process virtual and
	659	* profiling virtual time timers are kept in the p_stats area, since
	660	* they can be swapped out. These are kept internally in the
	661	* way they are specified externally: in time until they expire.
	662	*
	663	* The real time interval timer is kept in the process table slot
	664	* for the process, and its value (it_value) is kept as an
	665	* absolute time rather than as a delta, so that it is easy to keep
	666	* periodic real-time signals from drifting.
	667	*
	668	* Virtual time timers are processed in the hardclock() routine of
	669	* kern_clock.c. The real time timer is processed by a timeout
	670	* routine, called from the softclock() routine. Since a callout
	671	* may be delayed in real time due to interrupt processing in the system,
	672	* it is possible for the real time timeout routine (realitexpire, given below),
	673	* to be delayed in real time past when it is supposed to occur. It
	674	* does not suffice, therefore, to reload the real timer .it_value from the
	675	* real time timers .it_interval. Rather, we compute the next time in
	676	* absolute time the timer should go off.
	677	*/
	678	/* ARGSUSED */
	679	int
	680	getitimer(struct getitimer_args *uap)
	681	{
	682	struct proc *p = curproc;
	683	struct timeval ctv;
	684	struct itimerval aitv;
	685
	686	if (uap->which > ITIMER_PROF)
	687	return (EINVAL);
	688	crit_enter();
	689	if (uap->which == ITIMER_REAL) {
	690	/*
	691	* Convert from absolute to relative time in .it_value
	692	* part of real time timer. If time for real time timer
	693	* has passed return 0, else return difference between
	694	* current time and time for the timer to go off.
	695	*/
	696	aitv = p->p_realtimer;
	697	if (timevalisset(&aitv.it_value)) {
	698	getmicrouptime(&ctv);
	699	if (timevalcmp(&aitv.it_value, &ctv, <))
	700	timevalclear(&aitv.it_value);
	701	else
	702	timevalsub(&aitv.it_value, &ctv);
	703	}
	704	} else {
	705	aitv = p->p_stats->p_timer[uap->which];
	706	}
	707	crit_exit();
	708	return (copyout((caddr_t)&aitv, (caddr_t)uap->itv,
	709	sizeof (struct itimerval)));
	710	}
	711
	712	/* ARGSUSED */
	713	int
	714	setitimer(struct setitimer_args *uap)
	715	{
	716	struct itimerval aitv;
	717	struct timeval ctv;
	718	struct itimerval *itvp;
	719	struct proc *p = curproc;
	720	int error;
	721
	722	if (uap->which > ITIMER_PROF)
	723	return (EINVAL);
	724	itvp = uap->itv;
	725	if (itvp && (error = copyin((caddr_t)itvp, (caddr_t)&aitv,
	726	sizeof(struct itimerval))))
	727	return (error);
	728	if ((uap->itv = uap->oitv) &&
	729	(error = getitimer((struct getitimer_args *)uap)))
	730	return (error);
	731	if (itvp == 0)
	732	return (0);
	733	if (itimerfix(&aitv.it_value))
	734	return (EINVAL);
	735	if (!timevalisset(&aitv.it_value))
	736	timevalclear(&aitv.it_interval);
	737	else if (itimerfix(&aitv.it_interval))
	738	return (EINVAL);
	739	crit_enter();
	740	if (uap->which == ITIMER_REAL) {
	741	if (timevalisset(&p->p_realtimer.it_value))
	742	callout_stop(&p->p_ithandle);
	743	if (timevalisset(&aitv.it_value))
	744	callout_reset(&p->p_ithandle,
	745	tvtohz_high(&aitv.it_value), realitexpire, p);
	746	getmicrouptime(&ctv);
	747	timevaladd(&aitv.it_value, &ctv);
	748	p->p_realtimer = aitv;
	749	} else {
	750	p->p_stats->p_timer[uap->which] = aitv;
	751	}
	752	crit_exit();
	753	return (0);
	754	}
	755
	756	/*
	757	* Real interval timer expired:
	758	* send process whose timer expired an alarm signal.
	759	* If time is not set up to reload, then just return.
	760	* Else compute next time timer should go off which is > current time.
	761	* This is where delay in processing this timeout causes multiple
	762	* SIGALRM calls to be compressed into one.
	763	* tvtohz_high() always adds 1 to allow for the time until the next clock
	764	* interrupt being strictly less than 1 clock tick, but we don't want
	765	* that here since we want to appear to be in sync with the clock
	766	* interrupt even when we're delayed.
	767	*/
	768	void
	769	realitexpire(arg)
	770	void *arg;
	771	{
	772	struct proc *p;
	773	struct timeval ctv, ntv;
	774
	775	p = (struct proc *)arg;
	776	psignal(p, SIGALRM);
	777	if (!timevalisset(&p->p_realtimer.it_interval)) {
	778	timevalclear(&p->p_realtimer.it_value);
	779	return;
	780	}
	781	for (;;) {
	782	crit_enter();
	783	timevaladd(&p->p_realtimer.it_value,
	784	&p->p_realtimer.it_interval);
	785	getmicrouptime(&ctv);
	786	if (timevalcmp(&p->p_realtimer.it_value, &ctv, >)) {
	787	ntv = p->p_realtimer.it_value;
	788	timevalsub(&ntv, &ctv);
	789	callout_reset(&p->p_ithandle, tvtohz_low(&ntv),
	790	realitexpire, p);
	791	crit_exit();
	792	return;
	793	}
	794	crit_exit();
	795	}
	796	}
	797
	798	/*
	799	* Check that a proposed value to load into the .it_value or
	800	* .it_interval part of an interval timer is acceptable, and
	801	* fix it to have at least minimal value (i.e. if it is less
	802	* than the resolution of the clock, round it up.)
	803	*/
	804	int
	805	itimerfix(tv)
	806	struct timeval *tv;
	807	{
	808
	809	if (tv->tv_sec < 0 \|\| tv->tv_sec > 100000000 \|\|
	810	tv->tv_usec < 0 \|\| tv->tv_usec >= 1000000)
	811	return (EINVAL);
	812	if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < tick)
	813	tv->tv_usec = tick;
	814	return (0);
	815	}
	816
	817	/*
	818	* Decrement an interval timer by a specified number
	819	* of microseconds, which must be less than a second,
	820	* i.e. < 1000000. If the timer expires, then reload
	821	* it. In this case, carry over (usec - old value) to
	822	* reduce the value reloaded into the timer so that
	823	* the timer does not drift. This routine assumes
	824	* that it is called in a context where the timers
	825	* on which it is operating cannot change in value.
	826	*/
	827	int
	828	itimerdecr(itp, usec)
	829	struct itimerval *itp;
	830	int usec;
	831	{
	832
	833	if (itp->it_value.tv_usec < usec) {
	834	if (itp->it_value.tv_sec == 0) {
	835	/* expired, and already in next interval */
	836	usec -= itp->it_value.tv_usec;
	837	goto expire;
	838	}
	839	itp->it_value.tv_usec += 1000000;
	840	itp->it_value.tv_sec--;
	841	}
	842	itp->it_value.tv_usec -= usec;
	843	usec = 0;
	844	if (timevalisset(&itp->it_value))
	845	return (1);
	846	/* expired, exactly at end of interval */
	847	expire:
	848	if (timevalisset(&itp->it_interval)) {
	849	itp->it_value = itp->it_interval;
	850	itp->it_value.tv_usec -= usec;
	851	if (itp->it_value.tv_usec < 0) {
	852	itp->it_value.tv_usec += 1000000;
	853	itp->it_value.tv_sec--;
	854	}
	855	} else
	856	itp->it_value.tv_usec = 0; /* sec is already 0 */
	857	return (0);
	858	}
	859
	860	/*
	861	* Add and subtract routines for timevals.
	862	* N.B.: subtract routine doesn't deal with
	863	* results which are before the beginning,
	864	* it just gets very confused in this case.
	865	* Caveat emptor.
	866	*/
	867	void
	868	timevaladd(t1, t2)
	869	struct timeval t1, t2;
	870	{
	871
	872	t1->tv_sec += t2->tv_sec;
	873	t1->tv_usec += t2->tv_usec;
	874	timevalfix(t1);
	875	}
	876
	877	void
	878	timevalsub(t1, t2)
	879	struct timeval t1, t2;
	880	{
	881
	882	t1->tv_sec -= t2->tv_sec;
	883	t1->tv_usec -= t2->tv_usec;
	884	timevalfix(t1);
	885	}
	886
	887	static void
	888	timevalfix(t1)
	889	struct timeval *t1;
	890	{
	891
	892	if (t1->tv_usec < 0) {
	893	t1->tv_sec--;
	894	t1->tv_usec += 1000000;
	895	}
	896	if (t1->tv_usec >= 1000000) {
	897	t1->tv_sec++;
	898	t1->tv_usec -= 1000000;
	899	}
	900	}
	901
	902	/*
	903	* ratecheck(): simple time-based rate-limit checking.
	904	*/
	905	int
	906	ratecheck(struct timeval lasttime, const struct timeval mininterval)
	907	{
	908	struct timeval tv, delta;
	909	int rv = 0;
	910
	911	getmicrouptime(&tv); /* NB: 10ms precision */
	912	delta = tv;
	913	timevalsub(&delta, lasttime);
	914
	915	/*
	916	* check for 0,0 is so that the message will be seen at least once,
	917	* even if interval is huge.
	918	*/
	919	if (timevalcmp(&delta, mininterval, >=) \|\|
	920	(lasttime->tv_sec == 0 && lasttime->tv_usec == 0)) {
	921	*lasttime = tv;
	922	rv = 1;
	923	}
	924
	925	return (rv);
	926	}
	927
	928	/*
	929	* ppsratecheck(): packets (or events) per second limitation.
	930	*
	931	* Return 0 if the limit is to be enforced (e.g. the caller
	932	* should drop a packet because of the rate limitation).
	933	*
	934	* maxpps of 0 always causes zero to be returned. maxpps of -1
	935	* always causes 1 to be returned; this effectively defeats rate
	936	* limiting.
	937	*
	938	* Note that we maintain the struct timeval for compatibility
	939	* with other bsd systems. We reuse the storage and just monitor
	940	* clock ticks for minimal overhead.
	941	*/
	942	int
	943	ppsratecheck(struct timeval lasttime, int curpps, int maxpps)
	944	{
	945	int now;
	946
	947	/*
	948	* Reset the last time and counter if this is the first call
	949	* or more than a second has passed since the last update of
	950	* lasttime.
	951	*/
	952	now = ticks;
	953	if (lasttime->tv_sec == 0 \|\| (u_int)(now - lasttime->tv_sec) >= hz) {
	954	lasttime->tv_sec = now;
	955	*curpps = 1;
	956	return (maxpps != 0);
	957	} else {
	958	(curpps)++; / NB: ignore potential overflow */
	959	return (maxpps < 0 \|\| *curpps < maxpps);
	960	}
	961	}
	962