gitweb.dragonflybsd.org Git - dragonfly.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 1982, 1986, 1989, 1991, 1993
	3	* The Regents of the University of California. All rights reserved.
	4	* (c) UNIX System Laboratories, Inc.
	5	* All or some portions of this file are derived from material licensed
	6	* to the University of California by American Telephone and Telegraph
	7	* Co. or Unix System Laboratories, Inc. and are reproduced herein with
	8	* the permission of UNIX System Laboratories, Inc.
	9	*
	10	* Redistribution and use in source and binary forms, with or without
	11	* modification, are permitted provided that the following conditions
	12	* are met:
	13	* 1. Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* 2. Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* 3. All advertising materials mentioning features or use of this software
	19	* must display the following acknowledgement:
	20	* This product includes software developed by the University of
	21	* California, Berkeley and its contributors.
	22	* 4. Neither the name of the University nor the names of its contributors
	23	* may be used to endorse or promote products derived from this software
	24	* without specific prior written permission.
	25	*
	26	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	27	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	28	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	29	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	30	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	31	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	32	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	33	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	34	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	35	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	36	* SUCH DAMAGE.
	37	*
	38	* @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
	39	* $FreeBSD: src/sys/kern/kern_fork.c,v 1.72.2.14 2003/06/26 04:15:10 silby Exp $
	40	* $DragonFly: src/sys/kern/kern_fork.c,v 1.77 2008/05/18 20:02:02 nth Exp $
	41	*/
	42
	43	#include "opt_ktrace.h"
	44
	45	#include <sys/param.h>
	46	#include <sys/systm.h>
	47	#include <sys/sysproto.h>
	48	#include <sys/filedesc.h>
	49	#include <sys/kernel.h>
	50	#include <sys/sysctl.h>
	51	#include <sys/malloc.h>
	52	#include <sys/proc.h>
	53	#include <sys/resourcevar.h>
	54	#include <sys/vnode.h>
	55	#include <sys/acct.h>
	56	#include <sys/ktrace.h>
	57	#include <sys/unistd.h>
	58	#include <sys/jail.h>
	59	#include <sys/caps.h>
	60
	61	#include <vm/vm.h>
	62	#include <sys/lock.h>
	63	#include <vm/pmap.h>
	64	#include <vm/vm_map.h>
	65	#include <vm/vm_extern.h>
	66
	67	#include <sys/vmmeter.h>
	68	#include <sys/refcount.h>
	69	#include <sys/thread2.h>
	70	#include <sys/signal2.h>
	71	#include <sys/spinlock2.h>
	72
	73	#include <sys/dsched.h>
	74
	75	static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback");
	76
	77	/*
	78	* These are the stuctures used to create a callout list for things to do
	79	* when forking a process
	80	*/
	81	struct forklist {
	82	forklist_fn function;
	83	TAILQ_ENTRY(forklist) next;
	84	};
	85
	86	TAILQ_HEAD(forklist_head, forklist);
	87	static struct forklist_head fork_list = TAILQ_HEAD_INITIALIZER(fork_list);
	88
	89	static struct lwp lwp_fork(struct lwp , struct proc *, int flags);
	90
	91	int forksleep; /* Place for fork1() to sleep on. */
	92
	93	/*
	94	* Red-Black tree support for LWPs
	95	*/
	96
	97	static int
	98	rb_lwp_compare(struct lwp lp1, struct lwp lp2)
	99	{
	100	if (lp1->lwp_tid < lp2->lwp_tid)
	101	return(-1);
	102	if (lp1->lwp_tid > lp2->lwp_tid)
	103	return(1);
	104	return(0);
	105	}
	106
	107	RB_GENERATE2(lwp_rb_tree, lwp, u.lwp_rbnode, rb_lwp_compare, lwpid_t, lwp_tid);
	108
	109	/*
	110	* Fork system call
	111	*
	112	* MPALMOSTSAFE
	113	*/
	114	int
	115	sys_fork(struct fork_args *uap)
	116	{
	117	struct lwp *lp = curthread->td_lwp;
	118	struct proc *p2;
	119	int error;
	120
	121	error = fork1(lp, RFFDG \| RFPROC \| RFPGLOCK, &p2);
	122	if (error == 0) {
	123	start_forked_proc(lp, p2);
	124	uap->sysmsg_fds[0] = p2->p_pid;
	125	uap->sysmsg_fds[1] = 0;
	126	}
	127	return error;
	128	}
	129
	130	/*
	131	* MPALMOSTSAFE
	132	*/
	133	int
	134	sys_vfork(struct vfork_args *uap)
	135	{
	136	struct lwp *lp = curthread->td_lwp;
	137	struct proc *p2;
	138	int error;
	139
	140	error = fork1(lp, RFFDG \| RFPROC \| RFPPWAIT \| RFMEM \| RFPGLOCK, &p2);
	141	if (error == 0) {
	142	start_forked_proc(lp, p2);
	143	uap->sysmsg_fds[0] = p2->p_pid;
	144	uap->sysmsg_fds[1] = 0;
	145	}
	146	return error;
	147	}
	148
	149	/*
	150	* Handle rforks. An rfork may (1) operate on the current process without
	151	* creating a new, (2) create a new process that shared the current process's
	152	* vmspace, signals, and/or descriptors, or (3) create a new process that does
	153	* not share these things (normal fork).
	154	*
	155	* Note that we only call start_forked_proc() if a new process is actually
	156	* created.
	157	*
	158	* rfork { int flags }
	159	*
	160	* MPALMOSTSAFE
	161	*/
	162	int
	163	sys_rfork(struct rfork_args *uap)
	164	{
	165	struct lwp *lp = curthread->td_lwp;
	166	struct proc *p2;
	167	int error;
	168
	169	if ((uap->flags & RFKERNELONLY) != 0)
	170	return (EINVAL);
	171
	172	error = fork1(lp, uap->flags \| RFPGLOCK, &p2);
	173	if (error == 0) {
	174	if (p2)
	175	start_forked_proc(lp, p2);
	176	uap->sysmsg_fds[0] = p2 ? p2->p_pid : 0;
	177	uap->sysmsg_fds[1] = 0;
	178	}
	179	return error;
	180	}
	181
	182	/*
	183	* MPALMOSTSAFE
	184	*/
	185	int
	186	sys_lwp_create(struct lwp_create_args *uap)
	187	{
	188	struct proc *p = curproc;
	189	struct lwp *lp;
	190	struct lwp_params params;
	191	int error;
	192
	193	error = copyin(uap->params, &params, sizeof(params));
	194	if (error)
	195	goto fail2;
	196
	197	lwkt_gettoken(&p->p_token);
	198	plimit_lwp_fork(p); /* force exclusive access */
	199	lp = lwp_fork(curthread->td_lwp, p, RFPROC);
	200	error = cpu_prepare_lwp(lp, &params);
	201	if (params.tid1 != NULL &&
	202	(error = copyout(&lp->lwp_tid, params.tid1, sizeof(lp->lwp_tid))))
	203	goto fail;
	204	if (params.tid2 != NULL &&
	205	(error = copyout(&lp->lwp_tid, params.tid2, sizeof(lp->lwp_tid))))
	206	goto fail;
	207
	208	/*
	209	* Now schedule the new lwp.
	210	*/
	211	p->p_usched->resetpriority(lp);
	212	crit_enter();
	213	lp->lwp_stat = LSRUN;
	214	p->p_usched->setrunqueue(lp);
	215	crit_exit();
	216	lwkt_reltoken(&p->p_token);
	217
	218	return (0);
	219
	220	fail:
	221	lwp_rb_tree_RB_REMOVE(&p->p_lwp_tree, lp);
	222	--p->p_nthreads;
	223	/* lwp_dispose expects an exited lwp, and a held proc */
	224	lp->lwp_flag \|= LWP_WEXIT;
	225	lp->lwp_thread->td_flags \|= TDF_EXITING;
	226	PHOLD(p);
	227	lwp_dispose(lp);
	228	lwkt_reltoken(&p->p_token);
	229	fail2:
	230	return (error);
	231	}
	232
	233	int nprocs = 1; /* process 0 */
	234
	235	int
	236	fork1(struct lwp lp1, int flags, struct proc *procp)
	237	{
	238	struct proc *p1 = lp1->lwp_proc;
	239	struct proc p2, pptr;
	240	struct pgrp *p1grp;
	241	struct pgrp *plkgrp;
	242	uid_t uid;
	243	int ok, error;
	244	static int curfail = 0;
	245	static struct timeval lastfail;
	246	struct forklist *ep;
	247	struct filedesc_to_leader *fdtol;
	248
	249	if ((flags & (RFFDG\|RFCFDG)) == (RFFDG\|RFCFDG))
	250	return (EINVAL);
	251
	252	lwkt_gettoken(&p1->p_token);
	253	plkgrp = NULL;
	254
	255	/*
	256	* Here we don't create a new process, but we divorce
	257	* certain parts of a process from itself.
	258	*/
	259	if ((flags & RFPROC) == 0) {
	260	/*
	261	* This kind of stunt does not work anymore if
	262	* there are native threads (lwps) running
	263	*/
	264	if (p1->p_nthreads != 1) {
	265	error = EINVAL;
	266	goto done;
	267	}
	268
	269	vm_fork(p1, 0, flags);
	270
	271	/*
	272	* Close all file descriptors.
	273	*/
	274	if (flags & RFCFDG) {
	275	struct filedesc *fdtmp;
	276	fdtmp = fdinit(p1);
	277	fdfree(p1, fdtmp);
	278	}
	279
	280	/*
	281	* Unshare file descriptors (from parent.)
	282	*/
	283	if (flags & RFFDG) {
	284	if (p1->p_fd->fd_refcnt > 1) {
	285	struct filedesc *newfd;
	286	error = fdcopy(p1, &newfd);
	287	if (error != 0) {
	288	error = ENOMEM;
	289	goto done;
	290	}
	291	fdfree(p1, newfd);
	292	}
	293	}
	294	*procp = NULL;
	295	error = 0;
	296	goto done;
	297	}
	298
	299	/*
	300	* Interlock against process group signal delivery. If signals
	301	* are pending after the interlock is obtained we have to restart
	302	* the system call to process the signals. If we don't the child
	303	* can miss a pgsignal (such as ^C) sent during the fork.
	304	*
	305	* We can't use CURSIG() here because it will process any STOPs
	306	* and cause the process group lock to be held indefinitely. If
	307	* a STOP occurs, the fork will be restarted after the CONT.
	308	*/
	309	p1grp = p1->p_pgrp;
	310	if ((flags & RFPGLOCK) && (plkgrp = p1->p_pgrp) != NULL) {
	311	pgref(plkgrp);
	312	lockmgr(&plkgrp->pg_lock, LK_SHARED);
	313	if (CURSIG_NOBLOCK(lp1)) {
	314	error = ERESTART;
	315	goto done;
	316	}
	317	}
	318
	319	/*
	320	* Although process entries are dynamically created, we still keep
	321	* a global limit on the maximum number we will create. Don't allow
	322	* a nonprivileged user to use the last ten processes; don't let root
	323	* exceed the limit. The variable nprocs is the current number of
	324	* processes, maxproc is the limit.
	325	*/
	326	uid = lp1->lwp_thread->td_ucred->cr_ruid;
	327	if ((nprocs >= maxproc - 10 && uid != 0) \|\| nprocs >= maxproc) {
	328	if (ppsratecheck(&lastfail, &curfail, 1))
	329	kprintf("maxproc limit exceeded by uid %d, please "
	330	"see tuning(7) and login.conf(5).\n", uid);
	331	tsleep(&forksleep, 0, "fork", hz / 2);
	332	error = EAGAIN;
	333	goto done;
	334	}
	335
	336	/*
	337	* Increment the nprocs resource before blocking can occur. There
	338	* are hard-limits as to the number of processes that can run.
	339	*/
	340	atomic_add_int(&nprocs, 1);
	341
	342	/*
	343	* Increment the count of procs running with this uid. Don't allow
	344	* a nonprivileged user to exceed their current limit.
	345	*/
	346	ok = chgproccnt(lp1->lwp_thread->td_ucred->cr_ruidinfo, 1,
	347	(uid != 0) ? p1->p_rlimit[RLIMIT_NPROC].rlim_cur : 0);
	348	if (!ok) {
	349	/*
	350	* Back out the process count
	351	*/
	352	atomic_add_int(&nprocs, -1);
	353	if (ppsratecheck(&lastfail, &curfail, 1))
	354	kprintf("maxproc limit exceeded by uid %d, please "
	355	"see tuning(7) and login.conf(5).\n", uid);
	356	tsleep(&forksleep, 0, "fork", hz / 2);
	357	error = EAGAIN;
	358	goto done;
	359	}
	360
	361	/* Allocate new proc. */
	362	p2 = kmalloc(sizeof(struct proc), M_PROC, M_WAITOK\|M_ZERO);
	363
	364	/*
	365	* Setup linkage for kernel based threading XXX lwp
	366	*/
	367	if (flags & RFTHREAD) {
	368	p2->p_peers = p1->p_peers;
	369	p1->p_peers = p2;
	370	p2->p_leader = p1->p_leader;
	371	} else {
	372	p2->p_leader = p2;
	373	}
	374
	375	RB_INIT(&p2->p_lwp_tree);
	376	spin_init(&p2->p_spin);
	377	lwkt_token_init(&p2->p_token, "proc");
	378	p2->p_lasttid = -1; /* first tid will be 0 */
	379
	380	/*
	381	* Setting the state to SIDL protects the partially initialized
	382	* process once it starts getting hooked into the rest of the system.
	383	*/
	384	p2->p_stat = SIDL;
	385	proc_add_allproc(p2);
	386
	387	/*
	388	* Make a proc table entry for the new process.
	389	* The whole structure was zeroed above, so copy the section that is
	390	* copied directly from the parent.
	391	*/
	392	bcopy(&p1->p_startcopy, &p2->p_startcopy,
	393	(unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
	394
	395	/*
	396	* Duplicate sub-structures as needed. Increase reference counts
	397	* on shared objects.
	398	*
	399	* NOTE: because we are now on the allproc list it is possible for
	400	* other consumers to gain temporary references to p2
	401	* (p2->p_lock can change).
	402	*/
	403	if (p1->p_flag & P_PROFIL)
	404	startprofclock(p2);
	405	p2->p_ucred = crhold(lp1->lwp_thread->td_ucred);
	406
	407	if (jailed(p2->p_ucred))
	408	p2->p_flag \|= P_JAILED;
	409
	410	if (p2->p_args)
	411	refcount_acquire(&p2->p_args->ar_ref);
	412
	413	p2->p_usched = p1->p_usched;
	414	/* XXX: verify copy of the secondary iosched stuff */
	415	dsched_new_proc(p2);
	416
	417	if (flags & RFSIGSHARE) {
	418	p2->p_sigacts = p1->p_sigacts;
	419	refcount_acquire(&p2->p_sigacts->ps_refcnt);
	420	} else {
	421	p2->p_sigacts = kmalloc(sizeof(*p2->p_sigacts),
	422	M_SUBPROC, M_WAITOK);
	423	bcopy(p1->p_sigacts, p2->p_sigacts, sizeof(*p2->p_sigacts));
	424	refcount_init(&p2->p_sigacts->ps_refcnt, 1);
	425	}
	426	if (flags & RFLINUXTHPN)
	427	p2->p_sigparent = SIGUSR1;
	428	else
	429	p2->p_sigparent = SIGCHLD;
	430
	431	/* bump references to the text vnode (for procfs) */
	432	p2->p_textvp = p1->p_textvp;
	433	if (p2->p_textvp)
	434	vref(p2->p_textvp);
	435
	436	/* copy namecache handle to the text file */
	437	if (p1->p_textnch.mount)
	438	cache_copy(&p1->p_textnch, &p2->p_textnch);
	439
	440	/*
	441	* Handle file descriptors
	442	*/
	443	if (flags & RFCFDG) {
	444	p2->p_fd = fdinit(p1);
	445	fdtol = NULL;
	446	} else if (flags & RFFDG) {
	447	error = fdcopy(p1, &p2->p_fd);
	448	if (error != 0) {
	449	error = ENOMEM;
	450	goto done;
	451	}
	452	fdtol = NULL;
	453	} else {
	454	p2->p_fd = fdshare(p1);
	455	if (p1->p_fdtol == NULL) {
	456	lwkt_gettoken(&p1->p_token);
	457	p1->p_fdtol =
	458	filedesc_to_leader_alloc(NULL,
	459	p1->p_leader);
	460	lwkt_reltoken(&p1->p_token);
	461	}
	462	if ((flags & RFTHREAD) != 0) {
	463	/*
	464	* Shared file descriptor table and
	465	* shared process leaders.
	466	*/
	467	fdtol = p1->p_fdtol;
	468	fdtol->fdl_refcount++;
	469	} else {
	470	/*
	471	* Shared file descriptor table, and
	472	* different process leaders
	473	*/
	474	fdtol = filedesc_to_leader_alloc(p1->p_fdtol, p2);
	475	}
	476	}
	477	p2->p_fdtol = fdtol;
	478	p2->p_limit = plimit_fork(p1);
	479
	480	/*
	481	* Preserve some more flags in subprocess. P_PROFIL has already
	482	* been preserved.
	483	*/
	484	p2->p_flag \|= p1->p_flag & P_SUGID;
	485	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
	486	p2->p_flag \|= P_CONTROLT;
	487	if (flags & RFPPWAIT)
	488	p2->p_flag \|= P_PPWAIT;
	489
	490	/*
	491	* Inherit the virtual kernel structure (allows a virtual kernel
	492	* to fork to simulate multiple cpus).
	493	*/
	494	if (p1->p_vkernel)
	495	vkernel_inherit(p1, p2);
	496
	497	/*
	498	* Once we are on a pglist we may receive signals. XXX we might
	499	* race a ^C being sent to the process group by not receiving it
	500	* at all prior to this line.
	501	*/
	502	pgref(p1grp);
	503	lwkt_gettoken(&p1grp->pg_token);
	504	LIST_INSERT_AFTER(p1, p2, p_pglist);
	505	lwkt_reltoken(&p1grp->pg_token);
	506
	507	/*
	508	* Attach the new process to its parent.
	509	*
	510	* If RFNOWAIT is set, the newly created process becomes a child
	511	* of init. This effectively disassociates the child from the
	512	* parent.
	513	*/
	514	if (flags & RFNOWAIT)
	515	pptr = initproc;
	516	else
	517	pptr = p1;
	518	p2->p_pptr = pptr;
	519	LIST_INIT(&p2->p_children);
	520
	521	lwkt_gettoken(&pptr->p_token);
	522	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
	523	lwkt_reltoken(&pptr->p_token);
	524
	525	varsymset_init(&p2->p_varsymset, &p1->p_varsymset);
	526	callout_init_mp(&p2->p_ithandle);
	527
	528	#ifdef KTRACE
	529	/*
	530	* Copy traceflag and tracefile if enabled. If not inherited,
	531	* these were zeroed above but we still could have a trace race
	532	* so make sure p2's p_tracenode is NULL.
	533	*/
	534	if ((p1->p_traceflag & KTRFAC_INHERIT) && p2->p_tracenode == NULL) {
	535	p2->p_traceflag = p1->p_traceflag;
	536	p2->p_tracenode = ktrinherit(p1->p_tracenode);
	537	}
	538	#endif
	539
	540	/*
	541	* This begins the section where we must prevent the parent
	542	* from being swapped.
	543	*
	544	* Gets PRELE'd in the caller in start_forked_proc().
	545	*/
	546	PHOLD(p1);
	547
	548	vm_fork(p1, p2, flags);
	549
	550	/*
	551	* Create the first lwp associated with the new proc.
	552	* It will return via a different execution path later, directly
	553	* into userland, after it was put on the runq by
	554	* start_forked_proc().
	555	*/
	556	lwp_fork(lp1, p2, flags);
	557
	558	if (flags == (RFFDG \| RFPROC \| RFPGLOCK)) {
	559	mycpu->gd_cnt.v_forks++;
	560	mycpu->gd_cnt.v_forkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize;
	561	} else if (flags == (RFFDG \| RFPROC \| RFPPWAIT \| RFMEM \| RFPGLOCK)) {
	562	mycpu->gd_cnt.v_vforks++;
	563	mycpu->gd_cnt.v_vforkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize;
	564	} else if (p1 == &proc0) {
	565	mycpu->gd_cnt.v_kthreads++;
	566	mycpu->gd_cnt.v_kthreadpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize;
	567	} else {
	568	mycpu->gd_cnt.v_rforks++;
	569	mycpu->gd_cnt.v_rforkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize;
	570	}
	571
	572	/*
	573	* Both processes are set up, now check if any loadable modules want
	574	* to adjust anything.
	575	* What if they have an error? XXX
	576	*/
	577	TAILQ_FOREACH(ep, &fork_list, next) {
	578	(*ep->function)(p1, p2, flags);
	579	}
	580
	581	/*
	582	* Set the start time. Note that the process is not runnable. The
	583	* caller is responsible for making it runnable.
	584	*/
	585	microtime(&p2->p_start);
	586	p2->p_acflag = AFORK;
	587
	588	/*
	589	* tell any interested parties about the new process
	590	*/
	591	KNOTE(&p1->p_klist, NOTE_FORK \| p2->p_pid);
	592
	593	/*
	594	* Return child proc pointer to parent.
	595	*/
	596	*procp = p2;
	597	error = 0;
	598	done:
	599	lwkt_reltoken(&p1->p_token);
	600	if (plkgrp) {
	601	lockmgr(&plkgrp->pg_lock, LK_RELEASE);
	602	pgrel(plkgrp);
	603	}
	604	return (error);
	605	}
	606
	607	static struct lwp *
	608	lwp_fork(struct lwp origlp, struct proc destproc, int flags)
	609	{
	610	struct lwp *lp;
	611	struct thread *td;
	612
	613	lp = kmalloc(sizeof(struct lwp), M_LWP, M_WAITOK\|M_ZERO);
	614
	615	lp->lwp_proc = destproc;
	616	lp->lwp_vmspace = destproc->p_vmspace;
	617	lp->lwp_stat = LSRUN;
	618	bcopy(&origlp->lwp_startcopy, &lp->lwp_startcopy,
	619	(unsigned) ((caddr_t)&lp->lwp_endcopy -
	620	(caddr_t)&lp->lwp_startcopy));
	621	lp->lwp_flag \|= origlp->lwp_flag & LWP_ALTSTACK;
	622	/*
	623	* Set cpbase to the last timeout that occured (not the upcoming
	624	* timeout).
	625	*
	626	* A critical section is required since a timer IPI can update
	627	* scheduler specific data.
	628	*/
	629	crit_enter();
	630	lp->lwp_cpbase = mycpu->gd_schedclock.time -
	631	mycpu->gd_schedclock.periodic;
	632	destproc->p_usched->heuristic_forking(origlp, lp);
	633	crit_exit();
	634	lp->lwp_cpumask &= usched_mastermask;
	635
	636	td = lwkt_alloc_thread(NULL, LWKT_THREAD_STACK, -1, 0);
	637	lp->lwp_thread = td;
	638	td->td_proc = destproc;
	639	td->td_lwp = lp;
	640	td->td_switch = cpu_heavy_switch;
	641	lwkt_setpri(td, TDPRI_KERN_USER);
	642	lwkt_set_comm(td, "%s", destproc->p_comm);
	643
	644	/*
	645	* cpu_fork will copy and update the pcb, set up the kernel stack,
	646	* and make the child ready to run.
	647	*/
	648	cpu_fork(origlp, lp, flags);
	649	caps_fork(origlp->lwp_thread, lp->lwp_thread);
	650	kqueue_init(&lp->lwp_kqueue, destproc->p_fd);
	651
	652	/*
	653	* Assign a TID to the lp. Loop until the insert succeeds (returns
	654	* NULL).
	655	*/
	656	lp->lwp_tid = destproc->p_lasttid;
	657	do {
	658	if (++lp->lwp_tid < 0)
	659	lp->lwp_tid = 1;
	660	} while (lwp_rb_tree_RB_INSERT(&destproc->p_lwp_tree, lp) != NULL);
	661	destproc->p_lasttid = lp->lwp_tid;
	662	destproc->p_nthreads++;
	663
	664
	665	return (lp);
	666	}
	667
	668	/*
	669	* The next two functionms are general routines to handle adding/deleting
	670	* items on the fork callout list.
	671	*
	672	* at_fork():
	673	* Take the arguments given and put them onto the fork callout list,
	674	* However first make sure that it's not already there.
	675	* Returns 0 on success or a standard error number.
	676	*/
	677	int
	678	at_fork(forklist_fn function)
	679	{
	680	struct forklist *ep;
	681
	682	#ifdef INVARIANTS
	683	/* let the programmer know if he's been stupid */
	684	if (rm_at_fork(function)) {
	685	kprintf("WARNING: fork callout entry (%p) already present\n",
	686	function);
	687	}
	688	#endif
	689	ep = kmalloc(sizeof(*ep), M_ATFORK, M_WAITOK\|M_ZERO);
	690	ep->function = function;
	691	TAILQ_INSERT_TAIL(&fork_list, ep, next);
	692	return (0);
	693	}
	694
	695	/*
	696	* Scan the exit callout list for the given item and remove it..
	697	* Returns the number of items removed (0 or 1)
	698	*/
	699	int
	700	rm_at_fork(forklist_fn function)
	701	{
	702	struct forklist *ep;
	703
	704	TAILQ_FOREACH(ep, &fork_list, next) {
	705	if (ep->function == function) {
	706	TAILQ_REMOVE(&fork_list, ep, next);
	707	kfree(ep, M_ATFORK);
	708	return(1);
	709	}
	710	}
	711	return (0);
	712	}
	713
	714	/*
	715	* Add a forked process to the run queue after any remaining setup, such
	716	* as setting the fork handler, has been completed.
	717	*/
	718	void
	719	start_forked_proc(struct lwp lp1, struct proc p2)
	720	{
	721	struct lwp *lp2 = ONLY_LWP_IN_PROC(p2);
	722
	723	/*
	724	* Move from SIDL to RUN queue, and activate the process's thread.
	725	* Activation of the thread effectively makes the process "a"
	726	* current process, so we do not setrunqueue().
	727	*
	728	* YYY setrunqueue works here but we should clean up the trampoline
	729	* code so we just schedule the LWKT thread and let the trampoline
	730	* deal with the userland scheduler on return to userland.
	731	*/
	732	KASSERT(p2->p_stat == SIDL,
	733	("cannot start forked process, bad status: %p", p2));
	734	p2->p_usched->resetpriority(lp2);
	735	crit_enter();
	736	p2->p_stat = SACTIVE;
	737	lp2->lwp_stat = LSRUN;
	738	p2->p_usched->setrunqueue(lp2);
	739	crit_exit();
	740
	741	/*
	742	* Now can be swapped.
	743	*/
	744	PRELE(lp1->lwp_proc);
	745
	746	/*
	747	* Preserve synchronization semantics of vfork. If waiting for
	748	* child to exec or exit, set P_PPWAIT on child, and sleep on our
	749	* proc (in case of exec or exit).
	750	*
	751	* We must hold our p_token to interlock the flag/tsleep
	752	*/
	753	lwkt_gettoken(&p2->p_token);
	754	while (p2->p_flag & P_PPWAIT)
	755	tsleep(lp1->lwp_proc, 0, "ppwait", 0);
	756	lwkt_reltoken(&p2->p_token);
	757	}