From 8ba5f7efb82fab63aca8d82c49af39cbc707d33b Mon Sep 17 00:00:00 2001 From: Alex Hornung Date: Fri, 12 Mar 2010 22:59:30 +0000 Subject: [PATCH] linux emu - peripheral changes * These are peripheral changes to prepare for the import of the updated linuxulator: * hooks (eventhandler) for process creation and process exit. * splitting up of sys_kevent into sys_kevent + kern_kevent, including generic copyiin and copyout functions. Obtained-from: FreeBSD * adding a p_textnch to store the namecache handle for the .text file. * minor changes to sysv_sem and sysv_shm to support linux-specific magic. * bring in casuword (compare and set userland word) Obtained-from: FreeBSD * add a hook for userret from syscall2 Inspired-by: NetBSD --- sys/kern/imgact_elf.c | 3 + sys/kern/kern_event.c | 97 ++++++++++++++++++++++++-------- sys/kern/kern_exec.c | 7 +++ sys/kern/kern_exit.c | 15 ++++- sys/kern/kern_fork.c | 4 ++ sys/kern/kern_time.c | 5 +- sys/kern/sysv_sem.c | 30 +++++++++- sys/kern/sysv_shm.c | 3 +- sys/platform/pc32/i386/support.s | 33 +++++++++++ sys/platform/pc32/i386/trap.c | 13 +++++ sys/sys/event.h | 10 +++- sys/sys/eventhandler.h | 5 ++ sys/sys/proc.h | 2 + sys/sys/sem.h | 1 + sys/sys/systm.h | 1 + 15 files changed, 196 insertions(+), 33 deletions(-) diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index f20c64da2a..77a3bf4021 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -851,6 +852,8 @@ exec_elf_imgact(struct image_params *imgp) } imgp->proc->p_sysent = brand_info->sysvec; + EVENTHANDLER_INVOKE(process_exec, imgp); + if (interp != NULL) { path = kmalloc(MAXPATHLEN, M_TEMP, M_WAITOK); ksnprintf(path, MAXPATHLEN, "%s%s", diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 611f41a1d2..72eaecfed7 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -458,11 +458,47 @@ sys_kqueue(struct kqueue_args *uap) return (error); } +/* + * Copy 'count' items into the destination list pointed to by uap->eventlist. + */ +static int +kevent_copyout(void *arg, struct kevent *kevp, int count) +{ + struct kevent_args *uap; + int error; + + uap = (struct kevent_args *)arg; + + error = copyout(kevp, uap->eventlist, count * sizeof *kevp); + if (error == 0) + uap->eventlist += count; + return (error); +} + +/* + * Copy 'count' items from the list pointed to by uap->changelist. + */ +static int +kevent_copyin(void *arg, struct kevent *kevp, int count) +{ + struct kevent_args *uap; + int error; + + uap = (struct kevent_args *)arg; + + error = copyin(uap->changelist, kevp, count * sizeof *kevp); + if (error == 0) + uap->changelist += count; + return (error); +} + /* * MPALMOSTSAFE */ int -sys_kevent(struct kevent_args *uap) +kern_kevent(int fd, int nchanges, int nevents, struct kevent_args *uap, + k_copyin_fn kevent_copyinfn, k_copyout_fn kevent_copyoutfn, + struct timespec *tsp_in) { struct thread *td = curthread; struct proc *p = td->td_proc; @@ -474,7 +510,9 @@ sys_kevent(struct kevent_args *uap) int i, n, total, nerrors, error; struct kevent kev[KQ_NEVENTS]; - fp = holdfp(p->p_fd, uap->fd, -1); + tsp = tsp_in; + + fp = holdfp(p->p_fd, fd, -1); if (fp == NULL) return (EBADF); if (fp->f_type != DTYPE_KQUEUE) { @@ -482,22 +520,13 @@ sys_kevent(struct kevent_args *uap) return (EBADF); } - if (uap->timeout) { - error = copyin(uap->timeout, &ts, sizeof(ts)); - if (error) - goto done; - tsp = &ts; - } else { - tsp = NULL; - } - kq = (struct kqueue *)fp->f_data; nerrors = 0; get_mplock(); - while (uap->nchanges > 0) { - n = uap->nchanges > KQ_NEVENTS ? KQ_NEVENTS : uap->nchanges; - error = copyin(uap->changelist, kev, n * sizeof(struct kevent)); + while (nchanges > 0) { + n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; + error = kevent_copyinfn(uap, kev, n); if (error) goto done; for (i = 0; i < n; i++) { @@ -505,21 +534,18 @@ sys_kevent(struct kevent_args *uap) kevp->flags &= ~EV_SYSFLAGS; error = kqueue_register(kq, kevp); if (error) { - if (uap->nevents != 0) { + if (nevents != 0) { kevp->flags = EV_ERROR; kevp->data = error; - copyout(kevp, uap->eventlist, - sizeof(*kevp)); - uap->eventlist++; - uap->nevents--; + kevent_copyoutfn(uap, kevp, 1); + nevents--; nerrors++; } else { goto done; } } } - uap->nchanges -= n; - uap->changelist += n; + nchanges -= n; } if (nerrors) { uap->sysmsg_result = nerrors; @@ -547,14 +573,13 @@ sys_kevent(struct kevent_args *uap) */ total = 0; error = 0; - while ((n = uap->nevents - total) > 0) { + while ((n = nevents - total) > 0) { if (n > KQ_NEVENTS) n = KQ_NEVENTS; i = kqueue_scan(kq, kev, n, tsp, &error); if (i == 0) break; - error = copyout(kev, uap->eventlist + total, - (size_t)i * sizeof(struct kevent)); + error = kevent_copyoutfn(uap, kev, i); total += i; if (error || i != n) break; @@ -570,6 +595,30 @@ done: return (error); } +/* + * MPALMOSTSAFE + */ +int +sys_kevent(struct kevent_args *uap) +{ + struct timespec ts, *tsp; + int error; + + if (uap->timeout) { + error = copyin(uap->timeout, &ts, sizeof(ts)); + if (error) + return (error); + tsp = &ts; + } else { + tsp = NULL; + } + + error = kern_kevent(uap->fd, uap->nchanges, uap->nevents, + uap, kevent_copyin, kevent_copyout, tsp); + + return (error); +} + int kqueue_register(struct kqueue *kq, struct kevent *kev) { diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index b9b2e2ac60..81aed747ae 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -450,6 +450,13 @@ interpret: p->p_textvp = imgp->vp; vref(p->p_textvp); + /* Release old namecache handle to text file */ + if (p->p_textnch.ncp) + cache_drop(&p->p_textnch); + + if (nd->nl_nch.mount) + cache_copy(&nd->nl_nch, &p->p_textnch); + /* * Notify others that we exec'd, and clear the P_INEXEC flag * as we're now a bona fide freshly-execed process. diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index a9b39527c9..2f005c5c12 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -315,7 +316,7 @@ exit1(int rv) } while (p->p_peers) tsleep((caddr_t)p, 0, "exit1", 0); - } + } #ifdef PGINPROF vmsizmon(); @@ -328,6 +329,13 @@ exit1(int rv) * e.g. SYSV IPC stuff * XXX what if one of these generates an error? */ + p->p_xstat = rv; + EVENTHANDLER_INVOKE(process_exit, p); + + /* + * XXX: imho, the eventhandler stuff is much cleaner than this. + * Maybe we should move everything to use eventhandler. + */ TAILQ_FOREACH(ep, &exit_list, next) (*ep->function)(td); @@ -461,6 +469,10 @@ exit1(int rv) vrele(vtmp); } + /* Release namecache handle to text file */ + if (p->p_textnch.ncp) + cache_drop(&p->p_textnch); + /* * Move the process to the zombie list. This will block * until the process p_lock count reaches 0. The process will @@ -492,7 +504,6 @@ exit1(int rv) * Save exit status and final rusage info, adding in child rusage * info and self times. */ - p->p_xstat = rv; calcru_proc(p, &p->p_ru); ruadd(&p->p_ru, &p->p_cru); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 12b1fcaa9c..8c98d9265c 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -419,6 +419,10 @@ fork1(struct lwp *lp1, int flags, struct proc **procp) if (p2->p_textvp) vref(p2->p_textvp); + /* copy namecache handle to the text file */ + if (p1->p_textnch.mount) + cache_copy(&p1->p_textnch, &p2->p_textnch); + /* * Handle file descriptors */ diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index ec55d71427..6621c81810 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -70,7 +70,7 @@ struct timezone tz; * timers when they expire. */ -static int nanosleep1(struct timespec *rqt, struct timespec *rmt); +int nanosleep1(struct timespec *rqt, struct timespec *rmt); static int settime(struct timeval *); static void timevalfix(struct timeval *); @@ -289,7 +289,7 @@ ns1_systimer(systimer_t info) lwkt_schedule(info->data); } -static int +int nanosleep1(struct timespec *rqt, struct timespec *rmt) { static int nanowait; @@ -300,6 +300,7 @@ nanosleep1(struct timespec *rqt, struct timespec *rmt) if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) return (EINVAL); + /* XXX: imho this should return EINVAL at least for tv_sec < 0 */ if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0)) return (0); nanouptime(&ts); diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c index ee0ae08b9f..ca0017bd10 100644 --- a/sys/kern/sysv_sem.c +++ b/sys/kern/sysv_sem.c @@ -393,6 +393,7 @@ sys___semctl(struct __semctl_args *uap) int i, rval, eval; struct semid_ds sbuf; struct semid_ds *semaptr; + struct semid_ds *semakptr; #ifdef SEM_DEBUG kprintf("call to semctl(%d, %d, %d, 0x%x)\n", semid, semnum, cmd, arg); @@ -401,11 +402,36 @@ sys___semctl(struct __semctl_args *uap) if (!jail_sysvipc_allowed && cred->cr_prison != NULL) return (ENOSYS); + get_mplock(); + switch (cmd) { + case SEM_STAT: + /* + * For this command we assume semid is an array index + * rather than an IPC id. + */ + if (semid < 0 || semid >= seminfo.semmni) { + eval = EINVAL; + break; + } + semakptr = &sema[semid]; + if ((semakptr->sem_perm.mode & SEM_ALLOC) == 0) { + eval = EINVAL; + break; + } + if ((eval = ipcperm(td->td_proc, &semakptr->sem_perm, IPC_R))) + break; + + bcopy(&semakptr, arg->buf, sizeof(struct semid_ds)); + rval = IXSEQ_TO_IPCID(semid, semakptr->sem_perm); + break; + } + semid = IPCID_TO_IX(semid); - if (semid < 0 || semid >= seminfo.semmni) + if (semid < 0 || semid >= seminfo.semmni) { + rel_mplock(); return(EINVAL); + } - get_mplock(); semaptr = &sema[semid]; if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 || semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) { diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index 8aff6e894e..e173982fd7 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -79,7 +79,8 @@ static sy_call_t *shmcalls[] = { #define SHMSEG_ALLOCATED 0x0800 #define SHMSEG_WANTED 0x1000 -static int shm_last_free, shm_nused, shm_committed, shmalloced; +static int shm_last_free, shm_committed, shmalloced; +int shm_nused; static struct shmid_ds *shmsegs; struct shm_handle { diff --git a/sys/platform/pc32/i386/support.s b/sys/platform/pc32/i386/support.s index b4858fd382..171abd63f3 100644 --- a/sys/platform/pc32/i386/support.s +++ b/sys/platform/pc32/i386/support.s @@ -289,6 +289,39 @@ copyin_fault2: movl $EFAULT,%eax ret +/* + * casuword. Compare and set user word. Returns -1 or the current value. + */ + +ENTRY(casuword) + movl PCPU(curthread),%ecx + movl TD_PCB(%ecx),%ecx + movl $fusufault,PCB_ONFAULT(%ecx) + movl 4(%esp),%edx /* dst */ + movl 8(%esp),%eax /* old */ + movl 12(%esp),%ecx /* new */ + + cmpl $VM_MAX_USER_ADDRESS-4,%edx /* verify address is valid */ + ja fusufault + +#ifdef SMP + lock +#endif + cmpxchgl %ecx,(%edx) /* Compare and set. */ + + /* + * The old value is in %eax. If the store succeeded it will be the + * value we expected (old) from before the store, otherwise it will + * be the current value. + */ + + movl PCPU(curthread),%ecx + movl TD_PCB(%ecx),%ecx + movl $fusufault,PCB_ONFAULT(%ecx) + movl $0,PCB_ONFAULT(%ecx) + ret +END(casuword) + /* * fu{byte,sword,word} - MP SAFE * diff --git a/sys/platform/pc32/i386/trap.c b/sys/platform/pc32/i386/trap.c index bd6c3f59f0..e9b125d669 100644 --- a/sys/platform/pc32/i386/trap.c +++ b/sys/platform/pc32/i386/trap.c @@ -237,8 +237,15 @@ static void userret(struct lwp *lp, struct trapframe *frame, int sticks) { struct proc *p = lp->lwp_proc; + void (*hook)(void); int sig; + if (p->p_userret != NULL) { + hook = p->p_userret; + p->p_userret = NULL; + (*hook)(); + } + /* * Charge system time if profiling. Note: times are in microseconds. * This may do a copyout and block, so do it first even though it @@ -1250,6 +1257,7 @@ syscall2(struct trapframe *frame) } code &= p->p_sysent->sv_mask; + if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else @@ -1257,6 +1265,11 @@ syscall2(struct trapframe *frame) narg = callp->sy_narg & SYF_ARGMASK; +#if 0 + if (p->p_sysent->sv_name[0] == 'L') + kprintf("Linux syscall, code = %d\n", code); +#endif + /* * copyin is MP aware, but the tracing code is not */ diff --git a/sys/sys/event.h b/sys/sys/event.h index 69520421f6..a2922fe028 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -171,6 +171,13 @@ struct knote { struct proc; struct thread; struct filedesc; +struct kevent_args; + +typedef int (*k_copyout_fn)(void *arg, struct kevent *kevp, int count); +typedef int (*k_copyin_fn)(void *arg, struct kevent *kevp, int count); +int kern_kevent(int fd, int nchanges, int nevents, struct kevent_args *uap, + k_copyin_fn kevent_copyin, k_copyout_fn kevent_copyout, + struct timespec *tsp); extern void knote(struct klist *list, long hint); extern void knote_remove(struct klist *list); @@ -179,7 +186,7 @@ extern void kqueue_init(struct kqueue *kq, struct filedesc *fdp); extern void kqueue_terminate(struct kqueue *kq); extern int kqueue_register(struct kqueue *kq, struct kevent *kev); -#endif /* !_KERNEL */ +#endif /* _KERNEL */ #if !defined(_KERNEL) || defined(_KERNEL_VIRTUAL) @@ -191,7 +198,6 @@ int kqueue (void); int kevent (int, const struct kevent *, int, struct kevent *, int, const struct timespec *); __END_DECLS - #endif /* !_KERNEL */ #endif /* !_SYS_EVENT_H_ */ diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h index d1f50288cd..6e3269eb39 100644 --- a/sys/sys/eventhandler.h +++ b/sys/sys/eventhandler.h @@ -158,6 +158,7 @@ extern struct eventhandler_list *eventhandler_find_list(char *name); #define EVENTHANDLER_PRI_ANY 10000 #define EVENTHANDLER_PRI_LAST 20000 +struct image_params; /* Shutdown events */ typedef void (*shutdown_fn) (void *, int); @@ -165,10 +166,14 @@ typedef void (*shutdown_fn) (void *, int); #define SHUTDOWN_PRI_DEFAULT EVENTHANDLER_PRI_ANY #define SHUTDOWN_PRI_DRIVER (EVENTHANDLER_PRI_ANY + 5000) #define SHUTDOWN_PRI_LAST EVENTHANDLER_PRI_LAST +typedef void (*execlist_fn)(void *, struct image_params *); +typedef void (*exit_list_fn)(void *, struct proc *); EVENTHANDLER_DECLARE(shutdown_pre_sync, shutdown_fn); /* before fs sync */ EVENTHANDLER_DECLARE(shutdown_post_sync, shutdown_fn); /* after fs sync */ EVENTHANDLER_DECLARE(shutdown_final, shutdown_fn); +EVENTHANDLER_DECLARE(process_exec, execlist_fn); +EVENTHANDLER_DECLARE(process_exit, exit_list_fn); #endif /* _KERNEL */ #endif /* SYS_EVENTHANDLER_H */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 3e074c8adb..da916d84b8 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -265,6 +265,7 @@ struct proc { sigset_t p_siglist; /* Signals arrived but not delivered. */ struct vnode *p_textvp; /* Vnode of executable. */ + struct nchandle p_textnch; /* namecache handle of executable. */ unsigned int p_stops; /* procfs event bitmask */ unsigned int p_stype; /* procfs stop event type */ @@ -313,6 +314,7 @@ struct proc { struct usched *p_usched; /* Userland scheduling control */ struct vkernel_proc *p_vkernel; /* VKernel support, proc part */ int p_numposixlocks; /* number of POSIX locks */ + void (*p_userret)(void);/* p: return-to-user hook */ struct spinlock p_spin; /* Spinlock for LWP access to proc */ }; diff --git a/sys/sys/sem.h b/sys/sys/sem.h index 398c7487ec..5d2033177c 100644 --- a/sys/sys/sem.h +++ b/sys/sys/sem.h @@ -59,6 +59,7 @@ union semun { #define GETZCNT 7 /* Return the value of semzcnt {READ} */ #define SETVAL 8 /* Set the value of semval to arg.val {ALTER} */ #define SETALL 9 /* Set semvals from arg.array {ALTER} */ +#define SEM_STAT 10 /* Like IPC_STAT but treats semid as sema-index */ /* * Permissions diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 4d3841c1df..27663673dc 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -239,6 +239,7 @@ long fuword (const void *base); int suword (void *base, long word); int fusword (void *base); int susword (void *base, int word); +u_long casuword(volatile u_long *p, u_long oldval, u_long newval); void realitexpire (void *); void DELAY(int usec); -- 2.41.0