From 126930838c73dbde9fa69a103e6372d95507d840 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Thu, 18 Nov 2004 13:09:55 +0000 Subject: [PATCH] Lots of bug fixes to the checkpointing code. The big fix is that you can now checkpoint a program that you have checkpoint-restored. i.e. you run program X, you checkpoint it, you checkpoint-restore X from the checkpoint, and then you checkpoint it again. The issue here is the when a checkpointed program is restored the checkpoint file is used to map portions of the image of the restored program. If you then tried to checkpoint the restored image the system would overwrite or destroy the original checkpoint file and the new checkpoint file would have references to the old file (now non-existant) file. Any attempt to restore the recursed checkpoint would result in a seg-fault. That is now fixed. * Remove the previous checkpoint file before saving the new one. If the program we are checkpointing happens to be a checkpoint restore from the same file then overwriting the file would wind up corrupting the image set we are trying to save. * When checkpointing a program that has been checkpoint-restored do not attempt to save the file handles for the vnode representing the checkpoint-restored program's own checkpoint file (which is a good chunk of its backing store), because this vnode is likely to be destroyed the moment we close the handle, since we are likely replacing the previous checkpoint file. Instead, the backing store representing the old checkpoint file is copied to the new one. * Re-checkpointing a program (hitting ^E multiple times) now properly replaces the checkpoint file. * Properly close any file descriptors from the checkpt(1) program itself when restoring a checkpointed program, properly replace any file descriptors that need replacing. * Properly replace p_comm[] when restoring a checkpoint file, so checkpointing again saves under the same program name. 'ps' output is still wrong, though. TODO LIST: * Add an iterator to the checkpoint file, accessible via kern.ckptfile, so successive checkpoints save to a blah.ckpt.1, blah.ckpt.2, etc, rather then always overwriting blah.ckpt (the iterator could be saved in the proc structure). * Add back as a 'feature' the ability for the new checkpoint file to reference the old one. That is, each new checkpoint file would represent a delta relative to the old one. This might be useful when checkpointing programs with ever growing data setse so as not to have to copy the entire contents of the program to the checkpoint file each time you want to make a new checkpoint. It would be hell on the VM system, but it would work. * Add an option to checkpt(1) so you can checkpoint-restore-enter-gdb all in one go, to be able to debug a checkpointed file more easily. Inspired by: Brook Davis's HPC presentation. He expressed an interest in possibly porting the checkpoint code so I figure I ought to fix it up. --- sys/checkpt/checkpt.c | 81 ++++++++++++++++++++++++++++++++++------- sys/kern/imgact_elf.c | 25 +++++++++++-- sys/kern/kern_descrip.c | 9 ++++- sys/sys/kern_syscall.h | 3 +- sys/sys/vnode.h | 4 +- 5 files changed, 101 insertions(+), 21 deletions(-) diff --git a/sys/checkpt/checkpt.c b/sys/checkpt/checkpt.c index 1a1cd837dc..a1a78c1b92 100644 --- a/sys/checkpt/checkpt.c +++ b/sys/checkpt/checkpt.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/checkpt/Attic/checkpt.c,v 1.6 2004/11/12 00:09:00 dillon Exp $ + * $DragonFly: src/sys/checkpt/Attic/checkpt.c,v 1.7 2004/11/18 13:09:55 dillon Exp $ */ #include @@ -33,6 +33,7 @@ #include #include #include +#include #include /* only on dragonfly */ @@ -47,7 +48,7 @@ #include #include #include - +#include #include #include #include @@ -66,6 +67,7 @@ #include #include #include +#include #include "checkpt.h" #include #include @@ -253,6 +255,21 @@ ckpt_thaw_proc(struct proc *p, struct file *fp) /* handle mappings last in case we are reading from a socket */ error = elf_loadphdrs(fp, phdr, ehdr->e_phnum); + + /* + * Set the textvp to the checkpoint file and mark the vnode so + * a future checkpointing of this checkpoint-restored program + * will copy out the contents of the mappings rather then trying + * to record the vnode info related to the checkpoint file, which + * is likely going to be destroyed when the program is re-checkpointed. + */ + if (error == 0 && fp->f_data && fp->f_type == DTYPE_VNODE) { + if (p->p_textvp) + vrele(p->p_textvp); + p->p_textvp = (struct vnode *)fp->f_data; + p->p_textvp->v_flag |= VCKPT; + vref(p->p_textvp); + } done: if (ehdr) free(ehdr, M_TEMP); @@ -283,7 +300,8 @@ elf_loadnotes(struct proc *p, prpsinfo_t *psinfo, prstatus_t *status, if ((error = set_regs(p, &status->pr_reg)) != 0) goto done; error = set_fpregs(p, fpregset); - /* strncpy(psinfo->pr_psargs, p->p_comm, PRARGSZ); */ + strlcpy(p->p_comm, psinfo->pr_fname, sizeof(p->p_comm)); + /* XXX psinfo->pr_psargs not yet implemented */ done: TRACE_EXIT; return error; @@ -528,6 +546,8 @@ elf_gettextvp(struct proc *p, struct file *fp) error = ERANGE; goto done; } + + vmspace_exec(p, NULL); p->p_vmspace->vm_daddr = vminfo.cvm_daddr; p->p_vmspace->vm_dsize = vminfo.cvm_dsize; p->p_vmspace->vm_taddr = vminfo.cvm_taddr; @@ -558,10 +578,12 @@ elf_getfiles(struct proc *p, struct file *fp) int error; int i; int filecount; + int fd; struct ckpt_filehdr filehdr; struct ckpt_fileinfo *cfi_base = NULL; struct vnode *vp; struct file *tempfp; + struct file *ofp; TRACE_ENTER; if ((error = read_check(fp, &filehdr, sizeof(filehdr))) != 0) @@ -572,6 +594,16 @@ elf_getfiles(struct proc *p, struct file *fp) if (error) goto done; + /* + * Close all descriptors >= 3. These descriptors are from the + * checkpt(1) program itself and should not be retained. + */ + for (i = 3; i < p->p_fd->fd_nfiles; ++i) + kern_close(i); + + /* + * Scan files to load + */ for (i = 0; i < filecount; i++) { struct ckpt_fileinfo *cfi= &cfi_base[i]; /* @@ -581,11 +613,7 @@ elf_getfiles(struct proc *p, struct file *fp) */ if (cfi->cfi_index < 0) continue; - if (cfi->cfi_index >= p->p_fd->fd_nfiles) { - PRINTF(("can't currently restore fd: %d\n", - cfi->cfi_index)); - goto done; - } + if ((error = ckpt_fhtovp(&cfi->cfi_fh, &vp)) != 0) break; if ((error = fp_vpopen(vp, OFLAGS(cfi->cfi_flags), &tempfp)) != 0) { @@ -593,13 +621,30 @@ elf_getfiles(struct proc *p, struct file *fp) break; } tempfp->f_offset = cfi->cfi_offset; - /* XXX bail for now if we the index is - * larger than the current file table + + /* + * If overwriting a descriptor close the old descriptor. This + * only occurs if the saved core saved descriptors that we + * have not already closed. */ + if (cfi->cfi_index < p->p_fd->fd_nfiles && + (ofp = p->p_fd->fd_ofiles[cfi->cfi_index]) != NULL) { + kern_close(cfi->cfi_index); + } - PRINTF(("restoring %d\n", cfi->cfi_index)); + /* + * Allocate the descriptor we want. + */ + if (fdalloc(p, cfi->cfi_index, &fd) != 0) { + PRINTF(("can't currently restore fd: %d\n", + cfi->cfi_index)); + fp_close(fp); + goto done; + } + KKASSERT(fd == cfi->cfi_index); p->p_fd->fd_ofiles[cfi->cfi_index] = tempfp; cfi++; + PRINTF(("restoring %d\n", cfi->cfi_index)); } done: @@ -708,6 +753,7 @@ ckpt_handler(struct proc *p) { char *buf; struct file *fp; + struct nlookupdata nd; int error; chptinuse++; @@ -735,9 +781,18 @@ ckpt_handler(struct proc *p) PRINTF(("ckpt handler called, using '%s'\n", buf)); /* - * Use the same safety flags that the coredump code uses. + * Use the same safety flags that the coredump code uses. Remove + * any previous checkpoint file before writing out the new one in + * case we are re-checkpointing a program that had been checkpt + * restored. Otherwise we will corrupt the program space (which is + * made up of mmap()ings of the previous checkpoint file) while we + * write out the new one. */ - error = fp_open(buf, O_WRONLY|O_CREAT|O_NOFOLLOW, 0600, &fp); + error = nlookup_init(&nd, buf, UIO_SYSSPACE, 0); + if (error == 0) + error = kern_unlink(&nd); + nlookup_done(&nd); + error = fp_open(buf, O_WRONLY|O_CREAT|O_TRUNC|O_NOFOLLOW, 0600, &fp); if (error == 0) { (void)ckpt_freeze_proc(p, fp); fp_close(fp); diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 4a4e5318a0..fb3d2e34f3 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -27,7 +27,7 @@ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD: src/sys/kern/imgact_elf.c,v 1.73.2.13 2002/12/28 19:49:41 dillon Exp $ - * $DragonFly: src/sys/kern/imgact_elf.c,v 1.23 2004/11/12 00:09:23 dillon Exp $ + * $DragonFly: src/sys/kern/imgact_elf.c,v 1.24 2004/11/18 13:09:30 dillon Exp $ */ #include @@ -938,8 +938,14 @@ static int cb_fpcount_segment(vm_map_entry_t entry, void *closure) { int *count = closure; - if (entry->object.vm_object->type == OBJT_VNODE) + struct vnode *vp; + + if (entry->object.vm_object->type == OBJT_VNODE) { + vp = (struct vnode *)entry->object.vm_object->handle; + if ((vp->v_flag & VCKPT) && curproc->p_textvp == vp) + return 0; ++*count; + } return 0; } @@ -952,10 +958,23 @@ cb_put_fp(vm_map_entry_t entry, void *closure) struct vnode *vp; int error; + /* + * If an entry represents a vnode then write out a file handle. + * + * If we are checkpointing a checkpoint-restored program we do + * NOT record the filehandle for the old checkpoint vnode (which + * is mapped all over the place). Instead we rely on the fact + * that a checkpoint-restored program does not mmap() the checkpt + * vnode NOCORE, so its contents will be written out to the + * checkpoint file itself. This is necessary because the 'old' + * checkpoint file is typically destroyed when a new one is created. + */ if (entry->object.vm_object->type == OBJT_VNODE) { + vp = (struct vnode *)entry->object.vm_object->handle; + if ((vp->v_flag & VCKPT) && curproc->p_textvp == vp) + return 0; if (vnh == fpc->vnh_max) return EINVAL; - vp = (struct vnode *)entry->object.vm_object->handle; if (vp->v_mount) vnh->vnh_fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 2967202dbd..8c2493d96c 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -37,7 +37,7 @@ * * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 * $FreeBSD: src/sys/kern/kern_descrip.c,v 1.81.2.19 2004/02/28 00:43:31 tegge Exp $ - * $DragonFly: src/sys/kern/kern_descrip.c,v 1.31 2004/11/12 00:09:23 dillon Exp $ + * $DragonFly: src/sys/kern/kern_descrip.c,v 1.32 2004/11/18 13:09:30 dillon Exp $ */ #include "opt_compat.h" @@ -612,12 +612,17 @@ fgetown(struct sigio *sigio) /* ARGSUSED */ int close(struct close_args *uap) +{ + return(kern_close(uap->fd)); +} + +int +kern_close(int fd) { struct thread *td = curthread; struct proc *p = td->td_proc; struct filedesc *fdp; struct file *fp; - int fd = uap->fd; int error; int holdleaders; diff --git a/sys/sys/kern_syscall.h b/sys/sys/kern_syscall.h index e7f2445331..c90ce70f42 100644 --- a/sys/sys/kern_syscall.h +++ b/sys/sys/kern_syscall.h @@ -25,7 +25,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $DragonFly: src/sys/sys/kern_syscall.h,v 1.20 2004/11/12 00:09:27 dillon Exp $ + * $DragonFly: src/sys/sys/kern_syscall.h,v 1.21 2004/11/18 13:09:53 dillon Exp $ */ #ifndef _SYS_KERN_SYSCALL_H_ @@ -132,6 +132,7 @@ int kern_mkdir(struct nlookupdata *nd, int mode); int kern_mkfifo(struct nlookupdata *nd, int mode); int kern_mknod(struct nlookupdata *nd, int mode, int dev); int kern_open(struct nlookupdata *nd, int flags, int mode, int *res); +int kern_close(int fd); int kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res); int kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond); int kern_rmdir(struct nlookupdata *nd); diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 5ace2ee7f7..48a5e64288 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -32,7 +32,7 @@ * * @(#)vnode.h 8.7 (Berkeley) 2/4/94 * $FreeBSD: src/sys/sys/vnode.h,v 1.111.2.19 2002/12/29 18:19:53 dillon Exp $ - * $DragonFly: src/sys/sys/vnode.h,v 1.26 2004/11/12 00:09:27 dillon Exp $ + * $DragonFly: src/sys/sys/vnode.h,v 1.27 2004/11/18 13:09:53 dillon Exp $ */ #ifndef _SYS_VNODE_H_ @@ -154,7 +154,7 @@ struct vnode { #define VSYSTEM 0x00004 /* vnode being used by kernel */ #define VISTTY 0x00008 /* vnode represents a tty */ #define VCTTYISOPEN 0x00010 /* controlling terminal tty is open */ -/* open for business 0x00020 */ +#define VCKPT 0x00020 /* checkpoint-restored vnode */ /* open for business 0x00040 */ /* open for business 0x00080 */ /* open for business 0x00100 */ -- 2.41.0