2 * Copyright (c) 2006 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.22 2007/01/12 18:03:48 dillon Exp $
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
43 #include <sys/random.h>
44 #include <sys/vkernel.h>
47 #include <sys/msgbuf.h>
48 #include <sys/vmspace.h>
49 #include <vm/vm_page.h>
51 #include <machine/globaldata.h>
52 #include <machine/tls.h>
53 #include <machine/md_var.h>
54 #include <machine/vmparam.h>
65 vm_paddr_t phys_avail[16];
67 vm_paddr_t Maxmem_bytes;
74 vm_offset_t virtual_start;
75 vm_offset_t virtual_end;
76 vm_offset_t kernel_vm_end;
77 vm_offset_t crashdumpmap;
78 vm_offset_t clean_sva;
79 vm_offset_t clean_eva;
80 struct msgbuf *msgbufp;
83 vpte_t *KernelPTA; /* Warning: Offset for direct VA translation */
84 u_int cpu_feature; /* XXX */
85 u_int tsc_present; /* XXX */
87 struct privatespace *CPU_prvspace;
89 static struct trapframe proc0_tf;
90 static void *proc0paddr;
92 static void init_sys_memory(char *imageFile);
93 static void init_kern_memory(void);
94 static void init_globaldata(void);
95 static void init_vkernel(void);
96 static void init_rootdevice(char *imageFile);
97 static void init_netif(char *netifFile);
98 static void usage(const char *ctl);
101 * Kernel startup for virtual kernels - standard main()
104 main(int ac, char **av)
106 char *memImageFile = NULL;
107 char *rootImageFile = NULL;
108 char *netifFile = NULL;
117 while ((c = getopt(ac, av, "vm:r:e:I:")) != -1) {
121 * name=value:name=value:name=value...
124 kern_envp = malloc(n + 2);
125 for (i = 0; i < n; ++i) {
126 if (optarg[i] == ':')
129 kern_envp[i] = optarg[i];
138 memImageFile = optarg;
144 rootImageFile = optarg;
147 Maxmem_bytes = strtoull(optarg, &suffix, 0);
164 usage("Bad maxmem option");
174 init_sys_memory(memImageFile);
178 init_rootdevice(rootImageFile);
179 init_netif(netifFile);
187 * Initialize system memory. This is the virtual kernel's 'RAM'.
191 init_sys_memory(char *imageFile)
198 * Figure out the system memory image size. If an image file was
199 * specified and -m was not specified, use the image file's size.
202 if (imageFile && stat(imageFile, &st) == 0 && Maxmem_bytes == 0)
203 Maxmem_bytes = (vm_paddr_t)st.st_size;
204 if ((imageFile == NULL || stat(imageFile, &st) < 0) &&
206 err(1, "Cannot create new memory file %s unless "
207 "system memory size is specified with -m",
213 * Maxmem must be known at this time
215 if (Maxmem_bytes < 32 * 1024 * 1024 || (Maxmem_bytes & SEG_MASK)) {
216 err(1, "Bad maxmem specification: 32MB minimum, "
217 "multiples of %dMB only",
218 SEG_SIZE / 1024 / 1024);
223 * Generate an image file name if necessary, then open/create the
224 * file exclusively locked. Do not allow multiple virtual kernels
225 * to use the same image file.
227 if (imageFile == NULL) {
228 for (i = 0; i < 1000000; ++i) {
229 asprintf(&imageFile, "/var/vkernel/memimg.%06d", i);
231 O_RDWR|O_CREAT|O_EXLOCK|O_NONBLOCK, 0644);
232 if (fd < 0 && errno == EWOULDBLOCK) {
239 fd = open(imageFile, O_RDWR|O_CREAT|O_EXLOCK|O_NONBLOCK, 0644);
241 printf("Using memory file: %s\n", imageFile);
242 if (fd < 0 || fstat(fd, &st) < 0) {
243 err(1, "Unable to open/create %s: %s",
244 imageFile, strerror(errno));
249 * Truncate or extend the file as necessary.
251 if (st.st_size > Maxmem_bytes) {
252 ftruncate(fd, Maxmem_bytes);
253 } else if (st.st_size < Maxmem_bytes) {
255 off_t off = st.st_size & ~SEG_MASK;
257 kprintf("%s: Reserving blocks for memory image\n", imageFile);
258 zmem = malloc(SEG_SIZE);
259 bzero(zmem, SEG_SIZE);
260 lseek(fd, off, SEEK_SET);
261 while (off < Maxmem_bytes) {
262 if (write(fd, zmem, SEG_SIZE) != SEG_SIZE) {
263 err(1, "Unable to reserve blocks for memory image");
269 err(1, "Unable to reserve blocks for memory image");
273 Maxmem = Maxmem_bytes >> PAGE_SHIFT;
277 * Initialize kernel memory. This reserves kernel virtual memory by using
282 init_kern_memory(void)
290 * Memory map our kernel virtual memory space. Note that the
291 * kernel image itself is not made part of this memory for the
294 * The memory map must be segment-aligned so we can properly
297 base = mmap((void *)0x40000000, KERNEL_KVA_SIZE, PROT_READ|PROT_WRITE,
298 MAP_FILE|MAP_SHARED|MAP_VPAGETABLE, MemImageFd, 0);
299 madvise(base, KERNEL_KVA_SIZE, MADV_NOSYNC);
300 if (base == MAP_FAILED) {
301 err(1, "Unable to mmap() kernel virtual memory!");
304 KvaStart = (vm_offset_t)base;
305 KvaSize = KERNEL_KVA_SIZE;
306 KvaEnd = KvaStart + KvaSize;
309 * Create a top-level page table self-mapping itself.
311 * Initialize the page directory at physical page index 0 to point
312 * to an array of page table pages starting at physical page index 1
314 lseek(MemImageFd, 0L, 0);
315 for (i = 0; i < KERNEL_KVA_SIZE / SEG_SIZE; ++i) {
316 pte = ((i + 1) * PAGE_SIZE) | VPTE_V | VPTE_R | VPTE_W;
317 write(MemImageFd, &pte, sizeof(pte));
321 * Initialize the PTEs in the page table pages required to map the
322 * page table itself. This includes mapping the page directory page
323 * at the base so we go one more loop then normal.
325 lseek(MemImageFd, PAGE_SIZE, 0);
326 for (i = 0; i <= KERNEL_KVA_SIZE / SEG_SIZE * sizeof(vpte_t); ++i) {
327 pte = (i * PAGE_SIZE) | VPTE_V | VPTE_R | VPTE_W;
328 write(MemImageFd, &pte, sizeof(pte));
332 * Initialize remaining PTEs to 0. We may be reusing a memory image
333 * file. This is approximately a megabyte.
335 i = (KERNEL_KVA_SIZE / PAGE_SIZE - i) * sizeof(pte);
336 zero = malloc(PAGE_SIZE);
338 write(MemImageFd, zero, (i > PAGE_SIZE) ? PAGE_SIZE : i);
339 i = i - ((i > PAGE_SIZE) ? PAGE_SIZE : i);
344 * Enable the page table and calculate pointers to our self-map
345 * for easy kernel page table manipulation.
347 * KernelPTA must be offset so we can do direct VA translations
349 mcontrol(base, KERNEL_KVA_SIZE, MADV_SETMAP,
350 0 | VPTE_R | VPTE_W | VPTE_V);
351 KernelPTD = (vpte_t *)base; /* pg directory */
352 KernelPTA = (vpte_t *)((char *)base + PAGE_SIZE); /* pg table pages */
353 KernelPTA -= KvaStart >> PAGE_SHIFT;
356 * phys_avail[] represents unallocated physical memory. MI code
357 * will use phys_avail[] to create the vm_page array.
359 phys_avail[0] = PAGE_SIZE +
360 KERNEL_KVA_SIZE / PAGE_SIZE * sizeof(vpte_t);
361 phys_avail[0] = (phys_avail[0] + PAGE_MASK) & ~(vm_paddr_t)PAGE_MASK;
362 phys_avail[1] = Maxmem_bytes;
365 * (virtual_start, virtual_end) represent unallocated kernel virtual
366 * memory. MI code will create kernel_map using these parameters.
368 virtual_start = KvaStart + PAGE_SIZE +
369 KERNEL_KVA_SIZE / PAGE_SIZE * sizeof(vpte_t);
370 virtual_start = (virtual_start + PAGE_MASK) & ~(vm_offset_t)PAGE_MASK;
371 virtual_end = KvaStart + KERNEL_KVA_SIZE;
374 * kernel_vm_end could be set to virtual_end but we want some
375 * indication of how much of the kernel_map we've used, so
376 * set it low and let pmap_growkernel increase it even though we
377 * don't need to create any new page table pages.
379 kernel_vm_end = virtual_start;
382 * Allocate space for process 0's UAREA.
384 proc0paddr = (void *)virtual_start;
385 for (i = 0; i < UPAGES; ++i) {
386 pmap_kenter_quick(virtual_start, phys_avail[0]);
387 virtual_start += PAGE_SIZE;
388 phys_avail[0] += PAGE_SIZE;
394 crashdumpmap = virtual_start;
395 virtual_start += MAXDUMPPGS * PAGE_SIZE;
398 * msgbufp maps the system message buffer
400 assert((MSGBUF_SIZE & PAGE_MASK) == 0);
401 msgbufp = (void *)virtual_start;
402 for (i = 0; i < (MSGBUF_SIZE >> PAGE_SHIFT); ++i) {
403 pmap_kenter_quick(virtual_start, phys_avail[0]);
404 virtual_start += PAGE_SIZE;
405 phys_avail[0] += PAGE_SIZE;
407 msgbufinit(msgbufp, MSGBUF_SIZE);
410 * used by kern_memio for /dev/mem access
412 ptvmmap = (caddr_t)virtual_start;
413 virtual_start += PAGE_SIZE;
416 * Bootstrap the kernel_pmap
422 * Map the per-cpu globaldata for cpu #0. Allocate the space using
423 * virtual_start and phys_avail[0]
427 init_globaldata(void)
434 * Reserve enough KVA to cover possible cpus. This is a considerable
435 * amount of KVA since the privatespace structure includes two
436 * whole page table mappings.
438 virtual_start = (virtual_start + SEG_MASK) & ~(vm_offset_t)SEG_MASK;
439 CPU_prvspace = (void *)virtual_start;
440 virtual_start += sizeof(struct privatespace) * SMP_MAXCPU;
443 * Allocate enough physical memory to cover the mdglobaldata
444 * portion of the space and the idle stack and map the pages
445 * into KVA. For cpu #0 only.
447 for (i = 0; i < sizeof(struct mdglobaldata); i += PAGE_SIZE) {
449 va = (vm_offset_t)&CPU_prvspace[0].mdglobaldata + i;
450 pmap_kenter_quick(va, pa);
451 phys_avail[0] += PAGE_SIZE;
453 for (i = 0; i < sizeof(CPU_prvspace[0].idlestack); i += PAGE_SIZE) {
455 va = (vm_offset_t)&CPU_prvspace[0].idlestack + i;
456 pmap_kenter_quick(va, pa);
457 phys_avail[0] += PAGE_SIZE;
461 * Setup the %gs for cpu #0. The mycpu macro works after this
464 tls_set_fs(&CPU_prvspace[0], sizeof(struct privatespace));
468 * Initialize very low level systems including thread0, proc0, etc.
474 struct mdglobaldata *gd;
476 gd = &CPU_prvspace[0].mdglobaldata;
477 bzero(gd, sizeof(*gd));
479 gd->mi.gd_curthread = &thread0;
480 thread0.td_gd = &gd->mi;
484 gd->mi.gd_prvspace = &CPU_prvspace[0];
485 mi_gdinit(&gd->mi, 0);
487 mi_proc0init(&gd->mi, proc0paddr);
488 proc0.p_lwp.lwp_md.md_regs = &proc0_tf;
493 #if 0 /* #ifdef DDB */
495 if (boothowto & RB_KDB)
496 Debugger("Boot flags requested debugger");
499 initializecpu(); /* Initialize CPU registers */
501 init_param2((phys_avail[1] - phys_avail[0]) / PAGE_SIZE);
505 * Map the message buffer
507 for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
508 pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
509 msgbufinit(msgbufp, MSGBUF_SIZE);
512 thread0.td_pcb_cr3 ... MMU
513 proc0.p_lwp.lwp_md.md_regs = &proc0_tf;
518 * The root filesystem path for the virtual kernel is optional. If specified
519 * it points to a filesystem image.
521 * The virtual kernel caches data from our 'disk' just like a normal kernel,
522 * so we do not really want the real kernel to cache the data too. Use
523 * O_DIRECT to remove the duplication.
527 init_rootdevice(char *imageFile)
532 RootImageFd = open(imageFile, O_RDWR|O_DIRECT, 0644);
533 if (RootImageFd < 0 || fstat(RootImageFd, &st) < 0) {
534 err(1, "Unable to open/create %s: %s",
535 imageFile, strerror(errno));
538 rootdevnames[0] = "ufs:vkd0a";
544 init_netif(char *netifFile)
547 NetifFd = open(netifFile, O_RDWR | O_NONBLOCK);
549 warn("Unable to open %s: %s",
550 netifFile, strerror(errno));
557 usage(const char *ctl)
565 kprintf("cpu reset\n");
572 kprintf("cpu halt\n");
574 __asm__ __volatile("hlt");