// the kernel expects there to be RAM // for use by the kernel and user pages // from physical address 0x80000000 to PHYSTOP. #define KERNBASE 0x80000000L #define PHYSTOP (KERNBASE + 128*1024*1024) // 这里 PHYSTOP = 0x88000000L
externchar etext[]; // kernel.ld sets this to end of kernel code. externchar trampoline[]; // trampoline.S
// create a direct-map page table for the kernel. void kvminit( { // allocates a page of physical memory to hold the root page-table page. kernel_pagetable = (pagetable_t) kalloc(); memset(kernel_pagetable, 0, PGSIZE); // include the kernel’s instructions and data, physical memory up to PHYSTOP, // and memory ranges which are actually devices. // install mappings into a page table for a range of virtual addresses // to a corresponding range of physical addresses // uart registers kvmmap(UART0, UART0, PGSIZE, PTE_R | PTE_W); // virtio mmio disk interface kvmmap(VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W); // CLINT kvmmap(CLINT, CLINT, 0x10000, PTE_R | PTE_W); // PLIC kvmmap(PLIC, PLIC, 0x400000, PTE_R | PTE_W); // 大小为kernel text(code)的大小 // map kernel text executable and read-only. kvmmap(KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X); // 从kernel text之后到PHYSTOP之前的区域都是kernel data和free memory // xv6 uses the physical memory between the end of the kernel and PHYSTOP for run-time allocation. // map kernel data and the physical RAM we'll make use of. kvmmap((uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W); // map the trampoline for trap entry/exit to // the highest virtual address in the kernel. // TRAMPOLINE = MAXVA - PGSIZE // 从最大虚拟地址向下分配一页给trampoline kvmmap(TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X); }
kvmmap(): 建立虚拟地址与物理地址的映射
1 2 3 4 5 6 7 8 9
// add a mapping to the kernel page table. // only used when booting. // does not flush TLB or enable paging. void kvmmap(uint64 va, uint64 pa, uint64 sz, int perm) { if(mappages(kernel_pagetable, va, sz, pa, perm) != 0) panic("kvmmap"); }
// Create PTEs for virtual addresses starting at va that refer to // physical addresses starting at pa. va and size might not // be page-aligned. Returns 0 on success, -1 if walk() couldn't // allocate a needed page-table page. int mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) { // installs PTEs for new mappings // this mapping is separately for each virtual address in the range, at page intervals uint64 a, last; pte_t *pte;
a = PGROUNDDOWN(va); last = PGROUNDDOWN(va + size - 1); // 为va的起始和终止地址分别向下向上取页大小4096的整 for(;;){ // calls walk to find the address of the PTE for that address if((pte = walk(pagetable, a, 1)) == 0) // walk为虚拟地址a分配PTE失败 return-1; if(*pte & PTE_V) // 该PTE已经被别的va映射,有效位有效 panic("remap"); // 将物理地址pa的PPN提取出来,加上标志位信息perm和有效位 // 然后将该条目放到PTE中 *pte = PA2PTE(pa) | perm | PTE_V; if(a == last) // 分到了足够页数就返回 break; a += PGSIZE; pa += PGSIZE; // 已经分配了一页,虚拟地址的起始位置和物理地址起始位置都加一页 } return0; }
// Return the address of the PTE in page table pagetable // that corresponds to virtual address va. If alloc!=0, // create any required page-table pages. // // The risc-v Sv39 scheme has three levels of page-table // pages. A page-table page contains 512 64-bit PTEs. // A 64-bit virtual address is split into five fields: // 39..63 -- must be zero. // 30..38 -- 9 bits of level-2 index. // 21..29 -- 9 bits of level-1 index. // 12..20 -- 9 bits of level-0 index. // 0..11 -- 12 bits of byte offset within the page. pte_t * walk(pagetable_t pagetable, uint64 va, int alloc) { // walk函数为给定va找到其对应的PTE // returns the address of the PTE in the lowest layer in the tree if(va >= MAXVA) panic("walk");
// Switch h/w page table register to the kernel's page table, // and enable paging. void kvminithart() { // install the kernel page table // writes the physical address of the root page-table page into the register satp // After this the CPU will translate addresses using the kernel page table w_satp(MAKE_SATP(kernel_pagetable)); // flushes the current CPU’s TLB // 此外,在trampoline中,switches to a user page table before returning to user space,也会刷新TLB sfence_vma(); }
// initialize the proc table at boot time. void procinit(void) { structproc *p; initlock(&pid_lock, "nextpid");
// 开始时p=proc,即p的地址是proc数组的最开始位置 // 每次遍历p就指向下一个进程结构 for(p = proc; p < &proc[NPROC]; p++) { initlock(&p->lock, "proc"); // Allocate a page for a kernel stack, for each process // Map it high in memory at the va generated by KSTACK, followed by an invalid guard page. char *pa = kalloc(); if(pa == 0) panic("kalloc"); // 指针相减就是地址相减,获取当前进程p和proc数组最开始位置的偏移量 // 比如第一次,从p-proc=0开始,KSTACK生成虚拟地址: TRAMPOLINE - 2*PGSIZE // 因此TRAMPOLINE的下面第一页是guard page,第二页是kstack,也就是va指向的位置 // 后面也以此类推,被跳过而未被处理的guard page,PTE_V是无效的 uint64 va = KSTACK((int) (p - proc)); // adds the mapping PTEs to the kernel page table // 内核栈可读可写,但在用户态不可访问,也不能直接执行 kvmmap(va, (uint64)pa, PGSIZE, PTE_R | PTE_W); p->kstack = va; } // 将更新后的内核页表重新写入到satp中 kvminithart(); }
externchar end[]; // first address after kernel. // defined by kernel.ld.
//initialize the allocator void kinit() { // initializes the free list to hold every page between the end of the kernel and PHYSTOP // xv6 assumes that the machine has 128MB of RAM initlock(&kmem.lock, "kmem"); // kernel data之后到PHYSTOP之前都可以用于分配 // add memory to the free list via per-page calls to kfree freerange(end, (void*)PHYSTOP); }
freerange(): 回收物理内存
1 2 3 4 5 6 7 8 9
void freerange(void *pa_start, void *pa_end) { char *p; p = (char*)PGROUNDUP((uint64)pa_start); //kfree是头插法 for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE) kfree(p); }
// Free the page of physical memory pointed at by v, // which normally should have been returned by a // call to kalloc(). (The exception is when // initializing the allocator; see kinit above.) void kfree(void *pa) { structrun *r;
// Fill with junk to catch dangling refs. memset(pa, 1, PGSIZE);
// casts pa to a pointer to struct run, which records the old start of the free list in r->next, // and sets the free list equal to r r = (struct run*)pa;
// Allocate one 4096-byte page of physical memory. // Returns a pointer that the kernel can use. // Returns 0 if the memory cannot be allocated. void * kalloc(void) { // removes and returns the first element in the free list. // When a process asks xv6 for more user memory, xv6 first uses kalloc to allocate physical pages. structrun *r;
acquire(&kmem.lock); r = kmem.freelist; if(r) kmem.freelist = r->next; release(&kmem.lock);
if(r) memset((char*)r, 5, PGSIZE); // fill with junk return (void*)r; }
// Grow or shrink user memory by n bytes. // Return 0 on success, -1 on failure. // Sbrk is implemented by the function growproc int growproc(int n) { uint sz; structproc *p = myproc();
// Allocate PTEs and physical memory to grow process from oldsz to // newsz, which need not be page aligned. Returns new size or 0 on error. uint64 uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) { char *mem; uint64 a;
if(newsz < oldsz) return oldsz;
oldsz = PGROUNDUP(oldsz); for(a = oldsz; a < newsz; a += PGSIZE){ // allocates physical memory with kalloc, and adds PTEs to the user page table with mappages mem = kalloc(); if(mem == 0){ uvmdealloc(pagetable, a, oldsz); return0; } memset(mem, 0, PGSIZE); if(mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_W|PTE_X|PTE_R|PTE_U) != 0){ kfree(mem); uvmdealloc(pagetable, a, oldsz); return0; } } return newsz; }
uvmdealloc()
uvmdealloc调用uvmunmap来回收已分配的物理内存
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
// Deallocate user pages to bring the process size from oldsz to // newsz. oldsz and newsz need not be page-aligned, nor does newsz // need to be less than oldsz. oldsz can be larger than the actual // process size. Returns the new process size. uint64 uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) { if(newsz >= oldsz) return oldsz;
if(PGROUNDUP(newsz) < PGROUNDUP(oldsz)){ int npages = (PGROUNDUP(oldsz) - PGROUNDUP(newsz)) / PGSIZE; // calls uvmunmap, // which uses walk to find PTEs and kfree to free the physical memory they refer to. uvmunmap(pagetable, PGROUNDUP(newsz), npages, 1); }
// Remove npages of mappings starting from va. va must be // page-aligned. The mappings must exist. // Optionally free the physical memory. void uvmunmap(pagetable_t pagetable, uint64 va, uint64 npages, int do_free) { uint64 a; pte_t *pte;
if((va % PGSIZE) != 0) panic("uvmunmap: not aligned");
for(a = va; a < va + npages*PGSIZE; a += PGSIZE){ if((pte = walk(pagetable, a, 0)) == 0) panic("uvmunmap: walk"); if((*pte & PTE_V) == 0) // examination of the user page table // xv6 uses a process’s page table as the only record // of which physical memory pages are allocated to that process panic("uvmunmap: not mapped"); if(PTE_FLAGS(*pte) == PTE_V) panic("uvmunmap: not a leaf"); if(do_free){ uint64 pa = PTE2PA(*pte); kfree((void*)pa); } *pte = 0; } }
// Create a user page table for a given process, // with no user memory, but with trampoline pages. pagetable_t proc_pagetable(struct proc *p) { pagetable_t pagetable;
// map the trampoline code (for system call return) // at the highest user virtual address. // only the supervisor uses it, on the way // to/from user space, so not PTE_U. if(mappages(pagetable, TRAMPOLINE, PGSIZE, (uint64)trampoline, PTE_R | PTE_X) < 0){ uvmfree(pagetable, 0); return0; }
// map the trapframe just below TRAMPOLINE, for trampoline.S. if(mappages(pagetable, TRAPFRAME, PGSIZE, (uint64)(p->trapframe), PTE_R | PTE_W) < 0){ uvmunmap(pagetable, TRAMPOLINE, 1, 0); uvmfree(pagetable, 0); return0; }
return pagetable; }
// create an empty user page table. // returns 0 if out of memory. pagetable_t uvmcreate() { pagetable_t pagetable; pagetable = (pagetable_t) kalloc(); if(pagetable == 0) return0; memset(pagetable, 0, PGSIZE); return pagetable; }
// Load a program segment into pagetable at virtual address va. // va must be page-aligned // and the pages from va to va+sz must already be mapped. // Returns 0 on success, -1 on failure. staticint loadseg(pagetable_t pagetable, uint64 va, struct inode *ip, uint offset, uint sz) { uint i, n; uint64 pa;
if((va % PGSIZE) != 0) panic("loadseg: va must be page aligned");
for(i = 0; i < sz; i += PGSIZE){ pa = walkaddr(pagetable, va + i); if(pa == 0) panic("loadseg: address should exist"); if(sz - i < PGSIZE) n = sz - i; else n = PGSIZE; // 将文件内容写入物理内存DRAM中 if(readi(ip, 0, (uint64)pa, offset+i, n) != n) return-1; }
return0; }
// Look up a virtual address, return the physical address, // or 0 if not mapped. // Can only be used to look up user pages. uint64 walkaddr(pagetable_t pagetable, uint64 va) { pte_t *pte; uint64 pa;
// arguments to user main(argc, argv) // argc is returned via the system call return // value, which goes in a0. // 现在的sp指向argv[]数组,argc通过a0寄存器i 返回 p->trapframe->a1 = sp;
// Save program name for debugging. for(last=s=path; *s; s++) if(*s == '/') last = s+1; safestrcpy(p->name, last, sizeof(p->name));
// Commit to the user image. oldpagetable = p->pagetable; p->pagetable = pagetable; p->sz = sz; // 注意,在用户进程被创建的时候,这里就将返回到main的pc值放到寄存器epc里面 p->trapframe->epc = elf.entry; // initial program counter = main p->trapframe->sp = sp; // initial stack pointer proc_freepagetable(oldpagetable, oldsz);
// the C calling convention on RISC-V places return values in a0 return argc; // this ends up in a0, the first argument to main(argc, argv)