summaryrefslogtreecommitdiff
path: root/os/pc/mmu.c
diff options
context:
space:
mode:
authorCharles.Forsyth <devnull@localhost>2006-12-22 21:39:35 +0000
committerCharles.Forsyth <devnull@localhost>2006-12-22 21:39:35 +0000
commit74a4d8c26dd3c1e9febcb717cfd6cb6512991a7a (patch)
treec6e220ba61db3a6ea4052e6841296d829654e664 /os/pc/mmu.c
parent46439007cf417cbd9ac8049bb4122c890097a0fa (diff)
20060303
Diffstat (limited to 'os/pc/mmu.c')
-rw-r--r--os/pc/mmu.c321
1 files changed, 321 insertions, 0 deletions
diff --git a/os/pc/mmu.c b/os/pc/mmu.c
new file mode 100644
index 00000000..6bd4ddfb
--- /dev/null
+++ b/os/pc/mmu.c
@@ -0,0 +1,321 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
+#define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
+#define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
+ ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
+
+Segdesc gdt[NGDT] =
+{
+[NULLSEG] { 0, 0}, /* null descriptor */
+[KDSEG] DATASEGM(0), /* kernel data/stack */
+[KESEG] EXECSEGM(0), /* kernel code */
+[UDSEG] DATASEGM(3), /* user data/stack */
+[UESEG] EXECSEGM(3), /* user code */
+[TSSSEG] TSSSEGM(0,0), /* tss segment */
+};
+
+static void
+taskswitch(ulong pdb, ulong stack)
+{
+ Tss *tss;
+
+ tss = m->tss;
+ tss->ss0 = KDSEL;
+ tss->esp0 = stack;
+ tss->ss1 = KDSEL;
+ tss->esp1 = stack;
+ tss->ss2 = KDSEL;
+ tss->esp2 = stack;
+ tss->cr3 = pdb;
+ putcr3(pdb);
+}
+
+/*
+ * On processors that support it, we set the PTEGLOBAL bit in
+ * page table and page directory entries that map kernel memory.
+ * Doing this tells the processor not to bother flushing them
+ * from the TLB when doing the TLB flush associated with a
+ * context switch (write to CR3). Since kernel memory mappings
+ * are never removed, this is safe. (If we ever remove kernel memory
+ * mappings, we can do a full flush by turning off the PGE bit in CR4,
+ * writing to CR3, and then turning the PGE bit back on.)
+ *
+ * See also mmukmap below.
+ *
+ * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
+ */
+static void
+memglobal(void)
+{
+ int i, j;
+ ulong *pde, *pte;
+
+ /* only need to do this once, on bootstrap processor */
+ if(m->machno != 0)
+ return;
+
+ if(!m->havepge)
+ return;
+
+ pde = m->pdb;
+ for(i=512; i<1024; i++){ /* 512: start at entry for virtual 0x80000000 */
+ if(pde[i] & PTEVALID){
+ pde[i] |= PTEGLOBAL;
+ if(!(pde[i] & PTESIZE)){
+ pte = KADDR(pde[i]&~(BY2PG-1));
+ for(j=0; j<1024; j++)
+ if(pte[j] & PTEVALID)
+ pte[j] |= PTEGLOBAL;
+ }
+ }
+ }
+}
+
+void
+mmuinit(void)
+{
+ ulong x, *p;
+ ushort ptr[3];
+
+ memglobal();
+
+ m->tss = malloc(sizeof(Tss));
+ memset(m->tss, 0, sizeof(Tss));
+ m->tss->iomap = 0xDFFF<<16;
+
+ /*
+ * We used to keep the GDT in the Mach structure, but it
+ * turns out that that slows down access to the rest of the
+ * page. Since the Mach structure is accessed quite often,
+ * it pays off anywhere from a factor of 1.25 to 2 on real
+ * hardware to separate them (the AMDs are more sensitive
+ * than Intels in this regard). Under VMware it pays off
+ * a factor of about 10 to 100.
+ */
+
+ memmove(m->gdt, gdt, sizeof gdt);
+ x = (ulong)m->tss;
+ m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
+ m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
+
+ ptr[0] = sizeof(gdt)-1;
+ x = (ulong)m->gdt;
+ ptr[1] = x & 0xFFFF;
+ ptr[2] = (x>>16) & 0xFFFF;
+ lgdt(ptr);
+
+ ptr[0] = sizeof(Segdesc)*256-1;
+ x = IDTADDR;
+ ptr[1] = x & 0xFFFF;
+ ptr[2] = (x>>16) & 0xFFFF;
+ lidt(ptr);
+
+ /* make kernel text unwritable */
+ for(x = KTZERO; x < (ulong)etext; x += BY2PG){
+ p = mmuwalk(m->pdb, x, 2, 0);
+ if(p == nil)
+ panic("mmuinit");
+ *p &= ~PTEWRITE;
+ }
+
+ taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
+ ltr(TSSSEL);
+}
+
+
+
+
+ulong*
+mmuwalk(ulong* pdb, ulong va, int level, int create)
+{
+ ulong pa, *table;
+
+ /*
+ * Walk the page-table pointed to by pdb and return a pointer
+ * to the entry for virtual address va at the requested level.
+ * If the entry is invalid and create isn't requested then bail
+ * out early. Otherwise, for the 2nd level walk, allocate a new
+ * page-table page and register it in the 1st level.
+ */
+ table = &pdb[PDX(va)];
+ if(!(*table & PTEVALID) && create == 0)
+ return 0;
+
+ switch(level){
+
+ default:
+ return 0;
+
+ case 1:
+ return table;
+
+ case 2:
+ if(*table & PTESIZE)
+ panic("mmuwalk2: va %luX entry %luX\n", va, *table);
+ if(!(*table & PTEVALID)){
+ pa = PADDR(xspanalloc(BY2PG, BY2PG, 0));
+ *table = pa|PTEWRITE|PTEVALID;
+ }
+ table = KADDR(PPN(*table));
+
+ return &table[PTX(va)];
+ }
+}
+
+static Lock mmukmaplock;
+
+int
+mmukmapsync(ulong va)
+{
+ Mach *mach0;
+ ulong entry, *pte;
+
+ mach0 = MACHP(0);
+
+ ilock(&mmukmaplock);
+
+ if((pte = mmuwalk(mach0->pdb, va, 1, 0)) == nil){
+ iunlock(&mmukmaplock);
+ return 0;
+ }
+ if(!(*pte & PTESIZE) && mmuwalk(mach0->pdb, va, 2, 0) == nil){
+ iunlock(&mmukmaplock);
+ return 0;
+ }
+ entry = *pte;
+
+ if(!(m->pdb[PDX(va)] & PTEVALID))
+ m->pdb[PDX(va)] = entry;
+
+// if(up && up->mmupdb){
+// ((ulong*)up->mmupdb->va)[PDX(va)] = entry;
+// mmuflushtlb(up->mmupdb->pa);
+// }
+// else
+ mmuflushtlb(PADDR(m->pdb));
+
+ iunlock(&mmukmaplock);
+
+ return 1;
+}
+
+ulong
+mmukmap(ulong pa, ulong va, int size)
+{
+ Mach *mach0;
+ ulong ova, pae, *table, pgsz, *pte, x;
+ int pse, sync;
+
+ mach0 = MACHP(0);
+ if((mach0->cpuiddx & 0x08) && (getcr4() & 0x10))
+ pse = 1;
+ else
+ pse = 0;
+ sync = 0;
+
+ pa = PPN(pa);
+ if(va == 0)
+ va = (ulong)KADDR(pa);
+ else
+ va = PPN(va);
+ ova = va;
+
+ pae = pa + size;
+ ilock(&mmukmaplock);
+ while(pa < pae){
+ table = &mach0->pdb[PDX(va)];
+ /*
+ * Possibly already mapped.
+ */
+ if(*table & PTEVALID){
+ if(*table & PTESIZE){
+ /*
+ * Big page. Does it fit within?
+ * If it does, adjust pgsz so the correct end can be
+ * returned and get out.
+ * If not, adjust pgsz up to the next 4MB boundary
+ * and continue.
+ */
+ x = PPN(*table);
+ if(x != pa)
+ panic("mmukmap1: pa %luX entry %luX\n",
+ pa, *table);
+ x += 4*MB;
+ if(pae <= x){
+ pa = pae;
+ break;
+ }
+ pgsz = x - pa;
+ pa += pgsz;
+ va += pgsz;
+
+ continue;
+ }
+ else{
+ /*
+ * Little page. Walk to the entry.
+ * If the entry is valid, set pgsz and continue.
+ * If not, make it so, set pgsz, sync and continue.
+ */
+ pte = mmuwalk(mach0->pdb, va, 2, 0);
+ if(pte && *pte & PTEVALID){
+ x = PPN(*pte);
+ if(x != pa)
+ panic("mmukmap2: pa %luX entry %luX\n",
+ pa, *pte);
+ pgsz = BY2PG;
+ pa += pgsz;
+ va += pgsz;
+ sync++;
+
+ continue;
+ }
+ }
+ }
+
+ /*
+ * Not mapped. Check if it can be mapped using a big page -
+ * starts on a 4MB boundary, size >= 4MB and processor can do it.
+ * If not a big page, walk the walk, talk the talk.
+ * Sync is set.
+ *
+ * If we're creating a kernel mapping, we know that it will never
+ * expire and thus we can set the PTEGLOBAL bit to make the entry
+ * persist in the TLB across flushes. If we do add support later for
+ * unmapping kernel addresses, see devarch.c for instructions on
+ * how to do a full TLB flush.
+ */
+ if(pse && (pa % (4*MB)) == 0 && (pae >= pa+4*MB)){
+ *table = pa|PTESIZE|PTEWRITE|PTEUNCACHED|PTEVALID;
+ if((va&KZERO) && m->havepge)
+ *table |= PTEGLOBAL;
+ pgsz = 4*MB;
+ }
+ else{
+ pte = mmuwalk(mach0->pdb, va, 2, 1);
+ *pte = pa|PTEWRITE|PTEUNCACHED|PTEVALID;
+ if((va&KZERO) && m->havepge)
+ *pte |= PTEGLOBAL;
+ pgsz = BY2PG;
+ }
+ pa += pgsz;
+ va += pgsz;
+ sync++;
+ }
+ iunlock(&mmukmaplock);
+
+ /*
+ * If something was added
+ * then need to sync up.
+ */
+ if(sync)
+ mmukmapsync(ova);
+
+ return pa;
+}