From fad7eaea3ab96a02081142d9249a46cd0c436cba Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 19 Mar 2025 21:53:28 +0100 Subject: [PATCH 01/32] pat reading --- include/x86_64/mem.h | 26 ++++++- src/x86_64/mem.c | 177 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 166 insertions(+), 37 deletions(-) diff --git a/include/x86_64/mem.h b/include/x86_64/mem.h index a0e7ae1..4dc778d 100644 --- a/include/x86_64/mem.h +++ b/include/x86_64/mem.h @@ -9,7 +9,24 @@ void init_gdt(); void init_idt(); +// --- paging --- + +void init_paging(void); + +enum page_attr { + PA_UC = 0x00, + PA_WC = 0x01, + PA_WT = 0x04, + PA_WP = 0x05, + PA_WB = 0x06, + PA_UCMINUS = 0x07 +}; + struct pt_entry { + // fields not in actual entry + uint8_t level; // possible values are 1..=4 + + // fields that are level-independent bool present; bool writable; bool supervisor; @@ -17,15 +34,18 @@ struct pt_entry { bool cache_disable; bool accessed; bool dirty; - bool page_attr_table_low; // in high this is the page_size field to signal hugepages. bool global; - bool page_attr_table_high; struct ppn ppn; + + // fields that are level-dependent + bool hugepage; + bool pat_bit; + // TODO MPK, NX }; uint64_t pt_entry_pack(const struct pt_entry *ent_in); -void pt_entry_unpack(uint64_t ent_in, struct pt_entry *ent_out); +void pt_entry_unpack(uint64_t ent_in, uint8_t level, struct pt_entry *ent_out); void pt_entry_print(const struct pt_entry *ent); bool pt_translate(struct va va, struct ppn cr3, struct pa *pa_out); diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index 33c34a4..78ba087 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -102,21 +102,68 @@ void init_idt() { // --- paging --- -uint64_t pt_entry_pack(const struct pt_entry *ent_in) { - return (uint64_t)ent_in->present - | ((uint64_t)ent_in->writable) << 1 - | ((uint64_t)ent_in->supervisor) << 2 - | ((uint64_t)ent_in->writethrough) << 3 - | ((uint64_t)ent_in->cache_disable) << 4 - | ((uint64_t)ent_in->accessed) << 5 - | ((uint64_t)ent_in->dirty) << 6 - | ((uint64_t)ent_in->page_attr_table_low) << 7 - | ((uint64_t)ent_in->global) << 8 - | ((uint64_t)ent_in->page_attr_table_high) << 12 - | pa_to_value(pa_from_ppn(ent_in->ppn)); +#define MSR_PAT 0x277 + +enum page_attr get_pa(uint8_t index) { + ASSERT(index < 8); + uint64_t value = readmsr(MSR_PAT); + value = (value >> (index << 3)) & 0x7; + ASSERT(value == PA_UC + || value == PA_WC + || value == PA_WT + || value == PA_WP + || value == PA_WB + || value == PA_UCMINUS); + return value; } -void pt_entry_unpack(uint64_t ent_in, struct pt_entry *ent_out) { +void set_pa(uint8_t index, enum page_attr attr) { + ASSERT(index < 8); + ASSERT(attr == PA_UC + || attr == PA_WC + || attr == PA_WT + || attr == PA_WP + || attr == PA_WB + || attr == PA_UCMINUS); + uint64_t value = readmsr(MSR_PAT); + uint64_t mask = 0x7ull << (index << 3); + value &= ~mask; // clear bits + value |= (uint64_t)attr << (index << 3); + writemsr(MSR_PAT, value); +} + +uint64_t pt_entry_pack(const struct pt_entry *ent_in) { + uint64_t retval = (uint64_t)ent_in->present + | ((uint64_t)ent_in->writable) << 1 + | ((uint64_t)ent_in->supervisor) << 2 + | ((uint64_t)ent_in->writethrough) << 3 + | ((uint64_t)ent_in->cache_disable) << 4 + | ((uint64_t)ent_in->accessed) << 5 + | ((uint64_t)ent_in->dirty) << 6 + | ((uint64_t)ent_in->global) << 8 + | pa_to_value(pa_from_ppn(ent_in->ppn)); + + switch (ent_in->level) { + case 4: + ASSERT(ent_in->pat_bit == 0); // cannot use PAT bit on highest level (AMD 2 p.228) + ASSERT(ent_in->hugepage == false); + return retval; + case 3: // fallthrough + case 2: + return retval + | ((uint64_t)ent_in->hugepage) << 7 + | ((uint64_t)ent_in->pat_bit) << 12; + case 1: + ASSERT(ent_in->hugepage == false); + return retval + | ((uint64_t)ent_in->pat_bit) << 7; + default: + UNREACHABLE(); + } +} + +void pt_entry_unpack(uint64_t ent_in, uint8_t level, struct pt_entry *ent_out) { + ent_out->level = level; ent_out->present = (ent_in & (0x1ull << 0)) != 0; ent_out->writable = (ent_in & (0x1ull << 1)) != 0; ent_out->supervisor = (ent_in & (0x1ull << 2)) != 0; @@ -124,26 +171,82 @@ void pt_entry_unpack(uint64_t ent_in, struct pt_entry *ent_out) { ent_out->cache_disable = (ent_in & (0x1ull << 4)) != 0; ent_out->accessed = (ent_in & (0x1ull << 5)) != 0; ent_out->dirty = (ent_in & (0x1ull << 6)) != 0; - ent_out->page_attr_table_low = (ent_in & (0x1ull << 7)) != 0; ent_out->global = (ent_in & (0x1ull << 8)) != 0; - ent_out->page_attr_table_high = (ent_in & (0x1ull << 12)) != 0; - // this may panic if the address is 57 bit, which is intended here. ent_out->ppn = ppn_from_aligned_pa(pa_from_value(ent_in & 0x000ffffffffff000ull)); + + switch (level) { + case 4: + ent_out->hugepage = 0; + ent_out->pat_bit = false; + break; + case 3: // fallthrough + case 2: + ent_out->hugepage = (ent_in & (0x1ull << 7)) != 0; + ent_out->pat_bit = (ent_in & (0x1ull << 12)) != 0; + break; + case 1: + ent_out->hugepage = 0; + ent_out->pat_bit = (ent_in & (0x1ull << 7)) != 0; + break; + default: + UNREACHABLE(); + } +} + +static uint8_t pt_entry_to_pat_index(const struct pt_entry *ent) { + return ((uint8_t)ent->pat_bit) << 2 + | ((uint8_t)ent->cache_disable) << 1 + | ((uint8_t)ent->writethrough); +} + +static const char *page_attr_to_str(enum page_attr attr) { + switch (attr) { + case PA_UC: + return "UC (uncacheable)"; + case PA_WC: + return "WC (write-combining)"; + case PA_WT: + return "WT (write-through)"; + case PA_WP: + return "WP (write-protect)"; + case PA_WB: + return "WB (write-back)"; + case PA_UCMINUS: + return "UC- (uncacheable minus)"; + default: + UNREACHABLE(); + } } void pt_entry_print(const struct pt_entry *ent) { printf("pt_entry {\n"); + printf(" level: %d\n", (int)ent->level); printf(" present: %d\n", (int)ent->present); printf(" writable: %d\n", (int)ent->writable); printf(" supervisor: %d\n", (int)ent->supervisor); - printf(" writethrough: %d\n", (int)ent->writethrough); - printf(" cache_disable: %d\n", (int)ent->cache_disable); printf(" accessed: %d\n", (int)ent->accessed); printf(" dirty: %d\n", (int)ent->dirty); - printf(" page_attr_table_low: %d\n", (int)ent->page_attr_table_low); printf(" global: %d\n", (int)ent->global); - printf(" page_attr_table_high: %d\n", (int)ent->page_attr_table_high); printf(" page base: %p\n", pa_from_ppn(ent->ppn)); + + switch (ent->level) { + case 4: + printf(" (no hugepage)\n"); + break; + case 3: // fallthrough + case 2: + printf(" hugepage: %d\n", (int)ent->hugepage); + break; + case 1: + printf(" (no hugepage)\n"); + break; + default: + UNREACHABLE(); + } + + // TODO right now we assume PAT is present + printf(" caching: %s\n", page_attr_to_str(get_pa(pt_entry_to_pat_index(ent)))); + printf("}\n"); } @@ -163,9 +266,9 @@ static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc) { uint64_t *entry_ptr = (uint64_t*)pa_to_pointer(pa_from_ppn_with_offset(ppn, idx << 3)); struct pt_entry ent; - pt_entry_unpack(*entry_ptr, &ent); + pt_entry_unpack(*entry_ptr, level, &ent); - if (level > 1 && ent.page_attr_table_low) { + if (ent.hugepage) { PANIC("no hugepage support yet!"); } if (level == 1) { @@ -191,7 +294,8 @@ bool pt_translate(struct va va, struct ppn cr3, struct pa *pa_out) { return false; } struct pt_entry ent; - pt_entry_unpack(*leaf_ptr, &ent); + pt_entry_unpack(*leaf_ptr, 1, &ent); // todo may be > 1 for hugepage + // doesn't really matter because we only use address *pa_out = pa_from_ppn_with_offset(ent.ppn, va_offset(va)); return true; } @@ -209,17 +313,22 @@ bool pt_map_single(struct vpn virt, struct ppn phys, return false; } struct pt_entry ent; + ent.level = 1; ent.present = true; ent.writable = writable; ent.supervisor = supervisor; + // disable caching bits because we want to use MTRRs ent.writethrough = false; ent.cache_disable = false; ent.accessed = false; ent.dirty = false; - ent.page_attr_table_low = false; ent.global = global; - ent.page_attr_table_high = false; ent.ppn = phys; + + ent.hugepage = false; + // disable caching bits because we want to use MTRRs + ent.pat_bit = false; + *leaf_ptr = pt_entry_pack(&ent); return true; } @@ -260,11 +369,11 @@ void mem_range_print(const struct mem_range *mr) { printf("phys: %p .. %p (exclusive)\n", phys_start, phys_start + (mr->npages << 12)); - printf("attr: %c, %c, %s, %s, %s | npages: %lu, size: %lu\n\n", + printf("attr: %c, %c, %s, %s | npages: %lu, size: %lu\n\n", mr->entry_start.writable ? 'w' : 'r', mr->entry_start.supervisor ? 's' : 'u', - mr->entry_start.writethrough ? "wt" : "wb", - mr->entry_start.cache_disable ? "cached:n" : "cached:y", + // TODO right now we assume PAT is present + page_attr_to_str(get_pa(pt_entry_to_pat_index(&mr->entry_start))), mr->entry_start.global ? "global:y" : "global:n", mr->npages, mr->npages << 12); @@ -277,12 +386,12 @@ static bool pt_contiguous(struct mem_range *mr, struct vpn vpn, struct pt_entry && ent->supervisor == mr->entry_start.supervisor && ent->writethrough == mr->entry_start.writethrough && ent->cache_disable == mr->entry_start.cache_disable - && ent->global == mr->entry_start.global; - // TODO should PAT also be same? + && ent->global == mr->entry_start.global + && ent->pat_bit == mr->entry_start.pat_bit; } -#define LOWEST_LEVEL(level) ((level) == 1) -#define LEAF(level, ent) (LOWEST_LEVEL(level) || (ent).page_attr_table_low) + +#define LEAF(ent) ((ent).level == 1 || (ent).hugepage) static void pt_get_ranges_rec(struct ppn ppn, int level, uint64_t virt_prev, struct mem_range_buf *buf_out, struct mem_range *curr_range) @@ -290,7 +399,7 @@ static void pt_get_ranges_rec(struct ppn ppn, int level, uint64_t virt_prev, struct pt_entry ent; for (uint64_t i = 0; i < 512; i++) { uint64_t entry = *(uint64_t*)pa_to_pointer(pa_from_ppn_with_offset(ppn, i << 3)); - pt_entry_unpack(entry, &ent); + pt_entry_unpack(entry, level, &ent); if (!ent.present) { continue; } @@ -298,7 +407,7 @@ static void pt_get_ranges_rec(struct ppn ppn, int level, uint64_t virt_prev, uint64_t virt_part = i << (12 + 9*(level - 1)); uint64_t virt_new = virt_prev | virt_part; - if (LEAF(level, ent)) { + if (LEAF(ent)) { struct vpn curr_vpn = vpn_from_aligned_va(va_from_value(virt_new)); // for huge pages this is > 1 uint64_t num_pages_covered = 1ull << (9*(level - 1)); From b94c6940613ebd6fc113513ccded060303b0b981 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 23 Apr 2025 18:25:06 +0200 Subject: [PATCH 02/32] page table allocation functionality --- include/x86_64/mem.h | 5 ++++ src/x86_64/mem.c | 67 +++++++++++++++++++++++++++++++++----------- 2 files changed, 55 insertions(+), 17 deletions(-) diff --git a/include/x86_64/mem.h b/include/x86_64/mem.h index 4dc778d..ccf2c17 100644 --- a/include/x86_64/mem.h +++ b/include/x86_64/mem.h @@ -10,6 +10,9 @@ void init_gdt(); void init_idt(); // --- paging --- +// +// We don't use the PAT functionality. We set all bits (writethrough, cache_disable, PAT) to 0. +// For access protection we only use MTRRs. void init_paging(void); @@ -62,6 +65,8 @@ bool pt_map_range(struct vpn virt, struct ppn phys, uint64_t num_pages, bool pt_map_range_current(struct vpn virt, struct ppn phys, uint64_t num_pages, bool writable, bool supervisor, bool global); +void pt_free(struct ppn root); + struct mem_range { struct vpn vpn_start; struct pt_entry entry_start; diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index 78ba087..3260fa7 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -3,7 +3,8 @@ #include "x86_64/asm.h" #include "x86_64/mem.h" -#include "std.h" // remove later, this is only for interrupt handler +#include "std.h" +#include "ram.h" static uint64_t gdt[3]; @@ -102,6 +103,24 @@ void init_idt() { // --- paging --- +void init_paging() { + // assert paging enabled (PG) + ASSERT((get_cr0() >> 31) & 1ull); + // assert PAE enabled (this should never be disabled in 64-bit mode) + ASSERT((get_cr4() >> 5) & 1ull); + // PSE is ignored in long mode + // ASSERT((get_cr4() >> 4) & 1ull); + + // TODO check that everything is setup correctly + // - CR0.WP + // - Long-Mode Active (EFER.LMA) + // - PAT index 000 points to default strategy + // - See if NX bits are used and decide if we want to + // - See of MPK is used and decide if we want to + // - SMEP/SMAP? + // - ... +} + #define MSR_PAT 0x277 enum page_attr get_pa(uint8_t index) { @@ -117,21 +136,6 @@ enum page_attr get_pa(uint8_t index) { return value; } -void set_pa(uint8_t index, enum page_attr attr) { - ASSERT(index < 8); - ASSERT(attr == PA_UC - || attr == PA_WC - || attr == PA_WT - || attr == PA_WP - || attr == PA_WB - || attr == PA_UCMINUS); - uint64_t value = readmsr(MSR_PAT); - uint64_t mask = 0x7ull << (index << 3); - value &= ~mask; // clear bits - value |= (uint64_t)attr << (index << 3); - writemsr(MSR_PAT, value); -} - uint64_t pt_entry_pack(const struct pt_entry *ent_in) { uint64_t retval = (uint64_t)ent_in->present | ((uint64_t)ent_in->writable) << 1 @@ -257,6 +261,9 @@ static struct ppn get_cr3_ppn(void) { #define NUM_LEVELS 4 +// TODO for inspection, we need to accumulate permissions over all levels +// (upper supervisor bit will mean lower PTs are also supervisor, even without their bit set) + static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc) { uint64_t va_value = va_to_value(va_from_vpn(vpn)); int level = NUM_LEVELS; @@ -277,7 +284,22 @@ static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc) { if (!ent.present) { if (alloc) { - TODO(); + bool success = ram_alloc_frame(&ent.ppn, RAM_PAGE_NORMAL); + ASSERT(success); + ent.level = level; + ent.present = true; + // maximum privileges in upper level + ent.writable = true; + ent.supervisor = false; + ent.writethrough = false; + ent.cache_disable = false; + ent.accessed = false; + ent.dirty = false; + ent.global = false; // TODO should this be true for lower global mappings? probably + ent.hugepage = false; + ent.pat_bit = false; + // add into page table + *entry_ptr = pt_entry_pack(&ent); } else { return NULL; } @@ -329,6 +351,11 @@ bool pt_map_single(struct vpn virt, struct ppn phys, // disable caching bits because we want to use MTRRs ent.pat_bit = false; + // additional sanity check to avoid double mapping + struct pt_entry ent; + pt_entry_unpack(*leaf_ptr, 1, &ent); + ASSERT(!ent.present); + *leaf_ptr = pt_entry_pack(&ent); return true; } @@ -345,6 +372,7 @@ bool pt_map_range(struct vpn virt, struct ppn phys, uint64_t num_pages, { for (uint64_t i = 0; i < num_pages; i++) { // TODO error handling: what to do if it fails in the middle? + // TODO huge pages pt_map_single(virt, phys, writable, supervisor, global, cr3); virt = vpn_from_pagenum(vpn_to_pagenum(virt) + 1); phys = ppn_from_pagenum(ppn_to_pagenum(phys) + 1); @@ -358,6 +386,11 @@ bool pt_map_range_current(struct vpn virt, struct ppn phys, uint64_t num_pages, return pt_map_range(virt, phys, num_pages, writable, supervisor, global, get_cr3_ppn()); } +void pt_free(struct ppn root) { + // this assumes single ownership + TODO(); +} + // --- range finder --- void mem_range_print(const struct mem_range *mr) { From bc5dcc15b96fe7660dd466551e0e9c51bc791412 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 23 Apr 2025 18:40:26 +0200 Subject: [PATCH 03/32] fix double declaration --- src/x86_64/mem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index 77c689a..4544493 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -362,9 +362,9 @@ bool pt_map_single(struct vpn virt, struct ppn phys, ent.pat_bit = false; // additional sanity check to avoid double mapping - struct pt_entry ent; - pt_entry_unpack(*leaf_ptr, 1, &ent); - ASSERT(!ent.present); + struct pt_entry old_ent; + pt_entry_unpack(*leaf_ptr, 1, &old_ent); + ASSERT(!old_ent.present); *leaf_ptr = pt_entry_pack(&ent); return true; From e84cfa22db6a212e89ff08cde905fc2a69526213 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 7 May 2025 21:45:32 +0200 Subject: [PATCH 04/32] pt_translate hugepage support; try building simple pt --- include/x86_64/mem.h | 5 ++++ src/kernel.c | 2 ++ src/std.c | 4 +-- src/x86_64/mem.c | 70 ++++++++++++++++++++++++++++++++++++++------ 4 files changed, 70 insertions(+), 11 deletions(-) diff --git a/include/x86_64/mem.h b/include/x86_64/mem.h index ccf2c17..0da559b 100644 --- a/include/x86_64/mem.h +++ b/include/x86_64/mem.h @@ -67,6 +67,11 @@ bool pt_map_range_current(struct vpn virt, struct ppn phys, uint64_t num_pages, void pt_free(struct ppn root); +// TODO this is just for testing +void pt_create_minimal(void); + +// --- --- + struct mem_range { struct vpn vpn_start; struct pt_entry entry_start; diff --git a/src/kernel.c b/src/kernel.c index 4b1d744..92c9b1e 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -226,6 +226,8 @@ void _start() { printf("wp=%d\n", (cr0 >> 16) & 1); #endif + pt_create_minimal(); + // hang for now PANIC("end of kernel"); } diff --git a/src/std.c b/src/std.c index 7f69a38..bd304ab 100644 --- a/src/std.c +++ b/src/std.c @@ -89,8 +89,8 @@ static unsigned int current_buffer_position = 0; static void linebuf_flush(void) { for (unsigned int i = 0; i < current_buffer_position; i++) { uart_write_char(linebuf[i]); - extern void visual_putc(char c); - visual_putc(linebuf[i]); + // extern void visual_putc(char c); + // visual_putc(linebuf[i]); } current_buffer_position = 0; } diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index 4544493..978fe94 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -274,7 +274,7 @@ static struct ppn get_cr3_ppn(void) { // TODO for inspection, we need to accumulate permissions over all levels // (upper supervisor bit will mean lower PTs are also supervisor, even without their bit set) -static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc) { +static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc, int *level_out) { uint64_t va_value = va_to_value(va_from_vpn(vpn)); int level = NUM_LEVELS; struct ppn ppn = cr3; @@ -285,10 +285,8 @@ static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc) { struct pt_entry ent; pt_entry_unpack(*entry_ptr, level, &ent); - if (ent.hugepage) { - PANIC("no hugepage support yet!"); - } - if (level == 1) { + if (level == 1 || ent.hugepage) { + *level_out = level; return entry_ptr; } @@ -321,14 +319,31 @@ static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc) { } bool pt_translate(struct va va, struct ppn cr3, struct pa *pa_out) { - uint64_t *leaf_ptr = pt_get_leaf_ptr(vpn_from_unaligned_va(va), cr3, false); + int leaf_level = -1; + uint64_t *leaf_ptr = pt_get_leaf_ptr(vpn_from_unaligned_va(va), cr3, false, &leaf_level); if (leaf_ptr == NULL) { return false; } struct pt_entry ent; - pt_entry_unpack(*leaf_ptr, 1, &ent); // todo may be > 1 for hugepage + pt_entry_unpack(*leaf_ptr, 1, &ent); // TODO may be > 1 for hugepage, but // doesn't really matter because we only use address - *pa_out = pa_from_ppn_with_offset(ent.ppn, va_offset(va)); + + uint64_t phys_base; + switch (leaf_level) { + case 1: + *pa_out = pa_from_ppn_with_offset(ent.ppn, va_offset(va)); + break; + case 2: + phys_base = pa_to_value(pa_from_ppn(ent.ppn)) + (va_to_value(va) & 0x1fffff); + *pa_out = pa_from_value(phys_base); + break; + case 3: + phys_base = pa_to_value(pa_from_ppn(ent.ppn)) + (va_to_value(va) & 0x3fffffff); + *pa_out = pa_from_value(phys_base); + break; + default: + UNREACHABLE(); + } return true; } @@ -340,10 +355,13 @@ bool pt_map_single(struct vpn virt, struct ppn phys, bool writable, bool supervisor, bool global, struct ppn cr3) { - uint64_t *leaf_ptr = pt_get_leaf_ptr(virt, cr3, true); + int leaf_level = -1; + uint64_t *leaf_ptr = pt_get_leaf_ptr(virt, cr3, true, &leaf_level); if (leaf_ptr == NULL) { return false; } + ASSERT(leaf_level == 1); // no hugepage yet; we expect everything to be alloced + struct pt_entry ent; ent.level = 1; ent.present = true; @@ -401,6 +419,40 @@ void pt_free(struct ppn root) { TODO(); } +void pt_create_minimal(void) { + // stack is currently mapped on the very last page + + // get a top level page table + struct ppn new_cr3; + ASSERT(ram_alloc_frame(&new_cr3, RAM_PAGE_NORMAL)); + + // copy mappings we had previously for everything important + // we do single mapping for now because we don't know if this is physically continuous, + // even though it is virtually + unsigned int count = 0; + for (uint64_t va_value = 0xfffffffff8000000ull; va_value <= 0xfffffffffffff000; va_value += 0x1000) { + struct va curr_va = va_from_canonical(va_value); + struct pa curr_pa; + bool success = pt_translate_current(curr_va, &curr_pa); + if (success) { + printf("WARN: virt. address %p mapped\n", va_to_canonical(curr_va)); + pt_map_single(vpn_from_aligned_va(curr_va), ppn_from_aligned_pa(curr_pa), + true /* writable */, true /* supervisor */, false /* global */, + new_cr3 + ); + printf("WARN: virt. address %p mapped; done\n", va_to_canonical(curr_va)); + } else { + count += 1; + if ((count & 0xff) == 0) { + printf("WARN: virt. address %p not mapped (count %d)\n", va_to_canonical(curr_va), count); + } + } + } + printf("WARN: done\n"); + + // code should stay at the same position +} + // --- range finder --- void mem_range_print(const struct mem_range *mr) { From 615785eb515128415f26774f2a3e52ddcbf7e1fc Mon Sep 17 00:00:00 2001 From: uosfz Date: Tue, 13 May 2025 15:54:08 +0200 Subject: [PATCH 05/32] initialize memory correctly --- src/x86_64/mem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index 978fe94..d292c3d 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -292,8 +292,8 @@ static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc, int if (!ent.present) { if (alloc) { - bool success = ram_alloc_frame(&ent.ppn, RAM_PAGE_NORMAL); - ASSERT(success); + ASSERT(ram_alloc_frame(&ent.ppn, RAM_PAGE_NORMAL)); + memset(pa_to_pointer(pa_from_ppn(ent.ppn)), 0, 0x1000); ent.level = level; ent.present = true; // maximum privileges in upper level @@ -425,12 +425,13 @@ void pt_create_minimal(void) { // get a top level page table struct ppn new_cr3; ASSERT(ram_alloc_frame(&new_cr3, RAM_PAGE_NORMAL)); + memset(pa_to_pointer(pa_from_ppn(new_cr3)), 0, 0x1000); // copy mappings we had previously for everything important // we do single mapping for now because we don't know if this is physically continuous, // even though it is virtually unsigned int count = 0; - for (uint64_t va_value = 0xfffffffff8000000ull; va_value <= 0xfffffffffffff000; va_value += 0x1000) { + for (uint64_t va_value = 0xfffffffff8000000ull; va_value != 0ull; va_value += 0x1000) { struct va curr_va = va_from_canonical(va_value); struct pa curr_pa; bool success = pt_translate_current(curr_va, &curr_pa); From b910671210a4c44376b745183ba14aa4540f9649 Mon Sep 17 00:00:00 2001 From: uosfz Date: Tue, 13 May 2025 16:21:16 +0200 Subject: [PATCH 06/32] replace allocs with zeroed --- src/x86_64/mem.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index d292c3d..8d7a8cd 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -292,8 +292,7 @@ static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc, int if (!ent.present) { if (alloc) { - ASSERT(ram_alloc_frame(&ent.ppn, RAM_PAGE_NORMAL)); - memset(pa_to_pointer(pa_from_ppn(ent.ppn)), 0, 0x1000); + ASSERT(ram_alloc_frame_zeroed(&ent.ppn, RAM_PAGE_NORMAL)); ent.level = level; ent.present = true; // maximum privileges in upper level @@ -424,13 +423,13 @@ void pt_create_minimal(void) { // get a top level page table struct ppn new_cr3; - ASSERT(ram_alloc_frame(&new_cr3, RAM_PAGE_NORMAL)); - memset(pa_to_pointer(pa_from_ppn(new_cr3)), 0, 0x1000); + ASSERT(ram_alloc_frame_zeroed(&new_cr3, RAM_PAGE_NORMAL)); // copy mappings we had previously for everything important // we do single mapping for now because we don't know if this is physically continuous, // even though it is virtually unsigned int count = 0; + // this start value comes from the bootboot docs for (uint64_t va_value = 0xfffffffff8000000ull; va_value != 0ull; va_value += 0x1000) { struct va curr_va = va_from_canonical(va_value); struct pa curr_pa; @@ -438,7 +437,7 @@ void pt_create_minimal(void) { if (success) { printf("WARN: virt. address %p mapped\n", va_to_canonical(curr_va)); pt_map_single(vpn_from_aligned_va(curr_va), ppn_from_aligned_pa(curr_pa), - true /* writable */, true /* supervisor */, false /* global */, + true /* writable */, true /* supervisor */, false /* not global */, new_cr3 ); printf("WARN: virt. address %p mapped; done\n", va_to_canonical(curr_va)); From f38d6523839cf13cfa61d89a57d6edb57ab2720c Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 14 May 2025 19:01:54 +0200 Subject: [PATCH 07/32] set_cr3 --- include/x86_64/asm.h | 1 + src/x86_64/asm.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/x86_64/asm.h b/include/x86_64/asm.h index 5169ab5..c184eeb 100644 --- a/include/x86_64/asm.h +++ b/include/x86_64/asm.h @@ -20,6 +20,7 @@ void out32(int port, uint32_t value); uint64_t get_cr0(void); uint64_t get_cr3(void); +void set_cr3(uint64_t value); uint64_t get_cr4(void); static inline uint64_t diff --git a/src/x86_64/asm.c b/src/x86_64/asm.c index eafd9ed..878d68e 100644 --- a/src/x86_64/asm.c +++ b/src/x86_64/asm.c @@ -42,6 +42,10 @@ uint64_t get_cr3(void) { return cr3; } +void set_cr3(uint64_t value) { + __asm__("mov %0, %%cr3" :: "r"(value)); +} + uint64_t get_cr4(void) { uint64_t cr4; __asm__("mov %%cr4, %0" : "=r"(cr4)::); From f326e3f341023151f93b642b3a21bf9366e27112 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 14 May 2025 19:04:49 +0200 Subject: [PATCH 08/32] pt leaf iter and mem range iter --- include/x86_64/mem.h | 7 +- src/kernel.c | 17 ++- src/x86_64/mem.c | 268 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 284 insertions(+), 8 deletions(-) diff --git a/include/x86_64/mem.h b/include/x86_64/mem.h index 0da559b..5971f12 100644 --- a/include/x86_64/mem.h +++ b/include/x86_64/mem.h @@ -70,7 +70,7 @@ void pt_free(struct ppn root); // TODO this is just for testing void pt_create_minimal(void); -// --- --- +// --- range finder --- struct mem_range { struct vpn vpn_start; @@ -89,6 +89,9 @@ struct mem_range_buf { var < (buf)->ptr + (buf)->next_entry; \ var++) -void pt_get_ranges(struct mem_range_buf *buf_out); +void pt_get_ranges(struct mem_range_buf *buf_out, struct ppn cr3); +void pt_get_ranges_current(struct mem_range_buf *buf_out); + +void pt_leaf_iter_test(void); #endif diff --git a/src/kernel.c b/src/kernel.c index 92c9b1e..20afde7 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -164,7 +164,7 @@ void _start() { #if 1 struct mem_range_buf buf_out = { .ptr = range_ptr, .next_entry = 0, .num_entries = 10 }; - pt_get_ranges(&buf_out); + pt_get_ranges_current(&buf_out); FOR_MEM_RANGE_IN(curr_range, &buf_out) { mem_range_print(curr_range); } @@ -226,7 +226,20 @@ void _start() { printf("wp=%d\n", (cr0 >> 16) & 1); #endif - pt_create_minimal(); +#if 0 + struct mem_range range; + struct mem_range_iter mi; + mem_range_iter_init_current(&mi); + while (true) { + if (!mem_range_iter_next(&mi, &range)) { + break; + } + mem_range_print(&range); + } +#endif + + // pt_create_minimal(); + pt_leaf_iter_test(); // hang for now PANIC("end of kernel"); diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index 8d7a8cd..f8aced3 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -418,6 +418,8 @@ void pt_free(struct ppn root) { TODO(); } +#define NUM_RANGES 30 +struct mem_range ranges[NUM_RANGES]; void pt_create_minimal(void) { // stack is currently mapped on the very last page @@ -450,7 +452,35 @@ void pt_create_minimal(void) { } printf("WARN: done\n"); - // code should stay at the same position + // switch to new page table, whose address is in new_cr3 + // we cannot access direct-mapped memory anymore! + uint64_t value = pa_to_value(pa_from_ppn(new_cr3)); + printf("new cr3: %p\n", value); + // check out that things are mapped sensibly + struct mem_range_buf buf = { + .ptr = ranges, + .next_entry = 0, + .num_entries = NUM_RANGES, + }; + pt_get_ranges(&buf, new_cr3); + FOR_MEM_RANGE_IN(m, &buf) { + mem_range_print(m); + } + // set_cr3(value); + + printf("hello from new page table!\n"); + + // do some random work to waste time + size_t w = 0; + size_t x = 0; + while (x < 1ull << 63) { + w += 1; + if (w == 1ull << 63) { + printf("plus!\n"); + x += 1; + } + } + printf("done!\n"); } // --- range finder --- @@ -485,7 +515,6 @@ static bool pt_contiguous(struct mem_range *mr, struct vpn vpn, struct pt_entry && ent->pat_bit == mr->entry_start.pat_bit; } - #define LEAF(ent) ((ent).level == 1 || (ent).hugepage) static void pt_get_ranges_rec(struct ppn ppn, int level, uint64_t virt_prev, @@ -531,7 +560,7 @@ static void pt_get_ranges_rec(struct ppn ppn, int level, uint64_t virt_prev, #define CR4_LA57 12 -void pt_get_ranges(struct mem_range_buf *buf_out) { +void pt_get_ranges(struct mem_range_buf *buf_out, struct ppn cr3) { uint64_t cr4 = get_cr4(); // find out if we have 5 levels or 4 @@ -540,8 +569,239 @@ void pt_get_ranges(struct mem_range_buf *buf_out) { } struct mem_range curr_range = { .npages = 0 }; - pt_get_ranges_rec(get_cr3_ppn(), NUM_LEVELS, 0, buf_out, &curr_range); + pt_get_ranges_rec(cr3, NUM_LEVELS, 0, buf_out, &curr_range); // last range ASSERT(buf_out->next_entry < buf_out->num_entries); buf_out->ptr[buf_out->next_entry++] = curr_range; } + +void pt_get_ranges_current(struct mem_range_buf *buf_out) { + pt_get_ranges(buf_out, get_cr3_ppn()); +} + +// --- new iterators --- + +struct access_level { + struct ppn ppn; + unsigned int index; +}; + +struct pa access_level_pa(struct access_level *ac) { + return pa_from_ppn_with_offset(ac->ppn, ac->index << 3); +} + +struct pt_leaf_iter { + bool done; + unsigned int depth; + struct access_level levels[4]; +}; + +struct pt_leaf { + struct vpn vpn_start; + struct pt_entry ent; +}; + +void pt_leaf_iter_init(struct pt_leaf_iter *it, struct ppn cr3) { + it->done = false; + it->depth = 0; + it->levels[0] = (struct access_level){ .ppn = cr3, .index = 0 }; +} + +void pt_leaf_iter_init_current(struct pt_leaf_iter *it) { + pt_leaf_iter_init(it, get_cr3_ppn()); +} + +struct vpn pt_leaf_iter_vpn(struct pt_leaf_iter *it) { + uint64_t value = 0; + for (unsigned int i = 0; i <= it->depth; i++) { + int level = 4 - (int)i; + int shift = 9 * (level - 1); + value |= (uint64_t)(it->levels[i].index) << shift; + } + return vpn_from_pagenum(value); +} + +static void pt_leaf_iter_advance(struct pt_leaf_iter *it) { + while (true) { + it->levels[it->depth].index += 1; + if (it->levels[it->depth].index == 512) { + if (it->depth > 0) { + it->depth -= 1; + } else { + it->done = true; + return; + } + } else { + return; + } + } +} + +#define LEAF(ent) ((ent).level == 1 || (ent).hugepage) + +bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out) { + ASSERT(it != NULL && leaf_out != NULL); + + if (it->done) { + return false; + } + + while (true) { + uint64_t *entry_ptr = pa_to_pointer(access_level_pa(&it->levels[it->depth])); + struct pt_entry ent; + pt_entry_unpack(*entry_ptr, 4 - it->depth, &ent); + if (!ent.present) { + pt_leaf_iter_advance(it); + if (it->done) { + return false; + } + } else { + if (LEAF(ent)) { + *leaf_out = (struct pt_leaf){ .vpn_start = pt_leaf_iter_vpn(it), .ent = ent }; + pt_leaf_iter_advance(it); + return true; + } else { + it->depth += 1; + it->levels[it->depth] = (struct access_level){ .ppn = ent.ppn, .index = 0 }; + } + } + } +} + +struct mem_range_iter { + struct pt_leaf_iter leaf_it; + bool have_held_leaf; + struct pt_leaf held_leaf; +}; + +static uint64_t leaf_level_to_npages(int level) { + switch (level) { + case 1: return 1; + case 2: return 1 << 9; + case 3: return 1 << 18; + default: UNREACHABLE(); + } +} + +void mem_range_iter_init(struct mem_range_iter *it, struct ppn cr3) { + pt_leaf_iter_init(&it->leaf_it, cr3); + it->have_held_leaf = false; +} + +void mem_range_iter_init_current(struct mem_range_iter *it) { + mem_range_iter_init(it, get_cr3_ppn()); +} + +static bool mem_range_iter_leaf_get(struct mem_range_iter *it, struct pt_leaf *leaf_out) { + if (it->have_held_leaf) { + it->have_held_leaf = false; + *leaf_out = it->held_leaf; + return true; + } else { + return pt_leaf_iter_next(&it->leaf_it, leaf_out); + } +} + +static void mem_range_iter_leaf_unget(struct mem_range_iter *it, struct pt_leaf leaf) { + ASSERT(!it->have_held_leaf); + it->have_held_leaf = true; + it->held_leaf = leaf; +} + +bool mem_range_iter_next(struct mem_range_iter *it, struct mem_range *range_out) { + struct pt_leaf leaf; + bool success = mem_range_iter_leaf_get(it, &leaf); + if (!success) { + return false; + } + struct mem_range range = (struct mem_range){ + .vpn_start = leaf.vpn_start, + .entry_start = leaf.ent, + .npages = leaf_level_to_npages(leaf.ent.level), + }; + + while (true) { + bool success = mem_range_iter_leaf_get(it, &leaf); + if (!success) { + *range_out = range; + return true; + } + + if (pt_contiguous(&range, leaf.vpn_start, &leaf.ent)) { + range.npages += leaf_level_to_npages(leaf.ent.level); + } else { + mem_range_iter_leaf_unget(it, leaf); + *range_out = range; + return true; + } + } +} + +// --- tests --- +void pt_leaf_iter_test(void) { + struct ppn l4; + ASSERT(ram_alloc_frame_zeroed(&l4, RAM_PAGE_NORMAL)); + struct ppn l3; + ASSERT(ram_alloc_frame_zeroed(&l3, RAM_PAGE_NORMAL)); + + struct pt_entry ent; + ent.level = 4; + ent.present = true; + ent.writable = true; + ent.supervisor = false; + ent.writethrough = false; + ent.cache_disable = false; + ent.accessed = false; + ent.dirty = false; + ent.global = false; + ent.ppn = l3; + ent.hugepage = false; + ent.pat_bit = false; + + *(uint64_t*)pa_to_pointer(pa_from_ppn_with_offset(l4, 10 << 3)) = pt_entry_pack(&ent); + + ent.level = 3; + ent.ppn = ppn_from_pagenum(0); // should not matter + ent.hugepage = true; + + *(uint64_t*)pa_to_pointer(pa_from_ppn_with_offset(l3, 5 << 3)) = pt_entry_pack(&ent); + +#if 0 + struct pt_leaf leaf; + struct pt_leaf_iter it; + pt_leaf_iter_init(&it, l4); + while (true) { + if (!pt_leaf_iter_next(&it, &leaf)) { + break; + } + pt_entry_print(&leaf.ent); + } +#endif + +#if 0 + // --- leaf iter test --- + struct pt_leaf leaf; + struct pt_leaf_iter it; + pt_leaf_iter_init_current(&it); + while (true) { + if (!pt_leaf_iter_next(&it, &leaf)) { + break; + } + printf("va: %p\n", va_to_canonical(va_from_vpn(leaf.vpn_start))); + pt_entry_print(&leaf.ent); + } +#endif + +#if 1 + // --- mem iter test --- + struct mem_range range; + struct mem_range_iter it; + mem_range_iter_init_current(&it); + while (true) { + if (!mem_range_iter_next(&it, &range)) { + break; + } + mem_range_print(&range); + } +#endif +} From 3941fb0dfae5fd87a31320b8e39c9c5f6911d483 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 14 May 2025 19:17:05 +0200 Subject: [PATCH 09/32] remove old mem_range finder; put stuff in header --- include/x86_64/mem.h | 47 +++++++++++++----- src/kernel.c | 10 ---- src/x86_64/mem.c | 110 ++----------------------------------------- 3 files changed, 39 insertions(+), 128 deletions(-) diff --git a/include/x86_64/mem.h b/include/x86_64/mem.h index 5971f12..76a9061 100644 --- a/include/x86_64/mem.h +++ b/include/x86_64/mem.h @@ -47,6 +47,8 @@ struct pt_entry { // TODO MPK, NX }; +#define PT_IS_LEAF(ent) ((ent).level == 1 || (ent).hugepage) + uint64_t pt_entry_pack(const struct pt_entry *ent_in); void pt_entry_unpack(uint64_t ent_in, uint8_t level, struct pt_entry *ent_out); void pt_entry_print(const struct pt_entry *ent); @@ -70,7 +72,29 @@ void pt_free(struct ppn root); // TODO this is just for testing void pt_create_minimal(void); -// --- range finder --- +// --- iterators --- + +struct pt_leaf { + struct vpn vpn_start; + struct pt_entry ent; +}; + +struct access_level { + struct ppn ppn; + unsigned int index; +}; + +struct pt_leaf_iter { + bool done; + unsigned int depth; + struct access_level levels[4]; +}; + +void pt_leaf_iter_init(struct pt_leaf_iter *it, struct ppn cr3); +void pt_leaf_iter_init_current(struct pt_leaf_iter *it); +bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out); + + struct mem_range { struct vpn vpn_start; @@ -78,20 +102,19 @@ struct mem_range { uint64_t npages; }; -void mem_range_print(const struct mem_range *mr); - -struct mem_range_buf { - struct mem_range *ptr; - uint64_t next_entry; - uint64_t num_entries; +struct mem_range_iter { + struct pt_leaf_iter leaf_it; + bool have_held_leaf; + struct pt_leaf held_leaf; }; -#define FOR_MEM_RANGE_IN(var, buf) for(struct mem_range *var = (buf)->ptr; \ - var < (buf)->ptr + (buf)->next_entry; \ - var++) -void pt_get_ranges(struct mem_range_buf *buf_out, struct ppn cr3); -void pt_get_ranges_current(struct mem_range_buf *buf_out); +void mem_range_iter_init(struct mem_range_iter *it, struct ppn cr3); +void mem_range_iter_init_current(struct mem_range_iter *it); +bool mem_range_iter_next(struct mem_range_iter *it, struct mem_range *range_out); + + +// TODO this is just for testing void pt_leaf_iter_test(void); #endif diff --git a/src/kernel.c b/src/kernel.c index 20afde7..3cff929 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -79,8 +79,6 @@ void check_initrd() { void console_init(void); -struct mem_range range_ptr[10]; - /****************************************** * Entry point, called by BOOTBOOT Loader * ******************************************/ @@ -162,14 +160,6 @@ void _start() { lapic_set_timer(); #endif -#if 1 - struct mem_range_buf buf_out = { .ptr = range_ptr, .next_entry = 0, .num_entries = 10 }; - pt_get_ranges_current(&buf_out); - FOR_MEM_RANGE_IN(curr_range, &buf_out) { - mem_range_print(curr_range); - } -#endif - #if 0 // get mmapents for (MMapEnt *mmap_ent = &bootboot.mmap; diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index f8aced3..771a710 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -418,8 +418,6 @@ void pt_free(struct ppn root) { TODO(); } -#define NUM_RANGES 30 -struct mem_range ranges[NUM_RANGES]; void pt_create_minimal(void) { // stack is currently mapped on the very last page @@ -456,16 +454,6 @@ void pt_create_minimal(void) { // we cannot access direct-mapped memory anymore! uint64_t value = pa_to_value(pa_from_ppn(new_cr3)); printf("new cr3: %p\n", value); - // check out that things are mapped sensibly - struct mem_range_buf buf = { - .ptr = ranges, - .next_entry = 0, - .num_entries = NUM_RANGES, - }; - pt_get_ranges(&buf, new_cr3); - FOR_MEM_RANGE_IN(m, &buf) { - mem_range_print(m); - } // set_cr3(value); printf("hello from new page table!\n"); @@ -483,7 +471,7 @@ void pt_create_minimal(void) { printf("done!\n"); } -// --- range finder --- +// --- new iterators --- void mem_range_print(const struct mem_range *mr) { uint64_t virt_canonical = va_to_canonical(va_from_vpn(mr->vpn_start)); @@ -515,92 +503,10 @@ static bool pt_contiguous(struct mem_range *mr, struct vpn vpn, struct pt_entry && ent->pat_bit == mr->entry_start.pat_bit; } -#define LEAF(ent) ((ent).level == 1 || (ent).hugepage) - -static void pt_get_ranges_rec(struct ppn ppn, int level, uint64_t virt_prev, - struct mem_range_buf *buf_out, struct mem_range *curr_range) -{ - struct pt_entry ent; - for (uint64_t i = 0; i < 512; i++) { - uint64_t entry = *(uint64_t*)pa_to_pointer(pa_from_ppn_with_offset(ppn, i << 3)); - pt_entry_unpack(entry, level, &ent); - if (!ent.present) { - continue; - } - - uint64_t virt_part = i << (12 + 9*(level - 1)); - uint64_t virt_new = virt_prev | virt_part; - - if (LEAF(ent)) { - struct vpn curr_vpn = vpn_from_aligned_va(va_from_value(virt_new)); - // for huge pages this is > 1 - uint64_t num_pages_covered = 1ull << (9*(level - 1)); - if (curr_range->npages == 0) { - curr_range->vpn_start = curr_vpn; - curr_range->entry_start = ent; - curr_range->npages = num_pages_covered; - } else { - if (pt_contiguous(curr_range, curr_vpn, &ent)) { - curr_range->npages += num_pages_covered; - } else { - // copy last range and start new one - ASSERT(buf_out->next_entry < buf_out->num_entries); - buf_out->ptr[buf_out->next_entry++] = *curr_range; - - curr_range->vpn_start = curr_vpn; - curr_range->entry_start = ent; - curr_range->npages = num_pages_covered; - } - } - } else { - pt_get_ranges_rec(ent.ppn, level - 1, virt_new, buf_out, curr_range); - } - } -} - -#define CR4_LA57 12 - -void pt_get_ranges(struct mem_range_buf *buf_out, struct ppn cr3) { - uint64_t cr4 = get_cr4(); - - // find out if we have 5 levels or 4 - if (cr4 & (1ull << CR4_LA57)) { - PANIC("we don't support 5-level page tables"); - } - - struct mem_range curr_range = { .npages = 0 }; - pt_get_ranges_rec(cr3, NUM_LEVELS, 0, buf_out, &curr_range); - // last range - ASSERT(buf_out->next_entry < buf_out->num_entries); - buf_out->ptr[buf_out->next_entry++] = curr_range; -} - -void pt_get_ranges_current(struct mem_range_buf *buf_out) { - pt_get_ranges(buf_out, get_cr3_ppn()); -} - -// --- new iterators --- - -struct access_level { - struct ppn ppn; - unsigned int index; -}; - -struct pa access_level_pa(struct access_level *ac) { +static struct pa access_level_pa(struct access_level *ac) { return pa_from_ppn_with_offset(ac->ppn, ac->index << 3); } -struct pt_leaf_iter { - bool done; - unsigned int depth; - struct access_level levels[4]; -}; - -struct pt_leaf { - struct vpn vpn_start; - struct pt_entry ent; -}; - void pt_leaf_iter_init(struct pt_leaf_iter *it, struct ppn cr3) { it->done = false; it->depth = 0; @@ -611,7 +517,7 @@ void pt_leaf_iter_init_current(struct pt_leaf_iter *it) { pt_leaf_iter_init(it, get_cr3_ppn()); } -struct vpn pt_leaf_iter_vpn(struct pt_leaf_iter *it) { +static struct vpn pt_leaf_iter_vpn(struct pt_leaf_iter *it) { uint64_t value = 0; for (unsigned int i = 0; i <= it->depth; i++) { int level = 4 - (int)i; @@ -637,8 +543,6 @@ static void pt_leaf_iter_advance(struct pt_leaf_iter *it) { } } -#define LEAF(ent) ((ent).level == 1 || (ent).hugepage) - bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out) { ASSERT(it != NULL && leaf_out != NULL); @@ -656,7 +560,7 @@ bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out) { return false; } } else { - if (LEAF(ent)) { + if (PT_IS_LEAF(ent)) { *leaf_out = (struct pt_leaf){ .vpn_start = pt_leaf_iter_vpn(it), .ent = ent }; pt_leaf_iter_advance(it); return true; @@ -668,12 +572,6 @@ bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out) { } } -struct mem_range_iter { - struct pt_leaf_iter leaf_it; - bool have_held_leaf; - struct pt_leaf held_leaf; -}; - static uint64_t leaf_level_to_npages(int level) { switch (level) { case 1: return 1; From 2a030d3c81d7f347f8f9787c353ec2b17cfb9a6b Mon Sep 17 00:00:00 2001 From: uosfz Date: Thu, 15 May 2025 16:16:24 +0200 Subject: [PATCH 10/32] first functional pt switch with some issues --- include/x86_64/mem.h | 2 ++ src/kernel.c | 6 +++--- src/x86_64/mem.c | 45 ++++++++++++++++++++++++++------------------ 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/include/x86_64/mem.h b/include/x86_64/mem.h index 76a9061..d131e87 100644 --- a/include/x86_64/mem.h +++ b/include/x86_64/mem.h @@ -102,6 +102,8 @@ struct mem_range { uint64_t npages; }; +void mem_range_print(const struct mem_range *mr); + struct mem_range_iter { struct pt_leaf_iter leaf_it; bool have_held_leaf; diff --git a/src/kernel.c b/src/kernel.c index 3cff929..5fcc951 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -156,7 +156,7 @@ void _start() { __asm__("sti"); -#if 1 +#if 0 // TODO disable for now lapic_set_timer(); #endif @@ -228,8 +228,8 @@ void _start() { } #endif - // pt_create_minimal(); - pt_leaf_iter_test(); + pt_create_minimal(); + // pt_leaf_iter_test(); // hang for now PANIC("end of kernel"); diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index 771a710..6b857a6 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -435,40 +435,49 @@ void pt_create_minimal(void) { struct pa curr_pa; bool success = pt_translate_current(curr_va, &curr_pa); if (success) { - printf("WARN: virt. address %p mapped\n", va_to_canonical(curr_va)); + // printf("WARN: virt. address %p mapped\n", va_to_canonical(curr_va)); pt_map_single(vpn_from_aligned_va(curr_va), ppn_from_aligned_pa(curr_pa), true /* writable */, true /* supervisor */, false /* not global */, new_cr3 ); - printf("WARN: virt. address %p mapped; done\n", va_to_canonical(curr_va)); + // printf("WARN: virt. address %p mapped; done\n", va_to_canonical(curr_va)); } else { count += 1; if ((count & 0xff) == 0) { - printf("WARN: virt. address %p not mapped (count %d)\n", va_to_canonical(curr_va), count); + // printf("WARN: virt. address %p not mapped (count %d)\n", va_to_canonical(curr_va), count); } } } printf("WARN: done\n"); - // switch to new page table, whose address is in new_cr3 - // we cannot access direct-mapped memory anymore! uint64_t value = pa_to_value(pa_from_ppn(new_cr3)); printf("new cr3: %p\n", value); - // set_cr3(value); + + // compare old and new to make sure we don't do any bullshit + printf("old ranges:\n"); + struct mem_range range; + struct mem_range_iter it; + mem_range_iter_init_current(&it); + while (true) { + if (!mem_range_iter_next(&it, &range)) { + break; + } + mem_range_print(&range); + } + + printf("new ranges:\n"); + mem_range_iter_init(&it, new_cr3); + while (true) { + if (!mem_range_iter_next(&it, &range)) { + break; + } + mem_range_print(&range); + } + + // switch + set_cr3(value); printf("hello from new page table!\n"); - - // do some random work to waste time - size_t w = 0; - size_t x = 0; - while (x < 1ull << 63) { - w += 1; - if (w == 1ull << 63) { - printf("plus!\n"); - x += 1; - } - } - printf("done!\n"); } // --- new iterators --- From 3dc1e36c2fdbe9035aa513bbccf08782f8fe3e54 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 21 May 2025 19:36:08 +0200 Subject: [PATCH 11/32] add -d int to qemu scripts --- debug-run-qemu.sh | 2 +- run-qemu.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debug-run-qemu.sh b/debug-run-qemu.sh index 7c521d4..5e004db 100755 --- a/debug-run-qemu.sh +++ b/debug-run-qemu.sh @@ -1,3 +1,3 @@ #!/bin/sh -qemu-system-x86_64 -drive if=ide,format=raw,file=build/disk.img -drive if=virtio,media=disk,format=raw,file=drive.img "$@" -S -gdb tcp::3117 +qemu-system-x86_64 -drive if=ide,format=raw,file=build/disk.img -drive if=virtio,media=disk,format=raw,file=drive.img -d int "$@" -S -gdb tcp::3117 diff --git a/run-qemu.sh b/run-qemu.sh index 7cf7857..2a31f73 100755 --- a/run-qemu.sh +++ b/run-qemu.sh @@ -1,3 +1,3 @@ #!/bin/sh -qemu-system-x86_64 -drive if=ide,format=raw,file=build/disk.img -drive if=virtio,media=disk,format=raw,file=drive.img "$@" +qemu-system-x86_64 -drive if=ide,format=raw,file=build/disk.img -drive if=virtio,media=disk,format=raw,file=drive.img -d int "$@" From 93cfbfc2f14b53d6035f74c02ec2785f942fc10e Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 21 May 2025 21:03:41 +0200 Subject: [PATCH 12/32] custom identity map; hugepage mapping; fixed lapic bug after pt switch --- include/x86_64/address.h | 2 ++ include/x86_64/asm.h | 2 ++ src/kernel.c | 4 +++ src/x86_64/address.c | 11 ++++--- src/x86_64/apic.c | 25 ++++++--------- src/x86_64/mem.c | 66 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 90 insertions(+), 20 deletions(-) diff --git a/include/x86_64/address.h b/include/x86_64/address.h index e5cc0b9..3882456 100644 --- a/include/x86_64/address.h +++ b/include/x86_64/address.h @@ -45,4 +45,6 @@ struct ppn ppn_from_aligned_pa(struct pa pa); struct ppn ppn_from_unaligned_pa(struct pa pa); uint64_t ppn_to_pagenum(struct ppn ppn); +void set_identity_mapping(struct vpn first_page); + #endif diff --git a/include/x86_64/asm.h b/include/x86_64/asm.h index c184eeb..086d016 100644 --- a/include/x86_64/asm.h +++ b/include/x86_64/asm.h @@ -23,6 +23,8 @@ uint64_t get_cr3(void); void set_cr3(uint64_t value); uint64_t get_cr4(void); +#define X86_ASM_INT(value) __asm__("int $" #value :: ) + static inline uint64_t readmsr(uint32_t msr) { diff --git a/src/kernel.c b/src/kernel.c index 5fcc951..5c66a73 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -231,6 +231,10 @@ void _start() { pt_create_minimal(); // pt_leaf_iter_test(); + X86_ASM_INT(0xfe); + X86_ASM_INT(0xfe); + X86_ASM_INT(0xfe); + // hang for now PANIC("end of kernel"); } diff --git a/src/x86_64/address.c b/src/x86_64/address.c index d122371..78a01a1 100644 --- a/src/x86_64/address.c +++ b/src/x86_64/address.c @@ -1,10 +1,7 @@ #include "x86_64/address.h" #include "std.h" -#define MEM_SIZE (16ull * 1024 * 1024 * 1024) -#define IDMAP_END 0x400000000ull -#define IDMAP_START (IDMAP_END - MEM_SIZE) -#define PHYS_TO_IDMAPPED(addr) ((addr) + IDMAP_START) +uint64_t identity_mapping_start = 0; struct va va_from_value(uint64_t value) { ASSERT(value < (1ull << 48)); @@ -82,7 +79,7 @@ uint64_t pa_to_value(struct pa pa) { } void *pa_to_pointer(struct pa pa) { - return (void*)PHYS_TO_IDMAPPED(pa.value); + return (void*)(pa_to_value(pa) + identity_mapping_start); } uint64_t pa_offset(struct pa pa) { @@ -106,3 +103,7 @@ struct ppn ppn_from_unaligned_pa(struct pa pa) { uint64_t ppn_to_pagenum(struct ppn ppn) { return ppn.pagenum; } + +void set_identity_mapping(struct vpn first_page) { + identity_mapping_start = va_to_value(va_from_vpn(first_page)); +} diff --git a/src/x86_64/apic.c b/src/x86_64/apic.c index 40c14bc..1e3da13 100644 --- a/src/x86_64/apic.c +++ b/src/x86_64/apic.c @@ -1,12 +1,8 @@ #include "x86_64/apic.h" +#include "x86_64/address.h" #include "std.h" -#define MEM_SIZE (16ull * 1024 * 1024 * 1024) -#define IDMAP_END 0x400000000ull -#define IDMAP_START (IDMAP_END - MEM_SIZE) -#define PHYS_TO_IDMAPPED(addr) ((addr) + IDMAP_START) - -#define LAPIC_BASE 0xFEE00000 +#define LAPIC_BASE_PHYS 0xFEE00000 #define SPURIOUS_VECTOR_APIC_ENABLE 0x100 @@ -47,31 +43,30 @@ struct lapic { struct lapic_register timer_divisor; struct lapic_register reserved4[1]; }; - STATIC_ASSERT(sizeof (struct lapic) == 0x400); -static struct lapic *lapic = (struct lapic *)PHYS_TO_IDMAPPED(LAPIC_BASE); +#define LAPIC_ADDR ((struct lapic *)pa_to_pointer(pa_from_value(LAPIC_BASE_PHYS))) void lapic_init(void) { - ASSERT((lapic->lapic_version.value & 0xff) <= 0x15); + ASSERT((LAPIC_ADDR->lapic_version.value & 0xff) <= 0x15); - lapic->spurious_vector.value = SPURIOUS_VECTOR_APIC_ENABLE | 0xff; + LAPIC_ADDR->spurious_vector.value = SPURIOUS_VECTOR_APIC_ENABLE | 0xff; } unsigned lapic_get_id(void) { - return lapic->lapic_id.value >> 24; + return LAPIC_ADDR->lapic_id.value >> 24; } void lapic_eoi(void) { - lapic->eoi.value = 0; + LAPIC_ADDR->eoi.value = 0; } void lapic_set_timer(void) { - lapic->timer_divisor.value = 0b1010; - lapic->lvt_timer.value = 0xFE | (1 << 17); - lapic->timer_initial.value = 0x1000000; + LAPIC_ADDR->timer_divisor.value = 0b1010; + LAPIC_ADDR->lvt_timer.value = 0xFE | (1 << 17); + LAPIC_ADDR->timer_initial.value = 0x1000000; } diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index 6b857a6..82b203c 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -413,6 +413,61 @@ bool pt_map_range_current(struct vpn virt, struct ppn phys, uint64_t num_pages, return pt_map_range(virt, phys, num_pages, writable, supervisor, global, get_cr3_ppn()); } +bool pt_map_huge(struct vpn virt, struct ppn phys, + bool writable, bool supervisor, bool global, + struct ppn cr3) +{ + ASSERT((va_to_value(va_from_vpn(virt)) & ((1ull << 30) - 1)) == 0); + ASSERT((pa_to_value(pa_from_ppn(phys)) & ((1ull << 30) - 1)) == 0); + + // l4 page table is mapped and is at address cr3 + // get l3 page table + uint64_t idx = (vpn_to_pagenum(virt) >> 27) & 0x1ff; + uint64_t *entry_ptr = pa_to_pointer(pa_from_ppn_with_offset(cr3, idx << 3)); + + struct pt_entry ent; + pt_entry_unpack(*entry_ptr, 4, &ent); + + if (!ent.present) { + ASSERT(ram_alloc_frame_zeroed(&ent.ppn, RAM_PAGE_NORMAL)); + ent.level = 4; + ent.present = true; + // maximum privileges in upper level + ent.writable = true; + ent.supervisor = false; + ent.writethrough = false; + ent.cache_disable = false; + ent.accessed = false; + ent.dirty = false; + ent.global = false; // TODO should this be true for lower global mappings? probably + ent.hugepage = false; + ent.pat_bit = false; + // add into page table + *entry_ptr = pt_entry_pack(&ent); + } + + struct ppn l3_pt = ent.ppn; + idx = (vpn_to_pagenum(virt) >> 18) & 0x1ff; + entry_ptr = pa_to_pointer(pa_from_ppn_with_offset(l3_pt, idx << 3)); + pt_entry_unpack(*entry_ptr, 3, &ent); + ASSERT(!ent.present); + + ent.level = 3; + ent.present = true; + ent.writable = writable; + ent.supervisor = supervisor; + ent.writethrough = false; + ent.cache_disable = false; + ent.accessed = false; + ent.dirty = false; + ent.global = global; + ent.hugepage = true; + ent.pat_bit = false; + + *entry_ptr = pt_entry_pack(&ent); + return true; +} + void pt_free(struct ppn root) { // this assumes single ownership TODO(); @@ -450,6 +505,17 @@ void pt_create_minimal(void) { } printf("WARN: done\n"); + // map physical memory starting at 1 GiB (skip 1st hugepage) + for (uint64_t i = 0; i < 16; i++) { + pt_map_huge( + vpn_from_aligned_va(va_from_value((i + 1) << 30)), + ppn_from_aligned_pa(pa_from_value(i << 30)), + true, true, false, + new_cr3 + ); + } + set_identity_mapping(vpn_from_aligned_va(va_from_value(1ull << 30))); + uint64_t value = pa_to_value(pa_from_ppn(new_cr3)); printf("new cr3: %p\n", value); From 604e9bc32d9f327f10180907df39ca4b6c6587af Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 21 May 2025 21:30:24 +0200 Subject: [PATCH 13/32] cleanup with pt init and traverse --- src/x86_64/mem.c | 106 +++++++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 41 deletions(-) diff --git a/src/x86_64/mem.c b/src/x86_64/mem.c index 82b203c..54afcba 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/mem.c @@ -207,6 +207,45 @@ void pt_entry_unpack(uint64_t ent_in, uint8_t level, struct pt_entry *ent_out) { } } +void pt_entry_init_nonleaf(struct pt_entry *ent, uint8_t level, struct ppn ppn) { + ASSERT(level == 4 || level == 3 || level == 2); + + ent->level = level; + ent->present = true; + // maximum privileges in upper level + ent->writable = true; + ent->supervisor = false; + ent->writethrough = false; + ent->cache_disable = false; + ent->accessed = false; + ent->dirty = false; + ent->global = false; // ignored + ent->ppn = ppn; + ent->hugepage = false; + ent->pat_bit = false; + +} + +void pt_entry_init_leaf(struct pt_entry *ent, uint8_t level, + bool writable, bool supervisor, bool global, + struct ppn ppn) +{ + ASSERT(level == 3 || level == 2 || level == 1); + + ent->level = level; + ent->present = true; + ent->writable = writable; + ent->supervisor = supervisor; + ent->writethrough = false; + ent->cache_disable = false; + ent->accessed = false; + ent->dirty = false; + ent->global = global; + ent->ppn = ppn; + ent->hugepage = (level == 3 || level == 2); + ent->pat_bit = false; +} + static uint8_t pt_entry_to_pat_index(const struct pt_entry *ent) { return ((uint8_t)ent->pat_bit) << 2 | ((uint8_t)ent->cache_disable) << 1 @@ -269,11 +308,30 @@ static struct ppn get_cr3_ppn(void) { return ppn_from_aligned_pa(pa_from_value(cr3_val & 0x000ffffffffff000ull)); } +struct ppn traverse_with_alloc(struct ppn ppn, int level, uint64_t idx) { + ASSERT(idx < 512); + ASSERT(level == 4 || level == 3 || level == 2); + + uint64_t *entry_ptr = (uint64_t*)pa_to_pointer(pa_from_ppn_with_offset(ppn, idx << 3)); + + struct pt_entry ent; + pt_entry_unpack(*entry_ptr, level, &ent); + + if (!ent.present) { + struct ppn ppn; + ASSERT(ram_alloc_frame_zeroed(&ppn, RAM_PAGE_NORMAL)); + pt_entry_init_nonleaf(&ent, level, ppn); + // add into page table + *entry_ptr = pt_entry_pack(&ent); + } + return ent.ppn; +} + + #define NUM_LEVELS 4 // TODO for inspection, we need to accumulate permissions over all levels // (upper supervisor bit will mean lower PTs are also supervisor, even without their bit set) - static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc, int *level_out) { uint64_t va_value = va_to_value(va_from_vpn(vpn)); int level = NUM_LEVELS; @@ -417,53 +475,19 @@ bool pt_map_huge(struct vpn virt, struct ppn phys, bool writable, bool supervisor, bool global, struct ppn cr3) { - ASSERT((va_to_value(va_from_vpn(virt)) & ((1ull << 30) - 1)) == 0); - ASSERT((pa_to_value(pa_from_ppn(phys)) & ((1ull << 30) - 1)) == 0); + ASSERT((vpn_to_pagenum(virt) & ((1ull << 18) - 1)) == 0); + ASSERT((ppn_to_pagenum(phys) & ((1ull << 18) - 1)) == 0); - // l4 page table is mapped and is at address cr3 - // get l3 page table uint64_t idx = (vpn_to_pagenum(virt) >> 27) & 0x1ff; - uint64_t *entry_ptr = pa_to_pointer(pa_from_ppn_with_offset(cr3, idx << 3)); + struct ppn l3_pt = traverse_with_alloc(cr3, 4, idx); - struct pt_entry ent; - pt_entry_unpack(*entry_ptr, 4, &ent); - - if (!ent.present) { - ASSERT(ram_alloc_frame_zeroed(&ent.ppn, RAM_PAGE_NORMAL)); - ent.level = 4; - ent.present = true; - // maximum privileges in upper level - ent.writable = true; - ent.supervisor = false; - ent.writethrough = false; - ent.cache_disable = false; - ent.accessed = false; - ent.dirty = false; - ent.global = false; // TODO should this be true for lower global mappings? probably - ent.hugepage = false; - ent.pat_bit = false; - // add into page table - *entry_ptr = pt_entry_pack(&ent); - } - - struct ppn l3_pt = ent.ppn; idx = (vpn_to_pagenum(virt) >> 18) & 0x1ff; - entry_ptr = pa_to_pointer(pa_from_ppn_with_offset(l3_pt, idx << 3)); + uint64_t *entry_ptr = pa_to_pointer(pa_from_ppn_with_offset(l3_pt, idx << 3)); + struct pt_entry ent; pt_entry_unpack(*entry_ptr, 3, &ent); ASSERT(!ent.present); - ent.level = 3; - ent.present = true; - ent.writable = writable; - ent.supervisor = supervisor; - ent.writethrough = false; - ent.cache_disable = false; - ent.accessed = false; - ent.dirty = false; - ent.global = global; - ent.hugepage = true; - ent.pat_bit = false; - + pt_entry_init_leaf(&ent, 3, writable, supervisor, global, phys); *entry_ptr = pt_entry_pack(&ent); return true; } From 69457798d1f86b64c872dd0ca95f423c9df67183 Mon Sep 17 00:00:00 2001 From: uosfz Date: Thu, 22 May 2025 16:55:33 +0200 Subject: [PATCH 14/32] distribute mem.c to different source files; some more paging convenience and fixes --- Makefile | 3 +- include/x86_64/address.h | 5 + include/x86_64/cpu.h | 12 + include/x86_64/{mem.h => paging.h} | 51 ++-- src/kernel.c | 9 +- src/x86_64/address.c | 18 ++ src/x86_64/cpu.c | 113 ++++++++ src/x86_64/{mem.c => paging.c} | 441 ++++++++--------------------- 8 files changed, 302 insertions(+), 350 deletions(-) create mode 100644 include/x86_64/cpu.h rename include/x86_64/{mem.h => paging.h} (76%) create mode 100644 src/x86_64/cpu.c rename src/x86_64/{mem.c => paging.c} (64%) diff --git a/Makefile b/Makefile index a15b8a5..4533d8b 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,8 @@ KERNEL_SOURCES_x86_64 := \ src/x86_64/apic.c \ src/x86_64/loadcs.S \ src/x86_64/uart.c \ - src/x86_64/mem.c \ + src/x86_64/cpu.c \ + src/x86_64/paging.c \ src/x86_64/asm.c \ src/x86_64/address.c \ # end of x86_64 specific kernel sources list diff --git a/include/x86_64/address.h b/include/x86_64/address.h index 3882456..0e8c281 100644 --- a/include/x86_64/address.h +++ b/include/x86_64/address.h @@ -1,6 +1,7 @@ #ifndef KARLOS_ADDRESS_H #define KARLOS_ADDRESS_H +#include #include // do not use these fields directly! use *_to_* functions. @@ -32,10 +33,13 @@ struct vpn vpn_from_pagenum(uint64_t pagenum); struct vpn vpn_from_aligned_va(struct va va); struct vpn vpn_from_unaligned_va(struct va va); uint64_t vpn_to_pagenum(struct vpn vpn); +unsigned int vpn_level_idx(struct vpn vpn, unsigned int level); +bool vpn_is_huge_1gb(struct vpn vpn); struct pa pa_from_value(uint64_t value); struct pa pa_from_ppn(struct ppn ppn); struct pa pa_from_ppn_with_offset(struct ppn ppn, uint64_t offset); +struct pa pa_from_pt_with_idx(struct ppn ppn, unsigned int idx); uint64_t pa_to_value(struct pa pa); void *pa_to_pointer(struct pa pa); uint64_t pa_offset(struct pa pa); @@ -44,6 +48,7 @@ struct ppn ppn_from_pagenum(uint64_t pagenum); struct ppn ppn_from_aligned_pa(struct pa pa); struct ppn ppn_from_unaligned_pa(struct pa pa); uint64_t ppn_to_pagenum(struct ppn ppn); +bool ppn_is_huge_1gb(struct ppn ppn); void set_identity_mapping(struct vpn first_page); diff --git a/include/x86_64/cpu.h b/include/x86_64/cpu.h new file mode 100644 index 0000000..ea39430 --- /dev/null +++ b/include/x86_64/cpu.h @@ -0,0 +1,12 @@ +#include + +void init_gdt(); + +// make sure the function you register is __attribute__ ((interrupt)) and does a lapic_eoi() +// also make sure to call load_idt afterwards +void interrupt_handler_register(uint8_t num, void (*func)(void *)); +void load_idt(); + +// don't call this, only register for testing +__attribute__ ((interrupt)) +void basic_interrupt_handler(void *ptr); diff --git a/include/x86_64/mem.h b/include/x86_64/paging.h similarity index 76% rename from include/x86_64/mem.h rename to include/x86_64/paging.h index d131e87..36ba708 100644 --- a/include/x86_64/mem.h +++ b/include/x86_64/paging.h @@ -1,33 +1,20 @@ -#ifndef KARLOS_MEM_H -#define KARLOS_MEM_H +#ifndef KARLOS_PAGING_H +#define KARLOS_PAGING_H #include #include #include "x86_64/address.h" -void init_gdt(); -void init_idt(); +// --- init --- -// --- paging --- -// -// We don't use the PAT functionality. We set all bits (writethrough, cache_disable, PAT) to 0. -// For access protection we only use MTRRs. +void init_paging(); -void init_paging(void); - -enum page_attr { - PA_UC = 0x00, - PA_WC = 0x01, - PA_WT = 0x04, - PA_WP = 0x05, - PA_WB = 0x06, - PA_UCMINUS = 0x07 -}; +// --- entries --- struct pt_entry { // fields not in actual entry - uint8_t level; // possible values are 1..=4 + unsigned int level; // possible values are 1..=4 // fields that are level-independent bool present; @@ -50,12 +37,20 @@ struct pt_entry { #define PT_IS_LEAF(ent) ((ent).level == 1 || (ent).hugepage) uint64_t pt_entry_pack(const struct pt_entry *ent_in); -void pt_entry_unpack(uint64_t ent_in, uint8_t level, struct pt_entry *ent_out); +void pt_entry_unpack(uint64_t ent_in, unsigned int level, struct pt_entry *ent_out); +void pt_entry_init_nonleaf(struct pt_entry *ent, unsigned int level, struct ppn ppn); +void pt_entry_init_leaf(struct pt_entry *ent, unsigned int level, + bool writable, bool supervisor, bool global, + struct ppn ppn); void pt_entry_print(const struct pt_entry *ent); +// --- traversal --- + bool pt_translate(struct va va, struct ppn cr3, struct pa *pa_out); bool pt_translate_current(struct va va, struct pa *pa_out); +// --- mapping --- + bool pt_map_single(struct vpn virt, struct ppn phys, bool writable, bool supervisor, bool global, struct ppn cr3); @@ -66,10 +61,13 @@ bool pt_map_range(struct vpn virt, struct ppn phys, uint64_t num_pages, struct ppn cr3); bool pt_map_range_current(struct vpn virt, struct ppn phys, uint64_t num_pages, bool writable, bool supervisor, bool global); - +bool pt_map_huge_1gb(struct vpn virt, struct ppn phys, + bool writable, bool supervisor, bool global, + struct ppn cr3); void pt_free(struct ppn root); -// TODO this is just for testing +// --- page table creation --- + void pt_create_minimal(void); // --- iterators --- @@ -81,7 +79,7 @@ struct pt_leaf { struct access_level { struct ppn ppn; - unsigned int index; + unsigned int idx; }; struct pt_leaf_iter { @@ -94,8 +92,6 @@ void pt_leaf_iter_init(struct pt_leaf_iter *it, struct ppn cr3); void pt_leaf_iter_init_current(struct pt_leaf_iter *it); bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out); - - struct mem_range { struct vpn vpn_start; struct pt_entry entry_start; @@ -114,9 +110,4 @@ void mem_range_iter_init(struct mem_range_iter *it, struct ppn cr3); void mem_range_iter_init_current(struct mem_range_iter *it); bool mem_range_iter_next(struct mem_range_iter *it, struct mem_range *range_out); - - -// TODO this is just for testing -void pt_leaf_iter_test(void); - #endif diff --git a/src/kernel.c b/src/kernel.c index 5c66a73..61037d5 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -33,7 +33,8 @@ #include "x86_64/apic.h" #include "x86_64/asm.h" #include "x86_64/address.h" -#include "x86_64/mem.h" +#include "x86_64/cpu.h" +#include "x86_64/paging.h" #include "bootboot.h" #include "ram.h" @@ -126,9 +127,11 @@ void _start() { } #endif - // memory stuff init_gdt(); - init_idt(); + + interrupt_handler_register(0xfe, basic_interrupt_handler); + load_idt(); + lapic_init(); ram_init(); diff --git a/src/x86_64/address.c b/src/x86_64/address.c index 78a01a1..a0321c0 100644 --- a/src/x86_64/address.c +++ b/src/x86_64/address.c @@ -60,6 +60,16 @@ uint64_t vpn_to_pagenum(struct vpn vpn) { return vpn.pagenum; } +unsigned int vpn_level_idx(struct vpn vpn, unsigned int level) { + ASSERT(level >= 1 && level <= 4); + + return (vpn_to_pagenum(vpn) >> ((level - 1)*9)) & 0x1ffull; +} + +bool vpn_is_huge_1gb(struct vpn vpn) { + return (vpn_to_pagenum(vpn) & ((1ull << 18) - 1)) == 0; +} + struct pa pa_from_value(uint64_t value) { ASSERT(value < (1ull << 36)); return (struct pa){ .value = value }; @@ -74,6 +84,10 @@ struct pa pa_from_ppn_with_offset(struct ppn ppn, uint64_t offset) { return pa_from_value((ppn.pagenum << 12) + offset); } +struct pa pa_from_pt_with_idx(struct ppn ppn, unsigned int idx) { + return pa_from_ppn_with_offset(ppn, (uint64_t)idx << 3); +} + uint64_t pa_to_value(struct pa pa) { return pa.value; } @@ -104,6 +118,10 @@ uint64_t ppn_to_pagenum(struct ppn ppn) { return ppn.pagenum; } +bool ppn_is_huge_1gb(struct ppn ppn) { + return (ppn_to_pagenum(ppn) & ((1ull << 18) - 1)) == 0; +} + void set_identity_mapping(struct vpn first_page) { identity_mapping_start = va_to_value(va_from_vpn(first_page)); } diff --git a/src/x86_64/cpu.c b/src/x86_64/cpu.c new file mode 100644 index 0000000..4246493 --- /dev/null +++ b/src/x86_64/cpu.c @@ -0,0 +1,113 @@ +#include "x86_64/cpu.h" +#include "x86_64/apic.h" +#include "std.h" + +// --- segmentation --- + +extern void loadcs(uint16_t ss, uint16_t cs); + +static uint64_t gdt[3]; + +static void write_segment_descriptor(uint64_t *entry, uint8_t dpl, uint8_t executable) { + uint8_t access_byte = (1 << 7) // present bit + | (dpl << 5) | (1 << 4) // S + | (executable << 3) | (0 << 2) // DC + | (1 << 1) // RW + | (0 << 0); // A + + uint8_t flags = (1 << 3) // G + | (0 << 2) // DB + | (1 << 1) // L + | (0 << 0); // reserved + + *entry = 0xffff // limit + | ((uint64_t)0x0000 << 16) // base + | ((uint64_t)0x00 << 32) // base + | ((uint64_t)access_byte << 40) // access byte + | ((uint64_t)0xf << 48) // limit + | ((uint64_t)flags << 52) // flags + | ((uint64_t)0x00 << 56); // base +} + +#define CODE_SEGMENT 1 +#define DATA_SEGMENT 2 + +void init_gdt() { + gdt[0] = 0; + write_segment_descriptor(&gdt[CODE_SEGMENT], 0, 1); + write_segment_descriptor(&gdt[DATA_SEGMENT], 0, 0); + + uint8_t gdtr[10]; + + *(uint16_t *)gdtr = sizeof(gdt) - 1; + *(uint64_t **)(gdtr + 2) = (uint64_t *)gdt; + + __asm__("lgdt (%0)" ::"r"(gdtr)); + + __asm__("mov %0, %%es" ::"r"(DATA_SEGMENT << 3)); + __asm__("mov %0, %%ds" ::"r"(DATA_SEGMENT << 3)); + __asm__("mov %0, %%fs" ::"r"(DATA_SEGMENT << 3)); + __asm__("mov %0, %%gs" ::"r"(DATA_SEGMENT << 3)); + loadcs(DATA_SEGMENT << 3, CODE_SEGMENT << 3); +} + +// --- interrupts --- + +struct int_desc_entry { + uint16_t offset1; + uint16_t selector; + uint8_t ist; + uint8_t attr; + uint16_t offset2; + uint32_t offset3; + uint32_t pad; +}; +STATIC_ASSERT(sizeof(struct int_desc_entry) == 16); + +#define PRIV_KERNEL 0 +#define PRIV_USER 3 + +static void write_segment_selector(uint16_t *ss, uint16_t index) { + *ss = (uint16_t)PRIV_KERNEL + | 0 // use GDT + | (index << 3); +} + +#define GATE_TYPE_INTERRUPT 0xe + +static void write_int_desc_entry(struct int_desc_entry *e, uint64_t offset) { + e->offset1 = (uint16_t)(offset & 0xffff); + write_segment_selector(&e->selector, CODE_SEGMENT); + e->ist = 0; + e->attr = (uint8_t)GATE_TYPE_INTERRUPT + | (uint8_t)(PRIV_KERNEL << 5) + | (uint8_t)(1 << 7); // present + e->offset2 = (uint16_t) ((offset >> 16) & 0xffff); + e->offset3 = (uint32_t) ((offset >> 32) & 0xffffffff); + e->pad = 0; +} + +__attribute__ ((interrupt)) +void basic_interrupt_handler(void *ptr) { + static uint64_t interrupt_counter = 0; + + printf("Hello Interrupt %lu!\n", interrupt_counter); + interrupt_counter++; + lapic_eoi(); +} + +#define NUM_INTERRUPTS 256 +static struct int_desc_entry idt[NUM_INTERRUPTS]; + +void interrupt_handler_register(uint8_t num, void (*func)(void *)) { + write_int_desc_entry(&idt[num], (uint64_t)(intptr_t)func); +} + +void load_idt() { + uint8_t idtr[10]; + + *(uint16_t *)idtr = sizeof(idt) - 1; + *(uint64_t **)(idtr + 2) = (uint64_t *)idt; + + __asm__("lidt (%0)" ::"r"(idtr)); +} diff --git a/src/x86_64/mem.c b/src/x86_64/paging.c similarity index 64% rename from src/x86_64/mem.c rename to src/x86_64/paging.c index 54afcba..cce1f13 100644 --- a/src/x86_64/mem.c +++ b/src/x86_64/paging.c @@ -1,118 +1,9 @@ -#include - -#include "x86_64/asm.h" -#include "x86_64/mem.h" - #include "std.h" #include "ram.h" -#include "x86_64/apic.h" - -extern void loadcs(uint16_t ss, uint16_t cs); - -static uint64_t gdt[3]; - -static void write_segment_descriptor(uint64_t *entry, uint8_t dpl, uint8_t executable) { - uint8_t access_byte = (1 << 7) // present bit - | (dpl << 5) | (1 << 4) // S - | (executable << 3) | (0 << 2) // DC - | (1 << 1) // RW - | (0 << 0); // A - - uint8_t flags = (1 << 3) // G - | (0 << 2) // DB - | (1 << 1) // L - | (0 << 0); // reserved - - *entry = 0xffff // limit - | ((uint64_t)0x0000 << 16) // base - | ((uint64_t)0x00 << 32) // base - | ((uint64_t)access_byte << 40) // access byte - | ((uint64_t)0xf << 48) // limit - | ((uint64_t)flags << 52) // flags - | ((uint64_t)0x00 << 56); // base -} - -#define CODE_SEGMENT 1 -#define DATA_SEGMENT 2 - -void init_gdt() { - gdt[0] = 0; - write_segment_descriptor(&gdt[CODE_SEGMENT], 0, 1); - write_segment_descriptor(&gdt[DATA_SEGMENT], 0, 0); - - uint8_t gdtr[10]; - - *(uint16_t *)gdtr = sizeof(gdt) - 1; - *(uint64_t **)(gdtr + 2) = (uint64_t *)gdt; - - __asm__("lgdt (%0)" ::"r"(gdtr)); - - __asm__("mov %0, %%es" ::"r"(DATA_SEGMENT << 3)); - __asm__("mov %0, %%ds" ::"r"(DATA_SEGMENT << 3)); - __asm__("mov %0, %%fs" ::"r"(DATA_SEGMENT << 3)); - __asm__("mov %0, %%gs" ::"r"(DATA_SEGMENT << 3)); - loadcs(DATA_SEGMENT << 3, CODE_SEGMENT << 3); -} - -struct int_desc_entry { - uint16_t offset1; - uint16_t selector; - uint8_t ist; - uint8_t attr; - uint16_t offset2; - uint32_t offset3; - uint32_t pad; -}; -STATIC_ASSERT(sizeof(struct int_desc_entry) == 16); - -#define PRIV_KERNEL 0 -#define PRIV_USER 3 - -static void write_segment_selector(uint16_t *ss, uint16_t index) { - *ss = (uint16_t)PRIV_KERNEL - | 0 // use GDT - | (index << 3); -} - -#define GATE_TYPE_INTERRUPT 0xe - -static void write_int_desc_entry(struct int_desc_entry *e, uint64_t offset) { - e->offset1 = (uint16_t)(offset & 0xffff); - write_segment_selector(&e->selector, CODE_SEGMENT); - e->ist = 0; - e->attr = (uint8_t)GATE_TYPE_INTERRUPT - | (uint8_t)(PRIV_KERNEL << 5) - | (uint8_t)(1 << 7); // present - e->offset2 = (uint16_t) ((offset >> 16) & 0xffff); - e->offset3 = (uint32_t) ((offset >> 32) & 0xffffffff); - e->pad = 0; -} - -static uint64_t interrupt_counter; - -__attribute__ ((interrupt)) -void basic_interrupt_handler(void *ptr) { - printf("Hello Interrupt %lu!\n", interrupt_counter); - interrupt_counter++; - lapic_eoi(); -} - -#define NUM_INTERRUPTS 256 -static struct int_desc_entry idt[NUM_INTERRUPTS]; - -void init_idt() { - write_int_desc_entry(&idt[0xFE], (uint64_t)(intptr_t)basic_interrupt_handler); - - uint8_t idtr[10]; - - *(uint16_t *)idtr = sizeof(idt) - 1; - *(uint64_t **)(idtr + 2) = (uint64_t *)idt; - - __asm__("lidt (%0)" ::"r"(idtr)); -} - -// --- paging --- +#include "x86_64/asm.h" +#include "x86_64/paging.h" +// --- init --- void init_paging() { // assert paging enabled (PG) ASSERT((get_cr0() >> 31) & 1ull); @@ -124,19 +15,30 @@ void init_paging() { // TODO check that everything is setup correctly // - CR0.WP // - Long-Mode Active (EFER.LMA) - // - PAT index 000 points to default strategy + // - PAT idx 000 points to default strategy // - See if NX bits are used and decide if we want to // - See of MPK is used and decide if we want to // - SMEP/SMAP? // - ... } +// --- pat --- + +enum page_attr { + PA_UC = 0x00, + PA_WC = 0x01, + PA_WT = 0x04, + PA_WP = 0x05, + PA_WB = 0x06, + PA_UCMINUS = 0x07 +}; + #define MSR_PAT 0x277 -enum page_attr get_pa(uint8_t index) { - ASSERT(index < 8); +static enum page_attr get_pa(unsigned int idx) { + ASSERT(idx < 8); uint64_t value = readmsr(MSR_PAT); - value = (value >> (index << 3)) & 0x7; + value = (value >> (idx << 3)) & 0x7; ASSERT(value == PA_UC || value == PA_WC || value == PA_WT @@ -146,6 +48,33 @@ enum page_attr get_pa(uint8_t index) { return value; } +static uint8_t pt_entry_to_pat_idx(const struct pt_entry *ent) { + return ((uint8_t)ent->pat_bit) << 2 + | ((uint8_t)ent->cache_disable) << 1 + | ((uint8_t)ent->writethrough); +} + +static const char *page_attr_to_str(enum page_attr attr) { + switch (attr) { + case PA_UC: + return "UC (uncacheable)"; + case PA_WC: + return "WC (write-combining)"; + case PA_WT: + return "WT (write-through)"; + case PA_WP: + return "WP (write-protect)"; + case PA_WB: + return "WB (write-back)"; + case PA_UCMINUS: + return "UC- (uncacheable minus)"; + default: + UNREACHABLE(); + } +} + +// --- entries --- + uint64_t pt_entry_pack(const struct pt_entry *ent_in) { uint64_t retval = (uint64_t)ent_in->present | ((uint64_t)ent_in->writable) << 1 @@ -176,7 +105,7 @@ uint64_t pt_entry_pack(const struct pt_entry *ent_in) { } } -void pt_entry_unpack(uint64_t ent_in, uint8_t level, struct pt_entry *ent_out) { +void pt_entry_unpack(uint64_t ent_in, unsigned int level, struct pt_entry *ent_out) { ent_out->level = level; ent_out->present = (ent_in & (0x1ull << 0)) != 0; ent_out->writable = (ent_in & (0x1ull << 1)) != 0; @@ -207,7 +136,7 @@ void pt_entry_unpack(uint64_t ent_in, uint8_t level, struct pt_entry *ent_out) { } } -void pt_entry_init_nonleaf(struct pt_entry *ent, uint8_t level, struct ppn ppn) { +void pt_entry_init_nonleaf(struct pt_entry *ent, unsigned int level, struct ppn ppn) { ASSERT(level == 4 || level == 3 || level == 2); ent->level = level; @@ -223,10 +152,9 @@ void pt_entry_init_nonleaf(struct pt_entry *ent, uint8_t level, struct ppn ppn) ent->ppn = ppn; ent->hugepage = false; ent->pat_bit = false; - } -void pt_entry_init_leaf(struct pt_entry *ent, uint8_t level, +void pt_entry_init_leaf(struct pt_entry *ent, unsigned int level, bool writable, bool supervisor, bool global, struct ppn ppn) { @@ -246,31 +174,6 @@ void pt_entry_init_leaf(struct pt_entry *ent, uint8_t level, ent->pat_bit = false; } -static uint8_t pt_entry_to_pat_index(const struct pt_entry *ent) { - return ((uint8_t)ent->pat_bit) << 2 - | ((uint8_t)ent->cache_disable) << 1 - | ((uint8_t)ent->writethrough); -} - -static const char *page_attr_to_str(enum page_attr attr) { - switch (attr) { - case PA_UC: - return "UC (uncacheable)"; - case PA_WC: - return "WC (write-combining)"; - case PA_WT: - return "WT (write-through)"; - case PA_WP: - return "WP (write-protect)"; - case PA_WB: - return "WB (write-back)"; - case PA_UCMINUS: - return "UC- (uncacheable minus)"; - default: - UNREACHABLE(); - } -} - void pt_entry_print(const struct pt_entry *ent) { printf("pt_entry {\n"); printf(" level: %d\n", (int)ent->level); @@ -298,47 +201,27 @@ void pt_entry_print(const struct pt_entry *ent) { } // TODO right now we assume PAT is present - printf(" caching: %s\n", page_attr_to_str(get_pa(pt_entry_to_pat_index(ent)))); + printf(" caching: %s\n", page_attr_to_str(get_pa(pt_entry_to_pat_idx(ent)))); printf("}\n"); } +// --- convenience --- + static struct ppn get_cr3_ppn(void) { uint64_t cr3_val = get_cr3(); return ppn_from_aligned_pa(pa_from_value(cr3_val & 0x000ffffffffff000ull)); } -struct ppn traverse_with_alloc(struct ppn ppn, int level, uint64_t idx) { - ASSERT(idx < 512); - ASSERT(level == 4 || level == 3 || level == 2); - - uint64_t *entry_ptr = (uint64_t*)pa_to_pointer(pa_from_ppn_with_offset(ppn, idx << 3)); - - struct pt_entry ent; - pt_entry_unpack(*entry_ptr, level, &ent); - - if (!ent.present) { - struct ppn ppn; - ASSERT(ram_alloc_frame_zeroed(&ppn, RAM_PAGE_NORMAL)); - pt_entry_init_nonleaf(&ent, level, ppn); - // add into page table - *entry_ptr = pt_entry_pack(&ent); - } - return ent.ppn; -} - - -#define NUM_LEVELS 4 +// --- traversal --- // TODO for inspection, we need to accumulate permissions over all levels // (upper supervisor bit will mean lower PTs are also supervisor, even without their bit set) -static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc, int *level_out) { - uint64_t va_value = va_to_value(va_from_vpn(vpn)); - int level = NUM_LEVELS; +static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc, unsigned int *level_out) { + unsigned int level = 4; struct ppn ppn = cr3; while (1) { - uint64_t idx = (va_value >> (12 + (level - 1)*9)) & 0x1ffull; - uint64_t *entry_ptr = (uint64_t*)pa_to_pointer(pa_from_ppn_with_offset(ppn, idx << 3)); + uint64_t *entry_ptr = (uint64_t*)pa_to_pointer(pa_from_pt_with_idx(ppn, vpn_level_idx(vpn, level))); struct pt_entry ent; pt_entry_unpack(*entry_ptr, level, &ent); @@ -350,20 +233,9 @@ static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc, int if (!ent.present) { if (alloc) { - ASSERT(ram_alloc_frame_zeroed(&ent.ppn, RAM_PAGE_NORMAL)); - ent.level = level; - ent.present = true; - // maximum privileges in upper level - ent.writable = true; - ent.supervisor = false; - ent.writethrough = false; - ent.cache_disable = false; - ent.accessed = false; - ent.dirty = false; - ent.global = false; // TODO should this be true for lower global mappings? probably - ent.hugepage = false; - ent.pat_bit = false; - // add into page table + struct ppn new_ppn; + ASSERT(ram_alloc_frame_zeroed(&new_ppn, RAM_PAGE_NORMAL)); + pt_entry_init_nonleaf(&ent, level, new_ppn); *entry_ptr = pt_entry_pack(&ent); } else { return NULL; @@ -376,20 +248,22 @@ static uint64_t *pt_get_leaf_ptr(struct vpn vpn, struct ppn cr3, bool alloc, int } bool pt_translate(struct va va, struct ppn cr3, struct pa *pa_out) { - int leaf_level = -1; + unsigned int leaf_level = 42; uint64_t *leaf_ptr = pt_get_leaf_ptr(vpn_from_unaligned_va(va), cr3, false, &leaf_level); if (leaf_ptr == NULL) { return false; } + ASSERT(leaf_level != 42); + struct pt_entry ent; - pt_entry_unpack(*leaf_ptr, 1, &ent); // TODO may be > 1 for hugepage, but - // doesn't really matter because we only use address + pt_entry_unpack(*leaf_ptr, leaf_level, &ent); uint64_t phys_base; switch (leaf_level) { case 1: *pa_out = pa_from_ppn_with_offset(ent.ppn, va_offset(va)); break; + // TODO we don't really have an interface for hugepages so this is uglier than it needs to be case 2: phys_base = pa_to_value(pa_from_ppn(ent.ppn)) + (va_to_value(va) & 0x1fffff); *pa_out = pa_from_value(phys_base); @@ -408,39 +282,45 @@ bool pt_translate_current(struct va va, struct pa *pa_out) { return pt_translate(va, get_cr3_ppn(), pa_out); } +static struct ppn traverse_with_alloc(struct ppn ppn, unsigned int level, unsigned int idx) { + ASSERT(idx < 512); + ASSERT(level == 4 || level == 3 || level == 2); + + uint64_t *entry_ptr = (uint64_t*)pa_to_pointer(pa_from_pt_with_idx(ppn, idx)); + + struct pt_entry ent; + pt_entry_unpack(*entry_ptr, level, &ent); + + if (!ent.present) { + struct ppn ppn; + ASSERT(ram_alloc_frame_zeroed(&ppn, RAM_PAGE_NORMAL)); + pt_entry_init_nonleaf(&ent, level, ppn); + *entry_ptr = pt_entry_pack(&ent); + } + return ent.ppn; +} + +// --- mapping --- + bool pt_map_single(struct vpn virt, struct ppn phys, bool writable, bool supervisor, bool global, struct ppn cr3) { - int leaf_level = -1; + unsigned int leaf_level = 42; uint64_t *leaf_ptr = pt_get_leaf_ptr(virt, cr3, true, &leaf_level); if (leaf_ptr == NULL) { return false; } - ASSERT(leaf_level == 1); // no hugepage yet; we expect everything to be alloced + ASSERT(leaf_level != 42); + // check for and avoid double mapping + struct pt_entry old_ent; + pt_entry_unpack(*leaf_ptr, leaf_level, &old_ent); + if (leaf_level != 1 || old_ent.present) { + return false; + } struct pt_entry ent; - ent.level = 1; - ent.present = true; - ent.writable = writable; - ent.supervisor = supervisor; - // disable caching bits because we want to use MTRRs - ent.writethrough = false; - ent.cache_disable = false; - ent.accessed = false; - ent.dirty = false; - ent.global = global; - ent.ppn = phys; - - ent.hugepage = false; - // disable caching bits because we want to use MTRRs - ent.pat_bit = false; - - // additional sanity check to avoid double mapping - struct pt_entry old_ent; - pt_entry_unpack(*leaf_ptr, 1, &old_ent); - ASSERT(!old_ent.present); - + pt_entry_init_leaf(&ent, 1, writable, supervisor, global, phys); *leaf_ptr = pt_entry_pack(&ent); return true; } @@ -451,14 +331,17 @@ bool pt_map_single_current(struct vpn virt, struct ppn phys, return pt_map_single(virt, phys, writable, supervisor, global, get_cr3_ppn()); } +// if a single mapping fails, PT is left in half-mapped state +// TODO detect and map huge pages bool pt_map_range(struct vpn virt, struct ppn phys, uint64_t num_pages, bool writable, bool supervisor, bool global, struct ppn cr3) { for (uint64_t i = 0; i < num_pages; i++) { - // TODO error handling: what to do if it fails in the middle? - // TODO huge pages - pt_map_single(virt, phys, writable, supervisor, global, cr3); + bool success = pt_map_single(virt, phys, writable, supervisor, global, cr3); + if (!success) { + return false; + } virt = vpn_from_pagenum(vpn_to_pagenum(virt) + 1); phys = ppn_from_pagenum(ppn_to_pagenum(phys) + 1); } @@ -471,18 +354,16 @@ bool pt_map_range_current(struct vpn virt, struct ppn phys, uint64_t num_pages, return pt_map_range(virt, phys, num_pages, writable, supervisor, global, get_cr3_ppn()); } -bool pt_map_huge(struct vpn virt, struct ppn phys, +bool pt_map_huge_1gb(struct vpn virt, struct ppn phys, bool writable, bool supervisor, bool global, struct ppn cr3) { - ASSERT((vpn_to_pagenum(virt) & ((1ull << 18) - 1)) == 0); - ASSERT((ppn_to_pagenum(phys) & ((1ull << 18) - 1)) == 0); + ASSERT(vpn_is_huge_1gb(virt)); + ASSERT(ppn_is_huge_1gb(phys)); - uint64_t idx = (vpn_to_pagenum(virt) >> 27) & 0x1ff; - struct ppn l3_pt = traverse_with_alloc(cr3, 4, idx); + struct ppn l3_pt = traverse_with_alloc(cr3, 4, vpn_level_idx(virt, 4)); - idx = (vpn_to_pagenum(virt) >> 18) & 0x1ff; - uint64_t *entry_ptr = pa_to_pointer(pa_from_ppn_with_offset(l3_pt, idx << 3)); + uint64_t *entry_ptr = pa_to_pointer(pa_from_pt_with_idx(l3_pt, vpn_level_idx(virt, 3))); struct pt_entry ent; pt_entry_unpack(*entry_ptr, 3, &ent); ASSERT(!ent.present); @@ -497,9 +378,11 @@ void pt_free(struct ppn root) { TODO(); } -void pt_create_minimal(void) { - // stack is currently mapped on the very last page +// --- page table creation --- +#define PT_CREATE_DEBUG + +void pt_create_minimal(void) { // get a top level page table struct ppn new_cr3; ASSERT(ram_alloc_frame_zeroed(&new_cr3, RAM_PAGE_NORMAL)); @@ -507,31 +390,22 @@ void pt_create_minimal(void) { // copy mappings we had previously for everything important // we do single mapping for now because we don't know if this is physically continuous, // even though it is virtually - unsigned int count = 0; // this start value comes from the bootboot docs for (uint64_t va_value = 0xfffffffff8000000ull; va_value != 0ull; va_value += 0x1000) { struct va curr_va = va_from_canonical(va_value); struct pa curr_pa; - bool success = pt_translate_current(curr_va, &curr_pa); - if (success) { - // printf("WARN: virt. address %p mapped\n", va_to_canonical(curr_va)); + bool mapped = pt_translate_current(curr_va, &curr_pa); + if (mapped) { pt_map_single(vpn_from_aligned_va(curr_va), ppn_from_aligned_pa(curr_pa), true /* writable */, true /* supervisor */, false /* not global */, new_cr3 ); - // printf("WARN: virt. address %p mapped; done\n", va_to_canonical(curr_va)); - } else { - count += 1; - if ((count & 0xff) == 0) { - // printf("WARN: virt. address %p not mapped (count %d)\n", va_to_canonical(curr_va), count); - } } } - printf("WARN: done\n"); - // map physical memory starting at 1 GiB (skip 1st hugepage) + // 16 GiB identity map with hugepages starting at 1 GiB (skip 1st hugepage) for (uint64_t i = 0; i < 16; i++) { - pt_map_huge( + pt_map_huge_1gb( vpn_from_aligned_va(va_from_value((i + 1) << 30)), ppn_from_aligned_pa(pa_from_value(i << 30)), true, true, false, @@ -540,6 +414,7 @@ void pt_create_minimal(void) { } set_identity_mapping(vpn_from_aligned_va(va_from_value(1ull << 30))); +#ifdef PT_CREATE_DEBUG uint64_t value = pa_to_value(pa_from_ppn(new_cr3)); printf("new cr3: %p\n", value); @@ -563,14 +438,17 @@ void pt_create_minimal(void) { } mem_range_print(&range); } +#endif // switch set_cr3(value); +#ifdef PT_CREATE_DEBUG printf("hello from new page table!\n"); +#endif } -// --- new iterators --- +// --- iterators --- void mem_range_print(const struct mem_range *mr) { uint64_t virt_canonical = va_to_canonical(va_from_vpn(mr->vpn_start)); @@ -585,7 +463,7 @@ void mem_range_print(const struct mem_range *mr) { mr->entry_start.writable ? 'w' : 'r', mr->entry_start.supervisor ? 's' : 'u', // TODO right now we assume PAT is present - page_attr_to_str(get_pa(pt_entry_to_pat_index(&mr->entry_start))), + page_attr_to_str(get_pa(pt_entry_to_pat_idx(&mr->entry_start))), mr->entry_start.global ? "global:y" : "global:n", mr->npages, mr->npages << 12); @@ -603,13 +481,13 @@ static bool pt_contiguous(struct mem_range *mr, struct vpn vpn, struct pt_entry } static struct pa access_level_pa(struct access_level *ac) { - return pa_from_ppn_with_offset(ac->ppn, ac->index << 3); + return pa_from_pt_with_idx(ac->ppn, ac->idx); } void pt_leaf_iter_init(struct pt_leaf_iter *it, struct ppn cr3) { it->done = false; it->depth = 0; - it->levels[0] = (struct access_level){ .ppn = cr3, .index = 0 }; + it->levels[0] = (struct access_level){ .ppn = cr3, .idx = 0 }; } void pt_leaf_iter_init_current(struct pt_leaf_iter *it) { @@ -619,17 +497,17 @@ void pt_leaf_iter_init_current(struct pt_leaf_iter *it) { static struct vpn pt_leaf_iter_vpn(struct pt_leaf_iter *it) { uint64_t value = 0; for (unsigned int i = 0; i <= it->depth; i++) { - int level = 4 - (int)i; + unsigned int level = 4 - i; int shift = 9 * (level - 1); - value |= (uint64_t)(it->levels[i].index) << shift; + value |= (uint64_t)(it->levels[i].idx) << shift; } return vpn_from_pagenum(value); } static void pt_leaf_iter_advance(struct pt_leaf_iter *it) { while (true) { - it->levels[it->depth].index += 1; - if (it->levels[it->depth].index == 512) { + it->levels[it->depth].idx += 1; + if (it->levels[it->depth].idx == 512) { if (it->depth > 0) { it->depth -= 1; } else { @@ -665,13 +543,13 @@ bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out) { return true; } else { it->depth += 1; - it->levels[it->depth] = (struct access_level){ .ppn = ent.ppn, .index = 0 }; + it->levels[it->depth] = (struct access_level){ .ppn = ent.ppn, .idx = 0 }; } } } } -static uint64_t leaf_level_to_npages(int level) { +static uint64_t leaf_level_to_npages(unsigned int level) { switch (level) { case 1: return 1; case 2: return 1 << 9; @@ -733,72 +611,3 @@ bool mem_range_iter_next(struct mem_range_iter *it, struct mem_range *range_out) } } } - -// --- tests --- -void pt_leaf_iter_test(void) { - struct ppn l4; - ASSERT(ram_alloc_frame_zeroed(&l4, RAM_PAGE_NORMAL)); - struct ppn l3; - ASSERT(ram_alloc_frame_zeroed(&l3, RAM_PAGE_NORMAL)); - - struct pt_entry ent; - ent.level = 4; - ent.present = true; - ent.writable = true; - ent.supervisor = false; - ent.writethrough = false; - ent.cache_disable = false; - ent.accessed = false; - ent.dirty = false; - ent.global = false; - ent.ppn = l3; - ent.hugepage = false; - ent.pat_bit = false; - - *(uint64_t*)pa_to_pointer(pa_from_ppn_with_offset(l4, 10 << 3)) = pt_entry_pack(&ent); - - ent.level = 3; - ent.ppn = ppn_from_pagenum(0); // should not matter - ent.hugepage = true; - - *(uint64_t*)pa_to_pointer(pa_from_ppn_with_offset(l3, 5 << 3)) = pt_entry_pack(&ent); - -#if 0 - struct pt_leaf leaf; - struct pt_leaf_iter it; - pt_leaf_iter_init(&it, l4); - while (true) { - if (!pt_leaf_iter_next(&it, &leaf)) { - break; - } - pt_entry_print(&leaf.ent); - } -#endif - -#if 0 - // --- leaf iter test --- - struct pt_leaf leaf; - struct pt_leaf_iter it; - pt_leaf_iter_init_current(&it); - while (true) { - if (!pt_leaf_iter_next(&it, &leaf)) { - break; - } - printf("va: %p\n", va_to_canonical(va_from_vpn(leaf.vpn_start))); - pt_entry_print(&leaf.ent); - } -#endif - -#if 1 - // --- mem iter test --- - struct mem_range range; - struct mem_range_iter it; - mem_range_iter_init_current(&it); - while (true) { - if (!mem_range_iter_next(&it, &range)) { - break; - } - mem_range_print(&range); - } -#endif -} From 64d4476d752e27677fa813cf8c745502c77b1373 Mon Sep 17 00:00:00 2001 From: uosfz Date: Fri, 23 May 2025 15:02:10 +0200 Subject: [PATCH 15/32] map_ranges with hugepage detection; mem_range_iter doesn't work? --- include/x86_64/address.h | 6 ++ src/x86_64/address.c | 28 ++++++ src/x86_64/paging.c | 180 ++++++++++++++++++++++++++------------- 3 files changed, 156 insertions(+), 58 deletions(-) diff --git a/include/x86_64/address.h b/include/x86_64/address.h index 0e8c281..94bb86a 100644 --- a/include/x86_64/address.h +++ b/include/x86_64/address.h @@ -28,17 +28,22 @@ struct va va_from_vpn_with_offset(struct vpn vpn, uint64_t offset); uint64_t va_to_value(struct va va); uint64_t va_to_canonical(struct va va); uint64_t va_offset(struct va va); +uint64_t va_offset_huge_2mb(struct va va); +uint64_t va_offset_huge_1gb(struct va va); struct vpn vpn_from_pagenum(uint64_t pagenum); struct vpn vpn_from_aligned_va(struct va va); struct vpn vpn_from_unaligned_va(struct va va); uint64_t vpn_to_pagenum(struct vpn vpn); unsigned int vpn_level_idx(struct vpn vpn, unsigned int level); +bool vpn_is_huge_2mb(struct vpn vpn); bool vpn_is_huge_1gb(struct vpn vpn); struct pa pa_from_value(uint64_t value); struct pa pa_from_ppn(struct ppn ppn); struct pa pa_from_ppn_with_offset(struct ppn ppn, uint64_t offset); +struct pa pa_from_ppn_huge_2mb_with_offset(struct ppn ppn, uint64_t offset); +struct pa pa_from_ppn_huge_1gb_with_offset(struct ppn ppn, uint64_t offset); struct pa pa_from_pt_with_idx(struct ppn ppn, unsigned int idx); uint64_t pa_to_value(struct pa pa); void *pa_to_pointer(struct pa pa); @@ -48,6 +53,7 @@ struct ppn ppn_from_pagenum(uint64_t pagenum); struct ppn ppn_from_aligned_pa(struct pa pa); struct ppn ppn_from_unaligned_pa(struct pa pa); uint64_t ppn_to_pagenum(struct ppn ppn); +bool ppn_is_huge_2mb(struct ppn ppn); bool ppn_is_huge_1gb(struct ppn ppn); void set_identity_mapping(struct vpn first_page); diff --git a/src/x86_64/address.c b/src/x86_64/address.c index a0321c0..c41e89b 100644 --- a/src/x86_64/address.c +++ b/src/x86_64/address.c @@ -42,6 +42,14 @@ uint64_t va_offset(struct va va) { return va.value & 0xfff; } +uint64_t va_offset_huge_2mb(struct va va) { + return va.value & 0x1fffff; +} + +uint64_t va_offset_huge_1gb(struct va va) { + return va.value & 0x3fffffff; +} + struct vpn vpn_from_pagenum(uint64_t pagenum) { ASSERT(pagenum < (1ull << 36)); return (struct vpn){ .pagenum = pagenum }; @@ -66,6 +74,10 @@ unsigned int vpn_level_idx(struct vpn vpn, unsigned int level) { return (vpn_to_pagenum(vpn) >> ((level - 1)*9)) & 0x1ffull; } +bool vpn_is_huge_2mb(struct vpn vpn) { + return (vpn_to_pagenum(vpn) & ((1ull << 9) - 1)) == 0; +} + bool vpn_is_huge_1gb(struct vpn vpn) { return (vpn_to_pagenum(vpn) & ((1ull << 18) - 1)) == 0; } @@ -84,6 +96,18 @@ struct pa pa_from_ppn_with_offset(struct ppn ppn, uint64_t offset) { return pa_from_value((ppn.pagenum << 12) + offset); } +struct pa pa_from_ppn_huge_2mb_with_offset(struct ppn ppn, uint64_t offset) { + ASSERT(ppn_is_huge_2mb(ppn)); + ASSERT(offset < (1ull << 21)); + return pa_from_value((ppn.pagenum << 12) + offset); +} + +struct pa pa_from_ppn_huge_1gb_with_offset(struct ppn ppn, uint64_t offset) { + ASSERT(ppn_is_huge_1gb(ppn)); + ASSERT(offset < (1ull << 30)); + return pa_from_value((ppn.pagenum << 12) + offset); +} + struct pa pa_from_pt_with_idx(struct ppn ppn, unsigned int idx) { return pa_from_ppn_with_offset(ppn, (uint64_t)idx << 3); } @@ -118,6 +142,10 @@ uint64_t ppn_to_pagenum(struct ppn ppn) { return ppn.pagenum; } +bool ppn_is_huge_2mb(struct ppn ppn) { + return (ppn_to_pagenum(ppn) & ((1ull << 9) - 1)) == 0; +} + bool ppn_is_huge_1gb(struct ppn ppn) { return (ppn_to_pagenum(ppn) & ((1ull << 18) - 1)) == 0; } diff --git a/src/x86_64/paging.c b/src/x86_64/paging.c index cce1f13..9371dc0 100644 --- a/src/x86_64/paging.c +++ b/src/x86_64/paging.c @@ -56,20 +56,13 @@ static uint8_t pt_entry_to_pat_idx(const struct pt_entry *ent) { static const char *page_attr_to_str(enum page_attr attr) { switch (attr) { - case PA_UC: - return "UC (uncacheable)"; - case PA_WC: - return "WC (write-combining)"; - case PA_WT: - return "WT (write-through)"; - case PA_WP: - return "WP (write-protect)"; - case PA_WB: - return "WB (write-back)"; - case PA_UCMINUS: - return "UC- (uncacheable minus)"; - default: - UNREACHABLE(); + case PA_UC: return "UC (uncacheable)"; + case PA_WC: return "WC (write-combining)"; + case PA_WT: return "WT (write-through)"; + case PA_WP: return "WP (write-protect)"; + case PA_WB: return "WB (write-back)"; + case PA_UCMINUS: return "UC- (uncacheable minus)"; + default: UNREACHABLE(); } } @@ -258,19 +251,15 @@ bool pt_translate(struct va va, struct ppn cr3, struct pa *pa_out) { struct pt_entry ent; pt_entry_unpack(*leaf_ptr, leaf_level, &ent); - uint64_t phys_base; switch (leaf_level) { case 1: *pa_out = pa_from_ppn_with_offset(ent.ppn, va_offset(va)); break; - // TODO we don't really have an interface for hugepages so this is uglier than it needs to be case 2: - phys_base = pa_to_value(pa_from_ppn(ent.ppn)) + (va_to_value(va) & 0x1fffff); - *pa_out = pa_from_value(phys_base); + *pa_out = pa_from_ppn_huge_2mb_with_offset(ent.ppn, va_offset_huge_2mb(va)); break; case 3: - phys_base = pa_to_value(pa_from_ppn(ent.ppn)) + (va_to_value(va) & 0x3fffffff); - *pa_out = pa_from_value(phys_base); + *pa_out = pa_from_ppn_huge_1gb_with_offset(ent.ppn, va_offset_huge_1gb(va)); break; default: UNREACHABLE(); @@ -331,19 +320,119 @@ bool pt_map_single_current(struct vpn virt, struct ppn phys, return pt_map_single(virt, phys, writable, supervisor, global, get_cr3_ppn()); } +static struct pa access_level_pa(const struct access_level *ac) { + return pa_from_pt_with_idx(ac->ppn, ac->idx); +} + +struct traversal { + unsigned int depth; + struct access_level levels[4]; +}; + +#define TRAVERSAL_LEVEL(trav) (4 - (trav)->depth) +#define TRAVERSAL_NPAGES_COVERED(trav) (1ull << (9 * (TRAVERSAL_LEVEL(trav) - 1))) + +static void traversal_init(struct traversal *trav, struct vpn virt, struct ppn cr3) { + trav->depth = 0; + trav->levels[0].ppn = cr3; + trav->levels[0].idx = vpn_level_idx(virt, 4); +} + +static void traversal_descend(struct traversal *trav, struct vpn virt) { + ASSERT(trav->depth < 3); + unsigned int old_depth = trav->depth; + unsigned int new_depth = old_depth + 1; + trav->levels[new_depth].ppn = traverse_with_alloc(trav->levels[old_depth].ppn, + 4 - old_depth, + trav->levels[old_depth].idx); + trav->levels[new_depth].idx = vpn_level_idx(virt, 4 - new_depth); + trav->depth = new_depth; +} + +static uint64_t *traversal_entry_ptr(const struct traversal *trav) { + return pa_to_pointer(access_level_pa(&trav->levels[trav->depth])); +} + +static void traversal_map(struct traversal *trav, struct ppn phys, + bool writable, bool supervisor, bool global) +{ + switch (trav->depth) { + case 0: UNREACHABLE(); // there are no 512 GB pages + case 1: ASSERT(ppn_is_huge_1gb(phys)); break; + case 2: ASSERT(ppn_is_huge_2mb(phys)); break; + case 3: break; + default: UNREACHABLE(); + } + + uint64_t *entry_ptr = traversal_entry_ptr(trav); + struct pt_entry ent; + pt_entry_unpack(*entry_ptr, TRAVERSAL_LEVEL(trav), &ent); + ASSERT(!ent.present); + pt_entry_init_leaf(&ent, TRAVERSAL_LEVEL(trav), writable, supervisor, global, phys); + *entry_ptr = pt_entry_pack(&ent); +} + +// returns whether we're done or not. TODO not sure if we need this return value. +static bool traversal_advance(struct traversal *trav) { + while (true) { + trav->levels[trav->depth].idx += 1; + if (trav->levels[trav->depth].idx == 512) { + if (trav->depth > 0) { + trav->depth -= 1; + } else { + return true; + } + } else { + return false; + } + } +} + +static bool can_map_huge_1gb(struct ppn phys, uint64_t num_pages) { + return ppn_is_huge_1gb(phys) && num_pages >= (1ull << 18); +} + +static bool can_map_huge_2mb(struct ppn phys, uint64_t num_pages) { + return ppn_is_huge_2mb(phys) && num_pages >= (1ull << 9); +} + // if a single mapping fails, PT is left in half-mapped state -// TODO detect and map huge pages bool pt_map_range(struct vpn virt, struct ppn phys, uint64_t num_pages, bool writable, bool supervisor, bool global, struct ppn cr3) { - for (uint64_t i = 0; i < num_pages; i++) { - bool success = pt_map_single(virt, phys, writable, supervisor, global, cr3); - if (!success) { - return false; + struct traversal trav; + traversal_init(&trav, virt, cr3); + // we always need at least 2 levels + traversal_descend(&trav, virt); + if (!vpn_is_huge_1gb(virt) || !can_map_huge_1gb(phys, num_pages)) { + traversal_descend(&trav, virt); + } + if (!vpn_is_huge_2mb(virt) || !can_map_huge_2mb(phys, num_pages)) { + traversal_descend(&trav, virt); + } + + while (true) { + traversal_map(&trav, phys, writable, supervisor, global); + + // advance + uint64_t np = TRAVERSAL_NPAGES_COVERED(&trav); + virt = vpn_from_pagenum(vpn_to_pagenum(virt) + np); + phys = ppn_from_pagenum(ppn_to_pagenum(phys) + np); + num_pages -= np; + if (num_pages == 0) { + break; + } + ASSERT(!traversal_advance(&trav)); // TODO check this at the beginning + if (trav.depth < 1) { + traversal_descend(&trav, virt); + } + if (trav.depth < 2 && !can_map_huge_1gb(phys, num_pages)) { + traversal_descend(&trav, virt); + } + if (trav.depth < 3 && !can_map_huge_2mb(phys, num_pages)) { + traversal_descend(&trav, virt); } - virt = vpn_from_pagenum(vpn_to_pagenum(virt) + 1); - phys = ppn_from_pagenum(ppn_to_pagenum(phys) + 1); } return true; } @@ -354,25 +443,6 @@ bool pt_map_range_current(struct vpn virt, struct ppn phys, uint64_t num_pages, return pt_map_range(virt, phys, num_pages, writable, supervisor, global, get_cr3_ppn()); } -bool pt_map_huge_1gb(struct vpn virt, struct ppn phys, - bool writable, bool supervisor, bool global, - struct ppn cr3) -{ - ASSERT(vpn_is_huge_1gb(virt)); - ASSERT(ppn_is_huge_1gb(phys)); - - struct ppn l3_pt = traverse_with_alloc(cr3, 4, vpn_level_idx(virt, 4)); - - uint64_t *entry_ptr = pa_to_pointer(pa_from_pt_with_idx(l3_pt, vpn_level_idx(virt, 3))); - struct pt_entry ent; - pt_entry_unpack(*entry_ptr, 3, &ent); - ASSERT(!ent.present); - - pt_entry_init_leaf(&ent, 3, writable, supervisor, global, phys); - *entry_ptr = pt_entry_pack(&ent); - return true; -} - void pt_free(struct ppn root) { // this assumes single ownership TODO(); @@ -403,15 +473,13 @@ void pt_create_minimal(void) { } } + unsigned int npages_in_gb = 1ull << 18; // 16 GiB identity map with hugepages starting at 1 GiB (skip 1st hugepage) - for (uint64_t i = 0; i < 16; i++) { - pt_map_huge_1gb( - vpn_from_aligned_va(va_from_value((i + 1) << 30)), - ppn_from_aligned_pa(pa_from_value(i << 30)), - true, true, false, - new_cr3 - ); - } + pt_map_range(vpn_from_pagenum(npages_in_gb), + ppn_from_pagenum(0), + 16 * npages_in_gb, + true, true, false, + new_cr3); set_identity_mapping(vpn_from_aligned_va(va_from_value(1ull << 30))); #ifdef PT_CREATE_DEBUG @@ -480,10 +548,6 @@ static bool pt_contiguous(struct mem_range *mr, struct vpn vpn, struct pt_entry && ent->pat_bit == mr->entry_start.pat_bit; } -static struct pa access_level_pa(struct access_level *ac) { - return pa_from_pt_with_idx(ac->ppn, ac->idx); -} - void pt_leaf_iter_init(struct pt_leaf_iter *it, struct ppn cr3) { it->done = false; it->depth = 0; From eec22e26c9ef78b02e478f7383306460a0953a84 Mon Sep 17 00:00:00 2001 From: uosfz Date: Fri, 23 May 2025 15:49:49 +0200 Subject: [PATCH 16/32] identity map in upper half of AS; fix pa_to_pointer error for high addresses --- src/x86_64/address.c | 3 ++- src/x86_64/paging.c | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/x86_64/address.c b/src/x86_64/address.c index c41e89b..1523d33 100644 --- a/src/x86_64/address.c +++ b/src/x86_64/address.c @@ -117,7 +117,8 @@ uint64_t pa_to_value(struct pa pa) { } void *pa_to_pointer(struct pa pa) { - return (void*)(pa_to_value(pa) + identity_mapping_start); + struct va va = va_from_value(pa_to_value(pa) + identity_mapping_start); + return (void *)va_to_canonical(va); } uint64_t pa_offset(struct pa pa) { diff --git a/src/x86_64/paging.c b/src/x86_64/paging.c index 9371dc0..8b303a0 100644 --- a/src/x86_64/paging.c +++ b/src/x86_64/paging.c @@ -473,14 +473,14 @@ void pt_create_minimal(void) { } } + // 16 GiB identity map with hugepages starting at low end of upper half of AS unsigned int npages_in_gb = 1ull << 18; - // 16 GiB identity map with hugepages starting at 1 GiB (skip 1st hugepage) - pt_map_range(vpn_from_pagenum(npages_in_gb), + struct vpn vpn_idmap_start = vpn_from_aligned_va(va_from_value(1ull << 47)); + pt_map_range(vpn_idmap_start, ppn_from_pagenum(0), 16 * npages_in_gb, true, true, false, new_cr3); - set_identity_mapping(vpn_from_aligned_va(va_from_value(1ull << 30))); #ifdef PT_CREATE_DEBUG uint64_t value = pa_to_value(pa_from_ppn(new_cr3)); @@ -510,6 +510,7 @@ void pt_create_minimal(void) { // switch set_cr3(value); + set_identity_mapping(vpn_idmap_start); #ifdef PT_CREATE_DEBUG printf("hello from new page table!\n"); From 679f7d5636c26cb403272480b16f208bdb4b5df6 Mon Sep 17 00:00:00 2001 From: uosfz Date: Fri, 23 May 2025 17:35:47 +0200 Subject: [PATCH 17/32] prettier pt creation; we copy ranges instead of each mapping individually --- include/x86_64/paging.h | 24 +++--- src/x86_64/paging.c | 171 +++++++++++++++++++--------------------- 2 files changed, 96 insertions(+), 99 deletions(-) diff --git a/include/x86_64/paging.h b/include/x86_64/paging.h index 36ba708..5565f96 100644 --- a/include/x86_64/paging.h +++ b/include/x86_64/paging.h @@ -44,13 +44,21 @@ void pt_entry_init_leaf(struct pt_entry *ent, unsigned int level, struct ppn ppn); void pt_entry_print(const struct pt_entry *ent); -// --- traversal --- +// --- traversal and mapping --- + +struct access_level { + struct ppn ppn; + unsigned int idx; +}; + +struct traversal { + unsigned int depth; + struct access_level levels[4]; +}; bool pt_translate(struct va va, struct ppn cr3, struct pa *pa_out); bool pt_translate_current(struct va va, struct pa *pa_out); -// --- mapping --- - bool pt_map_single(struct vpn virt, struct ppn phys, bool writable, bool supervisor, bool global, struct ppn cr3); @@ -77,19 +85,14 @@ struct pt_leaf { struct pt_entry ent; }; -struct access_level { - struct ppn ppn; - unsigned int idx; -}; - struct pt_leaf_iter { bool done; - unsigned int depth; - struct access_level levels[4]; + struct traversal trav; }; void pt_leaf_iter_init(struct pt_leaf_iter *it, struct ppn cr3); void pt_leaf_iter_init_current(struct pt_leaf_iter *it); +void pt_leaf_iter_init_custom_start(struct pt_leaf_iter *it, struct ppn cr3, struct vpn virt); bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out); struct mem_range { @@ -108,6 +111,7 @@ struct mem_range_iter { void mem_range_iter_init(struct mem_range_iter *it, struct ppn cr3); void mem_range_iter_init_current(struct mem_range_iter *it); +void mem_range_iter_init_custom_start(struct mem_range_iter *it, struct ppn cr3, struct vpn virt); bool mem_range_iter_next(struct mem_range_iter *it, struct mem_range *range_out); #endif diff --git a/src/x86_64/paging.c b/src/x86_64/paging.c index 8b303a0..ec337e9 100644 --- a/src/x86_64/paging.c +++ b/src/x86_64/paging.c @@ -206,7 +206,11 @@ static struct ppn get_cr3_ppn(void) { return ppn_from_aligned_pa(pa_from_value(cr3_val & 0x000ffffffffff000ull)); } -// --- traversal --- +// --- traversal and mapping --- + +static struct pa access_level_pa(const struct access_level *ac) { + return pa_from_pt_with_idx(ac->ppn, ac->idx); +} // TODO for inspection, we need to accumulate permissions over all levels // (upper supervisor bit will mean lower PTs are also supervisor, even without their bit set) @@ -271,26 +275,6 @@ bool pt_translate_current(struct va va, struct pa *pa_out) { return pt_translate(va, get_cr3_ppn(), pa_out); } -static struct ppn traverse_with_alloc(struct ppn ppn, unsigned int level, unsigned int idx) { - ASSERT(idx < 512); - ASSERT(level == 4 || level == 3 || level == 2); - - uint64_t *entry_ptr = (uint64_t*)pa_to_pointer(pa_from_pt_with_idx(ppn, idx)); - - struct pt_entry ent; - pt_entry_unpack(*entry_ptr, level, &ent); - - if (!ent.present) { - struct ppn ppn; - ASSERT(ram_alloc_frame_zeroed(&ppn, RAM_PAGE_NORMAL)); - pt_entry_init_nonleaf(&ent, level, ppn); - *entry_ptr = pt_entry_pack(&ent); - } - return ent.ppn; -} - -// --- mapping --- - bool pt_map_single(struct vpn virt, struct ppn phys, bool writable, bool supervisor, bool global, struct ppn cr3) @@ -320,15 +304,6 @@ bool pt_map_single_current(struct vpn virt, struct ppn phys, return pt_map_single(virt, phys, writable, supervisor, global, get_cr3_ppn()); } -static struct pa access_level_pa(const struct access_level *ac) { - return pa_from_pt_with_idx(ac->ppn, ac->idx); -} - -struct traversal { - unsigned int depth; - struct access_level levels[4]; -}; - #define TRAVERSAL_LEVEL(trav) (4 - (trav)->depth) #define TRAVERSAL_NPAGES_COVERED(trav) (1ull << (9 * (TRAVERSAL_LEVEL(trav) - 1))) @@ -338,19 +313,47 @@ static void traversal_init(struct traversal *trav, struct vpn virt, struct ppn c trav->levels[0].idx = vpn_level_idx(virt, 4); } -static void traversal_descend(struct traversal *trav, struct vpn virt) { +static uint64_t *traversal_entry_ptr(const struct traversal *trav) { + return pa_to_pointer(access_level_pa(&trav->levels[trav->depth])); +} + +static void traversal_push(struct traversal *trav, struct ppn ppn, struct vpn virt) { ASSERT(trav->depth < 3); unsigned int old_depth = trav->depth; unsigned int new_depth = old_depth + 1; - trav->levels[new_depth].ppn = traverse_with_alloc(trav->levels[old_depth].ppn, - 4 - old_depth, - trav->levels[old_depth].idx); + trav->levels[new_depth].ppn = ppn; trav->levels[new_depth].idx = vpn_level_idx(virt, 4 - new_depth); trav->depth = new_depth; } -static uint64_t *traversal_entry_ptr(const struct traversal *trav) { - return pa_to_pointer(access_level_pa(&trav->levels[trav->depth])); +// return if we descended or not +static bool traversal_descend_single_noalloc(struct traversal *trav, struct vpn virt) { + ASSERT(trav->depth < 3); + + uint64_t *entry_ptr = traversal_entry_ptr(trav); + struct pt_entry ent; + pt_entry_unpack(*entry_ptr, TRAVERSAL_LEVEL(trav), &ent); + if (ent.present) { + traversal_push(trav, ent.ppn, virt); + return true; + } else { + return false; + } +} + +static void traversal_descend_single_with_alloc(struct traversal *trav, struct vpn virt) { + ASSERT(trav->depth < 3); + + uint64_t *entry_ptr = traversal_entry_ptr(trav); + struct pt_entry ent; + pt_entry_unpack(*entry_ptr, TRAVERSAL_LEVEL(trav), &ent); + if (!ent.present) { + struct ppn ppn; + ASSERT(ram_alloc_frame_zeroed(&ppn, RAM_PAGE_NORMAL)); + pt_entry_init_nonleaf(&ent, TRAVERSAL_LEVEL(trav), ppn); + *entry_ptr = pt_entry_pack(&ent); + } + traversal_push(trav, ent.ppn, virt); } static void traversal_map(struct traversal *trav, struct ppn phys, @@ -372,8 +375,8 @@ static void traversal_map(struct traversal *trav, struct ppn phys, *entry_ptr = pt_entry_pack(&ent); } -// returns whether we're done or not. TODO not sure if we need this return value. -static bool traversal_advance(struct traversal *trav) { +// returns whether we're done or not. +static bool traversal_advance_idx(struct traversal *trav) { while (true) { trav->levels[trav->depth].idx += 1; if (trav->levels[trav->depth].idx == 512) { @@ -404,12 +407,12 @@ bool pt_map_range(struct vpn virt, struct ppn phys, uint64_t num_pages, struct traversal trav; traversal_init(&trav, virt, cr3); // we always need at least 2 levels - traversal_descend(&trav, virt); + traversal_descend_single_with_alloc(&trav, virt); if (!vpn_is_huge_1gb(virt) || !can_map_huge_1gb(phys, num_pages)) { - traversal_descend(&trav, virt); + traversal_descend_single_with_alloc(&trav, virt); } if (!vpn_is_huge_2mb(virt) || !can_map_huge_2mb(phys, num_pages)) { - traversal_descend(&trav, virt); + traversal_descend_single_with_alloc(&trav, virt); } while (true) { @@ -417,21 +420,21 @@ bool pt_map_range(struct vpn virt, struct ppn phys, uint64_t num_pages, // advance uint64_t np = TRAVERSAL_NPAGES_COVERED(&trav); - virt = vpn_from_pagenum(vpn_to_pagenum(virt) + np); - phys = ppn_from_pagenum(ppn_to_pagenum(phys) + np); num_pages -= np; if (num_pages == 0) { break; } - ASSERT(!traversal_advance(&trav)); // TODO check this at the beginning + virt = vpn_from_pagenum(vpn_to_pagenum(virt) + np); + phys = ppn_from_pagenum(ppn_to_pagenum(phys) + np); + ASSERT(!traversal_advance_idx(&trav)); if (trav.depth < 1) { - traversal_descend(&trav, virt); + traversal_descend_single_with_alloc(&trav, virt); } if (trav.depth < 2 && !can_map_huge_1gb(phys, num_pages)) { - traversal_descend(&trav, virt); + traversal_descend_single_with_alloc(&trav, virt); } if (trav.depth < 3 && !can_map_huge_2mb(phys, num_pages)) { - traversal_descend(&trav, virt); + traversal_descend_single_with_alloc(&trav, virt); } } return true; @@ -458,19 +461,15 @@ void pt_create_minimal(void) { ASSERT(ram_alloc_frame_zeroed(&new_cr3, RAM_PAGE_NORMAL)); // copy mappings we had previously for everything important - // we do single mapping for now because we don't know if this is physically continuous, - // even though it is virtually // this start value comes from the bootboot docs - for (uint64_t va_value = 0xfffffffff8000000ull; va_value != 0ull; va_value += 0x1000) { - struct va curr_va = va_from_canonical(va_value); - struct pa curr_pa; - bool mapped = pt_translate_current(curr_va, &curr_pa); - if (mapped) { - pt_map_single(vpn_from_aligned_va(curr_va), ppn_from_aligned_pa(curr_pa), - true /* writable */, true /* supervisor */, false /* not global */, - new_cr3 - ); - } + struct mem_range range; + struct mem_range_iter it; + mem_range_iter_init_custom_start(&it, get_cr3_ppn(), + vpn_from_aligned_va(va_from_canonical(0xfffffffff8000000ull))); + while (mem_range_iter_next(&it, &range)) { + pt_map_range(range.vpn_start, range.entry_start.ppn, range.npages, + true, true, false, + new_cr3); } // 16 GiB identity map with hugepages starting at low end of upper half of AS @@ -488,8 +487,6 @@ void pt_create_minimal(void) { // compare old and new to make sure we don't do any bullshit printf("old ranges:\n"); - struct mem_range range; - struct mem_range_iter it; mem_range_iter_init_current(&it); while (true) { if (!mem_range_iter_next(&it, &range)) { @@ -551,38 +548,30 @@ static bool pt_contiguous(struct mem_range *mr, struct vpn vpn, struct pt_entry void pt_leaf_iter_init(struct pt_leaf_iter *it, struct ppn cr3) { it->done = false; - it->depth = 0; - it->levels[0] = (struct access_level){ .ppn = cr3, .idx = 0 }; + traversal_init(&it->trav, vpn_from_pagenum(0), cr3); } void pt_leaf_iter_init_current(struct pt_leaf_iter *it) { pt_leaf_iter_init(it, get_cr3_ppn()); } -static struct vpn pt_leaf_iter_vpn(struct pt_leaf_iter *it) { - uint64_t value = 0; - for (unsigned int i = 0; i <= it->depth; i++) { - unsigned int level = 4 - i; - int shift = 9 * (level - 1); - value |= (uint64_t)(it->levels[i].idx) << shift; +void pt_leaf_iter_init_custom_start(struct pt_leaf_iter *it, struct ppn cr3, struct vpn virt) { + it->done = false; + traversal_init(&it->trav, virt, cr3); + // "incorporate" virt into this traversal so we don't have to store it + while(traversal_descend_single_noalloc(&it->trav, virt)) { + // continue } - return vpn_from_pagenum(value); } -static void pt_leaf_iter_advance(struct pt_leaf_iter *it) { - while (true) { - it->levels[it->depth].idx += 1; - if (it->levels[it->depth].idx == 512) { - if (it->depth > 0) { - it->depth -= 1; - } else { - it->done = true; - return; - } - } else { - return; - } +static struct vpn pt_leaf_iter_vpn(struct pt_leaf_iter *it) { + uint64_t value = 0; + for (unsigned int i = 0; i <= it->trav.depth; i++) { + unsigned int level = 4 - i; + int shift = 9 * (level - 1); + value |= (uint64_t)(it->trav.levels[i].idx) << shift; } + return vpn_from_pagenum(value); } bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out) { @@ -593,22 +582,21 @@ bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out) { } while (true) { - uint64_t *entry_ptr = pa_to_pointer(access_level_pa(&it->levels[it->depth])); + uint64_t *entry_ptr = traversal_entry_ptr(&it->trav); struct pt_entry ent; - pt_entry_unpack(*entry_ptr, 4 - it->depth, &ent); + pt_entry_unpack(*entry_ptr, TRAVERSAL_LEVEL(&it->trav), &ent); if (!ent.present) { - pt_leaf_iter_advance(it); + it->done = traversal_advance_idx(&it->trav); if (it->done) { return false; } } else { if (PT_IS_LEAF(ent)) { *leaf_out = (struct pt_leaf){ .vpn_start = pt_leaf_iter_vpn(it), .ent = ent }; - pt_leaf_iter_advance(it); + it->done = traversal_advance_idx(&it->trav); return true; } else { - it->depth += 1; - it->levels[it->depth] = (struct access_level){ .ppn = ent.ppn, .idx = 0 }; + traversal_push(&it->trav, ent.ppn, vpn_from_pagenum(0)); } } } @@ -632,6 +620,11 @@ void mem_range_iter_init_current(struct mem_range_iter *it) { mem_range_iter_init(it, get_cr3_ppn()); } +void mem_range_iter_init_custom_start(struct mem_range_iter *it, struct ppn cr3, struct vpn virt) { + pt_leaf_iter_init_custom_start(&it->leaf_it, cr3, virt); + it->have_held_leaf = false; +} + static bool mem_range_iter_leaf_get(struct mem_range_iter *it, struct pt_leaf *leaf_out) { if (it->have_held_leaf) { it->have_held_leaf = false; From 45d6fea05b5c154d9b003cd83dc21342df973b43 Mon Sep 17 00:00:00 2001 From: uosfz Date: Fri, 23 May 2025 18:49:18 +0200 Subject: [PATCH 18/32] more traversal convenience; comment for naming conventions --- include/x86_64/paging.h | 19 +++++++++++++++- src/x86_64/paging.c | 50 ++++++++++++++++++++++++----------------- 2 files changed, 48 insertions(+), 21 deletions(-) diff --git a/include/x86_64/paging.h b/include/x86_64/paging.h index 5565f96..2e24ef5 100644 --- a/include/x86_64/paging.h +++ b/include/x86_64/paging.h @@ -1,6 +1,22 @@ #ifndef KARLOS_PAGING_H #define KARLOS_PAGING_H +// Naming conventions used in paging code: +// +// A _pt_ (page table) can refer to a single level or to an entire hierarchy. +// We mostly use it to refer to a single level. +// +// Pts have 4 _levels_. +// The highest level is level 4. There is exactly one level 4 pt in a hierarchy. +// The lowest level is level 1. +// +// Pts contain _entries_. Each entry is also associated with a level. +// It's the level this entry is located in, NOT the level it points to. +// This means entry levels also range from 4 to 1. +// +// A _leaf entry_ is an entry that points to data pages. +// This includes all entries on level 1, as well as hugepage entries on level 2 and 3. + #include #include @@ -34,7 +50,8 @@ struct pt_entry { // TODO MPK, NX }; -#define PT_IS_LEAF(ent) ((ent).level == 1 || (ent).hugepage) +#define PT_ENTRY_IS_LEAF(ent) ((ent).present && ((ent).level == 1 || (ent).hugepage)) +#define PT_ENTRY_IS_NONLEAF(ent) ((ent).present && (ent).level > 1 && !(ent).hugepage) uint64_t pt_entry_pack(const struct pt_entry *ent_in); void pt_entry_unpack(uint64_t ent_in, unsigned int level, struct pt_entry *ent_out); diff --git a/src/x86_64/paging.c b/src/x86_64/paging.c index ec337e9..3421400 100644 --- a/src/x86_64/paging.c +++ b/src/x86_64/paging.c @@ -17,7 +17,7 @@ void init_paging() { // - Long-Mode Active (EFER.LMA) // - PAT idx 000 points to default strategy // - See if NX bits are used and decide if we want to - // - See of MPK is used and decide if we want to + // - See if MPK is used and decide if we want to // - SMEP/SMAP? // - ... } @@ -317,11 +317,29 @@ static uint64_t *traversal_entry_ptr(const struct traversal *trav) { return pa_to_pointer(access_level_pa(&trav->levels[trav->depth])); } -static void traversal_push(struct traversal *trav, struct ppn ppn, struct vpn virt) { - ASSERT(trav->depth < 3); +static struct vpn traversal_vpn(const struct traversal *trav) { + uint64_t value = 0; + for (unsigned int i = 0; i <= trav->depth; i++) { + unsigned int level = 4 - i; + int shift = 9 * (level - 1); + value |= (uint64_t)(trav->levels[i].idx) << shift; + } + return vpn_from_pagenum(value); +} + +static bool traversal_have_leaf(const struct traversal *trav) { + uint64_t *entry_ptr = traversal_entry_ptr(trav); + struct pt_entry ent; + pt_entry_unpack(*entry_ptr, TRAVERSAL_LEVEL(trav), &ent); + return PT_ENTRY_IS_LEAF(ent); +} + +static void traversal_push(struct traversal *trav, struct pt_entry *ent, struct vpn virt) { + ASSERT(PT_ENTRY_IS_NONLEAF(*ent)); + unsigned int old_depth = trav->depth; unsigned int new_depth = old_depth + 1; - trav->levels[new_depth].ppn = ppn; + trav->levels[new_depth].ppn = ent->ppn; trav->levels[new_depth].idx = vpn_level_idx(virt, 4 - new_depth); trav->depth = new_depth; } @@ -333,8 +351,9 @@ static bool traversal_descend_single_noalloc(struct traversal *trav, struct vpn uint64_t *entry_ptr = traversal_entry_ptr(trav); struct pt_entry ent; pt_entry_unpack(*entry_ptr, TRAVERSAL_LEVEL(trav), &ent); + if (ent.present) { - traversal_push(trav, ent.ppn, virt); + traversal_push(trav, &ent, virt); return true; } else { return false; @@ -347,13 +366,14 @@ static void traversal_descend_single_with_alloc(struct traversal *trav, struct v uint64_t *entry_ptr = traversal_entry_ptr(trav); struct pt_entry ent; pt_entry_unpack(*entry_ptr, TRAVERSAL_LEVEL(trav), &ent); + if (!ent.present) { struct ppn ppn; ASSERT(ram_alloc_frame_zeroed(&ppn, RAM_PAGE_NORMAL)); pt_entry_init_nonleaf(&ent, TRAVERSAL_LEVEL(trav), ppn); *entry_ptr = pt_entry_pack(&ent); } - traversal_push(trav, ent.ppn, virt); + traversal_push(trav, &ent, virt); } static void traversal_map(struct traversal *trav, struct ppn phys, @@ -559,21 +579,11 @@ void pt_leaf_iter_init_custom_start(struct pt_leaf_iter *it, struct ppn cr3, str it->done = false; traversal_init(&it->trav, virt, cr3); // "incorporate" virt into this traversal so we don't have to store it - while(traversal_descend_single_noalloc(&it->trav, virt)) { + while(!traversal_have_leaf(&it->trav) && traversal_descend_single_noalloc(&it->trav, virt)) { // continue } } -static struct vpn pt_leaf_iter_vpn(struct pt_leaf_iter *it) { - uint64_t value = 0; - for (unsigned int i = 0; i <= it->trav.depth; i++) { - unsigned int level = 4 - i; - int shift = 9 * (level - 1); - value |= (uint64_t)(it->trav.levels[i].idx) << shift; - } - return vpn_from_pagenum(value); -} - bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out) { ASSERT(it != NULL && leaf_out != NULL); @@ -591,12 +601,12 @@ bool pt_leaf_iter_next(struct pt_leaf_iter *it, struct pt_leaf *leaf_out) { return false; } } else { - if (PT_IS_LEAF(ent)) { - *leaf_out = (struct pt_leaf){ .vpn_start = pt_leaf_iter_vpn(it), .ent = ent }; + if (PT_ENTRY_IS_LEAF(ent)) { + *leaf_out = (struct pt_leaf){ .vpn_start = traversal_vpn(&it->trav), .ent = ent }; it->done = traversal_advance_idx(&it->trav); return true; } else { - traversal_push(&it->trav, ent.ppn, vpn_from_pagenum(0)); + traversal_push(&it->trav, &ent, vpn_from_pagenum(0)); } } } From 908b5d1d5f3c93ae899bd3a61345226afba4c359 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 28 May 2025 21:39:43 +0200 Subject: [PATCH 19/32] create and switch to new stack --- src/x86_64/paging.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/x86_64/paging.c b/src/x86_64/paging.c index 3421400..7cf670e 100644 --- a/src/x86_64/paging.c +++ b/src/x86_64/paging.c @@ -1,5 +1,6 @@ #include "std.h" #include "ram.h" +#include "x86_64/address.h" #include "x86_64/asm.h" #include "x86_64/paging.h" @@ -494,13 +495,21 @@ void pt_create_minimal(void) { // 16 GiB identity map with hugepages starting at low end of upper half of AS unsigned int npages_in_gb = 1ull << 18; + unsigned int npages_idmap = 16 * npages_in_gb; struct vpn vpn_idmap_start = vpn_from_aligned_va(va_from_value(1ull << 47)); pt_map_range(vpn_idmap_start, ppn_from_pagenum(0), - 16 * npages_in_gb, + npages_idmap, true, true, false, new_cr3); + // new stack with 4 GiB size and guard pages around + // TODO one stack for each core + struct vpn stack_virt = vpn_from_pagenum(vpn_to_pagenum(vpn_idmap_start) + npages_idmap + 1); + struct ppn stack_phys; + ASSERT(ram_alloc_frame_zeroed(&stack_phys, RAM_PAGE_NORMAL)); + pt_map_single(stack_virt, stack_phys, true, true, false, new_cr3); + #ifdef PT_CREATE_DEBUG uint64_t value = pa_to_value(pa_from_ppn(new_cr3)); printf("new cr3: %p\n", value); @@ -529,9 +538,18 @@ void pt_create_minimal(void) { set_cr3(value); set_identity_mapping(vpn_idmap_start); + // stack switch + // it doesn't really make sense to copy stack contents because rbp and pointers + // to local variables will point to the wrong location. + // this means we won't leave this function for now. + uint64_t new_sp = va_to_canonical(va_from_vpn(stack_virt)) + 4096; + __asm__("mov %0,%%rsp" ::"r"(new_sp)); + #ifdef PT_CREATE_DEBUG printf("hello from new page table!\n"); #endif + + PANIC("end of pt_create_minimal"); } // --- iterators --- From 59fabd78389898940fc975b2195ab309f6210eae Mon Sep 17 00:00:00 2001 From: uosfz Date: Thu, 29 May 2025 00:53:31 +0200 Subject: [PATCH 20/32] make pt_create noreturn --- include/x86_64/paging.h | 1 + src/kernel.c | 25 ------------------------- src/x86_64/paging.c | 1 + 3 files changed, 2 insertions(+), 25 deletions(-) diff --git a/include/x86_64/paging.h b/include/x86_64/paging.h index 2e24ef5..605236c 100644 --- a/include/x86_64/paging.h +++ b/include/x86_64/paging.h @@ -93,6 +93,7 @@ void pt_free(struct ppn root); // --- page table creation --- +__attribute__((noreturn)) void pt_create_minimal(void); // --- iterators --- diff --git a/src/kernel.c b/src/kernel.c index 61037d5..28063b7 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -214,32 +214,7 @@ void _start() { printf("code: %p -> %p\n", &check_initrd, code_phys); #endif -#if 1 - uint64_t cr0 = get_cr0(); - printf("wp=%d\n", (cr0 >> 16) & 1); -#endif - -#if 0 - struct mem_range range; - struct mem_range_iter mi; - mem_range_iter_init_current(&mi); - while (true) { - if (!mem_range_iter_next(&mi, &range)) { - break; - } - mem_range_print(&range); - } -#endif - pt_create_minimal(); - // pt_leaf_iter_test(); - - X86_ASM_INT(0xfe); - X86_ASM_INT(0xfe); - X86_ASM_INT(0xfe); - - // hang for now - PANIC("end of kernel"); } /************************** diff --git a/src/x86_64/paging.c b/src/x86_64/paging.c index 7cf670e..5f6e369 100644 --- a/src/x86_64/paging.c +++ b/src/x86_64/paging.c @@ -476,6 +476,7 @@ void pt_free(struct ppn root) { #define PT_CREATE_DEBUG +__attribute__((noreturn)) void pt_create_minimal(void) { // get a top level page table struct ppn new_cr3; From 96fd7dfdf7531c9a6229ece9c84ebc31c3aac7b1 Mon Sep 17 00:00:00 2001 From: uosfz Date: Thu, 29 May 2025 14:01:19 +0200 Subject: [PATCH 21/32] start making headers arch-agnostic --- include/{x86_64 => }/address.h | 8 +++---- include/cpu.h | 9 ++++++++ include/paging.h | 38 ++++++++++++++++++++++++++++++++++ include/ram.h | 3 +-- include/serial.h | 6 ++++++ include/x86_64/apic.h | 4 ++-- include/x86_64/asm.h | 4 ++-- include/x86_64/cpu.h | 12 ----------- include/x86_64/paging.h | 9 +++----- include/x86_64/uart.h | 4 ++-- src/kernel.c | 19 +++++++---------- src/std.c | 4 ++-- src/x86_64/address.c | 2 +- src/x86_64/apic.c | 6 +++++- src/x86_64/cpu.c | 29 +++++++++++++------------- src/x86_64/paging.c | 4 ++-- src/x86_64/uart.c | 4 ++++ 17 files changed, 103 insertions(+), 62 deletions(-) rename include/{x86_64 => }/address.h (92%) create mode 100644 include/cpu.h create mode 100644 include/paging.h create mode 100644 include/serial.h delete mode 100644 include/x86_64/cpu.h diff --git a/include/x86_64/address.h b/include/address.h similarity index 92% rename from include/x86_64/address.h rename to include/address.h index 94bb86a..d3cafc6 100644 --- a/include/x86_64/address.h +++ b/include/address.h @@ -6,19 +6,19 @@ // do not use these fields directly! use *_to_* functions. struct va { - uint64_t value; // 48-bit + uint64_t value; }; struct vpn { - uint64_t pagenum; // 36-bit + uint64_t pagenum; }; struct pa { - uint64_t value; // 48 - 12 = 36-bit + uint64_t value; }; struct ppn { - uint64_t pagenum; // 36 - 12 = 24-bit + uint64_t pagenum; }; struct va va_from_value(uint64_t value); diff --git a/include/cpu.h b/include/cpu.h new file mode 100644 index 0000000..987afd6 --- /dev/null +++ b/include/cpu.h @@ -0,0 +1,9 @@ +#ifndef KARLOS_CPU_H +#define KARLOS_CPU_H + +void cpu_init(void); +unsigned int cpu_get_core_id(void); + +void interrupt_handler_register(unsigned int num, void (*func)(void)); + +#endif diff --git a/include/paging.h b/include/paging.h new file mode 100644 index 0000000..236ab70 --- /dev/null +++ b/include/paging.h @@ -0,0 +1,38 @@ +#ifndef KARLOS_PAGING_H +#define KARLOS_PAGING_H + +// restricted, architecture-agnostic interface. +// For full interface see include//paging.h + +// TODO remove mentions of cr3 and replace with top_pt or root or something +// I guess we only have to do this here + +#include "address.h" + +// --- init --- + +void init_paging(); + +// --- traversal and mapping --- + +bool pt_translate(struct va va, struct ppn cr3, struct pa *pa_out); +bool pt_translate_current(struct va va, struct pa *pa_out); + +bool pt_map_single(struct vpn virt, struct ppn phys, + bool writable, bool supervisor, bool global, + struct ppn cr3); +bool pt_map_single_current(struct vpn virt, struct ppn phys, + bool writable, bool supervisor, bool global); +bool pt_map_range(struct vpn virt, struct ppn phys, uint64_t num_pages, + bool writable, bool supervisor, bool global, + struct ppn cr3); +bool pt_map_range_current(struct vpn virt, struct ppn phys, uint64_t num_pages, + bool writable, bool supervisor, bool global); +void pt_free(struct ppn cr3); + +// --- page table creation --- + +__attribute__((noreturn)) +void pt_create_minimal(void); + +#endif diff --git a/include/ram.h b/include/ram.h index cbb5182..caff726 100644 --- a/include/ram.h +++ b/include/ram.h @@ -5,8 +5,7 @@ #include #include - -#include +#include enum frame_size { RAM_PAGE_NORMAL = 0, diff --git a/include/serial.h b/include/serial.h new file mode 100644 index 0000000..caadddc --- /dev/null +++ b/include/serial.h @@ -0,0 +1,6 @@ +#ifndef KARLOS_SERIAL_H +#define KARLOS_SERIAL_H + +void serial_write_char(char c); + +#endif diff --git a/include/x86_64/apic.h b/include/x86_64/apic.h index c773d7a..e4e79ea 100644 --- a/include/x86_64/apic.h +++ b/include/x86_64/apic.h @@ -1,5 +1,5 @@ -#ifndef KARLOS_APIC_H -#define KARLOS_APIC_H +#ifndef KARLOS_X86_64_APIC_H +#define KARLOS_X86_64_APIC_H void lapic_init(void); unsigned lapic_get_id(void); diff --git a/include/x86_64/asm.h b/include/x86_64/asm.h index 086d016..be89bfb 100644 --- a/include/x86_64/asm.h +++ b/include/x86_64/asm.h @@ -1,5 +1,5 @@ -#ifndef KARLOS_ASM_H -#define KARLOS_ASM_H +#ifndef KARLOS_X86_64_ASM_H +#define KARLOS_X86_64_ASM_H #include diff --git a/include/x86_64/cpu.h b/include/x86_64/cpu.h deleted file mode 100644 index ea39430..0000000 --- a/include/x86_64/cpu.h +++ /dev/null @@ -1,12 +0,0 @@ -#include - -void init_gdt(); - -// make sure the function you register is __attribute__ ((interrupt)) and does a lapic_eoi() -// also make sure to call load_idt afterwards -void interrupt_handler_register(uint8_t num, void (*func)(void *)); -void load_idt(); - -// don't call this, only register for testing -__attribute__ ((interrupt)) -void basic_interrupt_handler(void *ptr); diff --git a/include/x86_64/paging.h b/include/x86_64/paging.h index 605236c..8e10424 100644 --- a/include/x86_64/paging.h +++ b/include/x86_64/paging.h @@ -1,5 +1,5 @@ -#ifndef KARLOS_PAGING_H -#define KARLOS_PAGING_H +#ifndef KARLOS_X86_64_PAGING_H +#define KARLOS_X86_64_PAGING_H // Naming conventions used in paging code: // @@ -20,7 +20,7 @@ #include #include -#include "x86_64/address.h" +#include "address.h" // --- init --- @@ -86,9 +86,6 @@ bool pt_map_range(struct vpn virt, struct ppn phys, uint64_t num_pages, struct ppn cr3); bool pt_map_range_current(struct vpn virt, struct ppn phys, uint64_t num_pages, bool writable, bool supervisor, bool global); -bool pt_map_huge_1gb(struct vpn virt, struct ppn phys, - bool writable, bool supervisor, bool global, - struct ppn cr3); void pt_free(struct ppn root); // --- page table creation --- diff --git a/include/x86_64/uart.h b/include/x86_64/uart.h index b601f95..a2966a7 100644 --- a/include/x86_64/uart.h +++ b/include/x86_64/uart.h @@ -1,5 +1,5 @@ -#ifndef KARLOS_UART_H -#define KARLOS_UART_H +#ifndef KARLOS_X86_64_UART_H +#define KARLOS_X86_64_UART_H void uart_write_char(char c); diff --git a/src/kernel.c b/src/kernel.c index 28063b7..758e025 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -30,17 +30,13 @@ #include #include -#include "x86_64/apic.h" -#include "x86_64/asm.h" -#include "x86_64/address.h" -#include "x86_64/cpu.h" -#include "x86_64/paging.h" - #include "bootboot.h" #include "ram.h" #include "pci.h" #include "std.h" #include "tar.h" +#include "cpu.h" +#include "paging.h" /* imported virtual addresses, see linker script */ extern BOOTBOOT bootboot; // see bootboot.h @@ -80,6 +76,10 @@ void check_initrd() { void console_init(void); +void basic_interrupt_handler(void) { + printf("hello from interrupt!\n"); +} + /****************************************** * Entry point, called by BOOTBOOT Loader * ******************************************/ @@ -127,15 +127,12 @@ void _start() { } #endif - init_gdt(); - + cpu_init(); interrupt_handler_register(0xfe, basic_interrupt_handler); - load_idt(); - lapic_init(); ram_init(); - printf("LAPIC ID: %d\n", lapic_get_id()); + printf("Core ID: %d\n", cpu_get_core_id()); #if 0 struct tar_header hd; diff --git a/src/std.c b/src/std.c index bd304ab..a190214 100644 --- a/src/std.c +++ b/src/std.c @@ -2,7 +2,7 @@ #include #include "std.h" -#include "x86_64/uart.h" +#include "serial.h" #define BUFFER_SIZE 1024 @@ -88,7 +88,7 @@ static unsigned int current_buffer_position = 0; static void linebuf_flush(void) { for (unsigned int i = 0; i < current_buffer_position; i++) { - uart_write_char(linebuf[i]); + serial_write_char(linebuf[i]); // extern void visual_putc(char c); // visual_putc(linebuf[i]); } diff --git a/src/x86_64/address.c b/src/x86_64/address.c index 1523d33..2f7ea47 100644 --- a/src/x86_64/address.c +++ b/src/x86_64/address.c @@ -1,4 +1,4 @@ -#include "x86_64/address.h" +#include "address.h" #include "std.h" uint64_t identity_mapping_start = 0; diff --git a/src/x86_64/apic.c b/src/x86_64/apic.c index 1e3da13..18ce313 100644 --- a/src/x86_64/apic.c +++ b/src/x86_64/apic.c @@ -1,5 +1,5 @@ #include "x86_64/apic.h" -#include "x86_64/address.h" +#include "address.h" #include "std.h" #define LAPIC_BASE_PHYS 0xFEE00000 @@ -59,6 +59,10 @@ unsigned lapic_get_id(void) return LAPIC_ADDR->lapic_id.value >> 24; } +unsigned int cpu_get_core_id(void) { + return lapic_get_id(); +} + void lapic_eoi(void) { LAPIC_ADDR->eoi.value = 0; diff --git a/src/x86_64/cpu.c b/src/x86_64/cpu.c index 4246493..3ed1620 100644 --- a/src/x86_64/cpu.c +++ b/src/x86_64/cpu.c @@ -1,5 +1,4 @@ -#include "x86_64/cpu.h" -#include "x86_64/apic.h" +#include "cpu.h" #include "std.h" // --- segmentation --- @@ -87,22 +86,9 @@ static void write_int_desc_entry(struct int_desc_entry *e, uint64_t offset) { e->pad = 0; } -__attribute__ ((interrupt)) -void basic_interrupt_handler(void *ptr) { - static uint64_t interrupt_counter = 0; - - printf("Hello Interrupt %lu!\n", interrupt_counter); - interrupt_counter++; - lapic_eoi(); -} - #define NUM_INTERRUPTS 256 static struct int_desc_entry idt[NUM_INTERRUPTS]; -void interrupt_handler_register(uint8_t num, void (*func)(void *)) { - write_int_desc_entry(&idt[num], (uint64_t)(intptr_t)func); -} - void load_idt() { uint8_t idtr[10]; @@ -111,3 +97,16 @@ void load_idt() { __asm__("lidt (%0)" ::"r"(idtr)); } + +void cpu_init(void) { + init_gdt(); + for (int i = 0; i < 256; i++) { + // TODO + write_int_desc_entry(&idt[i], 0); + } + load_idt(); +} + +void interrupt_handler_register(unsigned int num, void (*func)(void)) { + TODO(); +} diff --git a/src/x86_64/paging.c b/src/x86_64/paging.c index 5f6e369..00f2df3 100644 --- a/src/x86_64/paging.c +++ b/src/x86_64/paging.c @@ -1,6 +1,6 @@ #include "std.h" #include "ram.h" -#include "x86_64/address.h" +#include "address.h" #include "x86_64/asm.h" #include "x86_64/paging.h" @@ -467,7 +467,7 @@ bool pt_map_range_current(struct vpn virt, struct ppn phys, uint64_t num_pages, return pt_map_range(virt, phys, num_pages, writable, supervisor, global, get_cr3_ppn()); } -void pt_free(struct ppn root) { +void pt_free(struct ppn cr3) { // this assumes single ownership TODO(); } diff --git a/src/x86_64/uart.c b/src/x86_64/uart.c index c384b7c..ce3445e 100644 --- a/src/x86_64/uart.c +++ b/src/x86_64/uart.c @@ -12,3 +12,7 @@ void uart_write_char(char c) { out8(COM1_BASE_PORT, c); } + +void serial_write_char(char c) { + uart_write_char(c); +} From d59c6ce14d35f58e7030964e68e0e4a50f505567 Mon Sep 17 00:00:00 2001 From: uosfz Date: Thu, 29 May 2025 14:55:27 +0200 Subject: [PATCH 22/32] ISR stubs that push their number and jump to common isr --- src/kernel.c | 4 +- src/x86_64/cpu.c | 321 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 321 insertions(+), 4 deletions(-) diff --git a/src/kernel.c b/src/kernel.c index 758e025..45a41b1 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -128,7 +128,9 @@ void _start() { #endif cpu_init(); - interrupt_handler_register(0xfe, basic_interrupt_handler); + // interrupt_handler_register(0xfe, basic_interrupt_handler); + __asm__("int $0xfe" :: ); + __asm__("int $0xf7" :: ); ram_init(); diff --git a/src/x86_64/cpu.c b/src/x86_64/cpu.c index 3ed1620..b716c7a 100644 --- a/src/x86_64/cpu.c +++ b/src/x86_64/cpu.c @@ -1,3 +1,4 @@ +#include "x86_64/apic.h" #include "cpu.h" #include "std.h" @@ -98,15 +99,329 @@ void load_idt() { __asm__("lidt (%0)" ::"r"(idtr)); } +void common_isr_2(uint64_t num) { + printf("Hello from interrupt %d!\n", num); + lapic_eoi(); +} + +// TODO This is done manually because we need to know how the stack looks. +__attribute__((naked)) +void common_isr(void) { + __asm__("push %%rbp" :: ); + __asm__("mov %%rsp,%%rbp" :: ); + __asm__("push %%r11" :: ); + __asm__("push %%r10" :: ); + __asm__("push %%r9" :: ); + __asm__("push %%r8" :: ); + __asm__("push %%rdi" :: ); + __asm__("push %%rsi" :: ); + __asm__("push %%rcx" :: ); + __asm__("push %%rdx" :: ); + __asm__("push %%rax" :: ); + + // get interrupt number and call "real" handler + __asm__("mov 0x8(%%rbp),%%rdi" :: ); + __asm__("call common_isr_2" :: ); + + __asm__("pop %%rax" :: ); + __asm__("pop %%rdx" :: ); + __asm__("pop %%rcx" :: ); + __asm__("pop %%rsi" :: ); + __asm__("pop %%rdi" :: ); + __asm__("pop %%r8" :: ); + __asm__("pop %%r9" :: ); + __asm__("pop %%r10" :: ); + __asm__("pop %%r11" :: ); + __asm__("pop %%rbp" :: ); + // remove interrupt number pushed in the isr stub + __asm__("add $0x8,%%rsp" :: ); + __asm__("iretq" :: ); +} + +// this has to be set manually. +#define ISR_STUB_SIZE 16 + +// absolutely horrifying stringify thing +// https://gcc.gnu.org/onlinedocs/cpp/Stringizing.html +#define MY_XSTR(s) MY_STR(s) +#define MY_STR(s) #s + +// TODO removed cli for now +#define ISR_STUB(n) __asm__( \ + ".align " MY_XSTR(ISR_STUB_SIZE) "\n\t" \ + "push $" #n "\n\t" \ + "jmp common_isr\n\t" \ +) + +__attribute__((aligned(ISR_STUB_SIZE))) +__attribute__((naked)) +void inttable(void) { + ISR_STUB(0); + ISR_STUB(1); + ISR_STUB(2); + ISR_STUB(3); + ISR_STUB(4); + ISR_STUB(5); + ISR_STUB(6); + ISR_STUB(7); + ISR_STUB(8); + ISR_STUB(9); + ISR_STUB(10); + ISR_STUB(11); + ISR_STUB(12); + ISR_STUB(13); + ISR_STUB(14); + ISR_STUB(15); + ISR_STUB(16); + ISR_STUB(17); + ISR_STUB(18); + ISR_STUB(19); + ISR_STUB(20); + ISR_STUB(21); + ISR_STUB(22); + ISR_STUB(23); + ISR_STUB(24); + ISR_STUB(25); + ISR_STUB(26); + ISR_STUB(27); + ISR_STUB(28); + ISR_STUB(29); + ISR_STUB(30); + ISR_STUB(31); + ISR_STUB(32); + ISR_STUB(33); + ISR_STUB(34); + ISR_STUB(35); + ISR_STUB(36); + ISR_STUB(37); + ISR_STUB(38); + ISR_STUB(39); + ISR_STUB(40); + ISR_STUB(41); + ISR_STUB(42); + ISR_STUB(43); + ISR_STUB(44); + ISR_STUB(45); + ISR_STUB(46); + ISR_STUB(47); + ISR_STUB(48); + ISR_STUB(49); + ISR_STUB(50); + ISR_STUB(51); + ISR_STUB(52); + ISR_STUB(53); + ISR_STUB(54); + ISR_STUB(55); + ISR_STUB(56); + ISR_STUB(57); + ISR_STUB(58); + ISR_STUB(59); + ISR_STUB(60); + ISR_STUB(61); + ISR_STUB(62); + ISR_STUB(63); + ISR_STUB(64); + ISR_STUB(65); + ISR_STUB(66); + ISR_STUB(67); + ISR_STUB(68); + ISR_STUB(69); + ISR_STUB(70); + ISR_STUB(71); + ISR_STUB(72); + ISR_STUB(73); + ISR_STUB(74); + ISR_STUB(75); + ISR_STUB(76); + ISR_STUB(77); + ISR_STUB(78); + ISR_STUB(79); + ISR_STUB(80); + ISR_STUB(81); + ISR_STUB(82); + ISR_STUB(83); + ISR_STUB(84); + ISR_STUB(85); + ISR_STUB(86); + ISR_STUB(87); + ISR_STUB(88); + ISR_STUB(89); + ISR_STUB(90); + ISR_STUB(91); + ISR_STUB(92); + ISR_STUB(93); + ISR_STUB(94); + ISR_STUB(95); + ISR_STUB(96); + ISR_STUB(97); + ISR_STUB(98); + ISR_STUB(99); + ISR_STUB(100); + ISR_STUB(101); + ISR_STUB(102); + ISR_STUB(103); + ISR_STUB(104); + ISR_STUB(105); + ISR_STUB(106); + ISR_STUB(107); + ISR_STUB(108); + ISR_STUB(109); + ISR_STUB(110); + ISR_STUB(111); + ISR_STUB(112); + ISR_STUB(113); + ISR_STUB(114); + ISR_STUB(115); + ISR_STUB(116); + ISR_STUB(117); + ISR_STUB(118); + ISR_STUB(119); + ISR_STUB(120); + ISR_STUB(121); + ISR_STUB(122); + ISR_STUB(123); + ISR_STUB(124); + ISR_STUB(125); + ISR_STUB(126); + ISR_STUB(127); + ISR_STUB(128); + ISR_STUB(129); + ISR_STUB(130); + ISR_STUB(131); + ISR_STUB(132); + ISR_STUB(133); + ISR_STUB(134); + ISR_STUB(135); + ISR_STUB(136); + ISR_STUB(137); + ISR_STUB(138); + ISR_STUB(139); + ISR_STUB(140); + ISR_STUB(141); + ISR_STUB(142); + ISR_STUB(143); + ISR_STUB(144); + ISR_STUB(145); + ISR_STUB(146); + ISR_STUB(147); + ISR_STUB(148); + ISR_STUB(149); + ISR_STUB(150); + ISR_STUB(151); + ISR_STUB(152); + ISR_STUB(153); + ISR_STUB(154); + ISR_STUB(155); + ISR_STUB(156); + ISR_STUB(157); + ISR_STUB(158); + ISR_STUB(159); + ISR_STUB(160); + ISR_STUB(161); + ISR_STUB(162); + ISR_STUB(163); + ISR_STUB(164); + ISR_STUB(165); + ISR_STUB(166); + ISR_STUB(167); + ISR_STUB(168); + ISR_STUB(169); + ISR_STUB(170); + ISR_STUB(171); + ISR_STUB(172); + ISR_STUB(173); + ISR_STUB(174); + ISR_STUB(175); + ISR_STUB(176); + ISR_STUB(177); + ISR_STUB(178); + ISR_STUB(179); + ISR_STUB(180); + ISR_STUB(181); + ISR_STUB(182); + ISR_STUB(183); + ISR_STUB(184); + ISR_STUB(185); + ISR_STUB(186); + ISR_STUB(187); + ISR_STUB(188); + ISR_STUB(189); + ISR_STUB(190); + ISR_STUB(191); + ISR_STUB(192); + ISR_STUB(193); + ISR_STUB(194); + ISR_STUB(195); + ISR_STUB(196); + ISR_STUB(197); + ISR_STUB(198); + ISR_STUB(199); + ISR_STUB(200); + ISR_STUB(201); + ISR_STUB(202); + ISR_STUB(203); + ISR_STUB(204); + ISR_STUB(205); + ISR_STUB(206); + ISR_STUB(207); + ISR_STUB(208); + ISR_STUB(209); + ISR_STUB(210); + ISR_STUB(211); + ISR_STUB(212); + ISR_STUB(213); + ISR_STUB(214); + ISR_STUB(215); + ISR_STUB(216); + ISR_STUB(217); + ISR_STUB(218); + ISR_STUB(219); + ISR_STUB(220); + ISR_STUB(221); + ISR_STUB(222); + ISR_STUB(223); + ISR_STUB(224); + ISR_STUB(225); + ISR_STUB(226); + ISR_STUB(227); + ISR_STUB(228); + ISR_STUB(229); + ISR_STUB(230); + ISR_STUB(231); + ISR_STUB(232); + ISR_STUB(233); + ISR_STUB(234); + ISR_STUB(235); + ISR_STUB(236); + ISR_STUB(237); + ISR_STUB(238); + ISR_STUB(239); + ISR_STUB(240); + ISR_STUB(241); + ISR_STUB(242); + ISR_STUB(243); + ISR_STUB(244); + ISR_STUB(245); + ISR_STUB(246); + ISR_STUB(247); + ISR_STUB(248); + ISR_STUB(249); + ISR_STUB(250); + ISR_STUB(251); + ISR_STUB(252); + ISR_STUB(253); + ISR_STUB(254); + ISR_STUB(255); +} + void cpu_init(void) { init_gdt(); for (int i = 0; i < 256; i++) { - // TODO - write_int_desc_entry(&idt[i], 0); + write_int_desc_entry(&idt[i], (uint64_t)(intptr_t)((char*)&inttable + ISR_STUB_SIZE * i)); } load_idt(); } void interrupt_handler_register(unsigned int num, void (*func)(void)) { - TODO(); + // TODO } From 8e7198348d8e83899aae661a19604c0d687b4f46 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 4 Jun 2025 19:56:10 +0200 Subject: [PATCH 23/32] all ISRs except 8,10-14 push an additional dummy error code --- src/x86_64/cpu.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/x86_64/cpu.c b/src/x86_64/cpu.c index b716c7a..72278e1 100644 --- a/src/x86_64/cpu.c +++ b/src/x86_64/cpu.c @@ -133,8 +133,8 @@ void common_isr(void) { __asm__("pop %%r10" :: ); __asm__("pop %%r11" :: ); __asm__("pop %%rbp" :: ); - // remove interrupt number pushed in the isr stub - __asm__("add $0x8,%%rsp" :: ); + // remove interrupt number and error code pushed in the isr stub (16 bytes) + __asm__("add $0x10,%%rsp" :: ); __asm__("iretq" :: ); } @@ -147,8 +147,15 @@ void common_isr(void) { #define MY_STR(s) #s // TODO removed cli for now +#define ISR_STUB_NO_PUSH_ERROR(n) __asm__( \ + ".align " MY_XSTR(ISR_STUB_SIZE) "\n\t" \ + "push $" #n "\n\t" \ + "jmp common_isr\n\t" \ +) + #define ISR_STUB(n) __asm__( \ ".align " MY_XSTR(ISR_STUB_SIZE) "\n\t" \ + "push $0 \n\t" \ "push $" #n "\n\t" \ "jmp common_isr\n\t" \ ) @@ -164,13 +171,13 @@ void inttable(void) { ISR_STUB(5); ISR_STUB(6); ISR_STUB(7); - ISR_STUB(8); + ISR_STUB_NO_PUSH_ERROR(8); ISR_STUB(9); - ISR_STUB(10); - ISR_STUB(11); - ISR_STUB(12); - ISR_STUB(13); - ISR_STUB(14); + ISR_STUB_NO_PUSH_ERROR(10); + ISR_STUB_NO_PUSH_ERROR(11); + ISR_STUB_NO_PUSH_ERROR(12); + ISR_STUB_NO_PUSH_ERROR(13); + ISR_STUB_NO_PUSH_ERROR(14); ISR_STUB(15); ISR_STUB(16); ISR_STUB(17); From 96564e78572365bea374a0e5d383e7935b2288eb Mon Sep 17 00:00:00 2001 From: uosfz Date: Thu, 5 Jun 2025 16:46:34 +0200 Subject: [PATCH 24/32] update which interrupts push error code --- src/x86_64/cpu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/x86_64/cpu.c b/src/x86_64/cpu.c index 72278e1..61c4daf 100644 --- a/src/x86_64/cpu.c +++ b/src/x86_64/cpu.c @@ -160,6 +160,7 @@ void common_isr(void) { "jmp common_isr\n\t" \ ) +// error code or not: see https://wiki.osdev.org/Exceptions __attribute__((aligned(ISR_STUB_SIZE))) __attribute__((naked)) void inttable(void) { @@ -180,11 +181,11 @@ void inttable(void) { ISR_STUB_NO_PUSH_ERROR(14); ISR_STUB(15); ISR_STUB(16); - ISR_STUB(17); + ISR_STUB_NO_PUSH_ERROR(17); ISR_STUB(18); ISR_STUB(19); ISR_STUB(20); - ISR_STUB(21); + ISR_STUB_NO_PUSH_ERROR(21); ISR_STUB(22); ISR_STUB(23); ISR_STUB(24); @@ -192,8 +193,8 @@ void inttable(void) { ISR_STUB(26); ISR_STUB(27); ISR_STUB(28); - ISR_STUB(29); - ISR_STUB(30); + ISR_STUB_NO_PUSH_ERROR(29); + ISR_STUB_NO_PUSH_ERROR(30); ISR_STUB(31); ISR_STUB(32); ISR_STUB(33); From 9b7d81ceeb348e52594b69abdb8ced8afbf3d472 Mon Sep 17 00:00:00 2001 From: richard Date: Thu, 5 Jun 2025 21:19:14 +0200 Subject: [PATCH 25/32] PS/2-driver with basic functionality --- include/x86_64/ps2_driver.h | 7 +++ src/x86_64/ps2_driver.c | 97 +++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 include/x86_64/ps2_driver.h create mode 100644 src/x86_64/ps2_driver.c diff --git a/include/x86_64/ps2_driver.h b/include/x86_64/ps2_driver.h new file mode 100644 index 0000000..d2b7ec6 --- /dev/null +++ b/include/x86_64/ps2_driver.h @@ -0,0 +1,7 @@ +#include +#include +#include + +uint8_t read_ps2_data(); +uint8_t ps2_status(); +uint8_t init_ps2(); diff --git a/src/x86_64/ps2_driver.c b/src/x86_64/ps2_driver.c new file mode 100644 index 0000000..35b99e6 --- /dev/null +++ b/src/x86_64/ps2_driver.c @@ -0,0 +1,97 @@ +#include + +#include "std.h" +#include "x86_64/ps2_driver.h" +#include "x86_64/asm.h" + +#define STATUSPORT 0x64 +#define CMDPORT 0x64 +#define DATAPORT 0x60 + +#define DISABLEPO 0xAD +#define DISABLEPT 0xA7 +#define PS2TEST 0xAA +#define CONFIGBYTE 0x20 +#define TESTPO 0xAB +#define TESTPT 0xA9 +#define ENABLEPO 0xAE +#define ENABLEPT 0xA8 + +uint8_t ps2_status() { + uint8_t value; + value = in8(STATUSPORT); + return value; +} + +void send_ps2_cmd(uint8_t cmd) { + out8(CMDPORT, cmd); +} + +uint8_t read_ps2_data() { + uint8_t value; + while((~ps2_status()) & 0x1) { + } + value = in8(DATAPORT); + return value; +} + +uint8_t ps2_cmd(uint8_t cmd) { + send_ps2_cmd(cmd); + return read_ps2_data(); +} + +void write_ps2_data(uint8_t data) { + out8(DATAPORT, data); +} +void ps2_instructions(uint8_t cmd, uint8_t data) { + send_ps2_cmd(cmd); + write_ps2_data(data); +} + +uint8_t init_ps2() { + uint8_t test; + uint8_t config_byte; + uint8_t device_check; + uint8_t port_test; + /* Disable devices */ + send_ps2_cmd(DISABLEPO); + send_ps2_cmd(DISABLEPT); + + /* Flush Outputbuffer */ + + /* Set Configbyte */ + + /* Selftest */ + test = ps2_cmd(PS2TEST); + if (test != 0x55){ + return test; + } + + /* Check 2nd port */ + send_ps2_cmd(ENABLEPT); + config_byte = ps2_cmd(CONFIGBYTE); + device_check = ~(config_byte & 0x20); + if (device_check) { + send_ps2_cmd(DISABLEPT); + } + + /* Test Ports */ + port_test = ps2_cmd(TESTPO); + if (port_test == 0x00) { + } + else { + } + if (device_check) { + port_test = ps2_cmd(TESTPT); + if (port_test == 0x00) { + } + else { + } + } + + /* Enable devices */ + send_ps2_cmd(ENABLEPO); + send_ps2_cmd(ENABLEPT); + + return 0; +} From c9b40f9d8db9be374c0dd4d0386019b718925e9c Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 18 Jun 2025 19:12:38 +0200 Subject: [PATCH 26/32] make lapic address volatile to prevent reordering --- src/x86_64/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/x86_64/apic.c b/src/x86_64/apic.c index 18ce313..bd9bd2e 100644 --- a/src/x86_64/apic.c +++ b/src/x86_64/apic.c @@ -45,7 +45,7 @@ struct lapic { }; STATIC_ASSERT(sizeof (struct lapic) == 0x400); -#define LAPIC_ADDR ((struct lapic *)pa_to_pointer(pa_from_value(LAPIC_BASE_PHYS))) +#define LAPIC_ADDR ((volatile struct lapic *)pa_to_pointer(pa_from_value(LAPIC_BASE_PHYS))) void lapic_init(void) { From 688a897e217a88d1fc080f2bea9579b8d0c9b2ee Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 18 Jun 2025 20:45:08 +0200 Subject: [PATCH 27/32] allow registering interrupt handlers --- include/cpu.h | 2 +- include/x86_64/apic.h | 2 ++ src/kernel.c | 8 ++++---- src/x86_64/apic.c | 11 +++++++++++ src/x86_64/cpu.c | 33 ++++++++++++++++++++++++++++----- 5 files changed, 46 insertions(+), 10 deletions(-) diff --git a/include/cpu.h b/include/cpu.h index 987afd6..c462252 100644 --- a/include/cpu.h +++ b/include/cpu.h @@ -4,6 +4,6 @@ void cpu_init(void); unsigned int cpu_get_core_id(void); -void interrupt_handler_register(unsigned int num, void (*func)(void)); +void interrupt_handler_register(unsigned int vector, void (*handler)(void)); #endif diff --git a/include/x86_64/apic.h b/include/x86_64/apic.h index e4e79ea..ddd84d4 100644 --- a/include/x86_64/apic.h +++ b/include/x86_64/apic.h @@ -6,4 +6,6 @@ unsigned lapic_get_id(void); void lapic_eoi(void); void lapic_set_timer(void); +void pic_disable(void); + #endif diff --git a/src/kernel.c b/src/kernel.c index 45a41b1..dae2f99 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -77,7 +77,7 @@ void check_initrd() { void console_init(void); void basic_interrupt_handler(void) { - printf("hello from interrupt!\n"); + printf("Hello from interrupt!\n"); } /****************************************** @@ -128,12 +128,12 @@ void _start() { #endif cpu_init(); - // interrupt_handler_register(0xfe, basic_interrupt_handler); + ram_init(); + + interrupt_handler_register(0xfe, basic_interrupt_handler); __asm__("int $0xfe" :: ); __asm__("int $0xf7" :: ); - ram_init(); - printf("Core ID: %d\n", cpu_get_core_id()); #if 0 diff --git a/src/x86_64/apic.c b/src/x86_64/apic.c index bd9bd2e..9e02fc3 100644 --- a/src/x86_64/apic.c +++ b/src/x86_64/apic.c @@ -1,3 +1,4 @@ +#include "x86_64/asm.h" #include "x86_64/apic.h" #include "address.h" #include "std.h" @@ -74,3 +75,13 @@ void lapic_set_timer(void) LAPIC_ADDR->lvt_timer.value = 0xFE | (1 << 17); LAPIC_ADDR->timer_initial.value = 0x1000000; } + +void pic_disable(void) { + // https://wiki.osdev.org/8259_PIC#Programming_the_PIC_chips +#define PIC1 0x20 +#define PIC2 0xA0 +#define PIC1_DATA (PIC1 + 1) +#define PIC2_DATA (PIC2 + 1) + out8(PIC1_DATA, 0xff); + out8(PIC2_DATA, 0xff); +} diff --git a/src/x86_64/cpu.c b/src/x86_64/cpu.c index 61c4daf..088881f 100644 --- a/src/x86_64/cpu.c +++ b/src/x86_64/cpu.c @@ -61,7 +61,7 @@ struct int_desc_entry { uint16_t offset2; uint32_t offset3; uint32_t pad; -}; +} __attribute__((aligned(16))); // TODO not sure if this is necessary STATIC_ASSERT(sizeof(struct int_desc_entry) == 16); #define PRIV_KERNEL 0 @@ -99,8 +99,32 @@ void load_idt() { __asm__("lidt (%0)" ::"r"(idtr)); } +#define NUM_INTERRUPT_HANDLERS 32 +struct { + uint8_t vector; + void (*handler)(void); +} interrupt_handlers[NUM_INTERRUPT_HANDLERS]; +unsigned int num_interrupt_handlers = 0; + +void interrupt_handler_register(unsigned int vector, void (*handler)(void)) { + ASSERT(vector <= 0xff); + ASSERT(num_interrupt_handlers < NUM_INTERRUPT_HANDLERS); + unsigned int i = num_interrupt_handlers++; + interrupt_handlers[i].vector = vector; + interrupt_handlers[i].handler = handler; +} + +#define DEBUG_INTERRUPT + void common_isr_2(uint64_t num) { - printf("Hello from interrupt %d!\n", num); +#ifdef DEBUG_INTERRUPT + printf("DEBUG: Interrupt %lu\n", num); +#endif + for (unsigned int i = 0; i < num_interrupt_handlers; i++) { + if (interrupt_handlers[i].vector == num) { + interrupt_handlers[i].handler(); + } + } lapic_eoi(); } @@ -428,8 +452,7 @@ void cpu_init(void) { write_int_desc_entry(&idt[i], (uint64_t)(intptr_t)((char*)&inttable + ISR_STUB_SIZE * i)); } load_idt(); -} -void interrupt_handler_register(unsigned int num, void (*func)(void)) { - // TODO + pic_disable(); + lapic_init(); } From a85119931a83739e79fb55fc67c03fcf5f5c2090 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 25 Jun 2025 19:11:04 +0200 Subject: [PATCH 28/32] cld --- src/x86_64/cpu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/x86_64/cpu.c b/src/x86_64/cpu.c index 088881f..5765a8e 100644 --- a/src/x86_64/cpu.c +++ b/src/x86_64/cpu.c @@ -143,6 +143,10 @@ void common_isr(void) { __asm__("push %%rdx" :: ); __asm__("push %%rax" :: ); + // https://wiki.osdev.org/Interrupt_Service_Routines + // "C code following the sysV ABI requires DF to be clear on function entry" + __asm__("cld"); + // get interrupt number and call "real" handler __asm__("mov 0x8(%%rbp),%%rdi" :: ); __asm__("call common_isr_2" :: ); From fa059fc96cbb5777ca4c1bc7c569fdc51eec714d Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 25 Jun 2025 20:33:53 +0200 Subject: [PATCH 29/32] refactored ps2 code --- Makefile | 1 + include/x86_64/ps2_driver.h | 8 +- src/x86_64/ps2_driver.c | 171 +++++++++++++++++++++++------------- 3 files changed, 115 insertions(+), 65 deletions(-) diff --git a/Makefile b/Makefile index a15b8a5..3bec223 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ KERNEL_SOURCES_x86_64 := \ src/x86_64/mem.c \ src/x86_64/asm.c \ src/x86_64/address.c \ + src/x86_64/ps2_driver.c \ # end of x86_64 specific kernel sources list # Architecture-agnostic kernel sources. diff --git a/include/x86_64/ps2_driver.h b/include/x86_64/ps2_driver.h index d2b7ec6..a6df555 100644 --- a/include/x86_64/ps2_driver.h +++ b/include/x86_64/ps2_driver.h @@ -2,6 +2,8 @@ #include #include -uint8_t read_ps2_data(); -uint8_t ps2_status(); -uint8_t init_ps2(); +uint8_t ps2_read_data(); +void ps2_write_data(uint8_t data); +uint8_t ps2_cmd_response(uint8_t cmd); +void ps2_cmd_with_data(uint8_t cmd, uint8_t data); +void ps2_init(); diff --git a/src/x86_64/ps2_driver.c b/src/x86_64/ps2_driver.c index 35b99e6..5c2008c 100644 --- a/src/x86_64/ps2_driver.c +++ b/src/x86_64/ps2_driver.c @@ -1,97 +1,144 @@ #include #include "std.h" -#include "x86_64/ps2_driver.h" #include "x86_64/asm.h" +#include "x86_64/ps2_driver.h" -#define STATUSPORT 0x64 -#define CMDPORT 0x64 -#define DATAPORT 0x60 +#define PORT_STATUS 0x64 +#define PORT_CMD 0x64 +#define PORT_DATA 0x60 -#define DISABLEPO 0xAD -#define DISABLEPT 0xA7 -#define PS2TEST 0xAA -#define CONFIGBYTE 0x20 -#define TESTPO 0xAB -#define TESTPT 0xA9 -#define ENABLEPO 0xAE -#define ENABLEPT 0xA8 +#define CMD_CONFIGBYTE_READ 0x20 +#define CMD_CONFIGBYTE_WRITE 0x20 +#define CMD_PORT2_DISABLE 0xA7 +#define CMD_PORT2_ENABLE 0xA8 +#define CMD_PORT2_TEST 0xA9 +#define CMD_TEST 0xAA +#define CMD_PORT1_TEST 0xAB +#define CMD_PORT1_DISABLE 0xAD +#define CMD_PORT1_ENABLE 0xAE -uint8_t ps2_status() { - uint8_t value; - value = in8(STATUSPORT); - return value; +#define STATUS_BIT_OUTPUT_FULL 0 +#define STATUS_BIT_INPUT_FULL 1 +#define STATUS_BIT_SYSTEM 2 +#define STATUS_BIT_COMMAND_DATA 3 +#define STATUS_BIT_ERR_TIMEOUT 6 +#define STATUS_BIT_ERR_PARITY 7 + +#define CONFIG_BIT_PORT1_INT_ENABLED 0 +#define CONFIG_BIT_PORT2_INT_ENABLED 1 +#define CONFIG_BIT_SYSTEM 2 +#define CONFIG_BIT_PORT1_CLOCK_DISABLED 4 +#define CONFIG_BIT_PORT2_CLOCK_DISABLED 5 +#define CONFIG_BIT_PORT1_TRANSLATION 6 + +uint8_t +ps2_read_status() { + return in8(PORT_STATUS); } -void send_ps2_cmd(uint8_t cmd) { - out8(CMDPORT, cmd); +bool +ps2_get_status_bit(uint8_t bit) { + return (ps2_read_status() >> bit) & 1; } -uint8_t read_ps2_data() { - uint8_t value; - while((~ps2_status()) & 0x1) { +bool +ps2_can_read() { + return ps2_get_status_bit(STATUS_BIT_OUTPUT_FULL); +} + +bool +ps2_can_write() { + return !ps2_get_status_bit(STATUS_BIT_INPUT_FULL); +} + +void ps2_write_cmd(uint8_t cmd) { + while (!ps2_can_write()) { + // wait } - value = in8(DATAPORT); - return value; + out8(PORT_CMD, cmd); } -uint8_t ps2_cmd(uint8_t cmd) { - send_ps2_cmd(cmd); - return read_ps2_data(); +uint8_t ps2_read_data() { + while(!ps2_can_read()) { + // wait + } + return in8(PORT_DATA); } -void write_ps2_data(uint8_t data) { - out8(DATAPORT, data); -} -void ps2_instructions(uint8_t cmd, uint8_t data) { - send_ps2_cmd(cmd); - write_ps2_data(data); +void ps2_empty_output_buffer() { + if (ps2_can_read()) { + in8(PORT_DATA); + } } -uint8_t init_ps2() { - uint8_t test; +void ps2_write_data(uint8_t data) { + while (!ps2_can_write()) { + // wait + } + out8(PORT_DATA, data); +} + +uint8_t ps2_cmd_response(uint8_t cmd) { + ps2_write_cmd(cmd); + return ps2_read_data(); +} + +void ps2_cmd_with_data(uint8_t cmd, uint8_t data) { + ps2_write_cmd(cmd); + ps2_write_data(data); +} + +void ps2_init() { uint8_t config_byte; - uint8_t device_check; - uint8_t port_test; + /* Disable devices */ - send_ps2_cmd(DISABLEPO); - send_ps2_cmd(DISABLEPT); + ps2_write_cmd(CMD_PORT1_DISABLE); + ps2_write_cmd(CMD_PORT2_DISABLE); /* Flush Outputbuffer */ + ps2_empty_output_buffer(); /* Set Configbyte */ + config_byte = ps2_cmd_response(CMD_CONFIGBYTE_READ); + config_byte &= ~CONFIG_BIT_PORT1_INT_ENABLED; + config_byte &= ~CONFIG_BIT_PORT1_CLOCK_DISABLED; + config_byte &= ~CONFIG_BIT_PORT1_TRANSLATION; + ps2_cmd_with_data(CMD_CONFIGBYTE_WRITE, config_byte); /* Selftest */ - test = ps2_cmd(PS2TEST); - if (test != 0x55){ - return test; - } + uint8_t test = ps2_cmd_response(CMD_TEST); + ASSERT(test == 0x55); - /* Check 2nd port */ - send_ps2_cmd(ENABLEPT); - config_byte = ps2_cmd(CONFIGBYTE); - device_check = ~(config_byte & 0x20); - if (device_check) { - send_ps2_cmd(DISABLEPT); + /* Query for 2nd port */ + ps2_write_cmd(CMD_PORT2_ENABLE); + config_byte = ps2_cmd_response(CMD_CONFIGBYTE_READ); + bool port_2_enabled = ((config_byte >> 5) & 1) == 0; + if (port_2_enabled) { + ps2_write_cmd(CMD_PORT2_DISABLE); + config_byte = ps2_cmd_response(CMD_CONFIGBYTE_READ); + config_byte &= ~CONFIG_BIT_PORT2_INT_ENABLED; + config_byte &= ~CONFIG_BIT_PORT2_CLOCK_DISABLED; + ps2_cmd_with_data(CMD_CONFIGBYTE_WRITE, config_byte); } /* Test Ports */ - port_test = ps2_cmd(TESTPO); - if (port_test == 0x00) { - } - else { - } - if (device_check) { - port_test = ps2_cmd(TESTPT); - if (port_test == 0x00) { - } - else { - } + uint8_t port_test = ps2_cmd_response(CMD_PORT1_TEST); + ASSERT(port_test == 0x00); + if (port_2_enabled) { + port_test = ps2_cmd_response(CMD_PORT2_TEST); + ASSERT(port_test == 0x00); } /* Enable devices */ - send_ps2_cmd(ENABLEPO); - send_ps2_cmd(ENABLEPT); + ps2_write_cmd(CMD_PORT1_ENABLE); + if (port_2_enabled) { + ps2_write_cmd(CMD_PORT2_ENABLE); + } - return 0; + /* Enable interrupts */ + // TODO + + /* Reset devices */ + // TODO } From 024333bba73a954951e9c53cfc3cf777439f7264 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 25 Jun 2025 20:34:15 +0200 Subject: [PATCH 30/32] kernel polls ps2 device --- src/kernel.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/kernel.c b/src/kernel.c index 4b1d744..89c82f8 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -34,6 +34,7 @@ #include "x86_64/asm.h" #include "x86_64/address.h" #include "x86_64/mem.h" +#include "x86_64/ps2_driver.h" #include "bootboot.h" #include "ram.h" @@ -226,6 +227,13 @@ void _start() { printf("wp=%d\n", (cr0 >> 16) & 1); #endif + ps2_init(); + while (1) { + uint8_t data = ps2_read_data(); + putu8x(data); + putln(); + } + // hang for now PANIC("end of kernel"); } From 940224c5e05d646c88985736c12b980a33033a49 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 25 Jun 2025 20:51:10 +0200 Subject: [PATCH 31/32] enable ps2 interrupts (doesn't work at the moment) --- src/kernel.c | 8 +++++--- src/x86_64/ps2_driver.c | 7 ++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/kernel.c b/src/kernel.c index cfa5402..57e68ed 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -217,9 +217,11 @@ void _start() { ps2_init(); while (1) { - uint8_t data = ps2_read_data(); - putu8x(data); - putln(); + // do nothing. PS2 controller should send interrupts. + + // uint8_t data = ps2_read_data(); + // putu8x(data); + // putln(); } pt_create_minimal(); diff --git a/src/x86_64/ps2_driver.c b/src/x86_64/ps2_driver.c index 5c2008c..8ba4e35 100644 --- a/src/x86_64/ps2_driver.c +++ b/src/x86_64/ps2_driver.c @@ -137,7 +137,12 @@ void ps2_init() { } /* Enable interrupts */ - // TODO + config_byte = ps2_cmd_response(CMD_CONFIGBYTE_READ); + config_byte |= CONFIG_BIT_PORT1_INT_ENABLED; + if (port_2_enabled) { + config_byte |= CONFIG_BIT_PORT2_INT_ENABLED; + } + ps2_cmd_with_data(CMD_CONFIGBYTE_WRITE, config_byte); /* Reset devices */ // TODO From c8a47cfd91c130aab06230ed8962d23b8c553771 Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 2 Jul 2025 18:30:03 +0200 Subject: [PATCH 32/32] loop when emptying output buffer (probably doesn't make sense but it also doesn't make things worse) --- src/x86_64/ps2_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/x86_64/ps2_driver.c b/src/x86_64/ps2_driver.c index 8ba4e35..9c19ac6 100644 --- a/src/x86_64/ps2_driver.c +++ b/src/x86_64/ps2_driver.c @@ -67,7 +67,7 @@ uint8_t ps2_read_data() { } void ps2_empty_output_buffer() { - if (ps2_can_read()) { + while (ps2_can_read()) { in8(PORT_DATA); } }