From 5607bf4b5d5deed34778a339a6a81276957690bb Mon Sep 17 00:00:00 2001 From: uosfz Date: Wed, 1 Oct 2025 22:09:35 +0200 Subject: [PATCH] sync primitives; local linebuf on every core; spin-locked output --- Makefile | 2 +- include/cpu.h | 19 ++++++--- include/sync.h | 32 ++++++++-------- src/kernel.c | 10 ++--- src/std.c | 25 ++++++++---- src/sync.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++ src/x86_64/cpu.c | 81 +++++++++++---------------------------- src/x86_64/sync.c | 98 ----------------------------------------------- 8 files changed, 173 insertions(+), 191 deletions(-) create mode 100644 src/sync.c delete mode 100644 src/x86_64/sync.c diff --git a/Makefile b/Makefile index a609db2..3da3ad3 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,6 @@ KERNEL_SOURCES_x86_64 := \ src/x86_64/asm.c \ src/x86_64/address.c \ src/x86_64/ps2_driver.c \ - src/x86_64/sync.c \ # end of x86_64 specific kernel sources list # Architecture-agnostic kernel sources. @@ -35,6 +34,7 @@ KERNEL_SOURCES := \ src/tar.c \ src/time.c \ src/std.c \ + src/sync.c \ $(KERNEL_SOURCES_$(ARCH)) \ # end of kernel sources list diff --git a/include/cpu.h b/include/cpu.h index 228c3b9..4123bdf 100644 --- a/include/cpu.h +++ b/include/cpu.h @@ -1,6 +1,9 @@ #ifndef KARLOS_CPU_H #define KARLOS_CPU_H +#include +#include + void cpu_init_bsp(void); void cpu_init_ap(void); @@ -10,17 +13,23 @@ void cpu_set_timer(void); void write_gs_base(uint64_t base); -struct cpu_info{ +#define LINEBUF_SIZE 256 +struct cpu_info { + // first member of struct is pointer to itself, for easier loading struct cpu_info* cpu_info; volatile bool started; uint32_t APIC_ID; uint8_t ID; + + // output + char linebuf[LINEBUF_SIZE]; + unsigned int linebuf_pos; }; -static inline uint8_t current_core_id(void) { - struct cpu_info *self; - __asm__("mov %%gs:0, %0" : "=r"(self)); - return self->ID; +static inline struct cpu_info *local_cpu_data(void) { + struct cpu_info *ptr; + __asm__ ("mov %%gs:0, %0" : "=r" (ptr)); + return ptr; } void interrupt_handler_register(unsigned int vector, void (*handler)(void)); diff --git a/include/sync.h b/include/sync.h index 27cedff..c22fe34 100644 --- a/include/sync.h +++ b/include/sync.h @@ -6,33 +6,35 @@ #include #include +#ifdef __x86_64__ #define CPU_RELAX() __asm__ volatile("pause") -#define COMPILER_BARRIER() atomic_signal_fence(memory_order_seq_cst) - +#else +#define CPU_RELAX() +#endif typedef struct { - uint32_t threshold; - _Atomic uint32_t count; - _Atomic uint8_t global_sense; + uint32_t threshold; + atomic_int count; } barrier_t; -typedef struct { atomic_flag flag; } spinlock_t; +typedef struct { + atomic_flag flag; + int int_state; +} spinlock_t; typedef struct { atomic_flag flag; } mutex_t; -typedef struct { _Atomic int32_t count; } semaphore_t; +typedef struct { atomic_int count; } semaphore_t; - -uint32_t atomic_read_and_add(_Atomic uint32_t *addr, uint32_t value); -uint32_t atomic_increment(_Atomic uint32_t *addr); - -bool barrier_init(barrier_t *bar, uint32_t n); -void barrier_wait(barrier_t *bar, uint8_t *local_sense); - -void spin_lock_init(spinlock_t *l); +#define SPIN_INITIALIZER (spinlock_t) { .flag = ATOMIC_FLAG_INIT } +void spin_init(spinlock_t *l); bool spin_try_lock(spinlock_t *l); void spin_lock(spinlock_t *l); void spin_unlock(spinlock_t *l); +/// Initializes the barrier. `n` is the number of threads that will use the barrier. +bool barrier_init(barrier_t *bar, uint32_t n); +void barrier_wait(barrier_t *bar); + void sem_init(semaphore_t *s, int32_t initial); bool sem_try_down(semaphore_t *s); void sem_down(semaphore_t *s); diff --git a/src/kernel.c b/src/kernel.c index c66ca09..68ca623 100644 --- a/src/kernel.c +++ b/src/kernel.c @@ -62,14 +62,12 @@ void _start() { unsigned info[4]; __get_cpuid(0x1, &info[0], &info[1], &info[2], &info[3]); unsigned procid = info[1] >> 24; - if (procid != bootboot.bspid) { + if (procid == bootboot.bspid) { + cpu_init_bsp(); + interrupt_handler_register(0x08, double_fault_handler); + } else { cpu_init_ap(); } - else{ - cpu_init_bsp(); - } - - interrupt_handler_register(0x08, double_fault_handler); __asm__("sti"); diff --git a/src/std.c b/src/std.c index fab5e35..b760aa1 100644 --- a/src/std.c +++ b/src/std.c @@ -1,8 +1,11 @@ #include #include +#include "cpu.h" #include "std.h" #include "serial.h" +#include "sync.h" +#include "x86_64/interrupt.h" #define BUFFER_SIZE 1024 @@ -66,23 +69,29 @@ bool strcontains(const char *s, char c) { /* --- Line buffering --- */ // DO NOT USE THESE FUNCTIONS ANYWHERE! USE THE PUBLIC API (EVEN IN THIS FILE)! -static char linebuf[BUFFER_SIZE]; -static unsigned int current_buffer_position = 0; static void linebuf_flush(void) { - for (unsigned int i = 0; i < current_buffer_position; i++) { - serial_write_char(linebuf[i]); + static spinlock_t linebuf_lock = SPIN_INITIALIZER; + + spin_lock(&linebuf_lock); + + struct cpu_info *mycpu = local_cpu_data(); + for (unsigned int i = 0; i < mycpu->linebuf_pos; i++) { + serial_write_char(mycpu->linebuf[i]); extern void visual_putc(char c); - visual_putc(linebuf[i]); + visual_putc(mycpu->linebuf[i]); } - current_buffer_position = 0; + mycpu->linebuf_pos = 0; + + spin_unlock(&linebuf_lock); } static void linebuf_putc(char c) { - if (current_buffer_position == BUFFER_SIZE) { + struct cpu_info *mycpu = local_cpu_data(); + if (mycpu->linebuf_pos == LINEBUF_SIZE) { linebuf_flush(); } - linebuf[current_buffer_position++] = c; + mycpu->linebuf[mycpu->linebuf_pos++] = c; } /* --- Low-level output functions --- */ diff --git a/src/sync.c b/src/sync.c new file mode 100644 index 0000000..72047af --- /dev/null +++ b/src/sync.c @@ -0,0 +1,97 @@ +#include "sync.h" +#include "std.h" +#include +#include "x86_64/interrupt.h" + +void spin_init(spinlock_t *lock) { + atomic_flag_clear_explicit(&lock->flag, memory_order_release); +} + +// if successful, interrupts are disabled. +bool spin_try_lock(spinlock_t *lock) { + int int_state = int_disable(); + bool val = atomic_flag_test_and_set_explicit(&lock->flag, memory_order_acquire); + if (val == 0) { // got the lock + // current core holding the lock may set its interrupt state + lock->int_state = int_state; + return true; + } else { + int_restore(int_state); + return false; + } +} + +void spin_lock(spinlock_t *lock) { + int int_state = int_disable(); + while (atomic_flag_test_and_set_explicit(&lock->flag, memory_order_acquire)) { + CPU_RELAX(); + } + lock->int_state = int_state; +} + +void spin_unlock(spinlock_t *lock) { + int int_state = lock->int_state; + atomic_flag_clear_explicit(&lock->flag, memory_order_release); + int_restore(int_state); +} + +bool barrier_init(barrier_t *bar, uint32_t n) { + if (!bar || n == 0) return false; + bar->threshold = n; + bar->count = 0; + atomic_thread_fence(memory_order_release); + return true; +} + +void barrier_wait(barrier_t *bar) { + // wait until previous threads have passed out of barrier + while (atomic_load_explicit(&bar->count, memory_order_acquire) < 0) { + CPU_RELAX(); + } + + int new_val = atomic_fetch_add_explicit(&bar->count, 1, memory_order_acq_rel) + 1; + ASSERT(new_val > 0); + + if ((uint32_t)new_val == bar->threshold) { + atomic_store_explicit(&bar->count, -(int)bar->threshold + 1, memory_order_release); + } else { + // wait until the final thread inverts the value to `-n` + while (atomic_load_explicit(&bar->count, memory_order_acquire) > 0) { + CPU_RELAX(); + } + atomic_fetch_add_explicit(&bar->count, 1, memory_order_release); + } +} + +void sem_init(semaphore_t *s, int initial) { + atomic_store_explicit(&s->count, initial, memory_order_release); +} + +bool sem_try_down(semaphore_t *s) { + int32_t old = atomic_load_explicit(&s->count, memory_order_relaxed); + while (old > 0) { + if (atomic_compare_exchange_weak_explicit( + &s->count, &old, old - 1, // `old` is overwritten here + memory_order_acquire, + memory_order_relaxed + )) { + return true; + } + CPU_RELAX(); + } + return false; +} + +void sem_down(semaphore_t *s) { + for (;;) { + if (sem_try_down(s)) { + return; + } + CPU_RELAX(); + } +} + +void sem_up(semaphore_t *s) { + atomic_fetch_add_explicit(&s->count, 1, memory_order_release); +} + diff --git a/src/x86_64/cpu.c b/src/x86_64/cpu.c index 5d8825b..dd40c17 100644 --- a/src/x86_64/cpu.c +++ b/src/x86_64/cpu.c @@ -1,22 +1,22 @@ +#include #include "x86_64/apic.h" #include "cpu.h" #include "std.h" -#include "bootboot.h" #include "ram.h" #include "framebuffer.h" #include "console.h" -#include "paging.h" #include "sync.h" #include "x86_64/cmos.h" +#include "x86_64/asm.h" #include "x86_64/ps2_driver.h" // Per CPU Data #define MSR_IA32_GS_BASE 0xC0000101 struct cpu_info cpu_data[256]; -volatile uint8_t cpu_bsp_ready = 0; -uint32_t cpu_increment = 0; +atomic_bool cpu_bsp_ready = false; +atomic_uint cpu_increment = 0; // --- segmentation --- extern void loadcs(uint16_t ss, uint16_t cs); @@ -63,7 +63,6 @@ void init_gdt() { __asm__("mov %0, %%es" ::"r"(DATA_SEGMENT << 3)); __asm__("mov %0, %%ds" ::"r"(DATA_SEGMENT << 3)); __asm__("mov %0, %%fs" ::"r"(DATA_SEGMENT << 3)); - __asm__("mov %0, %%gs" ::"r"(DATA_SEGMENT << 3)); loadcs(DATA_SEGMENT << 3, CODE_SEGMENT << 3); } @@ -467,6 +466,16 @@ void inttable(void) { } void cpu_init_bsp(void) { + //Fill out struct per CPU core + uint8_t core_id = atomic_fetch_add_explicit(&cpu_increment, 1, memory_order_relaxed); + uint32_t lapic = cpu_get_core_id(); // TODO do I have to initialize lapic to get this? + cpu_data[core_id].cpu_info = &cpu_data[core_id]; + cpu_data[core_id].started = 1; + cpu_data[core_id].APIC_ID = lapic; + cpu_data[core_id].ID = core_id; + cpu_data[core_id].linebuf_pos = 0; + writemsr(MSR_IA32_GS_BASE, (uint64_t) cpu_data[core_id].cpu_info); + // Early setup console_init(); init_gdt(); @@ -479,87 +488,43 @@ void cpu_init_bsp(void) { pic8259_disable(); // mask legacy PICs lapic_init(); // enable local APIC - - // Memory & video ram_init(); fb_init(); console_clear(); - // Device init rtc_init(); ps2_init(); + printf("Startup ID %d. My local struct is at %p and my ID in this struct is %d\n", core_id, local_cpu_data(), local_cpu_data()->ID); - - //Fill out struct per CPU core - uint32_t lapic = cpu_get_core_id(); - cpu_data[cpu_increment].cpu_info = &cpu_data[cpu_increment]; - cpu_data[cpu_increment].started = 1; - cpu_data[cpu_increment].APIC_ID = lapic; - cpu_data[cpu_increment].ID = cpu_increment; - - - write_gs_base((uint64_t) cpu_data[cpu_increment].cpu_info); - - - - - - cpu_bsp_ready = 1; - - + atomic_store_explicit(&cpu_bsp_ready, true, memory_order_release); } void cpu_init_ap(void) { - while (!cpu_bsp_ready) { - __asm__ volatile("pause"); + while (!atomic_load_explicit(&cpu_bsp_ready, memory_order_acquire)) { + CPU_RELAX(); } - - - // Get atomic inrement on counter variable - uint8_t core_id = atomic_increment(&cpu_increment); - //uint8_t core_id = __sync_add_and_fetch(&cpu_increment, 1); - uint32_t lapic = cpu_get_core_id(); - - //Fill out struct per Core + uint8_t core_id = atomic_fetch_add_explicit(&cpu_increment, 1, memory_order_relaxed); + uint32_t lapic = cpu_get_core_id(); cpu_data[core_id].cpu_info = &cpu_data[core_id]; cpu_data[core_id].started = 1; cpu_data[core_id].APIC_ID = lapic; cpu_data[core_id].ID = core_id; + cpu_data[core_id].linebuf_pos = 0; + writemsr(MSR_IA32_GS_BASE, (uint64_t) cpu_data[core_id].cpu_info); - write_gs_base((uint64_t) cpu_data[core_id].cpu_info); - printf("Startup ID, %d", core_id); + printf("Startup ID %d. My local struct is at %p and my ID in this struct is %d\n", core_id, local_cpu_data(), local_cpu_data()->ID); init_gdt(); load_idt(); // IDT table already filled by BSP in low memory lapic_init(); - } -void write_gs_base(uint64_t base) -{ - uint32_t low = (uint32_t)(base & 0xFFFFFFFF); - uint32_t high = (uint32_t)(base >> 32); - - __asm__ volatile ( - "mov %[msr], %%ecx\n\t" - "mov %[lo], %%eax\n\t" - "mov %[hi], %%edx\n\t" - "wrmsr\n\t" - : - : [msr] "i"(MSR_IA32_GS_BASE), - [lo] "r"(low), - [hi] "r"(high) - : "rax", "rdx", "rcx", "memory" - ); -} - - void cpu_set_timer(void) { lapic_set_timer(); } diff --git a/src/x86_64/sync.c b/src/x86_64/sync.c deleted file mode 100644 index 3b20e57..0000000 --- a/src/x86_64/sync.c +++ /dev/null @@ -1,98 +0,0 @@ -#include "sync.h" - - -uint32_t atomic_read_and_add(_Atomic uint32_t *addr, uint32_t value) { - uint32_t prev = atomic_fetch_add_explicit(addr, value, memory_order_acq_rel); - return prev + value; -} - -uint32_t atomic_increment(_Atomic uint32_t *addr) { - return atomic_read_and_add(addr, 1u); -} - - -bool barrier_init(barrier_t *bar, uint32_t n) { - if (!bar || n == 0) return false; - bar->threshold = n; - atomic_store_explicit(&bar->count, 0u, memory_order_relaxed); - atomic_store_explicit(&bar->global_sense, 0u, memory_order_relaxed); - COMPILER_BARRIER(); - return true; -} - -void barrier_wait(barrier_t *bar, uint8_t *local_sense) { - uint8_t s = (uint8_t)((*local_sense ^ 1u) & 1u); - *local_sense = s; - - uint32_t v = atomic_fetch_add_explicit(&bar->count, 1u, memory_order_acq_rel) + 1u; - - if (v == bar->threshold) { - atomic_store_explicit(&bar->count, 0u, memory_order_relaxed); - atomic_store_explicit(&bar->global_sense, s, memory_order_release); - } else { - while (atomic_load_explicit(&bar->global_sense, memory_order_acquire) != s) { - CPU_RELAX(); - } - } - COMPILER_BARRIER(); -} - - -void spin_lock_init(spinlock_t *l) { - atomic_flag_clear_explicit(&l->flag, memory_order_relaxed); - COMPILER_BARRIER(); -} - -bool spin_try_lock(spinlock_t *l) { - return !atomic_flag_test_and_set_explicit(&l->flag, memory_order_acquire); -} - -void spin_lock(spinlock_t *l) { - while (atomic_flag_test_and_set_explicit(&l->flag, memory_order_acquire)) { - CPU_RELAX(); - } - COMPILER_BARRIER(); -} - -void spin_unlock(spinlock_t *l) { - COMPILER_BARRIER(); - atomic_flag_clear_explicit(&l->flag, memory_order_release); -} - -void sem_init(semaphore_t *s, int32_t initial) { - atomic_store_explicit(&s->count, initial, memory_order_relaxed); - COMPILER_BARRIER(); -} - -bool sem_try_down(semaphore_t *s) { - int32_t old = atomic_load_explicit(&s->count, memory_order_relaxed); - while (old > 0) { - if (atomic_compare_exchange_weak_explicit( - &s->count, &old, old - 1, - memory_order_acquire, - memory_order_relaxed - )) { - return true; - } - CPU_RELAX(); - } - return false; -} - -void sem_down(semaphore_t *s) { - for (;;) { - if (sem_try_down(s)) { - COMPILER_BARRIER(); - return; - } - while (atomic_load_explicit(&s->count, memory_order_relaxed) <= 0) { - CPU_RELAX(); - } - } -} - -void sem_up(semaphore_t *s) { - atomic_fetch_add_explicit(&s->count, 1, memory_order_release); - COMPILER_BARRIER(); -} -