sync primitives; local linebuf on every core; spin-locked output

This commit is contained in:
uosfz 2025-10-01 22:09:35 +02:00
parent 49a3734cb1
commit 5607bf4b5d
Signed by: uosfz
SSH key fingerprint: SHA256:FlktuluyhTQg3jHZNLKwxOOC5hbfrUXM0tz3IA3lGJo
8 changed files with 173 additions and 191 deletions

View file

@ -19,7 +19,6 @@ KERNEL_SOURCES_x86_64 := \
src/x86_64/asm.c \ src/x86_64/asm.c \
src/x86_64/address.c \ src/x86_64/address.c \
src/x86_64/ps2_driver.c \ src/x86_64/ps2_driver.c \
src/x86_64/sync.c \
# end of x86_64 specific kernel sources list # end of x86_64 specific kernel sources list
# Architecture-agnostic kernel sources. # Architecture-agnostic kernel sources.
@ -35,6 +34,7 @@ KERNEL_SOURCES := \
src/tar.c \ src/tar.c \
src/time.c \ src/time.c \
src/std.c \ src/std.c \
src/sync.c \
$(KERNEL_SOURCES_$(ARCH)) \ $(KERNEL_SOURCES_$(ARCH)) \
# end of kernel sources list # end of kernel sources list

View file

@ -1,6 +1,9 @@
#ifndef KARLOS_CPU_H #ifndef KARLOS_CPU_H
#define KARLOS_CPU_H #define KARLOS_CPU_H
#include <stdint.h>
#include <stdbool.h>
void cpu_init_bsp(void); void cpu_init_bsp(void);
void cpu_init_ap(void); void cpu_init_ap(void);
@ -10,17 +13,23 @@ void cpu_set_timer(void);
void write_gs_base(uint64_t base); void write_gs_base(uint64_t base);
struct cpu_info{ #define LINEBUF_SIZE 256
struct cpu_info {
// first member of struct is pointer to itself, for easier loading
struct cpu_info* cpu_info; struct cpu_info* cpu_info;
volatile bool started; volatile bool started;
uint32_t APIC_ID; uint32_t APIC_ID;
uint8_t ID; uint8_t ID;
// output
char linebuf[LINEBUF_SIZE];
unsigned int linebuf_pos;
}; };
static inline uint8_t current_core_id(void) { static inline struct cpu_info *local_cpu_data(void) {
struct cpu_info *self; struct cpu_info *ptr;
__asm__("mov %%gs:0, %0" : "=r"(self)); __asm__ ("mov %%gs:0, %0" : "=r" (ptr));
return self->ID; return ptr;
} }
void interrupt_handler_register(unsigned int vector, void (*handler)(void)); void interrupt_handler_register(unsigned int vector, void (*handler)(void));

View file

@ -6,33 +6,35 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdatomic.h> #include <stdatomic.h>
#ifdef __x86_64__
#define CPU_RELAX() __asm__ volatile("pause") #define CPU_RELAX() __asm__ volatile("pause")
#define COMPILER_BARRIER() atomic_signal_fence(memory_order_seq_cst) #else
#define CPU_RELAX()
#endif
typedef struct { typedef struct {
uint32_t threshold; uint32_t threshold;
_Atomic uint32_t count; atomic_int count;
_Atomic uint8_t global_sense;
} barrier_t; } barrier_t;
typedef struct { atomic_flag flag; } spinlock_t; typedef struct {
atomic_flag flag;
int int_state;
} spinlock_t;
typedef struct { atomic_flag flag; } mutex_t; typedef struct { atomic_flag flag; } mutex_t;
typedef struct { _Atomic int32_t count; } semaphore_t; typedef struct { atomic_int count; } semaphore_t;
#define SPIN_INITIALIZER (spinlock_t) { .flag = ATOMIC_FLAG_INIT }
uint32_t atomic_read_and_add(_Atomic uint32_t *addr, uint32_t value); void spin_init(spinlock_t *l);
uint32_t atomic_increment(_Atomic uint32_t *addr);
bool barrier_init(barrier_t *bar, uint32_t n);
void barrier_wait(barrier_t *bar, uint8_t *local_sense);
void spin_lock_init(spinlock_t *l);
bool spin_try_lock(spinlock_t *l); bool spin_try_lock(spinlock_t *l);
void spin_lock(spinlock_t *l); void spin_lock(spinlock_t *l);
void spin_unlock(spinlock_t *l); void spin_unlock(spinlock_t *l);
/// Initializes the barrier. `n` is the number of threads that will use the barrier.
bool barrier_init(barrier_t *bar, uint32_t n);
void barrier_wait(barrier_t *bar);
void sem_init(semaphore_t *s, int32_t initial); void sem_init(semaphore_t *s, int32_t initial);
bool sem_try_down(semaphore_t *s); bool sem_try_down(semaphore_t *s);
void sem_down(semaphore_t *s); void sem_down(semaphore_t *s);

View file

@ -62,14 +62,12 @@ void _start() {
unsigned info[4]; unsigned info[4];
__get_cpuid(0x1, &info[0], &info[1], &info[2], &info[3]); __get_cpuid(0x1, &info[0], &info[1], &info[2], &info[3]);
unsigned procid = info[1] >> 24; unsigned procid = info[1] >> 24;
if (procid != bootboot.bspid) { if (procid == bootboot.bspid) {
cpu_init_bsp();
interrupt_handler_register(0x08, double_fault_handler);
} else {
cpu_init_ap(); cpu_init_ap();
} }
else{
cpu_init_bsp();
}
interrupt_handler_register(0x08, double_fault_handler);
__asm__("sti"); __asm__("sti");

View file

@ -1,8 +1,11 @@
#include <stdarg.h> #include <stdarg.h>
#include <stddef.h> #include <stddef.h>
#include "cpu.h"
#include "std.h" #include "std.h"
#include "serial.h" #include "serial.h"
#include "sync.h"
#include "x86_64/interrupt.h"
#define BUFFER_SIZE 1024 #define BUFFER_SIZE 1024
@ -66,23 +69,29 @@ bool strcontains(const char *s, char c) {
/* --- Line buffering --- */ /* --- Line buffering --- */
// DO NOT USE THESE FUNCTIONS ANYWHERE! USE THE PUBLIC API (EVEN IN THIS FILE)! // DO NOT USE THESE FUNCTIONS ANYWHERE! USE THE PUBLIC API (EVEN IN THIS FILE)!
static char linebuf[BUFFER_SIZE];
static unsigned int current_buffer_position = 0;
static void linebuf_flush(void) { static void linebuf_flush(void) {
for (unsigned int i = 0; i < current_buffer_position; i++) { static spinlock_t linebuf_lock = SPIN_INITIALIZER;
serial_write_char(linebuf[i]);
spin_lock(&linebuf_lock);
struct cpu_info *mycpu = local_cpu_data();
for (unsigned int i = 0; i < mycpu->linebuf_pos; i++) {
serial_write_char(mycpu->linebuf[i]);
extern void visual_putc(char c); extern void visual_putc(char c);
visual_putc(linebuf[i]); visual_putc(mycpu->linebuf[i]);
} }
current_buffer_position = 0; mycpu->linebuf_pos = 0;
spin_unlock(&linebuf_lock);
} }
static void linebuf_putc(char c) { static void linebuf_putc(char c) {
if (current_buffer_position == BUFFER_SIZE) { struct cpu_info *mycpu = local_cpu_data();
if (mycpu->linebuf_pos == LINEBUF_SIZE) {
linebuf_flush(); linebuf_flush();
} }
linebuf[current_buffer_position++] = c; mycpu->linebuf[mycpu->linebuf_pos++] = c;
} }
/* --- Low-level output functions --- */ /* --- Low-level output functions --- */

97
src/sync.c Normal file
View file

@ -0,0 +1,97 @@
#include "sync.h"
#include "std.h"
#include <stdatomic.h>
#include "x86_64/interrupt.h"
void spin_init(spinlock_t *lock) {
atomic_flag_clear_explicit(&lock->flag, memory_order_release);
}
// if successful, interrupts are disabled.
bool spin_try_lock(spinlock_t *lock) {
int int_state = int_disable();
bool val = atomic_flag_test_and_set_explicit(&lock->flag, memory_order_acquire);
if (val == 0) { // got the lock
// current core holding the lock may set its interrupt state
lock->int_state = int_state;
return true;
} else {
int_restore(int_state);
return false;
}
}
void spin_lock(spinlock_t *lock) {
int int_state = int_disable();
while (atomic_flag_test_and_set_explicit(&lock->flag, memory_order_acquire)) {
CPU_RELAX();
}
lock->int_state = int_state;
}
void spin_unlock(spinlock_t *lock) {
int int_state = lock->int_state;
atomic_flag_clear_explicit(&lock->flag, memory_order_release);
int_restore(int_state);
}
bool barrier_init(barrier_t *bar, uint32_t n) {
if (!bar || n == 0) return false;
bar->threshold = n;
bar->count = 0;
atomic_thread_fence(memory_order_release);
return true;
}
void barrier_wait(barrier_t *bar) {
// wait until previous threads have passed out of barrier
while (atomic_load_explicit(&bar->count, memory_order_acquire) < 0) {
CPU_RELAX();
}
int new_val = atomic_fetch_add_explicit(&bar->count, 1, memory_order_acq_rel) + 1;
ASSERT(new_val > 0);
if ((uint32_t)new_val == bar->threshold) {
atomic_store_explicit(&bar->count, -(int)bar->threshold + 1, memory_order_release);
} else {
// wait until the final thread inverts the value to `-n`
while (atomic_load_explicit(&bar->count, memory_order_acquire) > 0) {
CPU_RELAX();
}
atomic_fetch_add_explicit(&bar->count, 1, memory_order_release);
}
}
void sem_init(semaphore_t *s, int initial) {
atomic_store_explicit(&s->count, initial, memory_order_release);
}
bool sem_try_down(semaphore_t *s) {
int32_t old = atomic_load_explicit(&s->count, memory_order_relaxed);
while (old > 0) {
if (atomic_compare_exchange_weak_explicit(
&s->count, &old, old - 1, // `old` is overwritten here
memory_order_acquire,
memory_order_relaxed
)) {
return true;
}
CPU_RELAX();
}
return false;
}
void sem_down(semaphore_t *s) {
for (;;) {
if (sem_try_down(s)) {
return;
}
CPU_RELAX();
}
}
void sem_up(semaphore_t *s) {
atomic_fetch_add_explicit(&s->count, 1, memory_order_release);
}

View file

@ -1,22 +1,22 @@
#include <stdatomic.h>
#include "x86_64/apic.h" #include "x86_64/apic.h"
#include "cpu.h" #include "cpu.h"
#include "std.h" #include "std.h"
#include "bootboot.h"
#include "ram.h" #include "ram.h"
#include "framebuffer.h" #include "framebuffer.h"
#include "console.h" #include "console.h"
#include "paging.h"
#include "sync.h" #include "sync.h"
#include "x86_64/cmos.h" #include "x86_64/cmos.h"
#include "x86_64/asm.h"
#include "x86_64/ps2_driver.h" #include "x86_64/ps2_driver.h"
// Per CPU Data // Per CPU Data
#define MSR_IA32_GS_BASE 0xC0000101 #define MSR_IA32_GS_BASE 0xC0000101
struct cpu_info cpu_data[256]; struct cpu_info cpu_data[256];
volatile uint8_t cpu_bsp_ready = 0; atomic_bool cpu_bsp_ready = false;
uint32_t cpu_increment = 0; atomic_uint cpu_increment = 0;
// --- segmentation --- // --- segmentation ---
extern void loadcs(uint16_t ss, uint16_t cs); extern void loadcs(uint16_t ss, uint16_t cs);
@ -63,7 +63,6 @@ void init_gdt() {
__asm__("mov %0, %%es" ::"r"(DATA_SEGMENT << 3)); __asm__("mov %0, %%es" ::"r"(DATA_SEGMENT << 3));
__asm__("mov %0, %%ds" ::"r"(DATA_SEGMENT << 3)); __asm__("mov %0, %%ds" ::"r"(DATA_SEGMENT << 3));
__asm__("mov %0, %%fs" ::"r"(DATA_SEGMENT << 3)); __asm__("mov %0, %%fs" ::"r"(DATA_SEGMENT << 3));
__asm__("mov %0, %%gs" ::"r"(DATA_SEGMENT << 3));
loadcs(DATA_SEGMENT << 3, CODE_SEGMENT << 3); loadcs(DATA_SEGMENT << 3, CODE_SEGMENT << 3);
} }
@ -467,6 +466,16 @@ void inttable(void) {
} }
void cpu_init_bsp(void) { void cpu_init_bsp(void) {
//Fill out struct per CPU core
uint8_t core_id = atomic_fetch_add_explicit(&cpu_increment, 1, memory_order_relaxed);
uint32_t lapic = cpu_get_core_id(); // TODO do I have to initialize lapic to get this?
cpu_data[core_id].cpu_info = &cpu_data[core_id];
cpu_data[core_id].started = 1;
cpu_data[core_id].APIC_ID = lapic;
cpu_data[core_id].ID = core_id;
cpu_data[core_id].linebuf_pos = 0;
writemsr(MSR_IA32_GS_BASE, (uint64_t) cpu_data[core_id].cpu_info);
// Early setup // Early setup
console_init(); console_init();
init_gdt(); init_gdt();
@ -479,87 +488,43 @@ void cpu_init_bsp(void) {
pic8259_disable(); // mask legacy PICs pic8259_disable(); // mask legacy PICs
lapic_init(); // enable local APIC lapic_init(); // enable local APIC
// Memory & video // Memory & video
ram_init(); ram_init();
fb_init(); fb_init();
console_clear(); console_clear();
// Device init // Device init
rtc_init(); rtc_init();
ps2_init(); ps2_init();
printf("Startup ID %d. My local struct is at %p and my ID in this struct is %d\n", core_id, local_cpu_data(), local_cpu_data()->ID);
atomic_store_explicit(&cpu_bsp_ready, true, memory_order_release);
//Fill out struct per CPU core
uint32_t lapic = cpu_get_core_id();
cpu_data[cpu_increment].cpu_info = &cpu_data[cpu_increment];
cpu_data[cpu_increment].started = 1;
cpu_data[cpu_increment].APIC_ID = lapic;
cpu_data[cpu_increment].ID = cpu_increment;
write_gs_base((uint64_t) cpu_data[cpu_increment].cpu_info);
cpu_bsp_ready = 1;
} }
void cpu_init_ap(void) { void cpu_init_ap(void) {
while (!cpu_bsp_ready) { while (!atomic_load_explicit(&cpu_bsp_ready, memory_order_acquire)) {
__asm__ volatile("pause"); CPU_RELAX();
} }
// Get atomic inrement on counter variable
uint8_t core_id = atomic_increment(&cpu_increment);
//uint8_t core_id = __sync_add_and_fetch(&cpu_increment, 1);
uint32_t lapic = cpu_get_core_id();
//Fill out struct per Core //Fill out struct per Core
uint8_t core_id = atomic_fetch_add_explicit(&cpu_increment, 1, memory_order_relaxed);
uint32_t lapic = cpu_get_core_id();
cpu_data[core_id].cpu_info = &cpu_data[core_id]; cpu_data[core_id].cpu_info = &cpu_data[core_id];
cpu_data[core_id].started = 1; cpu_data[core_id].started = 1;
cpu_data[core_id].APIC_ID = lapic; cpu_data[core_id].APIC_ID = lapic;
cpu_data[core_id].ID = core_id; cpu_data[core_id].ID = core_id;
cpu_data[core_id].linebuf_pos = 0;
writemsr(MSR_IA32_GS_BASE, (uint64_t) cpu_data[core_id].cpu_info);
write_gs_base((uint64_t) cpu_data[core_id].cpu_info); printf("Startup ID %d. My local struct is at %p and my ID in this struct is %d\n", core_id, local_cpu_data(), local_cpu_data()->ID);
printf("Startup ID, %d", core_id);
init_gdt(); init_gdt();
load_idt(); // IDT table already filled by BSP in low memory load_idt(); // IDT table already filled by BSP in low memory
lapic_init(); lapic_init();
} }
void write_gs_base(uint64_t base)
{
uint32_t low = (uint32_t)(base & 0xFFFFFFFF);
uint32_t high = (uint32_t)(base >> 32);
__asm__ volatile (
"mov %[msr], %%ecx\n\t"
"mov %[lo], %%eax\n\t"
"mov %[hi], %%edx\n\t"
"wrmsr\n\t"
:
: [msr] "i"(MSR_IA32_GS_BASE),
[lo] "r"(low),
[hi] "r"(high)
: "rax", "rdx", "rcx", "memory"
);
}
void cpu_set_timer(void) { void cpu_set_timer(void) {
lapic_set_timer(); lapic_set_timer();
} }

View file

@ -1,98 +0,0 @@
#include "sync.h"
uint32_t atomic_read_and_add(_Atomic uint32_t *addr, uint32_t value) {
uint32_t prev = atomic_fetch_add_explicit(addr, value, memory_order_acq_rel);
return prev + value;
}
uint32_t atomic_increment(_Atomic uint32_t *addr) {
return atomic_read_and_add(addr, 1u);
}
bool barrier_init(barrier_t *bar, uint32_t n) {
if (!bar || n == 0) return false;
bar->threshold = n;
atomic_store_explicit(&bar->count, 0u, memory_order_relaxed);
atomic_store_explicit(&bar->global_sense, 0u, memory_order_relaxed);
COMPILER_BARRIER();
return true;
}
void barrier_wait(barrier_t *bar, uint8_t *local_sense) {
uint8_t s = (uint8_t)((*local_sense ^ 1u) & 1u);
*local_sense = s;
uint32_t v = atomic_fetch_add_explicit(&bar->count, 1u, memory_order_acq_rel) + 1u;
if (v == bar->threshold) {
atomic_store_explicit(&bar->count, 0u, memory_order_relaxed);
atomic_store_explicit(&bar->global_sense, s, memory_order_release);
} else {
while (atomic_load_explicit(&bar->global_sense, memory_order_acquire) != s) {
CPU_RELAX();
}
}
COMPILER_BARRIER();
}
void spin_lock_init(spinlock_t *l) {
atomic_flag_clear_explicit(&l->flag, memory_order_relaxed);
COMPILER_BARRIER();
}
bool spin_try_lock(spinlock_t *l) {
return !atomic_flag_test_and_set_explicit(&l->flag, memory_order_acquire);
}
void spin_lock(spinlock_t *l) {
while (atomic_flag_test_and_set_explicit(&l->flag, memory_order_acquire)) {
CPU_RELAX();
}
COMPILER_BARRIER();
}
void spin_unlock(spinlock_t *l) {
COMPILER_BARRIER();
atomic_flag_clear_explicit(&l->flag, memory_order_release);
}
void sem_init(semaphore_t *s, int32_t initial) {
atomic_store_explicit(&s->count, initial, memory_order_relaxed);
COMPILER_BARRIER();
}
bool sem_try_down(semaphore_t *s) {
int32_t old = atomic_load_explicit(&s->count, memory_order_relaxed);
while (old > 0) {
if (atomic_compare_exchange_weak_explicit(
&s->count, &old, old - 1,
memory_order_acquire,
memory_order_relaxed
)) {
return true;
}
CPU_RELAX();
}
return false;
}
void sem_down(semaphore_t *s) {
for (;;) {
if (sem_try_down(s)) {
COMPILER_BARRIER();
return;
}
while (atomic_load_explicit(&s->count, memory_order_relaxed) <= 0) {
CPU_RELAX();
}
}
}
void sem_up(semaphore_t *s) {
atomic_fetch_add_explicit(&s->count, 1, memory_order_release);
COMPILER_BARRIER();
}