https://github.com/PoutineSyropErable/MapleKernel The BareBones dir. debug2 branch. e5cf6d69e3f9f053 commit hash.
How is a proper 32PM -> 16RM mode switch done on modern x86_64? My 16 bit code is being executed as 32 bit (after a far jump) and undefined behavior happens. Is this a build issue (compile, link) and a special technique I don't know is needed to build multi cpu mode programs, a qemu bug (shouldn't be). Or is it a logic issue in my code/wrong order of things?
Background detail:
I want to learn the entire boot chain process. And cpu mode switches are thing that will be needed.
Now, if you write an entire bootloader, and you want to have more then 1mb of program size, then you will need to write a small filesystem/drive driver to read the disk and load the code into memory at runtime.
But that's annoying, so a hack, is to switch to 16 bit mode use the uefi interupts and read disk at a time. There's more details on the plan, but they aren't important.
That's how I want to do it, since it's a pedagocial exercise.
To simplify this, I've decided to have a 32 bit kernel, boatloaded by grub.
And in it, I will do a quick 32 -> 16 -> 32. Nothing is done in 16 bit mode. (Eventually, I'll make it call a trivial add16(uint16_t a, uint16_t b) function. For now, it's hardwired to return a specific value. Later, 16 bit code to interact with uefi can be inserted and then built from there.
The mode switching is the hard thing. Once I got that down, then I'll learn the rest.
So, here's the build script
#!/bin/bash
# build_debug.sh
set -eou pipefail
# Define directories
BUILD_DIR="build"
ISO_DIR="isodir"
# Create necessary directories
mkdir -p "$BUILD_DIR" "$ISO_DIR/boot/grub"
# Assemble the bootloader assembly with debug info
nasm -f elf32 -g -F dwarf boot32.s -o "$BUILD_DIR/boot32.o"
nasm -f elf32 -g -F dwarf boot_intel.asm -o "$BUILD_DIR/boot.o"
nasm -f elf32 -g -F dwarf add16_wrapper32.s -o "$BUILD_DIR/add16_wrapper32.o"
nasm -f elf -g -F dwarf add16_wrapper16.s -o "$BUILD_DIR/add16_wrapper16.o"
# ===== a raw binary that will be dd into the .bin ?
# nasm -f bin add16_wrapper16.s -o "$BUILD_DIR/add16_wrapper16.bin"
# Compile the kernel
i686-elf-gcc -c kernel.c -o "$BUILD_DIR/kernel.o" -std=gnu99 -ffreestanding -O2 -Wall -Wextra -g
i686-elf-gcc -c virtual_memory.c -o "$BUILD_DIR/virtual_memory.o" -std=gnu99 -ffreestanding -O2 -Wall -Wextra -g
i686-elf-gcc -c idt.c -o "$BUILD_DIR/idt.o" -std=gnu99 -ffreestanding -O2 -Wall -Wextra -g
ia16-elf-gcc -c ./add16.c -o "$BUILD_DIR/add16.o" -std=gnu99 -ffreestanding -O2 -Wall -Wextra -g
printf "\n\n=======Start of linking========\n\n\n"
i686-elf-gcc -T linker_debug.ld -o "$BUILD_DIR/myos.bin" -ffreestanding -O2 -nostdlib \
"$BUILD_DIR/boot32.o" \
"$BUILD_DIR/boot.o" \
"$BUILD_DIR/kernel.o" \
"$BUILD_DIR/virtual_memory.o" \
"$BUILD_DIR/idt.o" \
"$BUILD_DIR/add16_wrapper16.o" \
"$BUILD_DIR/add16_wrapper32.o" \
"$BUILD_DIR/add16.o" \
-lgcc -g
printf "\n\n=======End of linking========\n\n\n"
# dd if="$BUILD_DIR/add16_wrapper16.bin" \
# of="$BUILD_DIR/myos.bin" \
# bs=1 seek=$((0xB080)) conv=notrunc
# Copy the kernel binary and GRUB configuration to the ISO directory
cp "$BUILD_DIR/myos.bin" "$ISO_DIR/boot/myos.bin"
cp grub.cfg "$ISO_DIR/boot/grub/grub.cfg"
# Create the ISO image
grub-mkrescue -o "$BUILD_DIR/myos.iso" "$ISO_DIR"
echo "ISO created successfully: $BUILD_DIR/myos.iso"
# Create flat binary from ELF (for QEMU boot)
printf "\n\n=======Start of Qemu========\n\n\n"
# Start QEMU in debug mode (paused, waiting for GDB)
qemu-system-i386 \
-cdrom "$BUILD_DIR/myos.iso" \
-s -S \
-no-reboot \
-d in_asm,int,cpu_reset \
-D qemu_instr.log \
-serial stdio &
printf "\n\n=======End of Qemu========\n\n\n"
QEMU_PID=$!
# Give QEMU a second to start up
sleep 1
# Launch VNC viewer
vncviewer localhost:5900
# After you close the VNC viewer, kill QEMU
kill $QEMU_PID 2>/dev/null
The linker_debug.ld script:
/* Entry point of the kernel */
ENTRY(_start)
/* Sections layout */
SECTIONS
{
/* -----------------------------
16-bit real mode code & data
----------------------------- */
. = 0x7000; /* start at 28 KiB, safely below 1 MB */
/* Main text section */
.multiboot_start BLOCK(4K) : ALIGN(4K)
{
*(.multiboot) /* multiboot header */
}
/* 16-bit stack */
.stack16 (NOLOAD) : ALIGN(16)
{
stack16_start = .;
. += 16*1024; /* 16 KiB stack */
stack16_end = .;
}
. += 16;
/* Arguments / result buffer for 16-bit wrapper */
.args16 (NOLOAD) : ALIGN(4)
{
args16_start = .;
. += 16; /* 16 bytes for arguments + result */
args16_end = .;
}
. += 16;
.call_add16_section : ALIGN(16)
{
add1632_start = .;
KEEP(*(.text.add1632)) /* 32-bit wrapper code */
add1632_end = .;
}
.add16_wrapper : ALIGN(16)
{
add1616_start = .;
KEEP(*(.text.add1616)) /* 32-bit wrapper code */
add1616_end = .;
}
/* 16-bit wrapper + add16 function */
.add16_function : ALIGN(16)
{
KEEP(*(.text.add16)) /* add16.c */
}
Important address:
.resume32 : ALIGN(16)
{
KEEP(*(.text.resume32)) /* 32-bit wrapper code */
}
/* -----------------------------
32-bit protected mode code
----------------------------- */
. = 0x200000; /* 2 MiB start for 32-bit kernel */
/* Main text section */
.text BLOCK(4K) : ALIGN(4K)
{
/* *(.multiboot) /* multiboot header */ */
*(.text) /* 32-bit kernel code */
}
/* Read-only data */
.rodata BLOCK(4K) : ALIGN(4K)
{
*(.rodata)
}
/* Initialized data */
.data BLOCK(4K) : ALIGN(4K)
{
*(.data)
}
/* Uninitialized data + main stack */
.bss BLOCK(4K) : ALIGN(4K)
{
*(COMMON)
*(.bss)
}
/* Catch-all for any other sections */
}
The 32bit wrapper, it's called in kernel.c
;add16_wrapper32.s
BITS 32
global call_add16
global resume32
extern add1616_start
extern stack16_start
extern stack16_end
extern args16_start
section .text.add1632
call_add16:
; Save 32-bit registers and flags
pushad
pushfd
push ds
push es
push fs
push gs
mov eax, 0xdeadfac1
; rdi = arg1
; rsi = arg2
; Save the stack pointer in the first 1mb (first 64kb in fact)
; So its accessible in 16 bit, and can be restored on the way back to 32 bit
mov [args16_start], esp ;
cli
; --- Switch to 16-bit mode ---
mov eax, cr0
and eax, 0xFFFFFFFE
mov cr0, eax
; setup 16 bit data segment
mov ax, 0
mov ds, ax
; Set up 16-bit stack
mov ax, word stack16_start
shr ax, 4 ; segment = address >> 4
mov ss, ax
mov esp, 0x4000
; 1024 = 0x0400 = 1.00 KB
; 2048 = 0x0800 = 1.00 KB
; 4096 = 0x1000 = 1.00 KB
; 8192 = 0x2000 = 1.00 KB
; 16384 = 0x4000 = 1.00 KB
; Far jump to 16-bit wrapper
jmp far 00:add1616_start
halt_loop:
hlt
jmp halt_loop
;
; reset:
; cli ; disable interrupts
; xor eax, eax
; lidt [eax] ; load IDT base = 0, limit = 0 (invalid)
; int 3 ; trigger interrupt -> #GP -> #DF -> triple fault
section .text.resume32
; resume32 will be called by the 16-bit code when done
resume32:
; Restore segment registers
mov eax, 0xbad32
pop gs
pop fs
pop es
pop ds
; Restore general-purpose registers and flags
popfd
popad
; Retrieve result
; movzx eax, word [args16_start + 8]
mov eax, 15
ret
halt_loop2:
hlt
jmp halt_loop2
The 16 bit that would call another function (Stripped for simplicity) and then return to 32.
; add16_wrapper16.s
BITS 16
global add1616_start
extern resume32
extern args16_start
section .text.add1616
add1616_start:
mov bx, 0xFAC2
; To find it easier in the binary
; --- Switch back to protected mode ---
mov eax, cr0
or eax, 1
mov cr0, eax
mov esp, [args16_start]
mov ax, 0x18
mov ss, ax
mov ds, ax
jmp far 0x10:resume32
halt_loop3:
; This should never be reached
hlt
jmp halt_loop3
The execution:
# terminal one
./build_debug.sh
# terminal 2
gdb build/myos.bin
(gdb) target remote :1234
(gdb) b kernel_main
(gdb) c
# Return to the vga grub window, press enter a bunch to select the first entry.
(gdb) b before
(gdb) c
(gdb) b*0xb040 (call_add16)
(gdb) c
(gdb) b *0xb080 (add1616_start)
(gdb) c
(gdb) disassemble
# It's running the wrong disassembly.
# It seems to be reading the 16 bit code as 32 bit encoding.
# So the first instruction is half fucked, and the rest completly wrong.
# Qemu logs show the same behavior.
# And esp isn't properly read from the args.
Program Print
===== Start of Kernel=====
The address of stack16_start: 7010
The address of stack16_end: B010
The address of args16_start: B020
The address of args16_end: B030
The address of add1632: B040
The address of call_add16: B040
The address of add1616: B080
The address of resume32: B0B0
The value of cs: 16
The value of ss: 24
The value of ds: 24
The value of es: 24
The value of fs: 24
The value of gs: 24
gdt base address = 10B0
gdt size limit = 32
The gdtr values:
gdt[0].low = 0
gdt[0].high = 0
gdt[1].low = 0
gdt[1].high = 0
gdt[2].low = FFFF
gdt[2].high = CF9A00
gdt[3].low = FFFF
gdt[3].high = CF9300
Before the main execution
the end of the qemu log:
0x0000b040: 60 pushal
----------------
IN:
0x0000b041: 9c pushfl
----------------
IN:
0x0000b042: 1e pushl %ds
----------------
IN:
0x0000b043: 06 pushl %es
Important address:
----------------
IN:
0x0000b044: 0f a0 pushl %fs
----------------
IN:
0x0000b046: 0f a8 pushl %gs
----------------
IN:
0x0000b048: b8 c1 fa ad de movl $0xdeadfac1, %eax
----------------
IN:
0x0000b04d: 89 25 20 b0 00 00 movl %esp, 0xb020
----------------
IN:
0x0000b053: fa cli
----------------
IN:
0x0000b054: 0f 20 c0 movl %cr0, %eax
----------------
IN:
0x0000b057: 83 e0 fe andl $0xfffffffe, %eax
----------------
IN:
0x0000b05a: 0f 22 c0 movl %eax, %cr0
----------------
IN:
0x0000b05d: 66 b8 00 00 movw $0, %ax
----------------
IN:
0x0000b061: 8e d8 movl %eax, %ds
----------------
IN:
0x0000b063: 66 b8 10 70 movw $0x7010, %ax
----------------
IN:
0x0000b067: 66 c1 e8 04 shrw $4, %ax
----------------
IN:
0x0000b06b: 8e d0 movl %eax, %ss
----------------
IN:
0x0000b06d: bc 00 40 00 00 movl $0x4000, %esp
----------------
IN:
0x0000b072: ea 80 b0 00 00 00 00 ljmpl $0x0:$0xb080
----------------
IN:
0x0000b080: bb c2 fa 0f 20 movl $0x200ffac2, %ebx
0x0000b085: c0 66 83 c8 shlb $0xc8, -0x7d(%esi)
0x0000b089: 01 0f addl %ecx, (%edi)
0x0000b08b: 22 c0 andb %al, %al
0x0000b08d: 66 8b 26 movw (%esi), %sp
0x0000b090: 20 b0 b8 18 00 8e andb %dh, -0x71ffe748(%eax)
0x0000b096: d0 8e d8 ea b0 b0 rorb -0x4f4f1528(%esi)
0x0000b09c: 10 00 adcb %al, (%eax)
0x0000b09e: f4 hlt
----------------
IN:
0x0000b090: 20 b0 b8 18 00 8e andb %dh, -0x71ffe748(%eax)
----------------
IN:
0x0000b096: d0 8e d8 ea b0 b0 rorb -0x4f4f1528(%esi)
0x0000b09c: 10 00 adcb %al, (%eax)
0x0000b09e: f4 hlt
----------------
IN:
0x0000b096: d0 8e d8 ea b0 b0 rorb -0x4f4f1528(%esi)
----------------
IN:
0x0000b09c: 10 00 adcb %al, (%eax)
0x0000b09e: f4 hlt
What you see is the previous state. I tried to use a flatbinary for the 16 bit code (-elf bin). I tried to dd it into the binary file. It didn't worked.
And a bunch of variation. I searched for ways to just include a raw binary in a linker script. or an elf16 format.