diff --git a/packages/cjit/src/Runtime.zig b/packages/cjit/src/Runtime.zig index d5a2b44..37f4098 100644 --- a/packages/cjit/src/Runtime.zig +++ b/packages/cjit/src/Runtime.zig @@ -1,37 +1,28 @@ const std = @import("std"); +const builtin = @import("builtin"); const Self = @This(); +const tokens = @import("tokens.zig"); const types = @import("types.zig"); const Sections = @import("Sections.zig"); const StackValue = @import("StackValue.zig"); +const Tokenizer = tokens.Tokenizer; const Type = types.Type; pub const virtual_stack_size = 256; -state: State = .init, allocator: std.mem.Allocator, arena: std.heap.ArenaAllocator, -includes: std.DoublyLinkedList = .{}, symbols_needed: std.StringHashMapUnmanaged(ExternSymbol) = .{}, symbols_provided: std.StringHashMapUnmanaged(InternSymbol) = .{}, symbols_located: std.StringHashMapUnmanaged(LocatedSymbol) = .{}, relocation_table: std.ArrayList(Relocation) = .{}, /// Bounded, preallocated with the capacity of `virtual_stack_size`. virtual_stack: std.ArrayList(StackValue), +tokenizer: Tokenizer, sections: Sections = .{}, -pub const State = enum(u8) { - init, - compiled, - linked, -}; - -pub const Include = struct { - code: []const u8, - node: std.DoublyLinkedList.Node = .{}, -}; - pub const ExternSymbol = struct { name: []const u8, c_type: Type, @@ -49,7 +40,6 @@ pub const Location = union(enum) { text: usize, data: usize, rodata: usize, - bss: usize, }; pub const LocatedSymbol = struct { @@ -62,6 +52,12 @@ pub const LocatedSymbol = struct { pub const Relocation = struct { addr: usize, location: Location, + type: RelocationType, +}; + +pub const RelocationType = enum { + global_offset_table, + rip_disp32, }; pub fn init(allocator: std.mem.Allocator) !Self { @@ -70,11 +66,13 @@ pub fn init(allocator: std.mem.Allocator) !Self { errdefer arena.deinit(); const virtual_stack_buffer = try arena_allocator.alloc(StackValue, virtual_stack_size); + const tokenizer = try Tokenizer.init(arena_allocator); return .{ .allocator = allocator, .arena = arena, .virtual_stack = .initBuffer(virtual_stack_buffer), + .tokenizer = tokenizer, }; } @@ -84,30 +82,21 @@ pub fn deinit(self: *Self) void { self.symbols_located.deinit(self.allocator); self.relocation_table.deinit(self.allocator); self.arena.deinit(); + // TODO deinit sections (stage-depentent) self.* = undefined; } -pub fn include(self: *Self, code: []const u8) !void { - std.debug.assert(self.state == .init); - if (code.len == 0) return; - - const include_ptr = try self.create(Include); - include_ptr.* = .{ .code = code }; - - self.includes.append(&include_ptr.node); -} - -pub fn compile(self: *Self, code: []const u8) !void { +pub fn compile(self: *Self, filename: []const u8, code: []const u8) !void { std.debug.assert(self.state == .init); - _ = code; - self.state = .compiled; - std.debug.panic("Not implemented", .{}); + self.tokenizer.setSource(filename, code); + while (try self.tokenizer.nextToken(self.arena.allocator())) |token| { + _ = token; + std.debug.panic("Not implemented", .{}); + } } pub fn setSymbol(self: *Self, comptime T: type, name: []const u8, ptr: *const T) !void { - std.debug.assert(self.state == .compiled); - const symbol = self.symbols.getPtr(name) orelse return error.SymbolNotFound; if (!types.isCompatible(T, symbol.c_type)) return error.IncompatibleType; @@ -115,15 +104,48 @@ pub fn setSymbol(self: *Self, comptime T: type, name: []const u8, ptr: *const T) } pub fn link(self: *Self) !void { - std.debug.assert(self.state == .compiled); + try self.sections.relocateSections(self.allocator); - self.state = .linked; - std.debug.panic("Not implemented", .{}); + const text_base = @intFromPtr(self.sections.text.data.items.ptr); + const data_base = @intFromPtr(self.sections.data.data.items.ptr); + const rodata_base = @intFromPtr(self.sections.rodata.data.items.ptr); + + for (self.relocation_table.items) |relocation| { + const target_addr = switch (relocation.location) { + .text => |offset| text_base + offset, + .data => |offset| data_base + offset, + .rodata => |offset| rodata_base + offset, + }; + + switch (relocation.type) { + .global_offset_table => { + std.mem.writeInt( + usize, + self.sections.rodata.data[relocation.addr .. relocation.addr + @sizeOf(usize)], + target_addr, + builtin.cpu.arch.endian(), + ); + }, + .rip_disp32 => { + const rip = text_base + relocation.addr + 4; + const disp64: isize = @bitCast(rip -% target_addr); + const disp32 = std.math.cast(i32, disp64) orelse error.RelocationError; + std.mem.writeInt( + i32, + self.sections.text.data[relocation.addr .. relocation.addr + 4], + disp32, + builtin.cpu.arch.endian(), + ); + }, + } + } + + self.relocation_table.clearAndFree(self.allocator); + + try self.sections.protectSections(); } pub fn getSymbol(self: *const Self, comptime T: type, name: []const u8) ?*const T { - std.debug.assert(self.state == .linked); - const symbol = self.symbols.get(name) orelse return null; return @ptrCast(@alignCast(symbol.ptr)); } diff --git a/packages/cjit/src/Sections.zig b/packages/cjit/src/Sections.zig index 3211a27..e83a410 100644 --- a/packages/cjit/src/Sections.zig +++ b/packages/cjit/src/Sections.zig @@ -1,14 +1,20 @@ const std = @import("std"); +const builtin = @import("builtin"); +const Self = @This(); -text: Section = .{ .read_only = true, .executable = true }, -data: Section = .{ .read_only = false, .executable = false }, -rodata: Section = .{ .read_only = true, .executable = false }, -bss: usize = 0, +text: Section = .{ .protection = .executable }, +data: Section = .{ .protection = .read_write }, +rodata: Section = .{ .protection = .read_only }, pub const Section = struct { data: std.ArrayList(u8) = .{}, - read_only: bool, - executable: bool, + protection: Protection, + + pub const Protection = enum { + executable, + read_only, + read_write, + }; pub fn writeValue(self: *Section, value: anytype, allocator: std.mem.Allocator) !void { const T = @TypeOf(value); @@ -32,4 +38,148 @@ pub const Section = struct { try self.data.appendNTimes(allocator, 0, padding); } + + pub fn pageCount(self: Section) usize { + const page_size = std.heap.pageSize(); + const section_size = self.data.items.len; + return @divFloor(section_size + page_size - 1, page_size); + } }; + +pub fn relocateSections(self: *Self, allocator: std.mem.Allocator) !void { + const page_size = std.heap.pageSize(); + + const text_pages = self.text.pageCount(); + const text_bytes = text_pages * page_size; + + const data_pages = self.data.pageCount(); + const data_bytes = data_pages * page_size; + + const rodata_pages = self.rodata.pageCount(); + const rodata_bytes = rodata_pages * page_size; + + const total_pages = text_pages + data_pages + rodata_pages; + const total_bytes = text_bytes + data_bytes + rodata_bytes; + std.debug.assert(total_bytes == total_pages * page_size); + + const ptr: [*]u8 = sw: switch (builtin.os.tag) { + .windows => { + const windows = std.os.windows; + const ntdll = windows.ntdll; + + var base_addr: ?*anyopaque = null; + var size: windows.SIZE_T = total_bytes; + const status = ntdll.NtAllocateVirtualMemory( + windows.GetCurrentProcess(), + @ptrCast(&base_addr), + 0, + &size, + windows.MEM_COMMIT | windows.MEM_RESERVE, + windows.PAGE_READWRITE, + ); + + if (status == .SUCCESS) { + break :sw @ptrCast(base_addr); + } else { + return error.OutOfMemory; + } + }, + .linux => { + const linux = std.os.linux; + + const rc = linux.mmap( + null, + total_bytes, + linux.PROT.READ | linux.PROT.WRITE, + .{ .TYPE = .PRIVATE, .ANONYMOUS = true }, + -1, + 0, + ); + const status: linux.E = .init(rc); + + if (status == .SUCCESS) { + break :sw @ptrFromInt(rc); + } else { + return error.OutOfMemory; + } + }, + else => @compileError("Operating system " ++ @tagName(builtin.os.tag) ++ " not supported"), + }; + + const text_slice = ptr[0..text_bytes]; + const data_slice = ptr[text_bytes .. text_bytes + data_bytes]; + const rodata_slice = ptr[text_bytes + data_bytes .. text_bytes + data_bytes + rodata_bytes]; + + @memcpy(text_slice[0..self.text.data.items.len], self.text.data.items); + @memcpy(data_slice[0..self.data.data.items.len], self.data.data.items); + @memcpy(rodata_slice[0..self.rodata_slice.data.items.len], self.rodata.data.items); + + self.text.data.clearAndFree(allocator); + self.data.data.clearAndFree(allocator); + self.rodata.data.clearAndFree(allocator); + + self.text.data = .{ .items = text_slice, .capacity = text_bytes }; + self.data.data = .{ .items = data_slice, .capacity = data_bytes }; + self.rodata.data = .{ .items = rodata_slice, .capacity = rodata_bytes }; +} + +pub fn protectSections(self: *Self) !void { + const sections: []*Section = .{ &self.text, &self.data, &self.rodata }; + + for (sections) |section| { + switch (builtin.os.tag) { + .windows => { + const windows = std.os.windows; + const ntdll = windows.ntdll; + + const protection = switch (section.protection) { + .executable => windows.PAGE_EXECUTE, + .read_only => windows.PAGE_READONLY, + .read_write => windows.PAGE_READWRITE, + }; + + var base_addr: ?*anyopaque = section.data.items.ptr; + var size: windows.SIZE_T = section.data.capacity; + var old_protection: u32 = undefined; + + const status = ntdll.NtProtectVirtualMemory( + windows.GetCurrentProcess(), + &base_addr, + &size, + protection, + &old_protection, + ); + + if (status != .SUCCESS) { + return error.ProtectionError; + } + }, + .linux => { + const linux = std.os.linux; + + const protection = switch (section.protection) { + .executable => linux.PROT.EXEC, + .read_only => linux.PROT.READ, + .read_write => linux.PROT.READ | linux.PROT.WRITE, + }; + + const rc = linux.mprotect( + section.data.items.ptr, + section.data.capacity, + protection, + ); + const status: linux.E = .init(rc); + + if (status != .SUCCESS) { + return error.ProtectionError; + } + }, + else => @compileError("Operating system " ++ @tagName(builtin.os.tag) ++ " not supported"), + } + } +} + +pub fn freeSections(self: *Self) void { + _ = self; + std.debug.panic("Not implemented", .{}); +} diff --git a/packages/cjit/src/tokens/Tokenizer.zig b/packages/cjit/src/tokens/Tokenizer.zig index de528dd..98c3bdf 100644 --- a/packages/cjit/src/tokens/Tokenizer.zig +++ b/packages/cjit/src/tokens/Tokenizer.zig @@ -9,21 +9,19 @@ const Utf8Iterator = @import("Utf8Iterator.zig"); pub const max_string_length = 4096; pub const max_wide_string_length = 4096; -filename: []const u8, -it: Utf8Iterator, +filename: []const u8 = &.{}, +it: Utf8Iterator = .init(&.{}), defines: std.StringHashMapUnmanaged([]Token) = .{}, /// Bounded, preallocated with the capacity of `max_string_length`. string: std.ArrayList(u8), /// Bounded, preallocated with the capacity of `max_wide_string_length`. wide_string: std.ArrayList(u32), -pub fn init(filename: []const u8, code: []const u8, arena_allocator: std.mem.Allocator) !Self { +pub fn init(arena_allocator: std.mem.Allocator) !Self { const string_buffer = try arena_allocator.alloc(u8, max_string_length); const wide_string_buffer = try arena_allocator.alloc(u8, max_wide_string_length); return .{ - .filename = filename, - .it = .init(code), .string = .initBuffer(string_buffer), .wide_string = .initBuffer(wide_string_buffer), }; diff --git a/packages/cjit/src/x86_64.zig b/packages/cjit/src/x86_64.zig index 27d3d93..a25f68d 100644 --- a/packages/cjit/src/x86_64.zig +++ b/packages/cjit/src/x86_64.zig @@ -437,5 +437,9 @@ pub fn allocRodataDisp32(rt: *Runtime, data: []const u8) !void { try rt.sections.rodata.writeBytes(data, rt.allocator); try disp32(rt, 0); - try rt.relocation_table.append(rt.allocator, .{ .addr = addr, .location = location }); + try rt.relocation_table.append(rt.allocator, .{ + .addr = addr, + .location = location, + .type = .rip_disp32, + }); }