diff --git a/packages/cjit/src/Runtime.zig b/packages/cjit/src/Runtime.zig index 27b56bf..d5a2b44 100644 --- a/packages/cjit/src/Runtime.zig +++ b/packages/cjit/src/Runtime.zig @@ -3,6 +3,12 @@ const Self = @This(); const types = @import("types.zig"); +const Sections = @import("Sections.zig"); +const StackValue = @import("StackValue.zig"); +const Type = types.Type; + +pub const virtual_stack_size = 256; + state: State = .init, allocator: std.mem.Allocator, arena: std.heap.ArenaAllocator, @@ -10,6 +16,10 @@ includes: std.DoublyLinkedList = .{}, symbols_needed: std.StringHashMapUnmanaged(ExternSymbol) = .{}, symbols_provided: std.StringHashMapUnmanaged(InternSymbol) = .{}, symbols_located: std.StringHashMapUnmanaged(LocatedSymbol) = .{}, +relocation_table: std.ArrayList(Relocation) = .{}, +/// Bounded, preallocated with the capacity of `virtual_stack_size`. +virtual_stack: std.ArrayList(StackValue), +sections: Sections = .{}, pub const State = enum(u8) { init, @@ -24,40 +34,55 @@ pub const Include = struct { pub const ExternSymbol = struct { name: []const u8, - c_type: types.Type, + c_type: Type, ptr: ?*anyopaque, }; pub const InternSymbol = struct { name: []const u8, public: bool, - c_type: types.Type, + c_type: Type, location: Location, +}; - pub const Location = union(enum) { - text: usize, - data: usize, - rodata: usize, - bss: usize, - }; +pub const Location = union(enum) { + text: usize, + data: usize, + rodata: usize, + bss: usize, }; pub const LocatedSymbol = struct { name: []const u8, public: bool, - c_type: types.Type, + c_type: Type, ptr: ?*anyopaque, }; -pub fn init(allocator: std.mem.Allocator) Self { +pub const Relocation = struct { + addr: usize, + location: Location, +}; + +pub fn init(allocator: std.mem.Allocator) !Self { + var arena: std.heap.ArenaAllocator = .init(allocator); + const arena_allocator = arena.allocator(); + errdefer arena.deinit(); + + const virtual_stack_buffer = try arena_allocator.alloc(StackValue, virtual_stack_size); + return .{ .allocator = allocator, - .arena = .init(allocator), + .arena = arena, + .virtual_stack = .initBuffer(virtual_stack_buffer), }; } pub fn deinit(self: *Self) void { - self.symbols.deinit(self.allocator); + self.symbols_needed.deinit(self.allocator); + self.symbols_provided.deinit(self.allocator); + self.symbols_located.deinit(self.allocator); + self.relocation_table.deinit(self.allocator); self.arena.deinit(); self.* = undefined; } diff --git a/packages/cjit/src/Sections.zig b/packages/cjit/src/Sections.zig index 1a7103f..3211a27 100644 --- a/packages/cjit/src/Sections.zig +++ b/packages/cjit/src/Sections.zig @@ -3,7 +3,7 @@ const std = @import("std"); text: Section = .{ .read_only = true, .executable = true }, data: Section = .{ .read_only = false, .executable = false }, rodata: Section = .{ .read_only = true, .executable = false }, -bss: usize, +bss: usize = 0, pub const Section = struct { data: std.ArrayList(u8) = .{}, @@ -14,12 +14,13 @@ pub const Section = struct { const T = @TypeOf(value); std.debug.assert(std.meta.hasUniqueRepresentation(T)); const bytes = std.mem.asBytes(&value); - const alignment = @alignOf(T); - - try self.alignForward(alignment, allocator); try self.writeBytes(bytes, allocator); } + pub fn writeByte(self: *Section, byte: u8, allocator: std.mem.Allocator) !void { + try self.data.append(allocator, byte); + } + pub fn writeBytes(self: *Section, data: []const u8, allocator: std.mem.Allocator) !void { try self.data.appendSlice(allocator, data); } diff --git a/packages/cjit/src/StackValue.zig b/packages/cjit/src/StackValue.zig new file mode 100644 index 0000000..6d6a053 --- /dev/null +++ b/packages/cjit/src/StackValue.zig @@ -0,0 +1,20 @@ +const std = @import("std"); +const Self = @This(); + +const types = @import("types.zig"); +const x86_64 = @import("x86_64.zig"); + +const Register = x86_64.Register; +const Type = types.Type; + +pub const Value = union(enum) { + register: Register, + constant: u64, + symbol: []const u8, + /// Displacement in bytes from current value of base pointer register. + stack: i32, + cpu_flags: void, +}; + +c_type: Type, +value: Value, diff --git a/packages/cjit/src/root.zig b/packages/cjit/src/root.zig index cff2153..a8e5133 100644 --- a/packages/cjit/src/root.zig +++ b/packages/cjit/src/root.zig @@ -2,12 +2,16 @@ const std = @import("std"); const builtin = @import("builtin"); pub const Runtime = @import("Runtime.zig"); +pub const Sections = @import("Sections.zig"); +pub const StackValue = @import("StackValue.zig"); +pub const tokens = @import("tokens.zig"); pub const types = @import("types.zig"); +pub const x86_64 = @import("x86_64.zig"); pub const call: std.builtin.CallingConvention = switch (builtin.cpu.arch) { - .aarch64 => @compileError("TODO"), + .aarch64 => .{ .aarch64_aapcs = .{} }, .x86_64 => .{ .x86_64_sysv = .{} }, - else => unreachable, + else => @compileError("Architecture " ++ @tagName(builtin.cpu.arch) ++ " not supported"), }; test { diff --git a/packages/cjit/src/tokens.zig b/packages/cjit/src/tokens.zig index 734d1f1..a0eb9d2 100644 --- a/packages/cjit/src/tokens.zig +++ b/packages/cjit/src/tokens.zig @@ -129,7 +129,7 @@ pub const Token = union(enum) { punctuator: Punctuator, }; -pub fn isIdentifierStart(code_point: u21) void { +pub fn isIdentifierStart(code_point: u21) bool { // zig fmt: off return code_point >= 'A' and code_point <= 'Z' or code_point == '_' @@ -138,7 +138,7 @@ pub fn isIdentifierStart(code_point: u21) void { // zig fmt: on } -pub fn isIdentifierMiddle(code_point: u21) void { +pub fn isIdentifierMiddle(code_point: u21) bool { // zig fmt: off return code_point >= '0' and code_point <= '9' or code_point >= 'A' and code_point <= 'Z' diff --git a/packages/cjit/src/types.zig b/packages/cjit/src/types.zig index 921ad87..f74678c 100644 --- a/packages/cjit/src/types.zig +++ b/packages/cjit/src/types.zig @@ -28,6 +28,70 @@ pub const Type = union(enum) { array: *const Array, function: *const Function, pointer: *const Pointer, + + pub fn sizeOf(self: Type) ?usize { + return switch (self) { + .signed_char => 1, + .signed_short => 2, + .signed_int => 4, + .signed_long => 8, + .signed_long_long => 8, + + .unsigned_char => 1, + .unsigned_short => 2, + .unsigned_int => 4, + .unsigned_long => 8, + .unsigned_long_long => 8, + + .float => 4, + .double => 8, + .long_double => 8, + + .void => null, + .noreturn => null, + .char => 1, + .bool => 1, + + .@"enum" => 4, + .@"struct" => |s| s.size, + .@"union" => |u| u.size, + .array => |a| if (a.length) |l| (if (sizeOf(a.child)) |c| l * c else null) else 8, + .function => null, + .pointer => 8, + }; + } + + pub fn alignOf(self: Type) ?u16 { + return switch (self) { + .signed_char => 1, + .signed_short => 2, + .signed_int => 4, + .signed_long => 8, + .signed_long_long => 8, + + .unsigned_char => 1, + .unsigned_short => 2, + .unsigned_int => 4, + .unsigned_long => 8, + .unsigned_long_long => 8, + + .float => 4, + .double => 8, + .long_double => 8, + + .void => null, + .noreturn => null, + .char => 1, + .bool => 1, + + .@"enum" => 4, + .@"struct" => |s| s.@"align", + .@"union" => |u| u.@"align", + .array => |a| if (a.length != null) alignOf(a.child) else 8, + .function => null, + .pointer => 8, + }; + } }; pub const Enum = struct { @@ -87,70 +151,6 @@ pub const Pointer = struct { @"volatile": bool, }; -pub fn sizeOf(@"type": Type) ?usize { - return switch (@"type") { - .signed_char => 1, - .signed_short => 2, - .signed_int => 4, - .signed_long => 8, - .signed_long_long => 8, - - .unsigned_char => 1, - .unsigned_short => 2, - .unsigned_int => 4, - .unsigned_long => 8, - .unsigned_long_long => 8, - - .float => 4, - .double => 8, - .long_double => 8, - - .void => null, - .noreturn => null, - .char => 1, - .bool => 1, - - .@"enum" => 4, - .@"struct" => |s| s.size, - .@"union" => |u| u.size, - .array => |a| if (a.length) |l| (if (sizeOf(a.child)) |c| l * c else null) else 8, - .function => null, - .pointer => 8, - }; -} - -pub fn alignOf(@"type": Type) ?u16 { - return switch (@"type") { - .signed_char => 1, - .signed_short => 2, - .signed_int => 4, - .signed_long => 8, - .signed_long_long => 8, - - .unsigned_char => 1, - .unsigned_short => 2, - .unsigned_int => 4, - .unsigned_long => 8, - .unsigned_long_long => 8, - - .float => 4, - .double => 8, - .long_double => 8, - - .void => null, - .noreturn => null, - .char => 1, - .bool => 1, - - .@"enum" => 4, - .@"struct" => |s| s.@"align", - .@"union" => |u| u.@"align", - .array => |a| if (a.length != null) alignOf(a.child) else 8, - .function => null, - .pointer => 8, - }; -} - pub fn isCompatible(comptime ZigType: type, c_type: Type) bool { return switch (@typeInfo(ZigType)) { .type => false, diff --git a/packages/cjit/src/x86_64.zig b/packages/cjit/src/x86_64.zig new file mode 100644 index 0000000..27d3d93 --- /dev/null +++ b/packages/cjit/src/x86_64.zig @@ -0,0 +1,441 @@ +const std = @import("std"); + +const tokens = @import("tokens.zig"); +const types = @import("types.zig"); + +const Location = Runtime.Location; +const Punctuator = tokens.Punctuator; +const Runtime = @import("Runtime.zig"); +const StackValue = @import("StackValue.zig"); +const Type = types.Type; + +pub const Register = union(enum) { + gpr: Gpr, + xmm: Xmm, +}; + +pub const Gpr = enum(u4) { + rax = 0, + rcx = 1, + rdx = 2, + rbx = 3, + rsp = 4, + rbp = 5, + rsi = 6, + rdi = 7, + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + r12 = 12, + r13 = 13, + r14 = 14, + r15 = 15, + + pub fn reg(self: Gpr) u3 { + return @truncate(@intFromEnum(self)); + } + + pub fn x(self: Gpr) bool { + return @intFromEnum(self & 0b1000) != 0; + } +}; + +pub const Xmm = enum(u4) { + xmm0 = 0, + xmm1 = 1, + xmm2 = 2, + xmm3 = 3, + xmm4 = 4, + xmm5 = 5, + xmm6 = 6, + xmm7 = 7, + xmm8 = 8, + xmm9 = 9, + xmm10 = 10, + xmm11 = 11, + xmm12 = 12, + xmm13 = 13, + xmm14 = 14, + xmm15 = 15, + + pub fn reg(self: Xmm) u3 { + return @truncate(@intFromEnum(self)); + } + + pub fn x(self: Xmm) bool { + return @intFromEnum(self & 0b1000) != 0; + } +}; + +// --- EMIT HELPERS ------------------------------------------------------------ + +pub const Rex = packed struct(u8) { + b: bool, + x: bool, + r: bool, + w: bool, + /// MUST always be the default value + prefix: u4 = 0b0100, +}; + +pub const ModRM = packed struct(u8) { + rm: u3, + reg: u3, + mod: u2, +}; + +pub const SIB = packed struct(u8) { + base: u3, + index: u3, + scale: u2, +}; + +pub fn op16(rt: *Runtime) !void { + try rt.sections.text.writeByte(0x66, rt.allocator); +} + +pub fn rex(rt: *Runtime, value: Rex) !void { + try rt.sections.text.writeByte(@bitCast(value), rt.allocator); +} + +pub fn op(rt: *Runtime, value: u8) !void { + try rt.sections.text.writeByte(value, rt.allocator); +} + +pub fn op2(rt: *Runtime, v0: u8, v1: u8) !void { + const bytes: [2]u8 = .{ v0, v1 }; + try rt.sections.text.writeBytes(&bytes, rt.allocator); +} + +pub fn op3(rt: *Runtime, v0: u8, v1: u8, v2: u8) !void { + const bytes: [3]u8 = .{ v0, v1, v2 }; + try rt.sections.text.writeBytes(&bytes, rt.allocator); +} + +pub fn modrm(rt: *Runtime, value: ModRM) !void { + try rt.sections.text.writeByte(@bitCast(value), rt.allocator); +} + +pub fn sib(rt: *Runtime, value: SIB) !void { + try rt.sections.text.writeByte(@bitCast(value), rt.allocator); +} + +pub fn imm8(rt: *Runtime, value: u8) !void { + try rt.sections.text.writeByte(value, rt.allocator); +} + +pub fn imm16(rt: *Runtime, value: u16) !void { + var bytes: [2]u8 = undefined; + std.mem.writeInt(u16, &bytes, value, .little); + try rt.sections.text.writeBytes(value, rt.allocator); +} + +pub fn imm32(rt: *Runtime, value: u32) !void { + var bytes: [4]u8 = undefined; + std.mem.writeInt(u32, &bytes, value, .little); + try rt.sections.text.writeBytes(value, rt.allocator); +} + +pub fn imm64(rt: *Runtime, value: u64) !void { + var bytes: [8]u8 = undefined; + std.mem.writeInt(u64, &bytes, value, .little); + try rt.sections.text.writeBytes(value, rt.allocator); +} + +pub fn disp8(rt: *Runtime, value: i8) !void { + try rt.sections.text.writeByte(@bitCast(value), rt.allocator); +} + +pub fn disp32(rt: *Runtime, value: i32) !void { + var bytes: [4]u8 = undefined; + std.mem.writeInt(i32, &bytes, value, .little); + try rt.sections.text.writeBytes(bytes, rt.allocator); +} + +// ----------------------------------------------------------------------------- + +pub const Operation = enum { + add, + bit_and, + bit_not, + bit_or, + bit_xor, + bool_and, + bool_not, + bool_or, + cmp_eq, + cmp_gt, + cmp_gte, + cmp_lt, + cmp_lte, + cmp_neq, + div, + mod, + mul, + neg, + sar, + shl, + shr, + sub, +}; + +pub fn opFloat(rt: *Runtime, operation: Operation) !void { + _ = rt; + _ = operation; +} + +pub fn opInt(rt: *Runtime, operation: Operation) !void { + _ = rt; + _ = operation; +} + +pub fn cvtIntToFloat(rt: *Runtime) !void { + _ = rt; +} + +pub fn cvtFloatToInt(rt: *Runtime) !void { + _ = rt; +} + +pub fn cvtFloatToFloat(rt: *Runtime, target: Type) !void { + const top = vsTop(rt); + + switch (top.c_type) { + .float => switch (target) { + .float => { + // do nothing + }, + .double, .long_double => { + // CVTSS2SD xmm1, xmm2/m32 + // F3 0F 5A /r + }, + else => unreachable, + }, + .double, .long_double => switch (target) { + .float => { + // CVTSD2SS xmm1, xmm2/m64 + // F2 0F 5A /r + }, + .double, .long_double => { + // do nothing + }, + else => unreachable, + }, + else => unreachable, + } +} + +// --- LOAD AND STORE ---------------------------------------------------------- + +/// Load value into register. The value must be 1, 2, 4 or 8 bytes long. +pub fn load(rt: *Runtime, dst_register: Register, src_value: *const StackValue) !void { + const size = src_value.c_type.sizeOf().?; + std.debug.assert(size == 1 or size == 2 or size == 4 or size == 8); + + switch (src_value.value) { + .register => |src_register| { + if (dst_register == src_register) return; + }, + .constant => |constant| switch (dst_register) { + .gpr => |dest_gpr| switch (size) { + 1 => { + // MOV r8, imm8 + // B0+ rb ib + if (@intFromEnum(dest_gpr) >= 4) { + // NOTE spl, bpl, sil and dil need an empty REX prefix, + // otherwise ah, ch, dh and bh would be used. + try rex(rt, .{ .r = dest_gpr.x() }); + } + try op(rt, 0xB0 | dest_gpr.reg()); + try imm8(rt, @truncate(constant)); + }, + 2 => { + // MOV r16, imm16 + // B8+ rw iw + try op16(rt); + if (@intFromEnum(dest_gpr) >= 8) { + try rex(rt, .{ .r = dest_gpr.x() }); + } + try op(rt, 0xB8 | dest_gpr.reg()); + try imm16(rt, @truncate(constant)); + }, + 4 => { + // MOV r32, imm32 + // B8+ rd id + if (@intFromEnum(dest_gpr) >= 8) { + try rex(rt, .{ .r = dest_gpr.x() }); + } + try op(rt, 0xB8 | dest_gpr.reg()); + try imm32(rt, @truncate(constant)); + }, + 8 => { + // MOV r64, imm64 + // REX.W + B8+ rd io + try rex(rt, .{ .r = dest_gpr.x(), .w = true }); + try op(rt, 0xB8 | dest_gpr.reg()); + try imm64(rt, constant); + }, + else => unreachable, + }, + .xmm => |dest_xmm| { + var bytes: [8]u8 = undefined; + std.mem.writeInt(u64, &bytes, @truncate(constant), .little); + const data = bytes[0..size]; + + // MOVD xmm, r/m32 + // 66 0F 6E /r + + // MOVQ xmm, r/m64 + // 66 REX.W 0F 6E /r + + try op(rt, 0x66); + if (@intFromEnum(dest_xmm) >= 8 or size == 8) { + try rex(rt, .{ .r = dest_xmm.x(), .w = size == 8 }); + } + try op2(rt, 0x0F, 0x6E); + // [rip + disp32] + try modrm(rt, .{ .mod = 0b00, .reg = dest_xmm.reg(), .rm = Gpr.rbp.reg() }); + try allocRodataDisp32(rt, &data); + }, + }, + .symbol => |symbol| {}, + .stack => |disp| { + const disp_small = std.math.minInt(i8) <= disp and disp <= std.math.maxInt(i8); + const rex_prefix: Rex = .{ + .r = switch (dst_register) { + .gpr => |dest_gpr| dest_gpr.x(), + .xmm => |dest_xmm| dest_xmm.x(), + }, + .w = size == 8, + }; + + const mod: u2 = if (disp_small) 0b01 else 0b10; + const reg: u3 = switch (dst_register) { + .gpr => |dest_gpr| dest_gpr.reg(), + .xmm => |dest_xmm| dest_xmm.reg(), + }; + + switch (dst_register) { + .gpr => |dest_gpr| { + switch (size) { + 1 => { + // MOV r8, r/m8 + // 8A /r + if (@intFromEnum(dest_gpr) >= 4) { + // NOTE spl, bpl, sil and dil need an empty REX prefix, + // otherwise ah, ch, dh and bh would be used. + try rex(rt, rex_prefix); + } + try op(rt, 0x8A); + }, + 2 => { + // MOV r16, r/m16 + // 8B /r + try op16(rt); + if (@intFromEnum(dest_gpr) >= 8) { + try rex(rt, rex_prefix); + } + try op(rt, 0x8B); + }, + 4 => { + // MOV r32, r/m32 + // 8B /r + if (@intFromEnum(dest_gpr) >= 8) { + try rex(rt, rex_prefix); + } + try op(rt, 0x8B); + }, + 8 => { + // MOV r64, r/m64 + // REX.W + 8B /r + try rex(rt, rex_prefix); + try op(rt, 0x8B); + }, + else => unreachable, + } + }, + .xmm => |dest_xmm| { + // MOVD xmm, r/m32 + // 66 0F 6E /r + + // MOVQ xmm, r/m64 + // 66 REX.W 0F 6E /r + + try op(rt, 0x66); + if (@intFromEnum(dest_xmm) >= 8 or size == 8) { + try rex(rt, rex_prefix); + } + try op2(rt, 0x0F, 0x6E); + }, + } + // [rbp + disp8/32] + try modrm(rt, .{ .mod = mod, .reg = reg, .rm = Gpr.rbp.reg() }); + if (disp_small) { + try disp8(rt, @intCast(disp)); + } else { + try disp32(rt, disp); + } + }, + .cpu_flags => {}, + } +} + +/// Store register into value. +pub fn store(rt: *Runtime, dst_value: *const StackValue, src_register: Register) !void { + const size = dst_value.c_type.sizeOf().?; + std.debug.assert(size == 1 or size == 2 or size == 4 or size == 8); +} + +// --- STACK OPERATIONS -------------------------------------------------------- + +/// Caller asserts that the stack is not empty. +pub fn vsTop(rt: *Runtime) *StackValue { + const vs = rt.virtual_stack.items; + return &vs[vs.len - 1]; +} + +/// Caller asserts that the stack has at least two values. +pub fn vsSwap(rt: *Runtime) void { + const vs = rt.virtual_stack.items; + std.mem.swap(StackValue, &vs[vs.len - 1], &vs[vs.len - 2]); +} + +/// Ensure the top of the stack is in an XMM register. Returns the id of the +/// register. Caller asserts that the stack is not empty. +pub fn vsEnsureXmm(rt: *Runtime) Xmm { + const top = vsTop(rt); + switch (top.value) { + .register => {}, + .constant => {}, + .symbol => {}, + .stack => {}, + .cpu_flags => {}, + } +} + +/// Ensure the top of the stack is in a GPR register. Returns the id of the +/// register. Caller asserts that teh stack is not empty. +pub fn vsEnsureGpr(rt: *Runtime) Gpr { + const top = vsTop(rt); + switch (top.value) { + .register => {}, + .constant => {}, + .symbol => {}, + .stack => {}, + .cpu_flags => {}, + } +} + +// --- DATA ALLOCATIONS -------------------------------------------------------- + +/// Reserve `data.len` bytes in rodata and fill it with `data`, then add a +/// placeholder dips32 part of an instruction and a relocation entry for it. +pub fn allocRodataDisp32(rt: *Runtime, data: []const u8) !void { + const addr = rt.sections.text.data.items.len; + const location: Location = .{ .rodata = rt.sections.rodata.data.items.len }; + + try rt.sections.rodata.writeBytes(data, rt.allocator); + try disp32(rt, 0); + try rt.relocation_table.append(rt.allocator, .{ .addr = addr, .location = location }); +} diff --git a/packages/cjit/test/root.zig b/packages/cjit/test/root.zig index c26525f..5bffa90 100644 --- a/packages/cjit/test/root.zig +++ b/packages/cjit/test/root.zig @@ -6,7 +6,7 @@ fn add(a: i32, b: i32) callconv(cjit.call) i32 { } test { - var rt: cjit.Runtime = .init(std.testing.allocator); + var rt: cjit.Runtime = try .init(std.testing.allocator); defer rt.deinit(); try rt.compile(