From 0b2370704120cd3f14f068703544c4ea07a26d7c Mon Sep 17 00:00:00 2001 From: Szymon Nowakowski Date: Mon, 19 Jan 2026 14:01:40 +0100 Subject: [PATCH] Dream of my own C compiler --- castle.code-workspace | 16 +- packages/cjit/build.zig | 44 +++++ packages/cjit/build.zig.zon | 11 ++ packages/cjit/src/Runtime.zig | 108 +++++++++++ packages/cjit/src/Sections.zig | 34 ++++ packages/cjit/src/root.zig | 15 ++ packages/cjit/src/tokens.zig | 149 +++++++++++++++ packages/cjit/src/types.zig | 201 +++++++++++++++++++ packages/cjit/test/root.zig | 32 ++++ packages/tcc/src/CType.zig | 55 ++++++ packages/tcc/src/Elf.zig | 50 +++++ packages/tcc/src/Gen.zig | 17 ++ packages/tcc/src/SValue.zig | 57 ++++++ packages/tcc/src/Section.zig | 22 +++ packages/tcc/src/Sym.zig | 65 +++++++ packages/tcc/src/tcc.zig | 17 ++ packages/tcc/src/x86_64.zig | 339 +++++++++++++++++++++++++++++++++ 17 files changed, 1231 insertions(+), 1 deletion(-) create mode 100644 packages/cjit/build.zig create mode 100644 packages/cjit/build.zig.zon create mode 100644 packages/cjit/src/Runtime.zig create mode 100644 packages/cjit/src/Sections.zig create mode 100644 packages/cjit/src/root.zig create mode 100644 packages/cjit/src/tokens.zig create mode 100644 packages/cjit/src/types.zig create mode 100644 packages/cjit/test/root.zig create mode 100644 packages/tcc/src/CType.zig create mode 100644 packages/tcc/src/Elf.zig create mode 100644 packages/tcc/src/Gen.zig create mode 100644 packages/tcc/src/SValue.zig create mode 100644 packages/tcc/src/Section.zig create mode 100644 packages/tcc/src/Sym.zig create mode 100644 packages/tcc/src/tcc.zig create mode 100644 packages/tcc/src/x86_64.zig diff --git a/castle.code-workspace b/castle.code-workspace index 99964b0..4488bd8 100644 --- a/castle.code-workspace +++ b/castle.code-workspace @@ -1,30 +1,44 @@ { "folders": [ { + "name": "cjit", + "path": "packages/cjit" + }, + { + "name": "js", "path": "packages/js" }, { + "name": "media", "path": "packages/media" }, { + "name": "myid", "path": "packages/myid" }, { + "name": "sciter", "path": "packages/sciter" }, { + "name": "tcc", "path": "packages/tcc" }, { + "name": "vecmath", "path": "packages/vecmath" }, { + "name": "x11", "path": "packages/x11" - }, + } ], "settings": { "files.exclude": { "**/.zig-cache": true, }, + "files.associations": { + "**/packages/tcc/vendor/*.{def,h}": "c", + }, }, } diff --git a/packages/cjit/build.zig b/packages/cjit/build.zig new file mode 100644 index 0000000..424f2e8 --- /dev/null +++ b/packages/cjit/build.zig @@ -0,0 +1,44 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + const target = b.standardTargetOptions(.{ + .whitelist = &.{ + .{ + .cpu_arch = .x86_64, + .os_tag = .windows, + .abi = .gnu, + }, + .{ + .cpu_arch = .x86_64, + .os_tag = .linux, + .abi = .gnu, + }, + }, + }); + const optimize = b.standardOptimizeOption(.{}); + + const module = b.addModule("cjit", .{ + .target = target, + .optimize = optimize, + .root_source_file = b.path("src/root.zig"), + }); + + const module_internal_test = b.addTest(.{ .root_module = module }); + const run_internal_test = b.addRunArtifact(module_internal_test); + + const module_external_test = b.addTest(.{ + .root_module = b.createModule(.{ + .target = b.resolveTargetQuery(.{}), + .optimize = .Debug, + .root_source_file = b.path("test/root.zig"), + .imports = &.{ + .{ .name = "cjit", .module = module }, + }, + }), + }); + const run_external_test = b.addRunArtifact(module_external_test); + + const step_test = b.step("test", "Run tests"); + step_test.dependOn(&run_internal_test.step); + step_test.dependOn(&run_external_test.step); +} diff --git a/packages/cjit/build.zig.zon b/packages/cjit/build.zig.zon new file mode 100644 index 0000000..f9759f1 --- /dev/null +++ b/packages/cjit/build.zig.zon @@ -0,0 +1,11 @@ +.{ + .name = .cjit, + .version = "0.0.0", + .minimum_zig_version = "0.15.2", + .paths = .{ + "src", + "build.zig", + "build.zig.zon", + }, + .fingerprint = 0xaec0accc19243440, +} diff --git a/packages/cjit/src/Runtime.zig b/packages/cjit/src/Runtime.zig new file mode 100644 index 0000000..27b56bf --- /dev/null +++ b/packages/cjit/src/Runtime.zig @@ -0,0 +1,108 @@ +const std = @import("std"); +const Self = @This(); + +const types = @import("types.zig"); + +state: State = .init, +allocator: std.mem.Allocator, +arena: std.heap.ArenaAllocator, +includes: std.DoublyLinkedList = .{}, +symbols_needed: std.StringHashMapUnmanaged(ExternSymbol) = .{}, +symbols_provided: std.StringHashMapUnmanaged(InternSymbol) = .{}, +symbols_located: std.StringHashMapUnmanaged(LocatedSymbol) = .{}, + +pub const State = enum(u8) { + init, + compiled, + linked, +}; + +pub const Include = struct { + code: []const u8, + node: std.DoublyLinkedList.Node = .{}, +}; + +pub const ExternSymbol = struct { + name: []const u8, + c_type: types.Type, + ptr: ?*anyopaque, +}; + +pub const InternSymbol = struct { + name: []const u8, + public: bool, + c_type: types.Type, + location: Location, + + pub const Location = union(enum) { + text: usize, + data: usize, + rodata: usize, + bss: usize, + }; +}; + +pub const LocatedSymbol = struct { + name: []const u8, + public: bool, + c_type: types.Type, + ptr: ?*anyopaque, +}; + +pub fn init(allocator: std.mem.Allocator) Self { + return .{ + .allocator = allocator, + .arena = .init(allocator), + }; +} + +pub fn deinit(self: *Self) void { + self.symbols.deinit(self.allocator); + self.arena.deinit(); + self.* = undefined; +} + +pub fn include(self: *Self, code: []const u8) !void { + std.debug.assert(self.state == .init); + if (code.len == 0) return; + + const include_ptr = try self.create(Include); + include_ptr.* = .{ .code = code }; + + self.includes.append(&include_ptr.node); +} + +pub fn compile(self: *Self, code: []const u8) !void { + std.debug.assert(self.state == .init); + + _ = code; + self.state = .compiled; + std.debug.panic("Not implemented", .{}); +} + +pub fn setSymbol(self: *Self, comptime T: type, name: []const u8, ptr: *const T) !void { + std.debug.assert(self.state == .compiled); + + const symbol = self.symbols.getPtr(name) orelse return error.SymbolNotFound; + if (!types.isCompatible(T, symbol.c_type)) return error.IncompatibleType; + + symbol.ptr = @ptrCast(ptr); +} + +pub fn link(self: *Self) !void { + std.debug.assert(self.state == .compiled); + + self.state = .linked; + std.debug.panic("Not implemented", .{}); +} + +pub fn getSymbol(self: *const Self, comptime T: type, name: []const u8) ?*const T { + std.debug.assert(self.state == .linked); + + const symbol = self.symbols.get(name) orelse return null; + return @ptrCast(@alignCast(symbol.ptr)); +} + +fn create(self: *Self, comptime T: type) error{OutOfMemory}!*T { + return self.arena.allocator().create(T); +} diff --git a/packages/cjit/src/Sections.zig b/packages/cjit/src/Sections.zig new file mode 100644 index 0000000..1a7103f --- /dev/null +++ b/packages/cjit/src/Sections.zig @@ -0,0 +1,34 @@ +const std = @import("std"); + +text: Section = .{ .read_only = true, .executable = true }, +data: Section = .{ .read_only = false, .executable = false }, +rodata: Section = .{ .read_only = true, .executable = false }, +bss: usize, + +pub const Section = struct { + data: std.ArrayList(u8) = .{}, + read_only: bool, + executable: bool, + + pub fn writeValue(self: *Section, value: anytype, allocator: std.mem.Allocator) !void { + const T = @TypeOf(value); + std.debug.assert(std.meta.hasUniqueRepresentation(T)); + const bytes = std.mem.asBytes(&value); + const alignment = @alignOf(T); + + try self.alignForward(alignment, allocator); + try self.writeBytes(bytes, allocator); + } + + pub fn writeBytes(self: *Section, data: []const u8, allocator: std.mem.Allocator) !void { + try self.data.appendSlice(allocator, data); + } + + pub fn alignForward(self: *Section, alignment: u16, allocator: std.mem.Allocator) !void { + const ptr = self.data.items.len; + const ptr_aligned = std.mem.alignForward(usize, ptr, alignment); + const padding = ptr_aligned - ptr; + + try self.data.appendNTimes(allocator, 0, padding); + } +}; diff --git a/packages/cjit/src/root.zig b/packages/cjit/src/root.zig new file mode 100644 index 0000000..cff2153 --- /dev/null +++ b/packages/cjit/src/root.zig @@ -0,0 +1,15 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +pub const Runtime = @import("Runtime.zig"); +pub const types = @import("types.zig"); + +pub const call: std.builtin.CallingConvention = switch (builtin.cpu.arch) { + .aarch64 => @compileError("TODO"), + .x86_64 => .{ .x86_64_sysv = .{} }, + else => unreachable, +}; + +test { + std.testing.refAllDeclsRecursive(@This()); +} diff --git a/packages/cjit/src/tokens.zig b/packages/cjit/src/tokens.zig new file mode 100644 index 0000000..734d1f1 --- /dev/null +++ b/packages/cjit/src/tokens.zig @@ -0,0 +1,149 @@ +pub const Keyword = enum { + _Alignas, + _Alignof, + _Atomic, + _Bool, + _Complex, + _Generic, + _Imaginary, + _Noreturn, + _Static_assert, + _Thread_local, + auto, + @"break", + case, + char, + @"const", + @"continue", + default, + do, + double, + @"else", + @"enum", + @"extern", + float, + @"for", + goto, + @"if", + @"inline", + int, + long, + register, + restrict, + @"return", + short, + signed, + sizeof, + static, + @"struct", + @"switch", + typedef, + @"union", + unsigned, + void, + @"volatile", + @"while", +}; + +pub const Identifier = struct { + name: []const u8, +}; + +pub const Constant = union(enum) { + int: i32, + long: i64, + long_long: i64, + unsigned_int: u32, + unsigned_long: u64, + unsigned_long_long: u64, + float: f32, + double: f64, + character: u8, + wide_character: u32, +}; + +pub const StringLiteral = struct { + value: []const u8, +}; + +pub const Punctuator = enum { + // three characters + @"...", + @"<<=", + @">>=", + // two characters + @"--", + @"-=", + @"->", + @"!=", + @"*=", + @"/=", + @"&&", + @"&=", + @"##", + @"%=", + @"^=", + @"++", + @"+=", + @"<<", + @"<=", + @"==", + @">=", + @">>", + @"|=", + @"||", + // single character + @"-", + @",", + @";", + @":", + @"!", + @"?", + @".", + @"(", + @")", + @"[", + @"]", + @"{", + @"}", + @"*", + @"/", + @"&", + @"#", + @"%", + @"^", + @"+", + @"<", + @"=", + @">", + @"|", + @"~", +}; + +pub const Token = union(enum) { + keyword: Keyword, + identifier: []const u8, + constant: Constant, + string_literal: []const u8, + wide_string_literal: []const u32, + punctuator: Punctuator, +}; + +pub fn isIdentifierStart(code_point: u21) void { + // zig fmt: off + return code_point >= 'A' and code_point <= 'Z' + or code_point == '_' + or code_point >= 'a' and code_point <= 'z' + or code_point >= 128; + // zig fmt: on +} + +pub fn isIdentifierMiddle(code_point: u21) void { + // zig fmt: off + return code_point >= '0' and code_point <= '9' + or code_point >= 'A' and code_point <= 'Z' + or code_point == '_' + or code_point >= 'a' and code_point <= 'z' + or code_point >= 128; + // zig fmt: on +} diff --git a/packages/cjit/src/types.zig b/packages/cjit/src/types.zig new file mode 100644 index 0000000..921ad87 --- /dev/null +++ b/packages/cjit/src/types.zig @@ -0,0 +1,201 @@ +const std = @import("std"); + +pub const Type = union(enum) { + signed_char: void, + signed_short: void, + signed_int: void, + signed_long: void, + signed_long_long: void, + + unsigned_char: void, + unsigned_short: void, + unsigned_int: void, + unsigned_long: void, + unsigned_long_long: void, + + float: void, + double: void, + long_double: void, + + void: void, + noreturn: void, + char: void, + bool: void, + + @"enum": *const Enum, + @"struct": *const Struct, + @"union": *const Union, + array: *const Array, + function: *const Function, + pointer: *const Pointer, +}; + +pub const Enum = struct { + name: []const u8, + constants: []const EnumConstant, +}; + +pub const EnumConstant = struct { + name: []const u8, + value: i32, + inferred: bool, +}; + +pub const Struct = struct { + name: []const u8, + fields: []const StructField, + size: usize, + @"align": u16, +}; + +pub const StructField = struct { + name: []const u8, + type: Type, + offset: usize, +}; + +pub const Union = struct { + name: []const u8, + fields: []const UnionField, + size: usize, + @"align": u16, +}; + +pub const UnionField = struct { + name: []const u8, + type: Type, +}; + +pub const Array = struct { + child: Type, + length: ?usize, +}; + +pub const Function = struct { + arguments: []const FunctionArgument, + @"return": Type, +}; + +pub const FunctionArgument = struct { + name: []const u8, + type: Type, +}; + +pub const Pointer = struct { + child: Type, + @"const": bool, + @"volatile": bool, +}; + +pub fn sizeOf(@"type": Type) ?usize { + return switch (@"type") { + .signed_char => 1, + .signed_short => 2, + .signed_int => 4, + .signed_long => 8, + .signed_long_long => 8, + + .unsigned_char => 1, + .unsigned_short => 2, + .unsigned_int => 4, + .unsigned_long => 8, + .unsigned_long_long => 8, + + .float => 4, + .double => 8, + .long_double => 8, + + .void => null, + .noreturn => null, + .char => 1, + .bool => 1, + + .@"enum" => 4, + .@"struct" => |s| s.size, + .@"union" => |u| u.size, + .array => |a| if (a.length) |l| (if (sizeOf(a.child)) |c| l * c else null) else 8, + .function => null, + .pointer => 8, + }; +} + +pub fn alignOf(@"type": Type) ?u16 { + return switch (@"type") { + .signed_char => 1, + .signed_short => 2, + .signed_int => 4, + .signed_long => 8, + .signed_long_long => 8, + + .unsigned_char => 1, + .unsigned_short => 2, + .unsigned_int => 4, + .unsigned_long => 8, + .unsigned_long_long => 8, + + .float => 4, + .double => 8, + .long_double => 8, + + .void => null, + .noreturn => null, + .char => 1, + .bool => 1, + + .@"enum" => 4, + .@"struct" => |s| s.@"align", + .@"union" => |u| u.@"align", + .array => |a| if (a.length != null) alignOf(a.child) else 8, + .function => null, + .pointer => 8, + }; +} + +pub fn isCompatible(comptime ZigType: type, c_type: Type) bool { + return switch (@typeInfo(ZigType)) { + .type => false, + .void => c_type == .void, + .bool => c_type == .bool, + .noreturn => c_type == .noreturn, + .int => |zig_int| switch (c_type) { + .signed_char => zig_int.signedness == .signed and zig_int.bits == 8, + .signed_short => zig_int.signedness == .signed and zig_int.bits == 16, + .signed_int => zig_int.signedness == .signed and zig_int.bits == 32, + .signed_long => zig_int.signedness == .signed and zig_int.bits == 64, + .signed_long_long => zig_int.signedness == .signed and zig_int.bits == 64, + + .unsigned_char => zig_int.signedness == .unsigned and zig_int.bits == 8, + .unsigned_short => zig_int.signedness == .unsigned and zig_int.bits == 16, + .unsigned_int => zig_int.signedness == .unsigned and zig_int.bits == 32, + .unsigned_long => zig_int.signedness == .unsigned and zig_int.bits == 64, + .unsigned_long_long => zig_int.signedness == .unsigned and zig_int.bits == 64, + + .char => zig_int.bits = 8, + + else => false, + }, + .float => |zig_float| switch (c_type) { + .float => zig_float.bits = 32, + .double => zig_float.bits = 64, + .long_double => zig_float.bits = 64, + }, + .pointer => @compileError("TODO"), + .array => @compileError("TODO"), + .@"struct" => @compileError("TODO"), + .comptime_float => false, + .comptime_int => false, + .undefined => true, + .null => @compileError("TODO"), + .optional => @compileError("TODO"), + .error_union => false, + .error_set => false, + .@"enum" => @compileError("TODO"), + .@"union" => @compileError("TODO"), + .@"fn" => false, + .@"opaque" => @compileError("TODO"), + .frame => false, + .@"anyframe" => false, + .vector => false, + .enum_literal => false, + }; +} diff --git a/packages/cjit/test/root.zig b/packages/cjit/test/root.zig new file mode 100644 index 0000000..c26525f --- /dev/null +++ b/packages/cjit/test/root.zig @@ -0,0 +1,32 @@ +const std = @import("std"); +const cjit = @import("cjit"); + +fn add(a: i32, b: i32) callconv(cjit.call) i32 { + return a + b; +} + +test { + var rt: cjit.Runtime = .init(std.testing.allocator); + defer rt.deinit(); + + try rt.compile( + \\int add(int a, int b); + \\ + \\int add_one(int x) + \\{ + \\ return add(x, 1); + \\} + ); + + try rt.setSymbol(fn (i32, i32) callconv(cjit.call) i32, "add", &add); + try rt.link(); + + const add_one = rt.getSymbol(fn (i32) callconv(cjit.call) i32, "add_one").?; + + try std.testing.expectEqual(-9, add_one(-10)); + try std.testing.expectEqual(11, add_one(10)); + + const add_ptr = rt.getSymbol(fn (i32, i32) callconv(cjit.call) i32, "add").?; + + try std.testing.expectEqual(add_ptr, &add); +} diff --git a/packages/tcc/src/CType.zig b/packages/tcc/src/CType.zig new file mode 100644 index 0000000..b4f009f --- /dev/null +++ b/packages/tcc/src/CType.zig @@ -0,0 +1,55 @@ +const std = @import("std"); +const Self = @This(); + +const Sym = @import("Sym.zig"); + +const Type = packed struct(u32) { + basic_type: enum(u4) { + void, + byte, + short, + int, + llong, + ptr, + func, + @"struct", + float, + double, + ldouble, + bool, + qlong, + qfloat, + }, + unsigned: bool = false, + defsign: bool = false, + array: bool = false, + bitfield: bool = false, + constant: bool = false, + @"volatile": bool = false, + vla: bool = false, + long: bool = false, + @"extern": bool = false, + static: bool = false, + typedef: bool = false, + @"inline": bool = false, + _unused: u4 = 0, + extra: Extra = .empty, + + pub const Extra = packed struct(u12) { + bit_pos: u6, + bit_size: u6, + + pub const empty: Extra = @bitCast(0); + pub const @"union": Extra = @bitCast(1); + pub const @"enum": Extra = @bitCast(2); + pub const enum_val: Extra = @bitCast(3); + }; + + pub fn isFloat(self: Type) bool { + const bt = self.basic_type; + return bt == .ldouble or bt == .double or bt == .float or bt == .qfloat; + } +}; + +t: Type, +ref: *Sym, diff --git a/packages/tcc/src/Elf.zig b/packages/tcc/src/Elf.zig new file mode 100644 index 0000000..e0e7936 --- /dev/null +++ b/packages/tcc/src/Elf.zig @@ -0,0 +1,50 @@ +const std = @import("std"); +const Self = @This(); + +const Section = @import("Section.zig"); + +cur_text_section: *Section, + +// --- x86_64 RELOCATIONS --- + +pub const R_X86_64_NONE = 0; +pub const R_X86_64_64 = 1; +pub const R_X86_64_PC32 = 2; +pub const R_X86_64_GOT32 = 3; +pub const R_X86_64_PLT32 = 4; +pub const R_X86_64_COPY = 5; +pub const R_X86_64_GLOB_DAT = 6; +pub const R_X86_64_JUMP_SLOT = 7; +pub const R_X86_64_RELATIVE = 8; +pub const R_X86_64_GOTPCREL = 9; +pub const R_X86_64_32 = 10; +pub const R_X86_64_32S = 11; +pub const R_X86_64_16 = 12; +pub const R_X86_64_PC16 = 13; +pub const R_X86_64_8 = 14; +pub const R_X86_64_PC8 = 15; +pub const R_X86_64_DTPMOD64 = 16; +pub const R_X86_64_DTPOFF64 = 17; +pub const R_X86_64_TPOFF64 = 18; +pub const R_X86_64_TLSGD = 19; +pub const R_X86_64_TLSLD = 20; +pub const R_X86_64_DTPOFF32 = 21; +pub const R_X86_64_GOTTPOFF = 22; +pub const R_X86_64_TPOFF32 = 23; +pub const R_X86_64_PC64 = 24; +pub const R_X86_64_GOTOFF64 = 25; +pub const R_X86_64_GOTPC32 = 26; +pub const R_X86_64_GOT64 = 27; +pub const R_X86_64_GOTPCREL64 = 28; +pub const R_X86_64_GOTPC64 = 29; +pub const R_X86_64_GOTPLT64 = 30; +pub const R_X86_64_PLTOFF64 = 31; +pub const R_X86_64_SIZE32 = 32; +pub const R_X86_64_SIZE64 = 33; +pub const R_X86_64_GOTPC32_TLSDESC = 34; +pub const R_X86_64_TLSDESC_CALL = 35; +pub const R_X86_64_TLSDESC = 36; +pub const R_X86_64_IRELATIVE = 37; +pub const R_X86_64_RELATIVE64 = 38; +pub const R_X86_64_GOTPCRELX = 41; +pub const R_X86_64_REX_GOTPCRELX = 42; diff --git a/packages/tcc/src/Gen.zig b/packages/tcc/src/Gen.zig new file mode 100644 index 0000000..73ee1a7 --- /dev/null +++ b/packages/tcc/src/Gen.zig @@ -0,0 +1,17 @@ +const std = @import("std"); +const Self = @This(); + +const Elf = @import("Elf.zig"); +const Section = @import("Section.zig"); +const Sym = @import("Sym.zig"); + +nocode_wanted: u32, +ind: u32, + +pub fn greloca(self: *Self, elf: *Elf, section: *Section, maybe_sym: ?*Sym, offset: u64, relocation: u32, addend: i64) void { + if (self.nocode_wanted > 0 and section == elf.cur_text_section) { + return; + } + + // TODO +} diff --git a/packages/tcc/src/SValue.zig b/packages/tcc/src/SValue.zig new file mode 100644 index 0000000..a33bc33 --- /dev/null +++ b/packages/tcc/src/SValue.zig @@ -0,0 +1,57 @@ +const std = @import("std"); +const Self = @This(); + +const CType = @import("CType.zig"); +const Sym = @import("Sym.zig"); + +type: CType, +r: Register, +r2: Register = .{ .register = Register.@"const" }, +c: CValue = std.mem.zeroes(CValue), +sym: ?*Sym = null, + +pub const Register = packed struct(u16) { + location: Location = .{}, + _unused: u2 = 0, + flags: Flags = .{}, + + pub const Location = packed struct(u6) { + value: u6 = 0, + + pub inline fn isRegister(self: @This()) bool { + return self.value < @"const".value; + } + + pub const @"const": @This() = .{ .value = 0x30 }; + pub const llocal: @This() = .{ .value = 0x31 }; + pub const local: @This() = .{ .value = 0x32 }; + pub const cmp: @This() = .{ .value = 0x33 }; + pub const jmp: @This() = .{ .value = 0x34 }; + pub const jmpi: @This() = .{ .value = 0x35 }; + }; + + pub const Flags = packed struct(u8) { + lval: bool = false, + sym: bool = false, + mustcast: bool = false, + mustbound: bool = false, + lval_type: packed struct(u3) { + byte: bool = false, + short: bool = false, + unsigned: bool = false, + } = .{}, + bounded: bool = false, + }; +}; + +pub const CValue = extern union { + ld: c_longdouble, + d: f64, + f: f32, + i: u64, + str: extern struct { + size: u32, + data: ?*anyopaque, + }, + tab: [4]u32, +}; diff --git a/packages/tcc/src/Section.zig b/packages/tcc/src/Section.zig new file mode 100644 index 0000000..85d3919 --- /dev/null +++ b/packages/tcc/src/Section.zig @@ -0,0 +1,22 @@ +const std = @import("std"); +const Self = @This(); + +const Sym = @import("Sym.zig"); + +/// Current data offset. +data_offset: usize, +/// Allocated section data. +data: []u8, + +pub fn realloc(self: *Self, new_size: usize) void { + var size = self.data.len; + if (size == 0) { + size = 1; + } + while (size < new_size) { + size *= 2; + } + const data: [*]u8 = std.c.realloc(self.data.ptr, size).?; + @memset(data[self.data.len..size], 0); + self.data = data[0..size]; +} diff --git a/packages/tcc/src/Sym.zig b/packages/tcc/src/Sym.zig new file mode 100644 index 0000000..d768254 --- /dev/null +++ b/packages/tcc/src/Sym.zig @@ -0,0 +1,65 @@ +const std = @import("std"); +const Self = @This(); + +const CType = @import("CType.zig"); +const SValue = @import("SValue.zig"); + +v: u32, +r: SValue.Register, +a: SymAttr, +u: extern union { + s: extern struct { + c: u32, + u: extern union { + sym_scope: u32, + jnext: u32, + f: FuncAttr, + auxtype: u32, + }, + }, + enum_val: u64, + d: ?*u32, +}, +type: CType, +w: extern union { + next: ?*Self, + asm_label: u32, +}, +prev: ?*Self, +prev_tok: ?*Self, + +pub const SymAttr = packed struct(u16) { + /// log2(align) + 1 (0 means unspecified) + aligned: u5 = 0, + @"packed": bool = false, + weak: bool = 0, + visibility: Visibility = .default, + dllexport: bool = false, + dllimport: bool = false, + _unused: u5 = 0, +}; + +pub const Visibility = enum(u2) { + default = 0, + internal = 1, + hidden = 2, + protected = 3, +}; + +pub const FuncAttr = packed struct(u32) { + func_call: enum(u3) { + cdecl = 0, + stdcall = 1, + fastcall1 = 2, + fastcall2 = 3, + fastcall3 = 4, + fastcallw = 5, + }, + func_type: enum(u2) { + new = 1, + old = 2, + ellipsis = 3, + }, + func_args: u8, + _unused: u19, +}; diff --git a/packages/tcc/src/tcc.zig b/packages/tcc/src/tcc.zig new file mode 100644 index 0000000..fb03401 --- /dev/null +++ b/packages/tcc/src/tcc.zig @@ -0,0 +1,17 @@ +const std = @import("std"); + +pub const include_stack_size = 32; +pub const ifdef_stack_size = 64; +pub const vstack_size = 256; +pub const string_max_size = 1024; +pub const pack_stack_size = 8; + +pub const tok_hash_size = 16384; +pub const tok_alloc_incr = 512; +pub const tok_max_size = 4; + +pub fn err(comptime fmt: []const u8, args: anytype) noreturn { + // TODO Full implementation of original tcc_error + std.log.err(fmt, args); + std.posix.exit(1); +} diff --git a/packages/tcc/src/x86_64.zig b/packages/tcc/src/x86_64.zig new file mode 100644 index 0000000..53a5ff6 --- /dev/null +++ b/packages/tcc/src/x86_64.zig @@ -0,0 +1,339 @@ +const std = @import("std"); +const Self = @This(); + +const tcc = @import("tcc.zig"); + +const CType = @import("CType.zig"); +const Elf = @import("Elf.zig"); +const Gen = @import("Gen.zig"); +const SValue = @import("SValue.zig"); +const Sym = @import("Sym.zig"); + +/// Number of available registers. +pub const register_count = 25; +/// Whether function parameters must be evaluated in reverse order. +pub const invert_function_parameters = true; +/// Pointer size in bytes +pub const pointer_size = 8; +/// `long double` size in bytes +pub const long_double_size = 16; +/// `long double` alignment in bytes +pub const long_double_alignment = 16; +/// Maximum alignment for `aligned` attribute +pub const max_align = 16; + +pub const RegisterClass = packed struct(u32) { + /// Generic int register. + int: bool = false, + /// Generic float register. + float: bool = false, + rax: bool = false, + rcx: bool = false, + rdx: bool = false, + /// Only for long double. + st0: bool = false, + r8: bool = false, + r9: bool = false, + r10: bool = false, + r11: bool = false, + xmm0: bool = false, + xmm1: bool = false, + xmm2: bool = false, + xmm3: bool = false, + xmm4: bool = false, + xmm5: bool = false, + xmm6: bool = false, + xmm7: bool = false, + _pad: u14 = 0, + + pub const empty: RegisterClass = .{}; + /// Function return: integer register. + pub const iret: RegisterClass = .{ .rax = true }; + /// Function return: second integer register. + pub const lret: RegisterClass = .{ .rdx = true }; + /// Function return: float register. + pub const fret: RegisterClass = .{ .xmm0 = true }; + /// Function return: second float register. + pub const qret: RegisterClass = .{ .xmm0 = true }; +}; + +pub const Register = packed struct(u8) { + r: enum(u5) { + rax = 0, + rcx = 1, + rdx = 2, + rsp = 4, + rsi = 6, + rdi = 7, + + r8 = 8, + r9 = 9, + r10 = 10, + r11 = 11, + + xmm0 = 16, + xmm1 = 17, + xmm2 = 18, + xmm3 = 19, + xmm4 = 20, + xmm5 = 21, + xmm6 = 22, + xmm7 = 23, + + st0 = 24, + }, + mem: bool = false, + _unused: u2 = 0, + + /// Function return: integer register. + pub const iret: Register = .{ .r = .rax }; + /// Function return: second integer register. + pub const lret: Register = .{ .r = .rdx }; + /// Function return: float register. + pub const fret: Register = .{ .r = .xmm0 }; + /// Function return: second float register. + pub const qret: Register = .{ .r = .xmm1 }; + + pub inline fn fromInt(value: u8) Register { + return @bitCast(value); + } + + pub const mem_mask = 0x20; +}; + +pub inline fn rexBase(value: u8) u8 { + return (value >> 3) & 0b1; +} + +pub inline fn regValue(value: u8) u8 { + return value & 0b111; +} + +pub const register_classes = [register_count]RegisterClass{ + .{ .int = true, .rax = true }, + .{ .int = true, .rcx = true }, + .{ .int = true, .rdx = true }, + .empty, + .empty, + .empty, + .empty, + .empty, + .{ .r8 = true }, + .{ .r9 = true }, + .{ .r10 = true }, + .{ .r11 = true }, + .empty, + .empty, + .empty, + .empty, + .{ .float = true, .xmm0 = true }, + .{ .float = true, .xmm1 = true }, + .{ .float = true, .xmm2 = true }, + .{ .float = true, .xmm3 = true }, + .{ .float = true, .xmm4 = true }, + .{ .float = true, .xmm5 = true }, + .{ .xmm6 = true }, + .{ .xmm7 = true }, + .{ .st0 = true }, +}; + +func_sub_sp_offset: u64, +func_ret_seb: i32, + +pub fn g(gen: *Gen, elf: *Elf, c: u8) void { + if (gen.nocode_wanted > 0) { + return; + } + + const ind1 = gen.ind + 1; + if (ind1 > elf.cur_text_section.data.len) { + elf.cur_text_section.realloc(ind1); + } + elf.cur_text_section.data[gen.ind] = c; + gen.ind = ind1; +} + +pub fn o(gen: *Gen, elf: *Elf, c: u32) void { + while (c > 0) { + g(gen, elf, @truncate(c)); + c >>= 8; + } +} + +pub fn genLE16(gen: *Gen, elf: *Elf, v: u16) void { + g(gen, elf, @truncate(v)); + g(gen, elf, @truncate(v >> 8)); +} + +pub fn genLE32(gen: *Gen, elf: *Elf, v: u32) void { + g(gen, elf, @truncate(v)); + g(gen, elf, @truncate(v >> 8)); + g(gen, elf, @truncate(v >> 16)); + g(gen, elf, @truncate(v >> 24)); +} + +pub fn genLE64(gen: *Gen, elf: *Elf, v: u64) void { + g(gen, elf, @truncate(v)); + g(gen, elf, @truncate(v >> 8)); + g(gen, elf, @truncate(v >> 16)); + g(gen, elf, @truncate(v >> 24)); + g(gen, elf, @truncate(v >> 32)); + g(gen, elf, @truncate(v >> 40)); + g(gen, elf, @truncate(v >> 48)); + g(gen, elf, @truncate(v >> 56)); +} + +pub fn orex(gen: *Gen, elf: *Elf, ll: u8, _r: SValue.Register, _r2: SValue.Register, b: u32) void { + const r: u8 = if (_r.location.isRegister()) _r.location.value else 0; + const r2: u8 = if (_r2.location.isRegister()) _r2.location.value else 0; + if (ll > 0 or rexBase(r) > 0 or rexBase(r2) > 0) { + o(gen, elf, 0x40 | rexBase(r) | (rexBase(r2) << 2) | (ll << 3)); + } + o(b); +} + +/// Output a symbol and patch all calls to it. +pub fn gsymAddr(elf: *Elf, _t: u32, a: u32) void { + var t = _t; + while (t > 0) { + const ptr = (elf.cur_text_section.data.ptr + t)[0..4]; + const n = std.mem.readInt(u32, ptr, .little); + std.mem.writeInt(u32, ptr, a - t - 4); + t = n; + } +} + +pub fn gsym(gen: *Gen, elf: *Elf, t: u32) void { + gsymAddr(elf, t, gen.ind); +} + +pub fn is64Type(t: CType.Type) bool { + return t.basic_type == .ptr or t.basic_type == .func or t.basic_type == .llong; +} + +/// Instruction + 4 bytes of data. Return the address of the data. */ +pub fn oad(gen: *Gen, elf: *Elf, c: u32, s: u32) u32 { + if (gen.nocode_wanted) { + return s; + } + + o(gen, elf, c); + const t = gen.ind; + genLE32(gen, elf, s); + return t; +} + +/// Generate jmp to a label. +pub inline fn gjmp2(gen: *Gen, elf: *Elf, instr: u32, lbl: u32) void { + oad(gen, elf, instr, lbl); +} + +pub fn genAddr32(gen: *Gen, elf: *Elf, r: SValue.Register, sym: ?*Sym, _c: u32) void { + var c = _c; + if (r.flags.sym) { + gen.greloca(elf, elf.cur_text_section, sym, gen.ind, Elf.R_X86_64_32S, c); + c = 0; + } + genLE32(gen, elf, c); +} + +pub fn genAddr64(gen: *Gen, elf: *Elf, r: SValue.Register, sym: ?*Sym, _c: u64) void { + var c = _c; + if (r.flags.sym) { + gen.greloca(elf, elf.cur_text_section, sym, gen.ind, Elf.R_X86_64_64, c); + c = 0; + } + genLE64(gen, elf, c); +} + +pub fn genAddrPC32(gen: *Gen, elf: *Elf, r: SValue.Register, sym: ?*Sym, _c: u32) void { + var c = _c; + if (r.flags.sym) { + gen.greloca(elf, elf.cur_text_section, sym, gen.ind, Elf.R_X86_64_PC32, c - 4); + c = 4; + } + genLE32(gen, elf, c - 4); +} + +pub fn genGotPCRel(gen: *Gen, elf: *Elf, r: SValue.Register, sym: ?*Sym, c: u32) void { + gen.greloca(elf, elf.cur_text_section, sym, gen.ind, Elf.R_X86_64_GOTPCREL, -4); + genLE32(gen, elf, 0); + if (c > 0) { + orex(gen, elf, 1, r, .{}, 0x81); + o(0xC0 + regValue(r.location.value)); + genLE32(gen, elf, c); + } +} + +pub fn genModRMImpl(gen: *Gen, elf: *Elf, _op_reg: u8, r: SValue.Register, sym: ?*Sym, c: u32, is_got: bool) void { + const op_reg = regValue(_op_reg) << 3; + if (r.location == .@"const") { + // constant memory reference + if (!r.flags.sym) { + o(gen, elf, 0x04 | op_reg); // [sib] | destreg + _ = oad(gen, elf, 0x25, c); // disp32 + } else { + o(0x05 | op_reg); // (%rip)+disp32 | destreg + if (is_got) { + genGotPCRel(gen, elf, r, sym, c); + } else { + genAddrPC32(gen, elf, r, sym, c); + } + } + } else if (r.location == .local) { + const c_byte: u8 = @truncate(c); + if (@as(i32, @bitCast(c)) == @as(i32, @as(i8, @bitCast(c_byte)))) { + // short reference + o(gen, elf, 0x45 | op_reg); + g(gen, elf, c_byte); + } else { + oad(gen, elf, 0x85 | op_reg, c); + } + } else if (Register.fromInt(r.location.value).mem) { + if (c > 0) { + g(gen, elf, 0x80 | op_reg | regValue(r.location.value)); + genLE32(gen, elf, c); + } else { + g(gen, elf, 0x00 | op_reg | regValue(r.location.value)); + } + } else { + g(gen, elf, 0x00 | op_reg | regValue(r.location.value)); + } +} + +pub fn genModRM(gen: *Gen, elf: *Elf, op_reg: u8, r: SValue.Register, sym: ?*Sym, c: u32) void { + genModRMImpl(gen, elf, op_reg, r, sym, c, false); +} + +pub fn genModRM64(gen: *Gen, elf: *Elf, opcode: u32, op_reg: u8, r: SValue.Register, sym: ?*Sym, c: u32) void { + const is_got = (op_reg & @intFromEnum(Register.mem)) != 0 and !sym.?.type.t.static; + orex(gen, elf, 1, r, op_reg, opcode); + genModRMImpl(gen, elf, op_reg, r, sym, c, is_got); +} + +pub fn load(r: SValue.Register, sv: *SValue) void { + var fr = sv.r; + var ft = sv.type.t; + var fc: u32 = @truncate(sv.c.i); + + if (fc != sv.c.i and fr.flags.sym) { + tcc.err("64-bit addend in load", .{}); + } + + ft.defsign = false; + ft.@"volatile" = false; + ft.constant = false; + + if (fr.location == .@"const" and fr.flags.sym and fr.flags.lval and !sv.sym.?.type.t.static) { + var tr = r; + tr.location.value |= Register.mem_mask; + if (ft.isFloat()) { + tr = get_reg(.int); + tr.location.value |= Register.mem_mask; + } + genModRM64(gen, elf, 0x8b, tr, fr, sv.sym, 0); + + fr = tr; + fr.flags.lval = true; + } +}