Dream of my own C compiler

This commit is contained in:
2026-01-19 14:01:40 +01:00
parent b0deb958d2
commit 0b23707041
17 changed files with 1231 additions and 1 deletions

44
packages/cjit/build.zig Normal file
View File

@@ -0,0 +1,44 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{
.whitelist = &.{
.{
.cpu_arch = .x86_64,
.os_tag = .windows,
.abi = .gnu,
},
.{
.cpu_arch = .x86_64,
.os_tag = .linux,
.abi = .gnu,
},
},
});
const optimize = b.standardOptimizeOption(.{});
const module = b.addModule("cjit", .{
.target = target,
.optimize = optimize,
.root_source_file = b.path("src/root.zig"),
});
const module_internal_test = b.addTest(.{ .root_module = module });
const run_internal_test = b.addRunArtifact(module_internal_test);
const module_external_test = b.addTest(.{
.root_module = b.createModule(.{
.target = b.resolveTargetQuery(.{}),
.optimize = .Debug,
.root_source_file = b.path("test/root.zig"),
.imports = &.{
.{ .name = "cjit", .module = module },
},
}),
});
const run_external_test = b.addRunArtifact(module_external_test);
const step_test = b.step("test", "Run tests");
step_test.dependOn(&run_internal_test.step);
step_test.dependOn(&run_external_test.step);
}

View File

@@ -0,0 +1,11 @@
.{
.name = .cjit,
.version = "0.0.0",
.minimum_zig_version = "0.15.2",
.paths = .{
"src",
"build.zig",
"build.zig.zon",
},
.fingerprint = 0xaec0accc19243440,
}

View File

@@ -0,0 +1,108 @@
const std = @import("std");
const Self = @This();
const types = @import("types.zig");
state: State = .init,
allocator: std.mem.Allocator,
arena: std.heap.ArenaAllocator,
includes: std.DoublyLinkedList = .{},
symbols_needed: std.StringHashMapUnmanaged(ExternSymbol) = .{},
symbols_provided: std.StringHashMapUnmanaged(InternSymbol) = .{},
symbols_located: std.StringHashMapUnmanaged(LocatedSymbol) = .{},
pub const State = enum(u8) {
init,
compiled,
linked,
};
pub const Include = struct {
code: []const u8,
node: std.DoublyLinkedList.Node = .{},
};
pub const ExternSymbol = struct {
name: []const u8,
c_type: types.Type,
ptr: ?*anyopaque,
};
pub const InternSymbol = struct {
name: []const u8,
public: bool,
c_type: types.Type,
location: Location,
pub const Location = union(enum) {
text: usize,
data: usize,
rodata: usize,
bss: usize,
};
};
pub const LocatedSymbol = struct {
name: []const u8,
public: bool,
c_type: types.Type,
ptr: ?*anyopaque,
};
pub fn init(allocator: std.mem.Allocator) Self {
return .{
.allocator = allocator,
.arena = .init(allocator),
};
}
pub fn deinit(self: *Self) void {
self.symbols.deinit(self.allocator);
self.arena.deinit();
self.* = undefined;
}
pub fn include(self: *Self, code: []const u8) !void {
std.debug.assert(self.state == .init);
if (code.len == 0) return;
const include_ptr = try self.create(Include);
include_ptr.* = .{ .code = code };
self.includes.append(&include_ptr.node);
}
pub fn compile(self: *Self, code: []const u8) !void {
std.debug.assert(self.state == .init);
_ = code;
self.state = .compiled;
std.debug.panic("Not implemented", .{});
}
pub fn setSymbol(self: *Self, comptime T: type, name: []const u8, ptr: *const T) !void {
std.debug.assert(self.state == .compiled);
const symbol = self.symbols.getPtr(name) orelse return error.SymbolNotFound;
if (!types.isCompatible(T, symbol.c_type)) return error.IncompatibleType;
symbol.ptr = @ptrCast(ptr);
}
pub fn link(self: *Self) !void {
std.debug.assert(self.state == .compiled);
self.state = .linked;
std.debug.panic("Not implemented", .{});
}
pub fn getSymbol(self: *const Self, comptime T: type, name: []const u8) ?*const T {
std.debug.assert(self.state == .linked);
const symbol = self.symbols.get(name) orelse return null;
return @ptrCast(@alignCast(symbol.ptr));
}
fn create(self: *Self, comptime T: type) error{OutOfMemory}!*T {
return self.arena.allocator().create(T);
}

View File

@@ -0,0 +1,34 @@
const std = @import("std");
text: Section = .{ .read_only = true, .executable = true },
data: Section = .{ .read_only = false, .executable = false },
rodata: Section = .{ .read_only = true, .executable = false },
bss: usize,
pub const Section = struct {
data: std.ArrayList(u8) = .{},
read_only: bool,
executable: bool,
pub fn writeValue(self: *Section, value: anytype, allocator: std.mem.Allocator) !void {
const T = @TypeOf(value);
std.debug.assert(std.meta.hasUniqueRepresentation(T));
const bytes = std.mem.asBytes(&value);
const alignment = @alignOf(T);
try self.alignForward(alignment, allocator);
try self.writeBytes(bytes, allocator);
}
pub fn writeBytes(self: *Section, data: []const u8, allocator: std.mem.Allocator) !void {
try self.data.appendSlice(allocator, data);
}
pub fn alignForward(self: *Section, alignment: u16, allocator: std.mem.Allocator) !void {
const ptr = self.data.items.len;
const ptr_aligned = std.mem.alignForward(usize, ptr, alignment);
const padding = ptr_aligned - ptr;
try self.data.appendNTimes(allocator, 0, padding);
}
};

View File

@@ -0,0 +1,15 @@
const std = @import("std");
const builtin = @import("builtin");
pub const Runtime = @import("Runtime.zig");
pub const types = @import("types.zig");
pub const call: std.builtin.CallingConvention = switch (builtin.cpu.arch) {
.aarch64 => @compileError("TODO"),
.x86_64 => .{ .x86_64_sysv = .{} },
else => unreachable,
};
test {
std.testing.refAllDeclsRecursive(@This());
}

View File

@@ -0,0 +1,149 @@
pub const Keyword = enum {
_Alignas,
_Alignof,
_Atomic,
_Bool,
_Complex,
_Generic,
_Imaginary,
_Noreturn,
_Static_assert,
_Thread_local,
auto,
@"break",
case,
char,
@"const",
@"continue",
default,
do,
double,
@"else",
@"enum",
@"extern",
float,
@"for",
goto,
@"if",
@"inline",
int,
long,
register,
restrict,
@"return",
short,
signed,
sizeof,
static,
@"struct",
@"switch",
typedef,
@"union",
unsigned,
void,
@"volatile",
@"while",
};
pub const Identifier = struct {
name: []const u8,
};
pub const Constant = union(enum) {
int: i32,
long: i64,
long_long: i64,
unsigned_int: u32,
unsigned_long: u64,
unsigned_long_long: u64,
float: f32,
double: f64,
character: u8,
wide_character: u32,
};
pub const StringLiteral = struct {
value: []const u8,
};
pub const Punctuator = enum {
// three characters
@"...",
@"<<=",
@">>=",
// two characters
@"--",
@"-=",
@"->",
@"!=",
@"*=",
@"/=",
@"&&",
@"&=",
@"##",
@"%=",
@"^=",
@"++",
@"+=",
@"<<",
@"<=",
@"==",
@">=",
@">>",
@"|=",
@"||",
// single character
@"-",
@",",
@";",
@":",
@"!",
@"?",
@".",
@"(",
@")",
@"[",
@"]",
@"{",
@"}",
@"*",
@"/",
@"&",
@"#",
@"%",
@"^",
@"+",
@"<",
@"=",
@">",
@"|",
@"~",
};
pub const Token = union(enum) {
keyword: Keyword,
identifier: []const u8,
constant: Constant,
string_literal: []const u8,
wide_string_literal: []const u32,
punctuator: Punctuator,
};
pub fn isIdentifierStart(code_point: u21) void {
// zig fmt: off
return code_point >= 'A' and code_point <= 'Z'
or code_point == '_'
or code_point >= 'a' and code_point <= 'z'
or code_point >= 128;
// zig fmt: on
}
pub fn isIdentifierMiddle(code_point: u21) void {
// zig fmt: off
return code_point >= '0' and code_point <= '9'
or code_point >= 'A' and code_point <= 'Z'
or code_point == '_'
or code_point >= 'a' and code_point <= 'z'
or code_point >= 128;
// zig fmt: on
}

201
packages/cjit/src/types.zig Normal file
View File

@@ -0,0 +1,201 @@
const std = @import("std");
pub const Type = union(enum) {
signed_char: void,
signed_short: void,
signed_int: void,
signed_long: void,
signed_long_long: void,
unsigned_char: void,
unsigned_short: void,
unsigned_int: void,
unsigned_long: void,
unsigned_long_long: void,
float: void,
double: void,
long_double: void,
void: void,
noreturn: void,
char: void,
bool: void,
@"enum": *const Enum,
@"struct": *const Struct,
@"union": *const Union,
array: *const Array,
function: *const Function,
pointer: *const Pointer,
};
pub const Enum = struct {
name: []const u8,
constants: []const EnumConstant,
};
pub const EnumConstant = struct {
name: []const u8,
value: i32,
inferred: bool,
};
pub const Struct = struct {
name: []const u8,
fields: []const StructField,
size: usize,
@"align": u16,
};
pub const StructField = struct {
name: []const u8,
type: Type,
offset: usize,
};
pub const Union = struct {
name: []const u8,
fields: []const UnionField,
size: usize,
@"align": u16,
};
pub const UnionField = struct {
name: []const u8,
type: Type,
};
pub const Array = struct {
child: Type,
length: ?usize,
};
pub const Function = struct {
arguments: []const FunctionArgument,
@"return": Type,
};
pub const FunctionArgument = struct {
name: []const u8,
type: Type,
};
pub const Pointer = struct {
child: Type,
@"const": bool,
@"volatile": bool,
};
pub fn sizeOf(@"type": Type) ?usize {
return switch (@"type") {
.signed_char => 1,
.signed_short => 2,
.signed_int => 4,
.signed_long => 8,
.signed_long_long => 8,
.unsigned_char => 1,
.unsigned_short => 2,
.unsigned_int => 4,
.unsigned_long => 8,
.unsigned_long_long => 8,
.float => 4,
.double => 8,
.long_double => 8,
.void => null,
.noreturn => null,
.char => 1,
.bool => 1,
.@"enum" => 4,
.@"struct" => |s| s.size,
.@"union" => |u| u.size,
.array => |a| if (a.length) |l| (if (sizeOf(a.child)) |c| l * c else null) else 8,
.function => null,
.pointer => 8,
};
}
pub fn alignOf(@"type": Type) ?u16 {
return switch (@"type") {
.signed_char => 1,
.signed_short => 2,
.signed_int => 4,
.signed_long => 8,
.signed_long_long => 8,
.unsigned_char => 1,
.unsigned_short => 2,
.unsigned_int => 4,
.unsigned_long => 8,
.unsigned_long_long => 8,
.float => 4,
.double => 8,
.long_double => 8,
.void => null,
.noreturn => null,
.char => 1,
.bool => 1,
.@"enum" => 4,
.@"struct" => |s| s.@"align",
.@"union" => |u| u.@"align",
.array => |a| if (a.length != null) alignOf(a.child) else 8,
.function => null,
.pointer => 8,
};
}
pub fn isCompatible(comptime ZigType: type, c_type: Type) bool {
return switch (@typeInfo(ZigType)) {
.type => false,
.void => c_type == .void,
.bool => c_type == .bool,
.noreturn => c_type == .noreturn,
.int => |zig_int| switch (c_type) {
.signed_char => zig_int.signedness == .signed and zig_int.bits == 8,
.signed_short => zig_int.signedness == .signed and zig_int.bits == 16,
.signed_int => zig_int.signedness == .signed and zig_int.bits == 32,
.signed_long => zig_int.signedness == .signed and zig_int.bits == 64,
.signed_long_long => zig_int.signedness == .signed and zig_int.bits == 64,
.unsigned_char => zig_int.signedness == .unsigned and zig_int.bits == 8,
.unsigned_short => zig_int.signedness == .unsigned and zig_int.bits == 16,
.unsigned_int => zig_int.signedness == .unsigned and zig_int.bits == 32,
.unsigned_long => zig_int.signedness == .unsigned and zig_int.bits == 64,
.unsigned_long_long => zig_int.signedness == .unsigned and zig_int.bits == 64,
.char => zig_int.bits = 8,
else => false,
},
.float => |zig_float| switch (c_type) {
.float => zig_float.bits = 32,
.double => zig_float.bits = 64,
.long_double => zig_float.bits = 64,
},
.pointer => @compileError("TODO"),
.array => @compileError("TODO"),
.@"struct" => @compileError("TODO"),
.comptime_float => false,
.comptime_int => false,
.undefined => true,
.null => @compileError("TODO"),
.optional => @compileError("TODO"),
.error_union => false,
.error_set => false,
.@"enum" => @compileError("TODO"),
.@"union" => @compileError("TODO"),
.@"fn" => false,
.@"opaque" => @compileError("TODO"),
.frame => false,
.@"anyframe" => false,
.vector => false,
.enum_literal => false,
};
}

View File

@@ -0,0 +1,32 @@
const std = @import("std");
const cjit = @import("cjit");
fn add(a: i32, b: i32) callconv(cjit.call) i32 {
return a + b;
}
test {
var rt: cjit.Runtime = .init(std.testing.allocator);
defer rt.deinit();
try rt.compile(
\\int add(int a, int b);
\\
\\int add_one(int x)
\\{
\\ return add(x, 1);
\\}
);
try rt.setSymbol(fn (i32, i32) callconv(cjit.call) i32, "add", &add);
try rt.link();
const add_one = rt.getSymbol(fn (i32) callconv(cjit.call) i32, "add_one").?;
try std.testing.expectEqual(-9, add_one(-10));
try std.testing.expectEqual(11, add_one(10));
const add_ptr = rt.getSymbol(fn (i32, i32) callconv(cjit.call) i32, "add").?;
try std.testing.expectEqual(add_ptr, &add);
}