From 0a3d82a5625e007d517b129a4dac209d01ddc5e7 Mon Sep 17 00:00:00 2001 From: Szymon Nowakowski Date: Sat, 24 Jan 2026 22:43:57 +0100 Subject: [PATCH] cjit: Some standard headers, builtins --- packages/cjit/src/includes/builtin.h | 31 +++++ packages/cjit/src/includes/stdalign.h | 7 + packages/cjit/src/includes/stdbool.h | 7 + packages/cjit/src/includes/stddef.h | 9 ++ packages/cjit/src/includes/stdint.h | 118 +++++++++++++++++ packages/cjit/src/includes/stdnoreturn.h | 3 + packages/cjit/src/tokens.zig | 9 +- packages/cjit/src/tokens/Builtin.zig | 156 +++++++++++++++++++++++ packages/cjit/src/tokens/Punctuator.zig | 6 +- packages/cjit/src/tokens/Token.zig | 14 +- packages/cjit/src/tokens/Tokenizer.zig | 6 +- packages/cjit/src/x86_64.zig | 1 - 12 files changed, 352 insertions(+), 15 deletions(-) create mode 100644 packages/cjit/src/includes/builtin.h create mode 100644 packages/cjit/src/includes/stdalign.h create mode 100644 packages/cjit/src/includes/stdbool.h create mode 100644 packages/cjit/src/includes/stddef.h create mode 100644 packages/cjit/src/includes/stdint.h create mode 100644 packages/cjit/src/includes/stdnoreturn.h create mode 100644 packages/cjit/src/tokens/Builtin.zig diff --git a/packages/cjit/src/includes/builtin.h b/packages/cjit/src/includes/builtin.h new file mode 100644 index 0000000..a827ebc --- /dev/null +++ b/packages/cjit/src/includes/builtin.h @@ -0,0 +1,31 @@ +#pragma once + +#define abs(value) __builtin_abs(value) +#define byteswap(value) __builtin_byteswap(value) +#define ceil(value) __builtin_ceil(value) +#define clz(value) __builtin_clz(value) +#define containerof(ptr, type, member) __builtin_containerof(ptr, type, member) +#define cos(value) __builtin_cos(value) +#define ctz(value) __builtin_ctz(value) +#define embedfile(path) __builtin_embedfile(path) +#define exp(value) __builtin_exp(value) +#define exp2(value) __builtin_exp2(value) +#define floor(value) __builtin_floor(value) +#define frameaddress() __builtin_frameaddress() +#define log(value) __builtin_log(value) +#define log10(value) __builtin_log10(value) +#define log2(value) __builtin_log2(value) +#define max(...) __builtin_max(__VA_ARGS__) +#define memcpy(dest, src, count) __builtin_memcpy(dest, src, count) +#define memmove(dest, src, count) __builtin_memmove(dest, src, count) +#define memset(value) __builtin_memset(value) +#define min(...) __builtin_min(__VA_ARGS__) +#define popcount(value) __builtin_popcount(value) +#define returnaddress() __builtin_returnaddress() +#define round(value) __builtin_round(value) +#define sin(value) __builtin_sin(value) +#define sqrt(value) __builtin_sqrt(value) +#define tan(value) __builtin_tan(value) +#define trunc(value) __builtin_trunc(value) +#define typename(type) __builtin_typename(type) +#define typeof(...) __builtin_typeof(__VA_ARGS__) diff --git a/packages/cjit/src/includes/stdalign.h b/packages/cjit/src/includes/stdalign.h new file mode 100644 index 0000000..f5e415c --- /dev/null +++ b/packages/cjit/src/includes/stdalign.h @@ -0,0 +1,7 @@ +#pragma once + +#define alignas _Alignas +#define alignof _Alignof + +#define __alignas_is_defined 1 +#define __alignof_is_defined 1 diff --git a/packages/cjit/src/includes/stdbool.h b/packages/cjit/src/includes/stdbool.h new file mode 100644 index 0000000..2d46c11 --- /dev/null +++ b/packages/cjit/src/includes/stdbool.h @@ -0,0 +1,7 @@ +#pragma once + +#define bool _Bool + +#define true 1 +#define false 0 +#define __bool_true_false_are_defined 1 diff --git a/packages/cjit/src/includes/stddef.h b/packages/cjit/src/includes/stddef.h new file mode 100644 index 0000000..f563ffa --- /dev/null +++ b/packages/cjit/src/includes/stddef.h @@ -0,0 +1,9 @@ +#pragma once + +typedef long ptrdiff_t; +typedef long max_align_t; +typedef unsigned long size_t; + +#define NULL ((void *)0) + +#define offsetof(type, member) __builtin_offsetof(type, member) diff --git a/packages/cjit/src/includes/stdint.h b/packages/cjit/src/includes/stdint.h new file mode 100644 index 0000000..353a488 --- /dev/null +++ b/packages/cjit/src/includes/stdint.h @@ -0,0 +1,118 @@ +#pragma once + +typedef signed char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef long int64_t; + +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; + +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; + +typedef int64_t intmax_t; +typedef int64_t intptr_t; + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long uint64_t; + +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +typedef uint64_t uintmax_t; +typedef uint64_t uintptr_t; + +#define INT8_MIN (-0x80) +#define INT16_MIN (-0x8000) +#define INT32_MIN (-0x80000000) +#define INT64_MIN (-0x8000000000000000L) + +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST64_MIN INT64_MIN + +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST64_MIN INT64_MIN + +#define INTPTR_MIN INT64_MIN +#define INTMAX_MIN INT64_MIN + +#define INT8_MAX (0x7F) +#define INT16_MAX (0x7FFF) +#define INT32_MAX (0x7FFFFFFF) +#define INT64_MAX (0x7FFFFFFFFFFFFFFFL) + +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MAX INT64_MAX + +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MAX INT64_MAX + +#define INTPTR_MAX INT64_MAX +#define INTMAX_MAX INT64_MAX + +#define UINT8_MAX (0xFF) +#define UINT16_MAX (0xFFFF) +#define UINT32_MAX (0xFFFFFFFFU) +#define UINT64_MAX (0xFFFFFFFFFFFFFFFFUL) + +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +#define UINTPTR_MAX UINT64_MAX +#define UINTMAX_MAX UINT64_MAX + +#define INT8_C(value) value +#define INT16_C(value) value +#define INT32_C(value) value +#define INT64_C(value) value ## L + +#define INTMAX_C(value) value ## L + +#define UINT8_C(value) value +#define UINT16_C(value) value +#define UINT32_C(value) value ## U +#define UINT64_C(value) value ## UL + +#define UINTMAX_C(value) value ## UL + +#define PTRDIFF_MIN (-0x8000000000000000L) +#define PTRDIFF_MAX (0x7FFFFFFFFFFFFFFFL) + +#define SIZE_MAX (0xFFFFFFFFFFFFFFFFUL) + +#define WINT_MIN (-0x80000000) +#define WINT_MAX (0x7FFFFFFF) + +#define WCHAR_MIN (0) +#define WCHAR_MAX (0x10FFFF) + +#endif diff --git a/packages/cjit/src/includes/stdnoreturn.h b/packages/cjit/src/includes/stdnoreturn.h new file mode 100644 index 0000000..104ca32 --- /dev/null +++ b/packages/cjit/src/includes/stdnoreturn.h @@ -0,0 +1,3 @@ +#pragma once + +#define noreturn _Noreturn diff --git a/packages/cjit/src/tokens.zig b/packages/cjit/src/tokens.zig index 3dc9fbe..572a4d3 100644 --- a/packages/cjit/src/tokens.zig +++ b/packages/cjit/src/tokens.zig @@ -1,6 +1,7 @@ -pub const Constant = @import("tokens/Constant.zig"); -pub const Keyword = @import("tokens/Keyword.zig"); -pub const Punctuator = @import("tokens/Punctuator.zig"); -pub const Token = @import("tokens/Token.zig"); +pub const Builtin = @import("tokens/Builtin.zig").Builtin; +pub const Constant = @import("tokens/Constant.zig").Constant; +pub const Keyword = @import("tokens/Keyword.zig").Keyword; +pub const Punctuator = @import("tokens/Punctuator.zig").Punctuator; +pub const Token = @import("tokens/Token.zig").Token; pub const Tokenizer = @import("tokens/Tokenizer.zig"); pub const Utf8Iterator = @import("tokens/Utf8Iterator.zig"); diff --git a/packages/cjit/src/tokens/Builtin.zig b/packages/cjit/src/tokens/Builtin.zig new file mode 100644 index 0000000..2e67140 --- /dev/null +++ b/packages/cjit/src/tokens/Builtin.zig @@ -0,0 +1,156 @@ +const std = @import("std"); + +pub const Builtin = enum { + /// Usage: `__builtin_abs(value)` + /// + /// `value` can be any integer or real type. Equivalent to + /// `value < 0 ? -value : value`. Noop for unsigned integer types. + __builtin_abs, + /// Usage: `__builtin_byteswap(value)` + /// + /// `value` can be any integer type. + __builtin_byteswap, + /// Usage: `__builtin_ceil(value)` + /// + /// `value` can be any real type. + __builtin_ceil, + /// Usage: `__builtin_clz(value)` + /// + /// Count leading zeroes. `value` can be any integer type. The return type + /// is `int`. + __builtin_clz, + /// Usage `__builtin_containerof(ptr, type, member)` + /// + /// `ptr` must be an pointer to a struct or a union. `type` must be a type. + /// `member` must be an identifier. Given `ptr` is a pointer to a given + /// member of `type`, returns a pointer to the entire container. + __builtin_containerof, + /// Usage: `__builtin_cos(value)` + /// + /// `value` can be any real type. + __builtin_cos, + /// Usage: `__builtin_ctz(value)` + /// + /// Count trailing zeroes. `value` can be any integer type. The return type + /// is `int`. + __builtin_ctz, + /// Usage: `__builtin_embedfile(path)` + /// + /// `path` must be a string literal. The return type is `const char *`. The + /// data is null-terminated. + __builtin_embedfile, + /// Usage `__builtin_exp(value)` + /// + /// `value` can be any real type. Calculates e^value. + __builtin_exp, + /// Usage `__builtin_exp2(value)` + /// + /// `value` can be any real type. Calculates 2^value. + __builtin_exp2, + /// Usage: `__bultin_floor(value)` + /// + /// `value` can be any real type. + __builtin_floor, + /// Usage: `__builtin_frameaddress()` + /// + /// Returns the value of base pointer. The return type is equivalent to + /// `uintptr_t`. + __builtin_frameaddress, + /// Usage: `__builtin_log(value)` + /// + /// `value` can be any real type. Calculates natural logarithm (base e). + __builtin_log, + /// Usage: `__builtin_log10(value)` + /// + /// `value` can be any real type. Calculates base 10 logarithm. + __builtin_log10, + /// Usage: `__builtin_log2(value)` + /// + /// `value` can be any real type. Calculates base 2 logarithm. + __builtin_log2, + /// Usage: `__builtin_max(...)` + /// + /// The arguments can be any integer or real types. NaN values are ignored. + __builtin_max, + /// Usage: `__builtin_memcpy(dest, src, count)` + /// + /// `dest` and `src` must be pointers. The pointers are reinterpreted as + /// pointers to `char`. `count` is coerced to the equivalent of `size_t`. + /// `dest` must be a non-const pointer. The regions must not overlap. + __builtin_memcpy, + /// Usage: `__builtin_memmove(dest, src, count)` + /// + /// `dest` and `src` must be pointers. The pointers are reinterpreted as + /// pointers to `char`. `count` is coerced to the equivalent of `size_t`. + /// `dest` must be a non-const pointer. The regions may overlap. + __builtin_memmove, + /// Usage: `__builtin_memset(dest, ch, count)` + /// + /// `dest` must be a pointer. The pointer is reinterpreted as pointer to + /// `char`. `ch` is cast to `unsigned char`. `dest` must be a non-const + // pointer. + __builtin_memset, + /// Usage: `__builtin_min(...)` + /// + /// The arguments can be any integer or real types. NaN values are ignored. + __builtin_min, + /// Usage: `__builtin_offsetof(type, member)` + /// + /// `type` must be a type. `member` must be an identifier. + __builtin_offsetof, + /// Usage: `__builtin_popcount(value)` + /// + /// `value` can be any integer type. The return type is `int`. + __builtin_popcount, + /// Usage: `__builtin_returnaddress()` + /// + /// Returns the address of the instruction to run after current function + /// returns. The return type is equivalent to `uintptr_t`. + __builtin_returnaddress, + /// Usage: `__builtin_round(value)` + /// + /// `value` can be any real type. + __builtin_round, + /// Usage: `__builtin_sin(value)` + /// + /// `value` can be any real type. + __builtin_sin, + /// Usage: `__builtin_sqrt(value)` + /// + /// `value` can be any real type. + __builtin_sqrt, + /// Usage: `__builtin_tan(value)` + /// + /// `value` can be any real type. + __builtin_tan, + /// Usage: `__builtin_trunc(value)` + /// + /// `value` can be any real type. + __builtin_trunc, + /// Usage: `__builtin_typename(type)` + /// + /// `type` must be a type. The return type is `const char *`. + __builtin_typename, + /// Usage: `__builtin_typeof(...)` + __builtin_typeof, + + pub const map: std.StaticStringMap(Builtin) = blk: { + const fields = @typeInfo(Builtin).@"enum".fields; + + var kvs_list: [fields.len]struct { []const u8, Builtin } = undefined; + for (fields, 0..) |field, i| { + kvs_list[i] = .{ field.name, @field(Builtin, field.name) }; + } + + break :blk .initComptime(kvs_list); + }; + + pub fn isBuiltin(identifier: []const u8) ?Builtin { + if (std.mem.startsWith(u8, identifier, "__builtin_")) { + @branchHint(.unlikely); + return map.get(identifier); + } else { + return null; + } + } +}; diff --git a/packages/cjit/src/tokens/Punctuator.zig b/packages/cjit/src/tokens/Punctuator.zig index db38634..93f8c70 100644 --- a/packages/cjit/src/tokens/Punctuator.zig +++ b/packages/cjit/src/tokens/Punctuator.zig @@ -57,14 +57,14 @@ pub const Punctuator = enum(u32) { pub const line_continuation_crlf = strToInt3("\\\r\n"); }; -pub fn strToInt1(str: *const u8[1]) u32 { +fn strToInt1(str: *const u8[1]) u32 { return @as(u8, @bitCast(str.*)); } -pub fn strToInt2(str: *const u8[2]) u32 { +fn strToInt2(str: *const u8[2]) u32 { return @as(u16, @bitCast(str.*)); } -pub fn strToInt3(str: *const u8[3]) u32 { +fn strToInt3(str: *const u8[3]) u32 { return @as(u24, @bitCast(str.*)); } diff --git a/packages/cjit/src/tokens/Token.zig b/packages/cjit/src/tokens/Token.zig index 9c865a2..7f19366 100644 --- a/packages/cjit/src/tokens/Token.zig +++ b/packages/cjit/src/tokens/Token.zig @@ -1,14 +1,16 @@ const std = @import("std"); -pub const Constant = @import("Constant.zig"); -pub const Keyword = @import("Keyword.zig"); -pub const Punctuator = @import("Punctuator.zig"); +const Builtin = @import("Builtin.zig").Builtin; +const Constant = @import("Constant.zig").Constant; +const Keyword = @import("Keyword.zig").Keyword; +const Punctuator = @import("Punctuator.zig").Punctuator; pub const Token = union(enum) { - keyword: Keyword, - identifier: []const u8, + builtin: Builtin, constant: Constant, + keyword: Keyword, + punctuator: Punctuator, + identifier: []const u8, string_literal: [:0]const u8, wide_string_literal: [:0]const u32, - punctuator: Punctuator, }; diff --git a/packages/cjit/src/tokens/Tokenizer.zig b/packages/cjit/src/tokens/Tokenizer.zig index 98c3bdf..4c564ef 100644 --- a/packages/cjit/src/tokens/Tokenizer.zig +++ b/packages/cjit/src/tokens/Tokenizer.zig @@ -1,6 +1,7 @@ const std = @import("std"); const Self = @This(); +const Builtin = @import("Builtin.zig").Builtin; const Keyword = @import("Keyword.zig").Keyword; const Punctuator = @import("Punctuator.zig").Punctuator; const Token = @import("Token.zig").Token; @@ -84,10 +85,13 @@ pub fn nextToken(self: *Self, arena_allocator: std.mem.Allocator) !?Token { const identifier = self.str[identifier_start..self.it.ptr]; + // TODO Preprocessor + if (Keyword.isKeyword(identifier)) |keyword| { return .{ .keyword = keyword }; + } else if (Builtin.isBuiltin(identifier)) |builtin| { + return .{ .builtin = builtin }; } else { - // TODO Preprocessor return .{ .identifier = arena_allocator.dupe(u8, identifier) }; } }, diff --git a/packages/cjit/src/x86_64.zig b/packages/cjit/src/x86_64.zig index a25f68d..0de4f8a 100644 --- a/packages/cjit/src/x86_64.zig +++ b/packages/cjit/src/x86_64.zig @@ -4,7 +4,6 @@ const tokens = @import("tokens.zig"); const types = @import("types.zig"); const Location = Runtime.Location; -const Punctuator = tokens.Punctuator; const Runtime = @import("Runtime.zig"); const StackValue = @import("StackValue.zig"); const Type = types.Type;