cjit update
This commit is contained in:
@@ -1,149 +1,6 @@
|
||||
pub const Keyword = enum {
|
||||
_Alignas,
|
||||
_Alignof,
|
||||
_Atomic,
|
||||
_Bool,
|
||||
_Complex,
|
||||
_Generic,
|
||||
_Imaginary,
|
||||
_Noreturn,
|
||||
_Static_assert,
|
||||
_Thread_local,
|
||||
auto,
|
||||
@"break",
|
||||
case,
|
||||
char,
|
||||
@"const",
|
||||
@"continue",
|
||||
default,
|
||||
do,
|
||||
double,
|
||||
@"else",
|
||||
@"enum",
|
||||
@"extern",
|
||||
float,
|
||||
@"for",
|
||||
goto,
|
||||
@"if",
|
||||
@"inline",
|
||||
int,
|
||||
long,
|
||||
register,
|
||||
restrict,
|
||||
@"return",
|
||||
short,
|
||||
signed,
|
||||
sizeof,
|
||||
static,
|
||||
@"struct",
|
||||
@"switch",
|
||||
typedef,
|
||||
@"union",
|
||||
unsigned,
|
||||
void,
|
||||
@"volatile",
|
||||
@"while",
|
||||
};
|
||||
|
||||
pub const Identifier = struct {
|
||||
name: []const u8,
|
||||
};
|
||||
|
||||
pub const Constant = union(enum) {
|
||||
int: i32,
|
||||
long: i64,
|
||||
long_long: i64,
|
||||
unsigned_int: u32,
|
||||
unsigned_long: u64,
|
||||
unsigned_long_long: u64,
|
||||
float: f32,
|
||||
double: f64,
|
||||
character: u8,
|
||||
wide_character: u32,
|
||||
};
|
||||
|
||||
pub const StringLiteral = struct {
|
||||
value: []const u8,
|
||||
};
|
||||
|
||||
pub const Punctuator = enum {
|
||||
// three characters
|
||||
@"...",
|
||||
@"<<=",
|
||||
@">>=",
|
||||
// two characters
|
||||
@"--",
|
||||
@"-=",
|
||||
@"->",
|
||||
@"!=",
|
||||
@"*=",
|
||||
@"/=",
|
||||
@"&&",
|
||||
@"&=",
|
||||
@"##",
|
||||
@"%=",
|
||||
@"^=",
|
||||
@"++",
|
||||
@"+=",
|
||||
@"<<",
|
||||
@"<=",
|
||||
@"==",
|
||||
@">=",
|
||||
@">>",
|
||||
@"|=",
|
||||
@"||",
|
||||
// single character
|
||||
@"-",
|
||||
@",",
|
||||
@";",
|
||||
@":",
|
||||
@"!",
|
||||
@"?",
|
||||
@".",
|
||||
@"(",
|
||||
@")",
|
||||
@"[",
|
||||
@"]",
|
||||
@"{",
|
||||
@"}",
|
||||
@"*",
|
||||
@"/",
|
||||
@"&",
|
||||
@"#",
|
||||
@"%",
|
||||
@"^",
|
||||
@"+",
|
||||
@"<",
|
||||
@"=",
|
||||
@">",
|
||||
@"|",
|
||||
@"~",
|
||||
};
|
||||
|
||||
pub const Token = union(enum) {
|
||||
keyword: Keyword,
|
||||
identifier: []const u8,
|
||||
constant: Constant,
|
||||
string_literal: []const u8,
|
||||
wide_string_literal: []const u32,
|
||||
punctuator: Punctuator,
|
||||
};
|
||||
|
||||
pub fn isIdentifierStart(code_point: u21) bool {
|
||||
// zig fmt: off
|
||||
return code_point >= 'A' and code_point <= 'Z'
|
||||
or code_point == '_'
|
||||
or code_point >= 'a' and code_point <= 'z'
|
||||
or code_point >= 128;
|
||||
// zig fmt: on
|
||||
}
|
||||
|
||||
pub fn isIdentifierMiddle(code_point: u21) bool {
|
||||
// zig fmt: off
|
||||
return code_point >= '0' and code_point <= '9'
|
||||
or code_point >= 'A' and code_point <= 'Z'
|
||||
or code_point == '_'
|
||||
or code_point >= 'a' and code_point <= 'z'
|
||||
or code_point >= 128;
|
||||
// zig fmt: on
|
||||
}
|
||||
pub const Constant = @import("tokens/Constant.zig");
|
||||
pub const Keyword = @import("tokens/Keyword.zig");
|
||||
pub const Punctuator = @import("tokens/Punctuator.zig");
|
||||
pub const Token = @import("tokens/Token.zig");
|
||||
pub const Tokenizer = @import("tokens/Tokenizer.zig");
|
||||
pub const Utf8Iterator = @import("tokens/Utf8Iterator.zig");
|
||||
|
||||
14
packages/cjit/src/tokens/Constant.zig
Normal file
14
packages/cjit/src/tokens/Constant.zig
Normal file
@@ -0,0 +1,14 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub const Constant = union(enum) {
|
||||
int: i32,
|
||||
long: i64,
|
||||
long_long: i64,
|
||||
unsigned_int: u32,
|
||||
unsigned_long: u64,
|
||||
unsigned_long_long: u64,
|
||||
float: f32,
|
||||
double: f64,
|
||||
character: u8,
|
||||
wide_character: i32,
|
||||
};
|
||||
63
packages/cjit/src/tokens/Keyword.zig
Normal file
63
packages/cjit/src/tokens/Keyword.zig
Normal file
@@ -0,0 +1,63 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub const Keyword = enum {
|
||||
_Alignas,
|
||||
_Alignof,
|
||||
_Atomic,
|
||||
_Bool,
|
||||
_Complex,
|
||||
_Generic,
|
||||
_Imaginary,
|
||||
_Noreturn,
|
||||
_Static_assert,
|
||||
_Thread_local,
|
||||
auto,
|
||||
@"break",
|
||||
case,
|
||||
char,
|
||||
@"const",
|
||||
@"continue",
|
||||
default,
|
||||
do,
|
||||
double,
|
||||
@"else",
|
||||
@"enum",
|
||||
@"extern",
|
||||
float,
|
||||
@"for",
|
||||
goto,
|
||||
@"if",
|
||||
@"inline",
|
||||
int,
|
||||
long,
|
||||
register,
|
||||
restrict,
|
||||
@"return",
|
||||
short,
|
||||
signed,
|
||||
sizeof,
|
||||
static,
|
||||
@"struct",
|
||||
@"switch",
|
||||
typedef,
|
||||
@"union",
|
||||
unsigned,
|
||||
void,
|
||||
@"volatile",
|
||||
@"while",
|
||||
|
||||
pub const map: std.StaticStringMap(Keyword) = blk: {
|
||||
const fields = @typeInfo(Keyword).@"enum".fields;
|
||||
|
||||
var kvs_list: [fields.len]struct { []const u8, Keyword } = undefined;
|
||||
for (fields, 0..) |field, i| {
|
||||
kvs_list[i] = .{ field.name, @field(Keyword, field.name) };
|
||||
}
|
||||
|
||||
break :blk .initComptime(kvs_list);
|
||||
};
|
||||
|
||||
pub fn isKeyword(identifier: []const u8) ?Keyword {
|
||||
return map.get(identifier);
|
||||
}
|
||||
};
|
||||
70
packages/cjit/src/tokens/Punctuator.zig
Normal file
70
packages/cjit/src/tokens/Punctuator.zig
Normal file
@@ -0,0 +1,70 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub const Punctuator = enum(u32) {
|
||||
// three characters
|
||||
@"..." = strToInt3("..."),
|
||||
@"<<=" = strToInt3("<<="),
|
||||
@">>=" = strToInt3(">>="),
|
||||
// two characters
|
||||
@"--" = strToInt2("--"),
|
||||
@"-=" = strToInt2("-="),
|
||||
@"->" = strToInt2("->"),
|
||||
@"!=" = strToInt2("!="),
|
||||
@"*=" = strToInt2("*="),
|
||||
@"/=" = strToInt2("/="),
|
||||
@"&&" = strToInt2("&&"),
|
||||
@"&=" = strToInt2("&="),
|
||||
@"##" = strToInt2("##"),
|
||||
@"%=" = strToInt2("%="),
|
||||
@"^=" = strToInt2("^="),
|
||||
@"++" = strToInt2("++"),
|
||||
@"+=" = strToInt2("+="),
|
||||
@"<<" = strToInt2("<<"),
|
||||
@"<=" = strToInt2("<="),
|
||||
@"==" = strToInt2("=="),
|
||||
@">=" = strToInt2(">="),
|
||||
@">>" = strToInt2(">>"),
|
||||
@"|=" = strToInt2("|="),
|
||||
@"||" = strToInt2("||"),
|
||||
// single character
|
||||
@"-" = strToInt1("-"),
|
||||
@"," = strToInt1(","),
|
||||
@";" = strToInt1(";"),
|
||||
@":" = strToInt1(":"),
|
||||
@"!" = strToInt1("!"),
|
||||
@"?" = strToInt1("?"),
|
||||
@"." = strToInt1("."),
|
||||
@"(" = strToInt1("("),
|
||||
@")" = strToInt1(")"),
|
||||
@"[" = strToInt1("["),
|
||||
@"]" = strToInt1("]"),
|
||||
@"{" = strToInt1("{"),
|
||||
@"}" = strToInt1("}"),
|
||||
@"*" = strToInt1("*"),
|
||||
@"/" = strToInt1("/"),
|
||||
@"&" = strToInt1("&"),
|
||||
@"#" = strToInt1("#"),
|
||||
@"%" = strToInt1("%"),
|
||||
@"^" = strToInt1("^"),
|
||||
@"+" = strToInt1("+"),
|
||||
@"<" = strToInt1("<"),
|
||||
@"=" = strToInt1("="),
|
||||
@">" = strToInt1(">"),
|
||||
@"|" = strToInt1("|"),
|
||||
@"~" = strToInt1("~"),
|
||||
|
||||
pub const line_continuation_lf = strToInt2("\\\n");
|
||||
pub const line_continuation_crlf = strToInt3("\\\r\n");
|
||||
};
|
||||
|
||||
pub fn strToInt1(str: *const u8[1]) u32 {
|
||||
return @as(u8, @bitCast(str.*));
|
||||
}
|
||||
|
||||
pub fn strToInt2(str: *const u8[2]) u32 {
|
||||
return @as(u16, @bitCast(str.*));
|
||||
}
|
||||
|
||||
pub fn strToInt3(str: *const u8[3]) u32 {
|
||||
return @as(u24, @bitCast(str.*));
|
||||
}
|
||||
14
packages/cjit/src/tokens/Token.zig
Normal file
14
packages/cjit/src/tokens/Token.zig
Normal file
@@ -0,0 +1,14 @@
|
||||
const std = @import("std");
|
||||
|
||||
pub const Constant = @import("Constant.zig");
|
||||
pub const Keyword = @import("Keyword.zig");
|
||||
pub const Punctuator = @import("Punctuator.zig");
|
||||
|
||||
pub const Token = union(enum) {
|
||||
keyword: Keyword,
|
||||
identifier: []const u8,
|
||||
constant: Constant,
|
||||
string_literal: [:0]const u8,
|
||||
wide_string_literal: [:0]const u32,
|
||||
punctuator: Punctuator,
|
||||
};
|
||||
260
packages/cjit/src/tokens/Tokenizer.zig
Normal file
260
packages/cjit/src/tokens/Tokenizer.zig
Normal file
@@ -0,0 +1,260 @@
|
||||
const std = @import("std");
|
||||
const Self = @This();
|
||||
|
||||
const Keyword = @import("Keyword.zig").Keyword;
|
||||
const Punctuator = @import("Punctuator.zig").Punctuator;
|
||||
const Token = @import("Token.zig").Token;
|
||||
const Utf8Iterator = @import("Utf8Iterator.zig");
|
||||
|
||||
pub const max_string_length = 4096;
|
||||
pub const max_wide_string_length = 4096;
|
||||
|
||||
filename: []const u8,
|
||||
it: Utf8Iterator,
|
||||
defines: std.StringHashMapUnmanaged([]Token) = .{},
|
||||
/// Bounded, preallocated with the capacity of `max_string_length`.
|
||||
string: std.ArrayList(u8),
|
||||
/// Bounded, preallocated with the capacity of `max_wide_string_length`.
|
||||
wide_string: std.ArrayList(u32),
|
||||
|
||||
pub fn init(filename: []const u8, code: []const u8, arena_allocator: std.mem.Allocator) !Self {
|
||||
const string_buffer = try arena_allocator.alloc(u8, max_string_length);
|
||||
const wide_string_buffer = try arena_allocator.alloc(u8, max_wide_string_length);
|
||||
|
||||
return .{
|
||||
.filename = filename,
|
||||
.it = .init(code),
|
||||
.string = .initBuffer(string_buffer),
|
||||
.wide_string = .initBuffer(wide_string_buffer),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn setSource(self: *Self, filename: []const u8, code: []const u8) void {
|
||||
self.filename = filename;
|
||||
self.it = .init(code);
|
||||
}
|
||||
|
||||
pub fn nextToken(self: *Self, arena_allocator: std.mem.Allocator) !?Token {
|
||||
self.skipWhitespace();
|
||||
|
||||
// TODO Skip C and C++ style comments
|
||||
// TODO Preprocessor directives
|
||||
|
||||
const cp = try self.peekCodepointSkipLineContinuation() orelse return null;
|
||||
|
||||
switch (cp) {
|
||||
// Identifier start
|
||||
'A'...'Z', '_', 'a'...'z', 128...std.math.maxInt(u21) => {
|
||||
// This is an identifier, with the possible exception of:
|
||||
// - wide string: L"
|
||||
// - wide char: L'
|
||||
// - any keyword
|
||||
|
||||
if (cp == 'L') {
|
||||
const state = self.it.save();
|
||||
|
||||
self.it.advanceCodepoint(cp);
|
||||
const cp2 = self.peekCodepointSkipLineContinuation() orelse 0;
|
||||
|
||||
switch (cp2) {
|
||||
// Wide string
|
||||
'\"' => {
|
||||
self.it.advanceCodepoint(cp2);
|
||||
self.wide_string.clearRetainingCapacity();
|
||||
// TODO Parse wide string
|
||||
},
|
||||
// Wide char
|
||||
'\'' => {
|
||||
self.it.advanceCodepoint(cp2);
|
||||
// TODO Parse wide char
|
||||
},
|
||||
// Identifier or keyword
|
||||
else => {
|
||||
self.it.restore(state);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const identifier_start = self.it.ptr;
|
||||
self.it.advanceCodepoint(cp);
|
||||
|
||||
var next_cp = try self.peekCodepointSkipLineContinuation();
|
||||
while (next_cp != null and isIdentifierMiddle(next_cp.?)) {
|
||||
self.it.advanceCodepoint(next_cp.?);
|
||||
next_cp = try self.peekCodepointSkipLineContinuation();
|
||||
}
|
||||
|
||||
const identifier = self.str[identifier_start..self.it.ptr];
|
||||
|
||||
if (Keyword.isKeyword(identifier)) |keyword| {
|
||||
return .{ .keyword = keyword };
|
||||
} else {
|
||||
// TODO Preprocessor
|
||||
return .{ .identifier = arena_allocator.dupe(u8, identifier) };
|
||||
}
|
||||
},
|
||||
// String
|
||||
'\"' => {
|
||||
self.it.advanceCodepoint(cp);
|
||||
self.string.clearRetainingCapacity();
|
||||
// TODO Parse string
|
||||
},
|
||||
// Char
|
||||
'\'' => {
|
||||
self.it.advanceCodepoint(cp);
|
||||
// TODO Parse char
|
||||
},
|
||||
}
|
||||
|
||||
// Higher code points should've been already handled. The code below may
|
||||
// assume that `cp` is an ASCII character.
|
||||
std.debug.assert(cp < 128);
|
||||
|
||||
// TODO Numeric constants
|
||||
|
||||
const cp3 = self.it.peekThreeBytes().?;
|
||||
|
||||
switch (cp3 & 0x00_FF_FF_FF) {
|
||||
inline @intFromEnum(Punctuator.@"..."),
|
||||
@intFromEnum(Punctuator.@"<<="),
|
||||
@intFromEnum(Punctuator.@">>="),
|
||||
=> |p| {
|
||||
self.it.ptr += 3;
|
||||
self.it.col += 3;
|
||||
return .{
|
||||
.punctuator = @enumFromInt(p),
|
||||
};
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
switch (cp3 & 0x00_00_FF_FF) {
|
||||
inline @intFromEnum(Punctuator.@"--"),
|
||||
@intFromEnum(Punctuator.@"-="),
|
||||
@intFromEnum(Punctuator.@"->"),
|
||||
@intFromEnum(Punctuator.@"!="),
|
||||
@intFromEnum(Punctuator.@"*="),
|
||||
@intFromEnum(Punctuator.@"/="),
|
||||
@intFromEnum(Punctuator.@"&&"),
|
||||
@intFromEnum(Punctuator.@"&="),
|
||||
@intFromEnum(Punctuator.@"##"),
|
||||
@intFromEnum(Punctuator.@"%="),
|
||||
@intFromEnum(Punctuator.@"^="),
|
||||
@intFromEnum(Punctuator.@"++"),
|
||||
@intFromEnum(Punctuator.@"+="),
|
||||
@intFromEnum(Punctuator.@"<<"),
|
||||
@intFromEnum(Punctuator.@"<="),
|
||||
@intFromEnum(Punctuator.@"=="),
|
||||
@intFromEnum(Punctuator.@">="),
|
||||
@intFromEnum(Punctuator.@">>"),
|
||||
@intFromEnum(Punctuator.@"|="),
|
||||
@intFromEnum(Punctuator.@"||"),
|
||||
=> |p| {
|
||||
self.it.ptr += 2;
|
||||
self.it.col += 2;
|
||||
return .{
|
||||
.punctuator = @enumFromInt(p),
|
||||
};
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
switch (cp3 & 0x00_00_00_FF) {
|
||||
inline @intFromEnum(Punctuator.@"-"),
|
||||
@intFromEnum(Punctuator.@","),
|
||||
@intFromEnum(Punctuator.@";"),
|
||||
@intFromEnum(Punctuator.@":"),
|
||||
@intFromEnum(Punctuator.@"!"),
|
||||
@intFromEnum(Punctuator.@"?"),
|
||||
@intFromEnum(Punctuator.@"."),
|
||||
@intFromEnum(Punctuator.@"("),
|
||||
@intFromEnum(Punctuator.@")"),
|
||||
@intFromEnum(Punctuator.@"["),
|
||||
@intFromEnum(Punctuator.@"]"),
|
||||
@intFromEnum(Punctuator.@"{"),
|
||||
@intFromEnum(Punctuator.@"}"),
|
||||
@intFromEnum(Punctuator.@"*"),
|
||||
@intFromEnum(Punctuator.@"/"),
|
||||
@intFromEnum(Punctuator.@"&"),
|
||||
@intFromEnum(Punctuator.@"#"),
|
||||
@intFromEnum(Punctuator.@"%"),
|
||||
@intFromEnum(Punctuator.@"^"),
|
||||
@intFromEnum(Punctuator.@"+"),
|
||||
@intFromEnum(Punctuator.@"<"),
|
||||
@intFromEnum(Punctuator.@"="),
|
||||
@intFromEnum(Punctuator.@">"),
|
||||
@intFromEnum(Punctuator.@"|"),
|
||||
@intFromEnum(Punctuator.@"~"),
|
||||
=> |p| {
|
||||
self.it.ptr += 1;
|
||||
self.it.col += 1;
|
||||
return .{
|
||||
.punctuator = @enumFromInt(p),
|
||||
};
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
return error.InvalidToken;
|
||||
}
|
||||
|
||||
fn peekCodepointSkipLineContinuation(self: *Self) !?u21 {
|
||||
while (self.skipLineContinuation()) {}
|
||||
const cp = try self.it.peekCodepoint();
|
||||
return cp;
|
||||
}
|
||||
|
||||
/// Line continuation is defined as a backslash followed imediatelly by LF or
|
||||
/// CRLF. Return whether a line continuation was encountered and therefore
|
||||
/// skipped past.
|
||||
fn skipLineContinuation(self: *Self) bool {
|
||||
if (self.it.peekThreeBytes()) |b| {
|
||||
@branchHint(.likely);
|
||||
if (b & 0x00_00_FF_FF == Punctuator.line_continuation_lf) {
|
||||
@branchHint(.unlikely);
|
||||
self.it.ptr += 2;
|
||||
self.it.line += 1;
|
||||
self.it.col = 1;
|
||||
return true;
|
||||
} else if (b & 0x00_FF_FF_FF == Punctuator.line_continuation_crlf) {
|
||||
@branchHint(.unlikely);
|
||||
self.it.ptr += 3;
|
||||
self.it.line += 1;
|
||||
self.it.col = 1;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
fn skipWhitespace(self: *Self) !void {
|
||||
while (try self.peekCodepointSkipLineContinuation()) |cp| {
|
||||
switch (cp) {
|
||||
// <Character Tabulation> (HT, TAB)
|
||||
0x0009,
|
||||
// <End of Line> (EOL, LF, NL)
|
||||
0x000A,
|
||||
// <Line Tabulation> (VT)
|
||||
0x000B,
|
||||
// <Form Feed> (FF)
|
||||
0x000C,
|
||||
// <Carriage Return> (CR)
|
||||
0x000D,
|
||||
// Space (SP)
|
||||
0x0020,
|
||||
=> try self.it.advanceCodepoint(cp),
|
||||
else => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn isIdentifierMiddle(code_point: u21) bool {
|
||||
// zig fmt: off
|
||||
return code_point >= '0' and code_point <= '9'
|
||||
or code_point >= 'A' and code_point <= 'Z'
|
||||
or code_point == '_'
|
||||
or code_point >= 'a' and code_point <= 'z'
|
||||
or code_point >= 128;
|
||||
// zig fmt: on
|
||||
}
|
||||
103
packages/cjit/src/tokens/Utf8Iterator.zig
Normal file
103
packages/cjit/src/tokens/Utf8Iterator.zig
Normal file
@@ -0,0 +1,103 @@
|
||||
const std = @import("std");
|
||||
const Self = @This();
|
||||
|
||||
str: []const u8,
|
||||
ptr: usize,
|
||||
line: usize,
|
||||
col: usize,
|
||||
|
||||
pub const State = struct {
|
||||
ptr: usize,
|
||||
line: usize,
|
||||
col: usize,
|
||||
};
|
||||
|
||||
pub fn init(str: []const u8) Self {
|
||||
return .{
|
||||
.str = str,
|
||||
.ptr = 0,
|
||||
.line = 1,
|
||||
.col = 1,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn save(self: Self) State {
|
||||
return .{
|
||||
.ptr = self.ptr,
|
||||
.line = self.line,
|
||||
.col = self.col,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn restore(self: *Self, state: State) void {
|
||||
self.ptr = state.ptr;
|
||||
self.line = state.line;
|
||||
self.col = state.col;
|
||||
}
|
||||
|
||||
pub fn peekByte(self: *Self) ?u8 {
|
||||
if (self.ptr >= self.str.len) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return self.str[self.ptr];
|
||||
}
|
||||
|
||||
pub fn peekCodepoint(self: Self) !?u21 {
|
||||
if (self.ptr >= self.str.len) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const cp_len = std.unicode.utf8ByteSequenceLength(self.str[self.ptr]) catch return error.InvalidUtf8;
|
||||
if (self.ptr + cp_len > self.str.len) return error.InvalidUtf8;
|
||||
|
||||
const cp_slice = self.str[self.ptr .. self.ptr + cp_len];
|
||||
const cp = std.unicode.utf8Decode(cp_slice) catch return error.InvalidUtf8;
|
||||
|
||||
return cp;
|
||||
}
|
||||
|
||||
pub fn peekThreeBytes(self: Self) ?u32 {
|
||||
var bytes: [3]u8 = .{ 0, 0, 0 };
|
||||
|
||||
const bytes_left = self.str.len - self.ptr;
|
||||
sw: switch (bytes_left) {
|
||||
0 => return null,
|
||||
1 => {
|
||||
bytes[0] = self.str[self.ptr];
|
||||
return @as(u24, @bitCast(bytes));
|
||||
},
|
||||
2 => {
|
||||
bytes[1] = self.str[self.ptr + 1];
|
||||
continue :sw 1;
|
||||
},
|
||||
else => {
|
||||
bytes[2] = self.str[self.ptr + 2];
|
||||
continue :sw 2;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn advanceAsciiBytes(self: *Self, bytes: usize) void {
|
||||
std.debug.assert(self.str.ptr + bytes <= self.str.len);
|
||||
}
|
||||
|
||||
/// Call with value returned by `peekCodepoint`.
|
||||
pub fn advanceCodepoint(self: *Self, cp: u21) void {
|
||||
std.debug.assert(blk: {
|
||||
const actual_cp = self.peekCodepoint() catch break :blk false;
|
||||
break :blk cp == actual_cp;
|
||||
});
|
||||
|
||||
const cp_len = std.unicode.utf8CodepointSequenceLength(cp) catch unreachable;
|
||||
|
||||
self.ptr += cp_len;
|
||||
|
||||
if (cp == '\n') {
|
||||
self.line += 1;
|
||||
// NOTE Columns start as 1, it will be incremented below.
|
||||
self.col = 0;
|
||||
}
|
||||
|
||||
self.col += 1;
|
||||
}
|
||||
Reference in New Issue
Block a user