Files
castle/packages/web/src/http/Parser.zig

681 lines
22 KiB
Zig

//! HTTP/1.1 parser.
//!
//! This parser is *streaming*, meaning it can gracefully consume partial HTTP
//! request bytes. An instance of this parser is meant for parsing a singular
//! request. Once the request if fully completed, a new instance of the parser
//! should be initialized.
//!
//! During a single ingestion, the parser can return one of the following:
//!
//! - method of type `Method`, i.e. HTTP method (aka verb)
//! - pathname of type `[]const u8`
//! - header of type `Header`, i.e. a field name with a value
//! - end_of_headers of type `void`, i.e. a marker which informs the user of
//! this parser that there will be no more headers; this result can be used by
//! the user to make decisions about further processing of the request based
//! on the full knowledge of all the headers
//! - body of type `[]const u8`, i.e. a slice to the request body (or
//! zero-length slice if there is no request body)
//!
//! The first result returned from the parser will always be the route. Then,
//! one or more headers will follow terminated with end_of_headers marker. The
//! parser will always finish with a single body result.
//!
//! Parser methods stop processing at the first result. Therefore, if any result
//! is returned, the provided bytes might have been only partially consumed and
//! the methods must be repeatedly called until all of the bytes are consumed.
//! When the body is returned, the parser is finished and should be no longer
//! used. If the body was returned, but the bytes were not fully consumed, it
//! means that the remainder belongs to a subsequent HTTP request.
//!
//! When an error is returned from the parser, the HTTP request should be
//! considered malformed. You may choose to respond to it, but the request must
//! no longer be parsed and the connection should be closed.
//!
//! The parser is not involved in any HTTP semantics, only its syntax. It is up
//! to the user of this parser to respect all of the HTTP standards (if they
//! even choose to). For example, none of the header field valuess are verified.
//! The only exception is `Content-Length`. The parser must know the value to
//! determine the length of the request body. If the value fails to parse as a
//! decimal non-negative integer, a syntax error is returned. Note that
//! according to [RFC 9110, Section 8.6: HTTP Semantics](https://datatracker.ietf.org/doc/html/rfc9110#section-8.6),
//! `Content-Length` header field value consisting of the same decimal value
//! repeated as a comma-separated list (e.g. `Content-Length: 42, 42`) MAY be
//! accepted. This parser chooses not to accept it.
const std = @import("std");
const Parser = @This();
const FieldName = @import("FieldName.zig").FieldName;
const Header = @import("Header.zig");
const Method = @import("Method.zig").Method;
pub const Error = error{
MethodNotSupported,
HttpVersionNotSupported,
SyntaxError,
};
pub const Result = union(enum) {
method: Method,
pathname: []const u8,
header: Header,
end_of_headers: void,
body: []const u8,
pub fn initMethod(method: Method) Result {
return .{ .method = method };
}
pub fn initPathname(pathname: []const u8) Result {
return .{ .pathname = pathname };
}
pub fn initHeader(header: Header) Result {
return .{ .header = header };
}
pub fn initBody(body: []const u8) Result {
return .{ .body = body };
}
};
pub const ConsumeResult = struct {
consumed: usize,
result: ?Result,
};
pub const State = union(enum) {
init: void,
method_c: void,
method_d: void,
method_g: void,
method_h: void,
method_o: void,
method_p: void,
method_t: void,
method_co: void,
method_de: void,
method_ge: void,
method_he: void,
method_op: void,
method_pa: void,
method_po: void,
method_pu: void,
method_tr: void,
method_con: void,
method_del: void,
method_hea: void,
method_opt: void,
method_pat: void,
method_pos: void,
method_tra: void,
method_conn: void,
method_dele: void,
method_opti: void,
method_patc: void,
method_trac: void,
method_conne: void,
method_delet: void,
method_optio: void,
method_connec: void,
method_option: void,
method_complete: void,
pathname: []const u8,
pathname_complete: void,
version_h: void,
version_ht: void,
version_htt: void,
version_http: void,
@"version_http/": void,
@"version_http/1": void,
@"version_http/1.": void,
version_complete: void,
start_line_end: void,
header_name_start: void,
header_name: []const u8,
header_value: Header,
header_line_end: void,
headers_end: void,
body: []const u8,
done: void,
pub fn initPathname(pathname: []const u8) State {
return .{ .pathname = pathname };
}
pub fn initHeaderName(name: []const u8) State {
return .{ .header_name = name };
}
pub fn initHeaderValue(header: Header) State {
return .{ .header_value = header };
}
pub fn initBody(body: []const u8) State {
return .{ .body = body };
}
};
state: State,
content_length: ?usize,
pub fn init() Parser {
return .{
.state = .init,
.content_length = null,
};
}
pub fn consume(self: *Parser, chars: []const u8) Error!ConsumeResult {
var i: usize = 0;
while (i < chars.len) {
switch (self.state) {
.body => |body| {
const content_length = self.content_length.?;
const to_consume = @min(chars.len - i, content_length - body.len);
const new_body = extendSliceBy(body, to_consume);
i += to_consume;
if (new_body.len >= content_length) {
self.state = .done;
return .{
.consumed = i,
.result = .initBody(new_body),
};
} else {
self.state = .initBody(new_body);
}
},
else => {
// TODO fix
// if (chars.len - i >= vec_len) {
// const vec_res = try self.consumeVec(chars[i..][0..vec_len]);
// i += vec_res.consumed;
// if (vec_res.result) |result| {
// return .{
// .consumed = i,
// .result = result,
// };
// }
// if (vec_res.consumed > 0) {
// continue;
// }
// }
const maybe_result = try self.consumeChar(&chars[i]);
i += 1;
if (maybe_result) |result| {
return .{
.consumed = i,
.result = result,
};
}
},
}
}
std.debug.assert(i == chars.len);
return .{
.consumed = chars.len,
.result = null,
};
}
fn consumeChar(self: *Parser, char_ptr: *const u8) Error!?Result {
const char = char_ptr.*;
const char_slice: *const [1]u8 = @ptrCast(char_ptr);
const next_char_slice = @as([*]const u8, @ptrCast(char_ptr))[1..1];
switch (self.state) {
.init => switch (char) {
'C' => self.state = .method_c,
'D' => self.state = .method_d,
'G' => self.state = .method_g,
'H' => self.state = .method_h,
'O' => self.state = .method_o,
'P' => self.state = .method_p,
'T' => self.state = .method_t,
else => error.MethodNotSupported,
},
.method_c => switch (char) {
'O' => self.state = .method_co,
else => error.MethodNotSupported,
},
.method_d => switch (char) {
'E' => self.state = .method_de,
else => return error.MethodNotSupported,
},
.method_g => switch (char) {
'E' => self.state = .method_ge,
else => return error.MethodNotSupported,
},
.method_h => switch (char) {
'E' => self.state = .method_he,
else => return error.MethodNotSupported,
},
.method_o => switch (char) {
'P' => self.state = .method_op,
else => return error.MethodNotSupported,
},
.method_p => switch (char) {
'A' => self.state = .method_pa,
'O' => self.state = .method_po,
'U' => self.state = .method_pu,
else => return error.MethodNotSupported,
},
.method_t => switch (char) {
'R' => self.state = .method_tr,
else => return error.MethodNotSupported,
},
.method_co => switch (char) {
'N' => self.state = .method_con,
else => return error.MethodNotSupported,
},
.method_de => switch (char) {
'L' => self.state = .method_del,
else => return error.MethodNotSupported,
},
.method_ge => switch (char) {
'T' => {
self.state = .method_complete;
return .initMethod(.GET);
},
else => return error.MethodNotSupported,
},
.method_he => switch (char) {
'A' => self.state = .method_hea,
else => return error.MethodNotSupported,
},
.method_op => switch (char) {
'T' => self.state = .method_opt,
else => return error.MethodNotSupported,
},
.method_pa => switch (char) {
'T' => self.state = .method_pat,
else => return error.MethodNotSupported,
},
.method_po => switch (char) {
'S' => self.state = .method_pos,
else => return error.MethodNotSupported,
},
.method_pu => switch (char) {
'T' => {
self.state = .method_complete;
return .initMethod(.PUT);
},
else => return error.MethodNotSupported,
},
.method_tr => switch (char) {
'A' => self.state = .method_tra,
else => return error.MethodNotSupported,
},
.method_con => switch (char) {
'N' => self.state = .method_conn,
else => return error.MethodNotSupported,
},
.method_del => switch (char) {
'E' => self.state = .method_dele,
else => return error.MethodNotSupported,
},
.method_hea => switch (char) {
'D' => {
self.state = .method_complete;
return .initMethod(.HEAD);
},
else => return error.MethodNotSupported,
},
.method_opt => switch (char) {
'I' => self.state = .method_opti,
else => return error.MethodNotSupported,
},
.method_pat => switch (char) {
'C' => self.state = .method_patc,
else => return error.MethodNotSupported,
},
.method_pos => switch (char) {
'T' => {
self.state = .method_complete;
return .initMethod(.POST);
},
else => return error.MethodNotSupported,
},
.method_tra => switch (char) {
'C' => self.state = .method_trac,
else => return error.MethodNotSupported,
},
.method_conn => switch (char) {
'E' => self.state = .method_conne,
else => return error.MethodNotSupported,
},
.method_dele => switch (char) {
'T' => self.state = .method_delet,
else => return error.MethodNotSupported,
},
.method_opti => switch (char) {
'O' => self.state = .method_optio,
else => return error.MethodNotSupported,
},
.method_patc => switch (char) {
'H' => {
self.state = .method_complete;
return .initMethod(.PATCH);
},
else => return error.MethodNotSupported,
},
.method_trac => switch (char) {
'E' => {
self.state = .method_complete;
return .initMethod(.TRACE);
},
else => return error.MethodNotSupported,
},
.method_conne => switch (char) {
'C' => self.state = .method_connec,
else => return error.MethodNotSupported,
},
.method_delet => switch (char) {
'E' => {
self.state = .method_complete;
return .initMethod(.DELETE);
},
else => return error.MethodNotSupported,
},
.method_optio => switch (char) {
'N' => self.state = .method_option,
else => return error.MethodNotSupported,
},
.method_connec => switch (char) {
'T' => {
self.state = .method_complete;
return .initMethod(.CONNECT);
},
else => return error.MethodNotSupported,
},
.method_option => switch (char) {
'S' => {
self.state = .method_complete;
return .initMethod(.OPTIONS);
},
else => return error.MethodNotSupported,
},
.method_complete => switch (char) {
' ' => self.state = .initPathname(next_char_slice),
else => return error.MethodNotSupported,
},
.pathname => |pathname| switch (char) {
' ' => {
self.state = .pathname_complete;
return .initPathname(pathname);
},
else => self.state = .initPathname(extendSlice(pathname)),
},
.pathname_complete => switch (char) {
'H' => self.state = .version_h,
else => return error.HttpVersionNotSupported,
},
.version_h => switch (char) {
'T' => self.state = .version_ht,
else => return error.HttpVersionNotSupported,
},
.version_ht => switch (char) {
'T' => self.state = .version_htt,
else => return error.HttpVersionNotSupported,
},
.version_htt => switch (char) {
'P' => self.state = .version_http,
else => return error.HttpVersionNotSupported,
},
.version_http => switch (char) {
'/' => self.state = .@"version_http/",
else => return error.HttpVersionNotSupported,
},
.@"version_http/" => switch (char) {
'1' => self.state = .@"version_http/1",
else => return error.HttpVersionNotSupported,
},
.@"version_http/1" => switch (char) {
'.' => self.state = .@"version_http/1.",
else => return error.HttpVersionNotSupported,
},
.@"version_http/1." => switch (char) {
'1' => self.state = .version_complete,
else => return error.HttpVersionNotSupported,
},
.version_complete => switch (char) {
'\r' => self.state = .start_line_end,
else => return error.HttpVersionNotSupported,
},
.start_line_end => switch (char) {
'\n' => self.state = .header_name_start,
else => return error.SyntaxError,
},
.header_name_start => switch (char) {
'\r' => {
self.state = .headers_end;
return .end_of_headers;
},
else => self.state = .initHeaderName(char_slice),
},
.header_name => |name| switch (char) {
':' => self.state = .initHeaderValue(.init(.init(name), next_char_slice)),
else => self.state = .initHeaderName(extendSlice(name)),
},
.header_value => |untrimmed_header| switch (char) {
'\r' => {
self.state = .header_line_end;
const header: Header = .init(
untrimmed_header.name,
std.mem.trim(u8, untrimmed_header.value, " \t"),
);
if (header.isNamedKnown(.@"Content-Length")) {
const content_length = std.fmt.parseInt(usize, header.value, 10) catch return error.SyntaxError;
if (self.content_length) |current_content_length| {
@branchHint(.unlikely);
// Accept multiple `Content-Length` headers as long as
// they have the exact same value.
if (content_length != current_content_length) {
return error.SyntaxError;
}
} else {
self.content_length = content_length;
}
}
return .initHeader(header);
},
else => self.state = .initHeaderValue(extendHeader(untrimmed_header)),
},
.header_line_end => switch (char) {
'\n' => self.state = .header_name_start,
else => return error.SyntaxError,
},
.headers_end => switch (char) {
'\n' => {
const content_length = self.content_length orelse 0;
if (content_length == 0) {
self.state = .done;
return .initBody(&.{});
} else {
self.state = .initBody(next_char_slice);
}
},
else => return error.SyntaxError,
},
.body => |body| {
const content_length = self.content_length.?;
const new_body = extendSlice(body);
if (new_body.len >= content_length) {
self.state = .done;
return .initBody(new_body);
} else {
self.state = new_body;
}
},
.done => unreachable,
}
return null;
}
fn extendSlice(slice: []const u8) []const u8 {
return slice.ptr[0 .. slice.len + 1];
}
fn extendSliceBy(slice: []const u8, n: usize) []const u8 {
return slice.ptr[0 .. slice.len + n];
}
fn extendHeader(header: Header) Header {
return .{
.name = header.name,
.value = extendSlice(header.value),
};
}
// --- SIMD --------------------------------------------------------------------
const Vec = @Vector(std.simd.suggestVectorLength(u8).?, u8);
const vec_len = @typeInfo(Vec).vector.len;
const Pattern = struct {
value: Vec,
mask: Vec,
len: u32,
pub fn init(comptime prefix: []const u8) Pattern {
if (prefix.len > vec_len) {
@compileError("Prefix length is too high");
}
var value: [vec_len]u8 = undefined;
var mask: [vec_len]u8 = undefined;
for (0..vec_len) |i| {
if (i < prefix.len) {
value[i] = prefix[i];
mask[i] = 0xFF;
} else {
value[i] = 0x00;
mask[i] = 0x00;
}
}
return .{
.value = value,
.mask = mask,
.len = prefix.len,
};
}
inline fn check(self: Pattern, vec: Vec) bool {
return @reduce(.And, vec & self.mask == self.value);
}
};
const patterns = struct {
pub const methods = struct {
// NOTE These patterns are arranged in a specific order, such that the
// first ones are the most common (based on vibes only).
pub const GET = Pattern.init("GET ");
pub const POST = Pattern.init("POST ");
pub const HEAD = Pattern.init("HEAD ");
pub const PUT = Pattern.init("PUT ");
pub const DELETE = Pattern.init("DELETE ");
pub const PATCH = Pattern.init("PATCH ");
pub const OPTIONS = Pattern.init("OPTIONS ");
pub const CONNECT = Pattern.init("CONNECT ");
pub const TRACE = Pattern.init("TRACE ");
};
pub const @"version_http/1.1" = Pattern.init("HTTP/1.1\r\n");
};
inline fn hasSpace(vec: Vec) bool {
const has_space = vec == @as(Vec, @splat(' '));
return @reduce(.Or, has_space);
}
inline fn hasCRLF(vec: Vec) bool {
const has_cr = vec == @as(Vec, @splat('\r'));
const has_lf = vec == @as(Vec, @splat('\n'));
return @reduce(.Or, has_cr | has_lf);
}
/// May return with `.consumed == 0`, in which case the parsing should be
/// retried with non-SIMD method.
pub fn consumeVec(self: *Parser, vec_ptr: *const [vec_len]u8) Error!ConsumeResult {
const vec: Vec = vec_ptr.*;
switch (self.state) {
.init => {
inline for (@typeInfo(patterns.methods).@"struct".decls) |decl| {
const pattern: Pattern = @field(patterns.methods, decl.name);
if (pattern.check(vec)) {
self.state = .method_complete;
return .{
.consumed = pattern.len,
.result = .initMethod(@field(Method, decl.name)),
};
}
}
return error.MethodNotSupported;
},
.pathname_state => |s| {
if (hasSpace(vec)) {
// Delegate to `consumeChar`.
return .{
.consumed = 0,
.result = null,
};
}
self.state = .pathname(s.method, s.pathname.ptr[0 .. s.pathname.len + vec_len]);
return .{
.consumed = vec_len,
.result = null,
};
},
.pathname_complete => {
if (patterns.@"version_http/1.1".check(vec)) {
self.state = .header_name_start;
return .{
.consumed = patterns.@"version_http/1.1".len,
.result = null,
};
} else {
return error.HttpVersionNotSupported;
}
},
.header_value => |s| {
if (hasCRLF(vec)) {
// Delegate to `consumeChar`.
return .{
.consumed = 0,
.result = null,
};
}
self.state = .headerValue(s.name, s.value.ptr[0 .. s.value.len + vec_len]);
return .{
.consumed = vec_len,
.result = null,
};
},
else => {
// Delegate to `consumeChar`.
return .{
.consumed = 0,
.result = null,
};
},
}
}