web: Great cleanup WIP

This commit is contained in:
2026-03-09 16:59:29 +01:00
parent 6315589fa1
commit 9a4932e629
7 changed files with 972 additions and 680 deletions

View File

@@ -1,22 +1,525 @@
//! HTTP/1.1 parser.
//!
//! This parser is *streaming*, meaning it can gracefully consume partial HTTP
//! request bytes. An instance of this parser is meant for parsing a singular
//! request. Once the request if fully completed, a new instance of the parser
//! should be initialized.
//!
//! During a single ingestion, the parser can return one of the following:
//!
//! - route of type `Route`, i.e. HTTP method (aka verb) with pathname
//! - header of type `Header`, i.e. a field name with a value
//! - end_of_headers of type `void`, i.e. a marker which informs the user of
//! this parser that there will be no more headers; this moment can be used by
//! the user to make decisions about further processing of the request based
//! on the full knowledge of all the headers
//! - body of type `[]const u8`, i.e. a slice to the request body (or
//! zero-length slice if there is no request body)
//!
//! The first result returned from the parser will always be the route. Then,
//! one or more headers will follow terminated with end_of_headers marker. The
//! parser will always finish with a single body result.
//!
//! Parser methods stop processing at the first result. Therefore, if any result
//! is returned, the provided bytes might have been only partially consumed and
//! the methods must be repeatedly called until all of the bytes are consumed.
//! When the body is returned, the parser is finished and should be no longer
//! used. If the body was returned, but the bytes were not fully consumed, it
//! means that the remainder belongs to a subsequent HTTP request.
//!
//! When an error is returned from the parser, the HTTP request should be
//! considered malformed. You may choose to respond to it, but the request must
//! no longer be parsed and the connection should be closed.
//!
//! The parser is not involved in any HTTP semantics, only its syntax. It is up
//! to the user of this parser to respect all of the HTTP standards (if they
//! even choose to). For example, none of the header field valuess are verified.
//! The only exception is `Content-Length`. The parser must know the value to
//! determine the length of the request body. If the value fails to parse as a
//! decimal non-negative integer, a syntax error is returned. Note that
//! according to [RFC 9110, Section 8.6: HTTP Semantics](https://datatracker.ietf.org/doc/html/rfc9110#section-8.6),
//! `Content-Length` header field value consisting of the same decimal value
//! repeated as a comma-separated list (e.g. `Content-Length: 42, 42`) MAY be
//! accepted. This parser chooses not to accept it.
const std = @import("std");
const Parser = @This();
const FieldName = @import("FieldName.zig").FieldName;
const Header = @import("Header.zig");
const Method = @import("Method.zig").Method;
const Response = @import("../Response.zig");
const RequestHandler = @import("../RequestHandler.zig");
const RequestRouter = @import("../RequestRouter.zig");
const Route = @import("Route.zig");
const Error = error{
pub const Error = error{
MethodNotSupported,
HttpVersionNotSupported,
MissingLineFeed,
InvalidContentLength,
RouterError,
HandlerError,
SyntaxError,
};
const Vec: type = @Vector(std.simd.suggestVectorLength(u8).?, u8);
pub const Result = union(enum) {
route: Route,
header: Header,
end_of_headers: void,
body: []const u8,
pub fn initRoute(route: Route) Result {
return .{ .route = route };
}
pub fn initHeader(header: Header) Result {
return .{ .header = header };
}
pub fn initBody(body: []const u8) Result {
return .{ .body = body };
}
};
pub const ConsumeResult = struct {
consumed: usize,
result: ?Result,
};
pub const State = union(enum) {
init: void,
method_c: void,
method_d: void,
method_g: void,
method_h: void,
method_o: void,
method_p: void,
method_t: void,
method_co: void,
method_de: void,
method_ge: void,
method_he: void,
method_op: void,
method_pa: void,
method_po: void,
method_pu: void,
method_tr: void,
method_con: void,
method_del: void,
method_hea: void,
method_opt: void,
method_pat: void,
method_pos: void,
method_tra: void,
method_conn: void,
method_dele: void,
method_opti: void,
method_patc: void,
method_trac: void,
method_conne: void,
method_delet: void,
method_optio: void,
method_connec: void,
method_option: void,
method_complete: Method,
pathname: Route,
pathname_complete: void,
version_h: void,
version_ht: void,
version_htt: void,
version_http: void,
@"version_http/": void,
@"version_http/1": void,
@"version_http/1.": void,
version_complete: void,
start_line_end: void,
header_name_start: void,
header_name: []const u8,
header_value: Header,
header_line_end: void,
headers_end: void,
body: []const u8,
done: void,
pub fn initMethodComplete(method: Method) State {
return .{ .method_complete = method };
}
pub fn initPathname(route: Route) State {
return .{ .pathname = route };
}
pub fn initHeaderName(name: []const u8) State {
return .{ .header_name = name };
}
pub fn initHeaderValue(header: Header) State {
return .{ .header_value = header };
}
pub fn initBody(body: []const u8) State {
return .{ .body = body };
}
};
state: State,
content_length: ?usize,
pub fn init() Parser {
return .{
.state = .init,
.content_length = null,
};
}
pub fn consume(self: *Parser, chars: []const u8) Error!ConsumeResult {
var i: usize = 0;
while (i < chars.len) {
switch (self.state) {
.body => |body| {
const content_length = self.content_length.?;
const to_consume = @min(chars.len - i, content_length - body.len);
const new_body = extendSliceBy(body, to_consume);
i += to_consume;
if (new_body.len >= content_length) {
self.state = .done;
return .{
.consumed = i,
.result = .initBody(new_body),
};
} else {
self.state = .initBody(new_body);
}
},
else => {
// TODO fix
// if (chars.len - i >= vec_len) {
// const vec_res = try self.consumeVec(chars[i..][0..vec_len]);
// i += vec_res.consumed;
// if (vec_res.result) |result| {
// return .{
// .consumed = i,
// .result = result,
// };
// }
// if (vec_res.consumed > 0) {
// continue;
// }
// }
const maybe_result = try self.consumeChar(&chars[i]);
i += 1;
if (maybe_result) |result| {
return .{
.consumed = i,
.result = result,
};
}
},
}
}
std.debug.assert(i == chars.len);
return .{
.consumed = chars.len,
.result = null,
};
}
fn consumeChar(self: *Parser, char_ptr: *const u8) Error!?Result {
const char = char_ptr.*;
const char_slice: *const [1]u8 = @ptrCast(char_ptr);
const next_char_slice = @as([*]const u8, @ptrCast(char_ptr))[1..1];
switch (self.state) {
.init => switch (char) {
'C' => self.state = .method_c,
'D' => self.state = .method_d,
'G' => self.state = .method_g,
'H' => self.state = .method_h,
'O' => self.state = .method_o,
'P' => self.state = .method_p,
'T' => self.state = .method_t,
else => error.MethodNotSupported,
},
.method_c => switch (char) {
'O' => self.state = .method_co,
else => error.MethodNotSupported,
},
.method_d => switch (char) {
'E' => self.state = .method_de,
else => return error.MethodNotSupported,
},
.method_g => switch (char) {
'E' => self.state = .method_ge,
else => return error.MethodNotSupported,
},
.method_h => switch (char) {
'E' => self.state = .method_he,
else => return error.MethodNotSupported,
},
.method_o => switch (char) {
'P' => self.state = .method_op,
else => return error.MethodNotSupported,
},
.method_p => switch (char) {
'A' => self.state = .method_pa,
'O' => self.state = .method_po,
'U' => self.state = .method_pu,
else => return error.MethodNotSupported,
},
.method_t => switch (char) {
'R' => self.state = .method_tr,
else => return error.MethodNotSupported,
},
.method_co => switch (char) {
'N' => self.state = .method_con,
else => return error.MethodNotSupported,
},
.method_de => switch (char) {
'L' => self.state = .method_del,
else => return error.MethodNotSupported,
},
.method_ge => switch (char) {
'T' => self.state = .initMethodComplete(.GET),
else => return error.MethodNotSupported,
},
.method_he => switch (char) {
'A' => self.state = .method_hea,
else => return error.MethodNotSupported,
},
.method_op => switch (char) {
'T' => self.state = .method_opt,
else => return error.MethodNotSupported,
},
.method_pa => switch (char) {
'T' => self.state = .method_pat,
else => return error.MethodNotSupported,
},
.method_po => switch (char) {
'S' => self.state = .method_pos,
else => return error.MethodNotSupported,
},
.method_pu => switch (char) {
'T' => self.state = .initMethodComplete(.PUT),
else => return error.MethodNotSupported,
},
.method_tr => switch (char) {
'A' => self.state = .method_tra,
else => return error.MethodNotSupported,
},
.method_con => switch (char) {
'N' => self.state = .method_conn,
else => return error.MethodNotSupported,
},
.method_del => switch (char) {
'E' => self.state = .method_dele,
else => return error.MethodNotSupported,
},
.method_hea => switch (char) {
'D' => self.state = .initHeaderName(.HEAD),
else => return error.MethodNotSupported,
},
.method_opt => switch (char) {
'I' => self.state = .method_opti,
else => return error.MethodNotSupported,
},
.method_pat => switch (char) {
'C' => self.state = .method_patc,
else => return error.MethodNotSupported,
},
.method_pos => switch (char) {
'T' => self.state = .initHeaderName(.POST),
else => return error.MethodNotSupported,
},
.method_tra => switch (char) {
'C' => self.state = .method_trac,
else => return error.MethodNotSupported,
},
.method_conn => switch (char) {
'E' => self.state = .method_conne,
else => return error.MethodNotSupported,
},
.method_dele => switch (char) {
'T' => self.state = .method_delet,
else => return error.MethodNotSupported,
},
.method_opti => switch (char) {
'O' => self.state = .method_optio,
else => return error.MethodNotSupported,
},
.method_patc => switch (char) {
'H' => self.state = .initMethodComplete(.PATCH),
else => return error.MethodNotSupported,
},
.method_trac => switch (char) {
'E' => self.state = .initMethodComplete(.TRACE),
else => return error.MethodNotSupported,
},
.method_conne => switch (char) {
'C' => self.state = .method_connec,
else => return error.MethodNotSupported,
},
.method_delet => switch (char) {
'E' => self.state = .initMethodComplete(.DELETE),
else => return error.MethodNotSupported,
},
.method_optio => switch (char) {
'N' => self.state = .method_option,
else => return error.MethodNotSupported,
},
.method_connec => switch (char) {
'T' => self.state = .initMethodComplete(.CONNECT),
else => return error.MethodNotSupported,
},
.method_option => switch (char) {
'S' => self.state = .initMethodComplete(.OPTIONS),
else => return error.MethodNotSupported,
},
.method_complete => |method| switch (char) {
' ' => self.state = .initPathname(.init(method, next_char_slice)),
else => return error.MethodNotSupported,
},
.pathname => |route| switch (char) {
' ' => {
self.state = .pathname_complete;
return .initRoute(route);
},
else => self.state = .initPathname(extendRoute(route)),
},
.pathname_complete => switch (char) {
'H' => self.state = .version_h,
else => return error.HttpVersionNotSupported,
},
.version_h => switch (char) {
'T' => self.state = .version_ht,
else => return error.HttpVersionNotSupported,
},
.version_ht => switch (char) {
'T' => self.state = .version_htt,
else => return error.HttpVersionNotSupported,
},
.version_htt => switch (char) {
'P' => self.state = .version_http,
else => return error.HttpVersionNotSupported,
},
.version_http => switch (char) {
'/' => self.state = .@"version_http/",
else => return error.HttpVersionNotSupported,
},
.@"version_http/" => switch (char) {
'1' => self.state = .@"version_http/1",
else => return error.HttpVersionNotSupported,
},
.@"version_http/1" => switch (char) {
'.' => self.state = .@"version_http/1.",
else => return error.HttpVersionNotSupported,
},
.@"version_http/1." => switch (char) {
'1' => self.state = .version_complete,
else => return error.HttpVersionNotSupported,
},
.version_complete => switch (char) {
'\r' => self.state = .start_line_end,
else => return error.HttpVersionNotSupported,
},
.start_line_end => switch (char) {
'\n' => self.state = .header_name_start,
else => return error.SyntaxError,
},
.header_name_start => switch (char) {
'\r' => {
self.state = .headers_end;
return .end_of_headers;
},
else => self.state = .initHeaderName(char_slice),
},
.header_name => |name| switch (char) {
':' => self.state = .initHeaderValue(.init(.init(name), next_char_slice)),
else => self.state = .initHeaderName(extendSlice(name)),
},
.header_value => |untrimmed_header| switch (char) {
'\r' => {
self.state = .header_line_end;
const header: Header = .init(
untrimmed_header.name,
std.mem.trim(u8, untrimmed_header.value, " \t"),
);
if (header.isNamedKnown(.@"Content-Length")) {
const content_length = std.fmt.parseInt(usize, header.value, 10) catch return error.SyntaxError;
if (self.content_length) |current_content_length| {
@branchHint(.unlikely);
// Accept multiple `Content-Length` headers as long as
// they have the exact same value.
if (content_length != current_content_length) {
return error.SyntaxError;
}
} else {
self.content_length = content_length;
}
}
return .initHeader(header);
},
else => self.state = .initHeaderValue(extendHeader(untrimmed_header)),
},
.header_line_end => switch (char) {
'\n' => self.state = .header_name_start,
else => return error.SyntaxError,
},
.headers_end => switch (char) {
'\n' => {
const content_length = self.content_length orelse 0;
if (content_length == 0) {
self.state = .done;
return .initBody(&.{});
} else {
self.state = .initBody(next_char_slice);
}
},
else => return error.SyntaxError,
},
.body => |body| {
const content_length = self.content_length.?;
const new_body = extendSlice(body);
if (new_body.len >= content_length) {
self.state = .done;
return .initBody(new_body);
} else {
self.state = new_body;
}
},
.done => unreachable,
}
return null;
}
fn extendSlice(slice: []const u8) []const u8 {
return slice.ptr[0 .. slice.len + 1];
}
fn extendSliceBy(slice: []const u8, n: usize) []const u8 {
return slice.ptr[0 .. slice.len + n];
}
fn extendRoute(route: Route) Route {
return .{
.method = route.method,
.pathname = extendSlice(route.pathname),
};
}
fn extendHeader(header: Header) Header {
return .{
.name = header.name,
.value = extendSlice(header.value),
};
}
// --- SIMD --------------------------------------------------------------------
const Vec = @Vector(std.simd.suggestVectorLength(u8).?, u8);
const vec_len = @typeInfo(Vec).vector.len;
const Pattern = struct {
@@ -85,159 +588,8 @@ inline fn hasCRLF(vec: Vec) bool {
return @reduce(.Or, has_cr | has_lf);
}
const State = union(enum) {
pub fn methodComplete(method: Method) State {
return .{
.method_complete = .{
.method = method,
},
};
}
pub fn pathname(method: Method, p: []const u8) State {
return .{
.pathname_state = .{
.method = method,
.pathname = p,
},
};
}
pub fn headerValue(name: []const u8, value: []const u8) State {
return .{
.header_value = .{
.name = name,
.value = value,
},
};
}
init: void,
// TODO Add all methods here and in `consumeChar` (they are covered by
// `consumeVec`, though)
method_d: void,
method_g: void,
method_h: void,
method_p: void,
method_de: void,
method_ge: void,
method_he: void,
method_pa: void,
method_po: void,
method_pu: void,
method_del: void,
method_hea: void,
method_pat: void,
method_pos: void,
method_dele: void,
method_patc: void,
method_delet: void,
method_complete: struct { method: Method },
pathname_state: struct { method: Method, pathname: []const u8 },
pathname_complete: void,
version_h: void,
version_ht: void,
version_htt: void,
version_http: void,
@"version_http/": void,
@"version_http/1": void,
@"version_http/1.": void,
version_complete: void,
start_line_end: void,
header_name_start: void,
header_name: []const u8,
header_value: struct { name: []const u8, value: []const u8 },
header_line_end: void,
headers_end: void,
body: []const u8,
};
const ConsumeResult = struct {
consumed: usize,
done: bool,
};
const ConsumeCharResult = enum {
not_done,
done,
};
request_router: RequestRouter,
response: *Response,
state: State,
content_length: usize,
request_handler: ?RequestHandler = null,
last_router_error: anyerror = undefined,
last_handler_error: anyerror = undefined,
pub fn init(request_router: RequestRouter, response: *Response) Parser {
return .{
.request_router = request_router,
.response = response,
.state = .init,
.content_length = 0,
};
}
pub fn consume(self: *Parser, chars: []const u8) Error!ConsumeResult {
var i: usize = 0;
while (i < chars.len) {
switch (self.state) {
.body => |body| {
const to_consume = @min(chars.len - i, self.content_length - body.len);
const new_body = body.ptr[0 .. body.len + to_consume];
self.state = .{ .body = new_body };
i += to_consume;
const done = new_body.len >= self.content_length;
if (done) {
self.request_handler.?.rawBody(self.response, new_body) catch |err| {
self.last_handler_error = err;
return error.HandlerError;
};
}
return .{
.consumed = i,
.done = done,
};
},
else => {
// TODO Fix
// if (chars.len - i >= vec_len) {
// const vec_res = try self.consumeVec(chars[i..][0..vec_len]);
// i += vec_res.consumed;
// if (vec_res.done) {
// return .{
// .consumed = i,
// .done = true,
// };
// }
// if (vec_res.consumed > 0) {
// continue;
// }
// }
const char_res = try self.consumeChar(&chars[i]);
i += 1;
if (char_res == .done) return .{
.consumed = i,
.done = true,
};
},
}
}
return .{
.consumed = chars.len,
.done = false,
};
}
/// May return with `.consumed == 0`, in which case the parsing should be
/// retried with non-SIMD method.
pub fn consumeVec(self: *Parser, vec_ptr: *const [vec_len]u8) Error!ConsumeResult {
const vec: Vec = vec_ptr.*;
switch (self.state) {
@@ -305,197 +657,3 @@ pub fn consumeVec(self: *Parser, vec_ptr: *const [vec_len]u8) Error!ConsumeResul
},
}
}
pub fn consumeChar(self: *Parser, c_ptr: *const u8) Error!ConsumeCharResult {
const c = c_ptr.*;
const c_slice = @as([*]const u8, @ptrCast(c_ptr))[0..1];
switch (self.state) {
.init => switch (c) {
'D' => self.state = .method_d,
'G' => self.state = .method_g,
'H' => self.state = .method_h,
'P' => self.state = .method_p,
else => return error.MethodNotSupported,
},
.method_d => switch (c) {
'E' => self.state = .method_de,
else => return error.MethodNotSupported,
},
.method_g => switch (c) {
'E' => self.state = .method_ge,
else => return error.MethodNotSupported,
},
.method_h => switch (c) {
'E' => self.state = .method_he,
else => return error.MethodNotSupported,
},
.method_p => switch (c) {
'A' => self.state = .method_pa,
'O' => self.state = .method_po,
'U' => self.state = .method_pu,
else => return error.MethodNotSupported,
},
.method_de => switch (c) {
'L' => self.state = .method_del,
else => return error.MethodNotSupported,
},
.method_ge => switch (c) {
'T' => self.state = .methodComplete(.GET),
else => return error.MethodNotSupported,
},
.method_he => switch (c) {
'A' => self.state = .method_hea,
else => return error.MethodNotSupported,
},
.method_pa => switch (c) {
'T' => self.state = .method_pat,
else => return error.MethodNotSupported,
},
.method_po => switch (c) {
'S' => self.state = .method_pos,
else => return error.MethodNotSupported,
},
.method_pu => switch (c) {
'T' => self.state = .methodComplete(.PUT),
else => return error.MethodNotSupported,
},
.method_del => switch (c) {
'E' => self.state = .method_dele,
else => return error.MethodNotSupported,
},
.method_hea => switch (c) {
'D' => self.state = .methodComplete(.HEAD),
else => return error.MethodNotSupported,
},
.method_pat => switch (c) {
'C' => self.state = .method_patc,
else => return error.MethodNotSupported,
},
.method_pos => switch (c) {
'T' => self.state = .methodComplete(.POST),
else => return error.MethodNotSupported,
},
.method_dele => switch (c) {
'T' => self.state = .method_delet,
else => return error.MethodNotSupported,
},
.method_patc => switch (c) {
'H' => self.state = .methodComplete(.PATCH),
else => return error.MethodNotSupported,
},
.method_delet => switch (c) {
'E' => self.state = .methodComplete(.DELETE),
else => return error.MethodNotSupported,
},
.method_complete => |s| switch (c) {
' ' => self.state = .pathname(s.method, @as([*]const u8, @ptrCast(c_ptr))[1..1]),
else => return error.MethodNotSupported,
},
.pathname_state => |s| switch (c) {
' ' => {
self.state = .pathname_complete;
self.request_handler = self.request_router.rawRoute(.init(s.method, s.pathname)) catch |err| {
self.last_router_error = err;
return error.RouterError;
};
},
else => self.state = .pathname(s.method, s.pathname.ptr[0 .. s.pathname.len + 1]),
},
.pathname_complete => switch (c) {
'H' => self.state = .version_h,
else => return error.HttpVersionNotSupported,
},
.version_h => switch (c) {
'T' => self.state = .version_ht,
else => return error.HttpVersionNotSupported,
},
.version_ht => switch (c) {
'T' => self.state = .version_htt,
else => return error.HttpVersionNotSupported,
},
.version_htt => switch (c) {
'P' => self.state = .version_http,
else => return error.HttpVersionNotSupported,
},
.version_http => switch (c) {
'/' => self.state = .@"version_http/",
else => return error.HttpVersionNotSupported,
},
.@"version_http/" => switch (c) {
'1' => self.state = .@"version_http/1",
else => return error.HttpVersionNotSupported,
},
.@"version_http/1" => switch (c) {
'.' => self.state = .@"version_http/1.",
else => return error.HttpVersionNotSupported,
},
.@"version_http/1." => switch (c) {
'1' => self.state = .version_complete,
else => return error.HttpVersionNotSupported,
},
.version_complete => switch (c) {
'\r' => self.state = .start_line_end,
else => return error.HttpVersionNotSupported,
},
.start_line_end => switch (c) {
'\n' => self.state = .header_name_start,
else => return error.MissingLineFeed,
},
.header_name_start => switch (c) {
'\r' => self.state = .headers_end,
else => self.state = .{ .header_name = c_slice },
},
.header_name => |name| switch (c) {
':' => {
self.state = .headerValue(name, @as([*]const u8, @ptrCast(c_ptr))[1..1]);
},
else => self.state = .{ .header_name = name.ptr[0 .. name.len + 1] },
},
.header_value => |s| switch (c) {
'\r' => {
self.state = .header_line_end;
const header: Header = .init(s.name, std.mem.trim(u8, s.value, " \t"));
if (header.isKnown(.@"Content-Length")) {
self.content_length = std.fmt.parseInt(usize, header.value, 10) catch return error.InvalidContentLength;
}
self.request_handler.?.rawHeader(self.response, header) catch |err| {
self.last_handler_error = err;
return error.HandlerError;
};
},
else => self.state = .headerValue(s.name, s.value.ptr[0 .. s.value.len + 1]),
},
.header_line_end => switch (c) {
'\n' => self.state = .header_name_start,
else => return error.MissingLineFeed,
},
.headers_end => switch (c) {
'\n' => {
if (self.content_length == 0) {
self.request_handler.?.rawBody(self.response, &.{}) catch |err| {
self.last_handler_error = err;
return error.HandlerError;
};
return .done;
}
self.state = .{ .body = @as([*]const u8, @ptrCast(c_ptr))[1..1] };
},
else => return error.MissingLineFeed,
},
.body => |body| {
const new_body = body.ptr[0 .. body.len + 1];
self.state = .{ .body = new_body };
if (new_body.len >= self.content_length) {
self.request_handler.?.rawBody(self.response, new_body) catch |err| {
self.last_handler_error = err;
return error.HandlerError;
};
return .done;
}
},
}
return .not_done;
}