681 lines
22 KiB
Zig
681 lines
22 KiB
Zig
//! HTTP/1.1 parser.
|
|
//!
|
|
//! This parser is *streaming*, meaning it can gracefully consume partial HTTP
|
|
//! request bytes. An instance of this parser is meant for parsing a singular
|
|
//! request. Once the request if fully completed, a new instance of the parser
|
|
//! should be initialized.
|
|
//!
|
|
//! During a single ingestion, the parser can return one of the following:
|
|
//!
|
|
//! - method of type `Method`, i.e. HTTP method (aka verb)
|
|
//! - pathname of type `[]const u8`
|
|
//! - header of type `Header`, i.e. a field name with a value
|
|
//! - end_of_headers of type `void`, i.e. a marker which informs the user of
|
|
//! this parser that there will be no more headers; this result can be used by
|
|
//! the user to make decisions about further processing of the request based
|
|
//! on the full knowledge of all the headers
|
|
//! - body of type `[]const u8`, i.e. a slice to the request body (or
|
|
//! zero-length slice if there is no request body)
|
|
//!
|
|
//! The first result returned from the parser will always be the route. Then,
|
|
//! one or more headers will follow terminated with end_of_headers marker. The
|
|
//! parser will always finish with a single body result.
|
|
//!
|
|
//! Parser methods stop processing at the first result. Therefore, if any result
|
|
//! is returned, the provided bytes might have been only partially consumed and
|
|
//! the methods must be repeatedly called until all of the bytes are consumed.
|
|
//! When the body is returned, the parser is finished and should be no longer
|
|
//! used. If the body was returned, but the bytes were not fully consumed, it
|
|
//! means that the remainder belongs to a subsequent HTTP request.
|
|
//!
|
|
//! When an error is returned from the parser, the HTTP request should be
|
|
//! considered malformed. You may choose to respond to it, but the request must
|
|
//! no longer be parsed and the connection should be closed.
|
|
//!
|
|
//! The parser is not involved in any HTTP semantics, only its syntax. It is up
|
|
//! to the user of this parser to respect all of the HTTP standards (if they
|
|
//! even choose to). For example, none of the header field valuess are verified.
|
|
//! The only exception is `Content-Length`. The parser must know the value to
|
|
//! determine the length of the request body. If the value fails to parse as a
|
|
//! decimal non-negative integer, a syntax error is returned. Note that
|
|
//! according to [RFC 9110, Section 8.6: HTTP Semantics](https://datatracker.ietf.org/doc/html/rfc9110#section-8.6),
|
|
//! `Content-Length` header field value consisting of the same decimal value
|
|
//! repeated as a comma-separated list (e.g. `Content-Length: 42, 42`) MAY be
|
|
//! accepted. This parser chooses not to accept it.
|
|
|
|
const std = @import("std");
|
|
const Parser = @This();
|
|
|
|
const FieldName = @import("FieldName.zig").FieldName;
|
|
const Header = @import("Header.zig");
|
|
const Method = @import("Method.zig").Method;
|
|
|
|
pub const Error = error{
|
|
MethodNotSupported,
|
|
HttpVersionNotSupported,
|
|
SyntaxError,
|
|
};
|
|
|
|
pub const Result = union(enum) {
|
|
method: Method,
|
|
pathname: []const u8,
|
|
header: Header,
|
|
end_of_headers: void,
|
|
body: []const u8,
|
|
|
|
pub fn initMethod(method: Method) Result {
|
|
return .{ .method = method };
|
|
}
|
|
|
|
pub fn initPathname(pathname: []const u8) Result {
|
|
return .{ .pathname = pathname };
|
|
}
|
|
|
|
pub fn initHeader(header: Header) Result {
|
|
return .{ .header = header };
|
|
}
|
|
|
|
pub fn initBody(body: []const u8) Result {
|
|
return .{ .body = body };
|
|
}
|
|
};
|
|
|
|
pub const ConsumeResult = struct {
|
|
consumed: usize,
|
|
result: ?Result,
|
|
};
|
|
|
|
pub const State = union(enum) {
|
|
init: void,
|
|
method_c: void,
|
|
method_d: void,
|
|
method_g: void,
|
|
method_h: void,
|
|
method_o: void,
|
|
method_p: void,
|
|
method_t: void,
|
|
method_co: void,
|
|
method_de: void,
|
|
method_ge: void,
|
|
method_he: void,
|
|
method_op: void,
|
|
method_pa: void,
|
|
method_po: void,
|
|
method_pu: void,
|
|
method_tr: void,
|
|
method_con: void,
|
|
method_del: void,
|
|
method_hea: void,
|
|
method_opt: void,
|
|
method_pat: void,
|
|
method_pos: void,
|
|
method_tra: void,
|
|
method_conn: void,
|
|
method_dele: void,
|
|
method_opti: void,
|
|
method_patc: void,
|
|
method_trac: void,
|
|
method_conne: void,
|
|
method_delet: void,
|
|
method_optio: void,
|
|
method_connec: void,
|
|
method_option: void,
|
|
method_complete: void,
|
|
pathname: []const u8,
|
|
pathname_complete: void,
|
|
version_h: void,
|
|
version_ht: void,
|
|
version_htt: void,
|
|
version_http: void,
|
|
@"version_http/": void,
|
|
@"version_http/1": void,
|
|
@"version_http/1.": void,
|
|
version_complete: void,
|
|
start_line_end: void,
|
|
header_name_start: void,
|
|
header_name: []const u8,
|
|
header_value: Header,
|
|
header_line_end: void,
|
|
headers_end: void,
|
|
body: []const u8,
|
|
done: void,
|
|
|
|
pub fn initPathname(pathname: []const u8) State {
|
|
return .{ .pathname = pathname };
|
|
}
|
|
|
|
pub fn initHeaderName(name: []const u8) State {
|
|
return .{ .header_name = name };
|
|
}
|
|
|
|
pub fn initHeaderValue(header: Header) State {
|
|
return .{ .header_value = header };
|
|
}
|
|
|
|
pub fn initBody(body: []const u8) State {
|
|
return .{ .body = body };
|
|
}
|
|
};
|
|
|
|
state: State,
|
|
content_length: ?usize,
|
|
|
|
pub fn init() Parser {
|
|
return .{
|
|
.state = .init,
|
|
.content_length = null,
|
|
};
|
|
}
|
|
|
|
pub fn consume(self: *Parser, chars: []const u8) Error!ConsumeResult {
|
|
var i: usize = 0;
|
|
while (i < chars.len) {
|
|
switch (self.state) {
|
|
.body => |body| {
|
|
const content_length = self.content_length.?;
|
|
const to_consume = @min(chars.len - i, content_length - body.len);
|
|
|
|
const new_body = extendSliceBy(body, to_consume);
|
|
i += to_consume;
|
|
|
|
if (new_body.len >= content_length) {
|
|
self.state = .done;
|
|
return .{
|
|
.consumed = i,
|
|
.result = .initBody(new_body),
|
|
};
|
|
} else {
|
|
self.state = .initBody(new_body);
|
|
}
|
|
},
|
|
else => {
|
|
// TODO fix
|
|
// if (chars.len - i >= vec_len) {
|
|
// const vec_res = try self.consumeVec(chars[i..][0..vec_len]);
|
|
// i += vec_res.consumed;
|
|
|
|
// if (vec_res.result) |result| {
|
|
// return .{
|
|
// .consumed = i,
|
|
// .result = result,
|
|
// };
|
|
// }
|
|
|
|
// if (vec_res.consumed > 0) {
|
|
// continue;
|
|
// }
|
|
// }
|
|
|
|
const maybe_result = try self.consumeChar(&chars[i]);
|
|
i += 1;
|
|
|
|
if (maybe_result) |result| {
|
|
return .{
|
|
.consumed = i,
|
|
.result = result,
|
|
};
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
std.debug.assert(i == chars.len);
|
|
return .{
|
|
.consumed = chars.len,
|
|
.result = null,
|
|
};
|
|
}
|
|
|
|
fn consumeChar(self: *Parser, char_ptr: *const u8) Error!?Result {
|
|
const char = char_ptr.*;
|
|
const char_slice: *const [1]u8 = @ptrCast(char_ptr);
|
|
const next_char_slice = @as([*]const u8, @ptrCast(char_ptr))[1..1];
|
|
|
|
switch (self.state) {
|
|
.init => switch (char) {
|
|
'C' => self.state = .method_c,
|
|
'D' => self.state = .method_d,
|
|
'G' => self.state = .method_g,
|
|
'H' => self.state = .method_h,
|
|
'O' => self.state = .method_o,
|
|
'P' => self.state = .method_p,
|
|
'T' => self.state = .method_t,
|
|
else => error.MethodNotSupported,
|
|
},
|
|
.method_c => switch (char) {
|
|
'O' => self.state = .method_co,
|
|
else => error.MethodNotSupported,
|
|
},
|
|
.method_d => switch (char) {
|
|
'E' => self.state = .method_de,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_g => switch (char) {
|
|
'E' => self.state = .method_ge,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_h => switch (char) {
|
|
'E' => self.state = .method_he,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_o => switch (char) {
|
|
'P' => self.state = .method_op,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_p => switch (char) {
|
|
'A' => self.state = .method_pa,
|
|
'O' => self.state = .method_po,
|
|
'U' => self.state = .method_pu,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_t => switch (char) {
|
|
'R' => self.state = .method_tr,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_co => switch (char) {
|
|
'N' => self.state = .method_con,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_de => switch (char) {
|
|
'L' => self.state = .method_del,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_ge => switch (char) {
|
|
'T' => {
|
|
self.state = .method_complete;
|
|
return .initMethod(.GET);
|
|
},
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_he => switch (char) {
|
|
'A' => self.state = .method_hea,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_op => switch (char) {
|
|
'T' => self.state = .method_opt,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_pa => switch (char) {
|
|
'T' => self.state = .method_pat,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_po => switch (char) {
|
|
'S' => self.state = .method_pos,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_pu => switch (char) {
|
|
'T' => {
|
|
self.state = .method_complete;
|
|
return .initMethod(.PUT);
|
|
},
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_tr => switch (char) {
|
|
'A' => self.state = .method_tra,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_con => switch (char) {
|
|
'N' => self.state = .method_conn,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_del => switch (char) {
|
|
'E' => self.state = .method_dele,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_hea => switch (char) {
|
|
'D' => {
|
|
self.state = .method_complete;
|
|
return .initMethod(.HEAD);
|
|
},
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_opt => switch (char) {
|
|
'I' => self.state = .method_opti,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_pat => switch (char) {
|
|
'C' => self.state = .method_patc,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_pos => switch (char) {
|
|
'T' => {
|
|
self.state = .method_complete;
|
|
return .initMethod(.POST);
|
|
},
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_tra => switch (char) {
|
|
'C' => self.state = .method_trac,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_conn => switch (char) {
|
|
'E' => self.state = .method_conne,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_dele => switch (char) {
|
|
'T' => self.state = .method_delet,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_opti => switch (char) {
|
|
'O' => self.state = .method_optio,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_patc => switch (char) {
|
|
'H' => {
|
|
self.state = .method_complete;
|
|
return .initMethod(.PATCH);
|
|
},
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_trac => switch (char) {
|
|
'E' => {
|
|
self.state = .method_complete;
|
|
return .initMethod(.TRACE);
|
|
},
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_conne => switch (char) {
|
|
'C' => self.state = .method_connec,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_delet => switch (char) {
|
|
'E' => {
|
|
self.state = .method_complete;
|
|
return .initMethod(.DELETE);
|
|
},
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_optio => switch (char) {
|
|
'N' => self.state = .method_option,
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_connec => switch (char) {
|
|
'T' => {
|
|
self.state = .method_complete;
|
|
return .initMethod(.CONNECT);
|
|
},
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_option => switch (char) {
|
|
'S' => {
|
|
self.state = .method_complete;
|
|
return .initMethod(.OPTIONS);
|
|
},
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.method_complete => switch (char) {
|
|
' ' => self.state = .initPathname(next_char_slice),
|
|
else => return error.MethodNotSupported,
|
|
},
|
|
.pathname => |pathname| switch (char) {
|
|
' ' => {
|
|
self.state = .pathname_complete;
|
|
return .initPathname(pathname);
|
|
},
|
|
else => self.state = .initPathname(extendSlice(pathname)),
|
|
},
|
|
.pathname_complete => switch (char) {
|
|
'H' => self.state = .version_h,
|
|
else => return error.HttpVersionNotSupported,
|
|
},
|
|
.version_h => switch (char) {
|
|
'T' => self.state = .version_ht,
|
|
else => return error.HttpVersionNotSupported,
|
|
},
|
|
.version_ht => switch (char) {
|
|
'T' => self.state = .version_htt,
|
|
else => return error.HttpVersionNotSupported,
|
|
},
|
|
.version_htt => switch (char) {
|
|
'P' => self.state = .version_http,
|
|
else => return error.HttpVersionNotSupported,
|
|
},
|
|
.version_http => switch (char) {
|
|
'/' => self.state = .@"version_http/",
|
|
else => return error.HttpVersionNotSupported,
|
|
},
|
|
.@"version_http/" => switch (char) {
|
|
'1' => self.state = .@"version_http/1",
|
|
else => return error.HttpVersionNotSupported,
|
|
},
|
|
.@"version_http/1" => switch (char) {
|
|
'.' => self.state = .@"version_http/1.",
|
|
else => return error.HttpVersionNotSupported,
|
|
},
|
|
.@"version_http/1." => switch (char) {
|
|
'1' => self.state = .version_complete,
|
|
else => return error.HttpVersionNotSupported,
|
|
},
|
|
.version_complete => switch (char) {
|
|
'\r' => self.state = .start_line_end,
|
|
else => return error.HttpVersionNotSupported,
|
|
},
|
|
.start_line_end => switch (char) {
|
|
'\n' => self.state = .header_name_start,
|
|
else => return error.SyntaxError,
|
|
},
|
|
.header_name_start => switch (char) {
|
|
'\r' => {
|
|
self.state = .headers_end;
|
|
return .end_of_headers;
|
|
},
|
|
else => self.state = .initHeaderName(char_slice),
|
|
},
|
|
.header_name => |name| switch (char) {
|
|
':' => self.state = .initHeaderValue(.init(.init(name), next_char_slice)),
|
|
else => self.state = .initHeaderName(extendSlice(name)),
|
|
},
|
|
.header_value => |untrimmed_header| switch (char) {
|
|
'\r' => {
|
|
self.state = .header_line_end;
|
|
const header: Header = .init(
|
|
untrimmed_header.name,
|
|
std.mem.trim(u8, untrimmed_header.value, " \t"),
|
|
);
|
|
|
|
if (header.isNamedKnown(.@"Content-Length")) {
|
|
const content_length = std.fmt.parseInt(usize, header.value, 10) catch return error.SyntaxError;
|
|
if (self.content_length) |current_content_length| {
|
|
@branchHint(.unlikely);
|
|
// Accept multiple `Content-Length` headers as long as
|
|
// they have the exact same value.
|
|
if (content_length != current_content_length) {
|
|
return error.SyntaxError;
|
|
}
|
|
} else {
|
|
self.content_length = content_length;
|
|
}
|
|
}
|
|
|
|
return .initHeader(header);
|
|
},
|
|
else => self.state = .initHeaderValue(extendHeader(untrimmed_header)),
|
|
},
|
|
.header_line_end => switch (char) {
|
|
'\n' => self.state = .header_name_start,
|
|
else => return error.SyntaxError,
|
|
},
|
|
.headers_end => switch (char) {
|
|
'\n' => {
|
|
const content_length = self.content_length orelse 0;
|
|
if (content_length == 0) {
|
|
self.state = .done;
|
|
return .initBody(&.{});
|
|
} else {
|
|
self.state = .initBody(next_char_slice);
|
|
}
|
|
},
|
|
else => return error.SyntaxError,
|
|
},
|
|
.body => |body| {
|
|
const content_length = self.content_length.?;
|
|
const new_body = extendSlice(body);
|
|
if (new_body.len >= content_length) {
|
|
self.state = .done;
|
|
return .initBody(new_body);
|
|
} else {
|
|
self.state = new_body;
|
|
}
|
|
},
|
|
.done => unreachable,
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
fn extendSlice(slice: []const u8) []const u8 {
|
|
return slice.ptr[0 .. slice.len + 1];
|
|
}
|
|
|
|
fn extendSliceBy(slice: []const u8, n: usize) []const u8 {
|
|
return slice.ptr[0 .. slice.len + n];
|
|
}
|
|
|
|
fn extendHeader(header: Header) Header {
|
|
return .{
|
|
.name = header.name,
|
|
.value = extendSlice(header.value),
|
|
};
|
|
}
|
|
|
|
// --- SIMD --------------------------------------------------------------------
|
|
|
|
const Vec = @Vector(std.simd.suggestVectorLength(u8).?, u8);
|
|
const vec_len = @typeInfo(Vec).vector.len;
|
|
|
|
const Pattern = struct {
|
|
value: Vec,
|
|
mask: Vec,
|
|
len: u32,
|
|
|
|
pub fn init(comptime prefix: []const u8) Pattern {
|
|
if (prefix.len > vec_len) {
|
|
@compileError("Prefix length is too high");
|
|
}
|
|
|
|
var value: [vec_len]u8 = undefined;
|
|
var mask: [vec_len]u8 = undefined;
|
|
for (0..vec_len) |i| {
|
|
if (i < prefix.len) {
|
|
value[i] = prefix[i];
|
|
mask[i] = 0xFF;
|
|
} else {
|
|
value[i] = 0x00;
|
|
mask[i] = 0x00;
|
|
}
|
|
}
|
|
|
|
return .{
|
|
.value = value,
|
|
.mask = mask,
|
|
.len = prefix.len,
|
|
};
|
|
}
|
|
|
|
inline fn check(self: Pattern, vec: Vec) bool {
|
|
return @reduce(.And, vec & self.mask == self.value);
|
|
}
|
|
};
|
|
|
|
const patterns = struct {
|
|
pub const methods = struct {
|
|
// NOTE These patterns are arranged in a specific order, such that the
|
|
// first ones are the most common (based on vibes only).
|
|
|
|
pub const GET = Pattern.init("GET ");
|
|
pub const POST = Pattern.init("POST ");
|
|
pub const HEAD = Pattern.init("HEAD ");
|
|
|
|
pub const PUT = Pattern.init("PUT ");
|
|
pub const DELETE = Pattern.init("DELETE ");
|
|
pub const PATCH = Pattern.init("PATCH ");
|
|
|
|
pub const OPTIONS = Pattern.init("OPTIONS ");
|
|
pub const CONNECT = Pattern.init("CONNECT ");
|
|
pub const TRACE = Pattern.init("TRACE ");
|
|
};
|
|
|
|
pub const @"version_http/1.1" = Pattern.init("HTTP/1.1\r\n");
|
|
};
|
|
|
|
inline fn hasSpace(vec: Vec) bool {
|
|
const has_space = vec == @as(Vec, @splat(' '));
|
|
return @reduce(.Or, has_space);
|
|
}
|
|
|
|
inline fn hasCRLF(vec: Vec) bool {
|
|
const has_cr = vec == @as(Vec, @splat('\r'));
|
|
const has_lf = vec == @as(Vec, @splat('\n'));
|
|
return @reduce(.Or, has_cr | has_lf);
|
|
}
|
|
|
|
/// May return with `.consumed == 0`, in which case the parsing should be
|
|
/// retried with non-SIMD method.
|
|
pub fn consumeVec(self: *Parser, vec_ptr: *const [vec_len]u8) Error!ConsumeResult {
|
|
const vec: Vec = vec_ptr.*;
|
|
switch (self.state) {
|
|
.init => {
|
|
inline for (@typeInfo(patterns.methods).@"struct".decls) |decl| {
|
|
const pattern: Pattern = @field(patterns.methods, decl.name);
|
|
if (pattern.check(vec)) {
|
|
self.state = .method_complete;
|
|
return .{
|
|
.consumed = pattern.len,
|
|
.result = .initMethod(@field(Method, decl.name)),
|
|
};
|
|
}
|
|
}
|
|
|
|
return error.MethodNotSupported;
|
|
},
|
|
.pathname_state => |s| {
|
|
if (hasSpace(vec)) {
|
|
// Delegate to `consumeChar`.
|
|
return .{
|
|
.consumed = 0,
|
|
.result = null,
|
|
};
|
|
}
|
|
|
|
self.state = .pathname(s.method, s.pathname.ptr[0 .. s.pathname.len + vec_len]);
|
|
return .{
|
|
.consumed = vec_len,
|
|
.result = null,
|
|
};
|
|
},
|
|
.pathname_complete => {
|
|
if (patterns.@"version_http/1.1".check(vec)) {
|
|
self.state = .header_name_start;
|
|
return .{
|
|
.consumed = patterns.@"version_http/1.1".len,
|
|
.result = null,
|
|
};
|
|
} else {
|
|
return error.HttpVersionNotSupported;
|
|
}
|
|
},
|
|
.header_value => |s| {
|
|
if (hasCRLF(vec)) {
|
|
// Delegate to `consumeChar`.
|
|
return .{
|
|
.consumed = 0,
|
|
.result = null,
|
|
};
|
|
}
|
|
|
|
self.state = .headerValue(s.name, s.value.ptr[0 .. s.value.len + vec_len]);
|
|
return .{
|
|
.consumed = vec_len,
|
|
.result = null,
|
|
};
|
|
},
|
|
else => {
|
|
// Delegate to `consumeChar`.
|
|
return .{
|
|
.consumed = 0,
|
|
.result = null,
|
|
};
|
|
},
|
|
}
|
|
}
|