//! HTTP/1.1 parser. //! //! This parser is *streaming*, meaning it can gracefully consume partial HTTP //! request bytes. An instance of this parser is meant for parsing a singular //! request. Once the request if fully completed, a new instance of the parser //! should be initialized. //! //! During a single ingestion, the parser can return one of the following: //! //! - method of type `Method`, i.e. HTTP method (aka verb) //! - pathname of type `[]const u8` //! - header of type `Header`, i.e. a field name with a value //! - end_of_headers of type `void`, i.e. a marker which informs the user of //! this parser that there will be no more headers; this result can be used by //! the user to make decisions about further processing of the request based //! on the full knowledge of all the headers //! - body of type `[]const u8`, i.e. a slice to the request body (or //! zero-length slice if there is no request body) //! //! The first result returned from the parser will always be the route. Then, //! one or more headers will follow terminated with end_of_headers marker. The //! parser will always finish with a single body result. //! //! Parser methods stop processing at the first result. Therefore, if any result //! is returned, the provided bytes might have been only partially consumed and //! the methods must be repeatedly called until all of the bytes are consumed. //! When the body is returned, the parser is finished and should be no longer //! used. If the body was returned, but the bytes were not fully consumed, it //! means that the remainder belongs to a subsequent HTTP request. //! //! When an error is returned from the parser, the HTTP request should be //! considered malformed. You may choose to respond to it, but the request must //! no longer be parsed and the connection should be closed. //! //! The parser is not involved in any HTTP semantics, only its syntax. It is up //! to the user of this parser to respect all of the HTTP standards (if they //! even choose to). For example, none of the header field valuess are verified. //! The only exception is `Content-Length`. The parser must know the value to //! determine the length of the request body. If the value fails to parse as a //! decimal non-negative integer, a syntax error is returned. Note that //! according to [RFC 9110, Section 8.6: HTTP Semantics](https://datatracker.ietf.org/doc/html/rfc9110#section-8.6), //! `Content-Length` header field value consisting of the same decimal value //! repeated as a comma-separated list (e.g. `Content-Length: 42, 42`) MAY be //! accepted. This parser chooses not to accept it. const std = @import("std"); const Parser = @This(); const FieldName = @import("FieldName.zig").FieldName; const Header = @import("Header.zig"); const Method = @import("Method.zig").Method; pub const Error = error{ MethodNotSupported, HttpVersionNotSupported, SyntaxError, }; pub const Result = union(enum) { method: Method, pathname: []const u8, header: Header, end_of_headers: void, body: []const u8, pub fn initMethod(method: Method) Result { return .{ .method = method }; } pub fn initPathname(pathname: []const u8) Result { return .{ .pathname = pathname }; } pub fn initHeader(header: Header) Result { return .{ .header = header }; } pub fn initBody(body: []const u8) Result { return .{ .body = body }; } }; pub const ConsumeResult = struct { consumed: usize, result: ?Result, }; pub const State = union(enum) { init: void, method_c: void, method_d: void, method_g: void, method_h: void, method_o: void, method_p: void, method_t: void, method_co: void, method_de: void, method_ge: void, method_he: void, method_op: void, method_pa: void, method_po: void, method_pu: void, method_tr: void, method_con: void, method_del: void, method_hea: void, method_opt: void, method_pat: void, method_pos: void, method_tra: void, method_conn: void, method_dele: void, method_opti: void, method_patc: void, method_trac: void, method_conne: void, method_delet: void, method_optio: void, method_connec: void, method_option: void, method_complete: void, pathname: []const u8, pathname_complete: void, version_h: void, version_ht: void, version_htt: void, version_http: void, @"version_http/": void, @"version_http/1": void, @"version_http/1.": void, version_complete: void, start_line_end: void, header_name_start: void, header_name: []const u8, header_value: Header, header_line_end: void, headers_end: void, body: []const u8, done: void, pub fn initPathname(pathname: []const u8) State { return .{ .pathname = pathname }; } pub fn initHeaderName(name: []const u8) State { return .{ .header_name = name }; } pub fn initHeaderValue(header: Header) State { return .{ .header_value = header }; } pub fn initBody(body: []const u8) State { return .{ .body = body }; } }; state: State, content_length: ?usize, pub fn init() Parser { return .{ .state = .init, .content_length = null, }; } pub fn consume(self: *Parser, chars: []const u8) Error!ConsumeResult { var i: usize = 0; while (i < chars.len) { switch (self.state) { .body => |body| { const content_length = self.content_length.?; const to_consume = @min(chars.len - i, content_length - body.len); const new_body = extendSliceBy(body, to_consume); i += to_consume; if (new_body.len >= content_length) { self.state = .done; return .{ .consumed = i, .result = .initBody(new_body), }; } else { self.state = .initBody(new_body); } }, else => { // TODO fix // if (chars.len - i >= vec_len) { // const vec_res = try self.consumeVec(chars[i..][0..vec_len]); // i += vec_res.consumed; // if (vec_res.result) |result| { // return .{ // .consumed = i, // .result = result, // }; // } // if (vec_res.consumed > 0) { // continue; // } // } const maybe_result = try self.consumeChar(&chars[i]); i += 1; if (maybe_result) |result| { return .{ .consumed = i, .result = result, }; } }, } } std.debug.assert(i == chars.len); return .{ .consumed = chars.len, .result = null, }; } fn consumeChar(self: *Parser, char_ptr: *const u8) Error!?Result { const char = char_ptr.*; const char_slice: *const [1]u8 = @ptrCast(char_ptr); const next_char_slice = @as([*]const u8, @ptrCast(char_ptr))[1..1]; switch (self.state) { .init => switch (char) { 'C' => self.state = .method_c, 'D' => self.state = .method_d, 'G' => self.state = .method_g, 'H' => self.state = .method_h, 'O' => self.state = .method_o, 'P' => self.state = .method_p, 'T' => self.state = .method_t, else => error.MethodNotSupported, }, .method_c => switch (char) { 'O' => self.state = .method_co, else => error.MethodNotSupported, }, .method_d => switch (char) { 'E' => self.state = .method_de, else => return error.MethodNotSupported, }, .method_g => switch (char) { 'E' => self.state = .method_ge, else => return error.MethodNotSupported, }, .method_h => switch (char) { 'E' => self.state = .method_he, else => return error.MethodNotSupported, }, .method_o => switch (char) { 'P' => self.state = .method_op, else => return error.MethodNotSupported, }, .method_p => switch (char) { 'A' => self.state = .method_pa, 'O' => self.state = .method_po, 'U' => self.state = .method_pu, else => return error.MethodNotSupported, }, .method_t => switch (char) { 'R' => self.state = .method_tr, else => return error.MethodNotSupported, }, .method_co => switch (char) { 'N' => self.state = .method_con, else => return error.MethodNotSupported, }, .method_de => switch (char) { 'L' => self.state = .method_del, else => return error.MethodNotSupported, }, .method_ge => switch (char) { 'T' => { self.state = .method_complete; return .initMethod(.GET); }, else => return error.MethodNotSupported, }, .method_he => switch (char) { 'A' => self.state = .method_hea, else => return error.MethodNotSupported, }, .method_op => switch (char) { 'T' => self.state = .method_opt, else => return error.MethodNotSupported, }, .method_pa => switch (char) { 'T' => self.state = .method_pat, else => return error.MethodNotSupported, }, .method_po => switch (char) { 'S' => self.state = .method_pos, else => return error.MethodNotSupported, }, .method_pu => switch (char) { 'T' => { self.state = .method_complete; return .initMethod(.PUT); }, else => return error.MethodNotSupported, }, .method_tr => switch (char) { 'A' => self.state = .method_tra, else => return error.MethodNotSupported, }, .method_con => switch (char) { 'N' => self.state = .method_conn, else => return error.MethodNotSupported, }, .method_del => switch (char) { 'E' => self.state = .method_dele, else => return error.MethodNotSupported, }, .method_hea => switch (char) { 'D' => { self.state = .method_complete; return .initMethod(.HEAD); }, else => return error.MethodNotSupported, }, .method_opt => switch (char) { 'I' => self.state = .method_opti, else => return error.MethodNotSupported, }, .method_pat => switch (char) { 'C' => self.state = .method_patc, else => return error.MethodNotSupported, }, .method_pos => switch (char) { 'T' => { self.state = .method_complete; return .initMethod(.POST); }, else => return error.MethodNotSupported, }, .method_tra => switch (char) { 'C' => self.state = .method_trac, else => return error.MethodNotSupported, }, .method_conn => switch (char) { 'E' => self.state = .method_conne, else => return error.MethodNotSupported, }, .method_dele => switch (char) { 'T' => self.state = .method_delet, else => return error.MethodNotSupported, }, .method_opti => switch (char) { 'O' => self.state = .method_optio, else => return error.MethodNotSupported, }, .method_patc => switch (char) { 'H' => { self.state = .method_complete; return .initMethod(.PATCH); }, else => return error.MethodNotSupported, }, .method_trac => switch (char) { 'E' => { self.state = .method_complete; return .initMethod(.TRACE); }, else => return error.MethodNotSupported, }, .method_conne => switch (char) { 'C' => self.state = .method_connec, else => return error.MethodNotSupported, }, .method_delet => switch (char) { 'E' => { self.state = .method_complete; return .initMethod(.DELETE); }, else => return error.MethodNotSupported, }, .method_optio => switch (char) { 'N' => self.state = .method_option, else => return error.MethodNotSupported, }, .method_connec => switch (char) { 'T' => { self.state = .method_complete; return .initMethod(.CONNECT); }, else => return error.MethodNotSupported, }, .method_option => switch (char) { 'S' => { self.state = .method_complete; return .initMethod(.OPTIONS); }, else => return error.MethodNotSupported, }, .method_complete => switch (char) { ' ' => self.state = .initPathname(next_char_slice), else => return error.MethodNotSupported, }, .pathname => |pathname| switch (char) { ' ' => { self.state = .pathname_complete; return .initPathname(pathname); }, else => self.state = .initPathname(extendSlice(pathname)), }, .pathname_complete => switch (char) { 'H' => self.state = .version_h, else => return error.HttpVersionNotSupported, }, .version_h => switch (char) { 'T' => self.state = .version_ht, else => return error.HttpVersionNotSupported, }, .version_ht => switch (char) { 'T' => self.state = .version_htt, else => return error.HttpVersionNotSupported, }, .version_htt => switch (char) { 'P' => self.state = .version_http, else => return error.HttpVersionNotSupported, }, .version_http => switch (char) { '/' => self.state = .@"version_http/", else => return error.HttpVersionNotSupported, }, .@"version_http/" => switch (char) { '1' => self.state = .@"version_http/1", else => return error.HttpVersionNotSupported, }, .@"version_http/1" => switch (char) { '.' => self.state = .@"version_http/1.", else => return error.HttpVersionNotSupported, }, .@"version_http/1." => switch (char) { '1' => self.state = .version_complete, else => return error.HttpVersionNotSupported, }, .version_complete => switch (char) { '\r' => self.state = .start_line_end, else => return error.HttpVersionNotSupported, }, .start_line_end => switch (char) { '\n' => self.state = .header_name_start, else => return error.SyntaxError, }, .header_name_start => switch (char) { '\r' => { self.state = .headers_end; return .end_of_headers; }, else => self.state = .initHeaderName(char_slice), }, .header_name => |name| switch (char) { ':' => self.state = .initHeaderValue(.init(.init(name), next_char_slice)), else => self.state = .initHeaderName(extendSlice(name)), }, .header_value => |untrimmed_header| switch (char) { '\r' => { self.state = .header_line_end; const header: Header = .init( untrimmed_header.name, std.mem.trim(u8, untrimmed_header.value, " \t"), ); if (header.isNamedKnown(.@"Content-Length")) { const content_length = std.fmt.parseInt(usize, header.value, 10) catch return error.SyntaxError; if (self.content_length) |current_content_length| { @branchHint(.unlikely); // Accept multiple `Content-Length` headers as long as // they have the exact same value. if (content_length != current_content_length) { return error.SyntaxError; } } else { self.content_length = content_length; } } return .initHeader(header); }, else => self.state = .initHeaderValue(extendHeader(untrimmed_header)), }, .header_line_end => switch (char) { '\n' => self.state = .header_name_start, else => return error.SyntaxError, }, .headers_end => switch (char) { '\n' => { const content_length = self.content_length orelse 0; if (content_length == 0) { self.state = .done; return .initBody(&.{}); } else { self.state = .initBody(next_char_slice); } }, else => return error.SyntaxError, }, .body => |body| { const content_length = self.content_length.?; const new_body = extendSlice(body); if (new_body.len >= content_length) { self.state = .done; return .initBody(new_body); } else { self.state = new_body; } }, .done => unreachable, } return null; } fn extendSlice(slice: []const u8) []const u8 { return slice.ptr[0 .. slice.len + 1]; } fn extendSliceBy(slice: []const u8, n: usize) []const u8 { return slice.ptr[0 .. slice.len + n]; } fn extendHeader(header: Header) Header { return .{ .name = header.name, .value = extendSlice(header.value), }; } // --- SIMD -------------------------------------------------------------------- const Vec = @Vector(std.simd.suggestVectorLength(u8).?, u8); const vec_len = @typeInfo(Vec).vector.len; const Pattern = struct { value: Vec, mask: Vec, len: u32, pub fn init(comptime prefix: []const u8) Pattern { if (prefix.len > vec_len) { @compileError("Prefix length is too high"); } var value: [vec_len]u8 = undefined; var mask: [vec_len]u8 = undefined; for (0..vec_len) |i| { if (i < prefix.len) { value[i] = prefix[i]; mask[i] = 0xFF; } else { value[i] = 0x00; mask[i] = 0x00; } } return .{ .value = value, .mask = mask, .len = prefix.len, }; } inline fn check(self: Pattern, vec: Vec) bool { return @reduce(.And, vec & self.mask == self.value); } }; const patterns = struct { pub const methods = struct { // NOTE These patterns are arranged in a specific order, such that the // first ones are the most common (based on vibes only). pub const GET = Pattern.init("GET "); pub const POST = Pattern.init("POST "); pub const HEAD = Pattern.init("HEAD "); pub const PUT = Pattern.init("PUT "); pub const DELETE = Pattern.init("DELETE "); pub const PATCH = Pattern.init("PATCH "); pub const OPTIONS = Pattern.init("OPTIONS "); pub const CONNECT = Pattern.init("CONNECT "); pub const TRACE = Pattern.init("TRACE "); }; pub const @"version_http/1.1" = Pattern.init("HTTP/1.1\r\n"); }; inline fn hasSpace(vec: Vec) bool { const has_space = vec == @as(Vec, @splat(' ')); return @reduce(.Or, has_space); } inline fn hasCRLF(vec: Vec) bool { const has_cr = vec == @as(Vec, @splat('\r')); const has_lf = vec == @as(Vec, @splat('\n')); return @reduce(.Or, has_cr | has_lf); } /// May return with `.consumed == 0`, in which case the parsing should be /// retried with non-SIMD method. pub fn consumeVec(self: *Parser, vec_ptr: *const [vec_len]u8) Error!ConsumeResult { const vec: Vec = vec_ptr.*; switch (self.state) { .init => { inline for (@typeInfo(patterns.methods).@"struct".decls) |decl| { const pattern: Pattern = @field(patterns.methods, decl.name); if (pattern.check(vec)) { self.state = .method_complete; return .{ .consumed = pattern.len, .result = .initMethod(@field(Method, decl.name)), }; } } return error.MethodNotSupported; }, .pathname_state => |s| { if (hasSpace(vec)) { // Delegate to `consumeChar`. return .{ .consumed = 0, .result = null, }; } self.state = .pathname(s.method, s.pathname.ptr[0 .. s.pathname.len + vec_len]); return .{ .consumed = vec_len, .result = null, }; }, .pathname_complete => { if (patterns.@"version_http/1.1".check(vec)) { self.state = .header_name_start; return .{ .consumed = patterns.@"version_http/1.1".len, .result = null, }; } else { return error.HttpVersionNotSupported; } }, .header_value => |s| { if (hasCRLF(vec)) { // Delegate to `consumeChar`. return .{ .consumed = 0, .result = null, }; } self.state = .headerValue(s.name, s.value.ptr[0 .. s.value.len + vec_len]); return .{ .consumed = vec_len, .result = null, }; }, else => { // Delegate to `consumeChar`. return .{ .consumed = 0, .result = null, }; }, } }