From 9a4932e629deb645f4fca92a294dbea9997e727c Mon Sep 17 00:00:00 2001 From: Szymon Nowakowski Date: Mon, 9 Mar 2026 16:59:29 +0100 Subject: [PATCH] web: Great cleanup WIP --- packages/web/src/ShortString.zig | 33 + packages/web/src/http.zig | 3 + packages/web/src/http/FieldName.zig | 44 ++ packages/web/src/http/Header.zig | 333 +-------- packages/web/src/http/KnownFieldName.zig | 367 ++++++++++ packages/web/src/http/Parser.zig | 870 +++++++++++++---------- packages/web/src/{ => http}/Route.zig | 2 +- 7 files changed, 972 insertions(+), 680 deletions(-) create mode 100644 packages/web/src/ShortString.zig create mode 100644 packages/web/src/http/FieldName.zig create mode 100644 packages/web/src/http/KnownFieldName.zig rename packages/web/src/{ => http}/Route.zig (82%) diff --git a/packages/web/src/ShortString.zig b/packages/web/src/ShortString.zig new file mode 100644 index 0000000..868ff45 --- /dev/null +++ b/packages/web/src/ShortString.zig @@ -0,0 +1,33 @@ +const std = @import("std"); +const ShortString = @This(); + +len: u8, +data: [15]u8, + +pub fn init(string: []const u8) ShortString { + std.debug.assert(string.len <= 15); + + const len: u8 = @intCast(string.len); + var data: [15]u8 = undefined; + + @memcpy(data[0..len], string); + + return .{ + .len = len, + .data = data, + }; +} + +/// Check whether `string` can be converted into a `ShortString` and converts +/// it. Returns `null` if conversion is not possible. +pub fn isShortString(string: []const u8) ?ShortString { + return if (string.len <= 15) init(string) else null; +} + +pub fn slice(self: *const ShortString) []const u8 { + return self.data[0..self.len]; +} + +pub fn eql(a: ShortString, b: ShortString) bool { + return std.mem.eql(u8, a.slice(), b.slice()); +} diff --git a/packages/web/src/http.zig b/packages/web/src/http.zig index db520fb..9b50e69 100644 --- a/packages/web/src/http.zig +++ b/packages/web/src/http.zig @@ -1,6 +1,9 @@ const std = @import("std"); +pub const FieldName = @import("http/FieldName.zig").FieldName; pub const Header = @import("http/Header.zig"); +pub const KnownFieldName = @import("http/KnownFieldName.zig").KnownFieldName; pub const Method = @import("http/Method.zig").Method; pub const Parser = @import("http/Parser.zig"); +pub const Route = @import("http/Route.zig"); pub const status = @import("http/status.zig"); diff --git a/packages/web/src/http/FieldName.zig b/packages/web/src/http/FieldName.zig new file mode 100644 index 0000000..7bcfa14 --- /dev/null +++ b/packages/web/src/http/FieldName.zig @@ -0,0 +1,44 @@ +const std = @import("std"); + +const KnownFieldName = @import("KnownFieldName.zig").KnownFieldName; +const ShortString = @import("../ShortString.zig"); + +pub const FieldName = union(enum) { + known: KnownFieldName, + short: ShortString, + long: []const u8, + + pub fn init(name: []const u8) FieldName { + if (KnownFieldName.isKnownFieldName(name)) |known| { + return initKnown(known); + } else if (ShortString.isShortString(name)) |short| { + return initShort(short); + } else { + return initOther(name); + } + } + + pub fn initKnown(known: KnownFieldName) FieldName { + return .{ .known = known }; + } + + pub fn initShort(short: ShortString) FieldName { + return .{ .short = short }; + } + + pub fn initOther(other: []const u8) FieldName { + return .{ .other = other }; + } + + pub fn eql(a: FieldName, b: FieldName) bool { + const tag_a = std.meta.activeTag(a); + const tag_b = std.meta.activeTag(b); + if (tag_a != tag_b) return false; + + return switch (a) { + .known => |x| x == b.known, + .short => |x| ShortString.eql(x, b.short), + .long => |x| std.mem.eql(x, b.long), + }; + } +}; diff --git a/packages/web/src/http/Header.zig b/packages/web/src/http/Header.zig index 06f39ed..7be446a 100644 --- a/packages/web/src/http/Header.zig +++ b/packages/web/src/http/Header.zig @@ -1,339 +1,26 @@ const std = @import("std"); const Header = @This(); -name: Name, +const FieldName = @import("FieldName.zig").FieldName; +const KnownFieldName = @import("KnownFieldName.zig").KnownFieldName; + +name: FieldName, value: []const u8, -pub fn init(name: []const u8, value: []const u8) Header { +pub fn init(name: FieldName, value: []const u8) Header { return .{ - .name = .init(name), + .name = name, .value = value, }; } -pub fn initKnown(known: Name.Known, value: []const u8) Header { - return .{ - .name = .initKnown(known), - .value = value, - }; +pub fn isNamed(self: Header, name: FieldName) bool { + return FieldName.eql(self.name, name); } -pub fn isKnown(self: Header, known: Name.Known) bool { +pub fn isNamedKnown(self: Header, known: KnownFieldName) bool { return switch (self.name) { .known => |x| x == known, - .other => false, + else => false, }; } - -pub const Name = union(enum) { - known: Known, - other: []const u8, - - pub fn initKnown(known: Known) Name { - return .{ .known = known }; - } - - pub fn initOther(other: []const u8) Name { - return .{ .other = other }; - } - - pub fn init(name: []const u8) Name { - return if (Known.isKnown(name)) |known| .initKnown(known) else .initOther(name); - } - - pub const Known = enum { - @"A-IM", - Accept, - @"Accept-Additions", - @"Accept-CH", - @"Accept-Charset", - @"Accept-Datetime", - @"Accept-Encoding", - @"Accept-Features", - @"Accept-Language", - @"Accept-Patch", - @"Accept-Post", - @"Accept-Query", - @"Accept-Ranges", - @"Accept-Signature", - @"Access-Control", - @"Access-Control-Allow-Credentials", - @"Access-Control-Allow-Headers", - @"Access-Control-Allow-Methods", - @"Access-Control-Allow-Origin", - @"Access-Control-Expose-Headers", - @"Access-Control-Max-Age", - @"Access-Control-Request-Headers", - @"Access-Control-Request-Method", - @"Activate-Storage-Access", - Age, - Allow, - ALPN, - @"Alt-Svc", - @"Alt-Used", - Alternates, - @"AMP-Cache-Transform", - @"Apply-To-Redirect-Ref", - @"Authentication-Control", - @"Authentication-Info", - Authorization, - @"Available-Dictionary", - @"C-Ext", - @"C-Man", - @"C-Opt", - @"C-PEP", - @"C-PEP-Info", - @"Cache-Control", - @"Cache-Group-Invalidation", - @"Cache-Groups", - @"Cache-Status", - @"Cal-Managed-ID", - @"CalDAV-Timezones", - @"Capsule-Protocol", - @"CDN-Cache-Control", - @"CDN-Loop", - @"Cert-Not-After", - @"Cert-Not-Before", - @"Clear-Site-Data", - @"Client-Cert", - @"Client-Cert-Chain", - Close, - @"CMCD-Object", - @"CMCD-Request", - @"CMCD-Session", - @"CMCD-Status", - @"CMSD-Dynamic", - @"CMSD-Static", - @"Concealed-Auth-Export", - @"Configuration-Context", - Connection, - @"Content-Base", - @"Content-Digest", - @"Content-Disposition", - @"Content-Encoding", - @"Content-ID", - @"Content-Language", - @"Content-Length", - @"Content-Location", - @"Content-MD5", - @"Content-Range", - @"Content-Script-Type", - @"Content-Security-Policy", - @"Content-Security-Policy-Report-Only", - @"Content-Style-Type", - @"Content-Type", - @"Content-Version", - Cookie, - Cookie2, - @"Cross-Origin-Embedder-Policy", - @"Cross-Origin-Embedder-Policy-Report-Only", - @"Cross-Origin-Opener-Policy", - @"Cross-Origin-Opener-Policy-Report-Only", - @"Cross-Origin-Resource-Policy", - @"CTA-Common-Access-Token", - DASL, - Date, - DAV, - @"Default-Style", - @"Delta-Base", - Deprecation, - Depth, - @"Derived-From", - Destination, - @"Detached-JWS", - @"Differential-ID", - @"Dictionary-ID", - Digest, - DPoP, - @"DPoP-Nonce", - @"Early-Data", - @"EDIINT-Features", - ETag, - Expect, - @"Expect-CT", - Expires, - Ext, - Forwarded, - From, - GetProfile, - Hobareg, - Host, - @"HTTP2-Settings", - If, - @"If-Match", - @"If-Modified-Since", - @"If-None-Match", - @"If-Range", - @"If-Schedule-Tag-Match", - @"If-Unmodified-Since", - IM, - @"Include-Referred-Token-Binding-ID", - Incremental, - Isolation, - @"Keep-Alive", - Label, - @"Last-Event-ID", - @"Last-Modified", - Link, - @"Link-Template", - Location, - @"Lock-Token", - Man, - @"Max-Forwards", - @"Memento-Datetime", - Meter, - @"Method-Check", - @"Method-Check-Expires", - @"MIME-Version", - Negotiate, - NEL, - @"OData-EntityId", - @"OData-Isolation", - @"OData-MaxVersion", - @"OData-Version", - Opt, - @"Optional-WWW-Authenticate", - @"Ordering-Type", - Origin, - @"Origin-Agent-Cluster", - OSCORE, - @"OSLC-Core-Version", - Overwrite, - P3P, - PEP, - @"PEP-Info", - @"Permissions-Policy", - @"PICS-Label", - @"Ping-From", - @"Ping-To", - Position, - Pragma, - Prefer, - @"Preference-Applied", - Priority, - ProfileObject, - Protocol, - @"Protocol-Info", - @"Protocol-Query", - @"Protocol-Request", - @"Proxy-Authenticate", - @"Proxy-Authentication-Info", - @"Proxy-Authorization", - @"Proxy-Features", - @"Proxy-Instruction", - @"Proxy-Status", - Public, - @"Public-Key-Pins", - @"Public-Key-Pins-Report-Only", - Range, - @"Redirect-Ref", - Referer, - @"Referer-Root", - @"Referrer-Policy", - Refresh, - @"Repeatability-Client-ID", - @"Repeatability-First-Sent", - @"Repeatability-Request-ID", - @"Repeatability-Result", - @"Replay-Nonce", - @"Reporting-Endpoints", - @"Repr-Digest", - @"Retry-After", - Safe, - @"Schedule-Reply", - @"Schedule-Tag", - @"Sec-Fetch-Dest", - @"Sec-Fetch-Mode", - @"Sec-Fetch-Site", - @"Sec-Fetch-Storage-Access", - @"Sec-Fetch-User", - @"Sec-GPC", - @"Sec-Purpose", - @"Sec-Token-Binding", - @"Sec-WebSocket-Accept", - @"Sec-WebSocket-Extensions", - @"Sec-WebSocket-Key", - @"Sec-WebSocket-Protocol", - @"Sec-WebSocket-Version", - @"Security-Scheme", - Server, - @"Server-Timing", - @"Set-Cookie", - @"Set-Cookie2", - @"Set-Txn", - SetProfile, - Signature, - @"Signature-Input", - SLUG, - SoapAction, - @"Status-URI", - @"Strict-Transport-Security", - Sunset, - @"Surrogate-Capability", - @"Surrogate-Control", - TCN, - TE, - Timeout, - @"Timing-Allow-Origin", - Topic, - Traceparent, - Tracestate, - Trailer, - @"Transfer-Encoding", - TTL, - Upgrade, - Urgency, - URI, - @"Use-As-Dictionary", - @"User-Agent", - @"Variant-Vary", - Vary, - Via, - @"Want-Content-Digest", - @"Want-Digest", - @"Want-Repr-Digest", - Warning, - @"WWW-Authenticate", - @"X-Content-Type-Options", - @"X-Frame-Options", - - /// Maps **lowercased** header names to enum values. - pub const map: std.StaticStringMap(Known) = blk: { - @setEvalBranchQuota(20000); - const fields = @typeInfo(Known).@"enum".fields; - - var kvs_list: [fields.len]struct { []const u8, Known } = undefined; - for (fields, 0..) |field, i| { - var name_buf: [field.name.len]u8 = undefined; - _ = std.ascii.lowerString(&name_buf, field.name); - const name = name_buf; - kvs_list[i] = .{ &name, @field(Known, field.name) }; - } - - break :blk .initComptime(kvs_list); - }; - - /// The maximum length of all known header names. Any header name longer - /// than this cannot be a known header name. - pub const max_known_name_len = blk: { - const fields = @typeInfo(Known).@"enum".fields; - - var max_len: usize = 0; - for (fields) |field| { - max_len = @max(max_len, field.name.len); - } - break :blk max_len; - }; - - pub fn isKnown(name: []const u8) ?Known { - if (name.len > max_known_name_len) { - @branchHint(.unlikely); - return null; - } - - var name_lowercase_buf: [max_known_name_len]u8 = undefined; - const name_lowercase = std.ascii.lowerString(&name_lowercase_buf, name); - return map.get(name_lowercase); - } - }; -}; diff --git a/packages/web/src/http/KnownFieldName.zig b/packages/web/src/http/KnownFieldName.zig new file mode 100644 index 0000000..6710bb9 --- /dev/null +++ b/packages/web/src/http/KnownFieldName.zig @@ -0,0 +1,367 @@ +const std = @import("std"); + +pub const KnownFieldName = enum { + + // --- STANDARD FIELD NAMES ------------------------------------------------ + + // These are all names listed under: + // + // https://www.iana.org/assignments/http-fields/http-fields.xhtml + // + // Some of them might be obsoleted or deprecated; they are included here + // nonetheless. + // + // When the list was retrieved, its "Last Updated" date was 2026-03-06. + + @"A-IM", + Accept, + @"Accept-Additions", + @"Accept-CH", + @"Accept-Charset", + @"Accept-Datetime", + @"Accept-Encoding", + @"Accept-Features", + @"Accept-Language", + @"Accept-Patch", + @"Accept-Post", + @"Accept-Query", + @"Accept-Ranges", + @"Accept-Signature", + @"Access-Control", + @"Access-Control-Allow-Credentials", + @"Access-Control-Allow-Headers", + @"Access-Control-Allow-Methods", + @"Access-Control-Allow-Origin", + @"Access-Control-Expose-Headers", + @"Access-Control-Max-Age", + @"Access-Control-Request-Headers", + @"Access-Control-Request-Method", + @"Activate-Storage-Access", + Age, + Allow, + ALPN, + @"Alt-Svc", + @"Alt-Used", + Alternates, + @"AMP-Cache-Transform", + @"Apply-To-Redirect-Ref", + @"Authentication-Control", + @"Authentication-Info", + Authorization, + @"Available-Dictionary", + @"C-Ext", + @"C-Man", + @"C-Opt", + @"C-PEP", + @"C-PEP-Info", + @"Cache-Control", + @"Cache-Group-Invalidation", + @"Cache-Groups", + @"Cache-Status", + @"Cal-Managed-ID", + @"CalDAV-Timezones", + @"Capsule-Protocol", + @"CDN-Cache-Control", + @"CDN-Loop", + @"Cert-Not-After", + @"Cert-Not-Before", + @"Clear-Site-Data", + @"Client-Cert", + @"Client-Cert-Chain", + Close, + @"CMCD-Object", + @"CMCD-Request", + @"CMCD-Session", + @"CMCD-Status", + @"CMSD-Dynamic", + @"CMSD-Static", + @"Concealed-Auth-Export", + @"Configuration-Context", + Connection, + @"Content-Base", + @"Content-Digest", + @"Content-Disposition", + @"Content-Encoding", + @"Content-ID", + @"Content-Language", + @"Content-Length", + @"Content-Location", + @"Content-MD5", + @"Content-Range", + @"Content-Script-Type", + @"Content-Security-Policy", + @"Content-Security-Policy-Report-Only", + @"Content-Style-Type", + @"Content-Type", + @"Content-Version", + Cookie, + Cookie2, + @"Cross-Origin-Embedder-Policy", + @"Cross-Origin-Embedder-Policy-Report-Only", + @"Cross-Origin-Opener-Policy", + @"Cross-Origin-Opener-Policy-Report-Only", + @"Cross-Origin-Resource-Policy", + @"CTA-Common-Access-Token", + DASL, + Date, + DAV, + @"Default-Style", + @"Delta-Base", + Deprecation, + Depth, + @"Derived-From", + Destination, + @"Detached-JWS", + @"Differential-ID", + @"Dictionary-ID", + Digest, + DPoP, + @"DPoP-Nonce", + @"Early-Data", + @"EDIINT-Features", + ETag, + Expect, + @"Expect-CT", + Expires, + Ext, + Forwarded, + From, + GetProfile, + Hobareg, + Host, + @"HTTP2-Settings", + If, + @"If-Match", + @"If-Modified-Since", + @"If-None-Match", + @"If-Range", + @"If-Schedule-Tag-Match", + @"If-Unmodified-Since", + IM, + @"Include-Referred-Token-Binding-ID", + Incremental, + Isolation, + @"Keep-Alive", + Label, + @"Last-Event-ID", + @"Last-Modified", + Link, + @"Link-Template", + Location, + @"Lock-Token", + Man, + @"Max-Forwards", + @"Memento-Datetime", + Meter, + @"Method-Check", + @"Method-Check-Expires", + @"MIME-Version", + Negotiate, + NEL, + @"OData-EntityId", + @"OData-Isolation", + @"OData-MaxVersion", + @"OData-Version", + Opt, + @"Optional-WWW-Authenticate", + @"Ordering-Type", + Origin, + @"Origin-Agent-Cluster", + OSCORE, + @"OSLC-Core-Version", + Overwrite, + P3P, + PEP, + @"PEP-Info", + @"Permissions-Policy", + @"PICS-Label", + @"Ping-From", + @"Ping-To", + Position, + Pragma, + Prefer, + @"Preference-Applied", + Priority, + ProfileObject, + Protocol, + @"Protocol-Info", + @"Protocol-Query", + @"Protocol-Request", + @"Proxy-Authenticate", + @"Proxy-Authentication-Info", + @"Proxy-Authorization", + @"Proxy-Features", + @"Proxy-Instruction", + @"Proxy-Status", + Public, + @"Public-Key-Pins", + @"Public-Key-Pins-Report-Only", + Range, + @"Redirect-Ref", + Referer, + @"Referer-Root", + @"Referrer-Policy", + Refresh, + @"Repeatability-Client-ID", + @"Repeatability-First-Sent", + @"Repeatability-Request-ID", + @"Repeatability-Result", + @"Replay-Nonce", + @"Reporting-Endpoints", + @"Repr-Digest", + @"Retry-After", + Safe, + @"Schedule-Reply", + @"Schedule-Tag", + @"Sec-Fetch-Dest", + @"Sec-Fetch-Mode", + @"Sec-Fetch-Site", + @"Sec-Fetch-Storage-Access", + @"Sec-Fetch-User", + @"Sec-GPC", + @"Sec-Purpose", + @"Sec-Token-Binding", + @"Sec-WebSocket-Accept", + @"Sec-WebSocket-Extensions", + @"Sec-WebSocket-Key", + @"Sec-WebSocket-Protocol", + @"Sec-WebSocket-Version", + @"Security-Scheme", + Server, + @"Server-Timing", + @"Set-Cookie", + @"Set-Cookie2", + @"Set-Txn", + SetProfile, + Signature, + @"Signature-Input", + SLUG, + SoapAction, + @"Status-URI", + @"Strict-Transport-Security", + Sunset, + @"Surrogate-Capability", + @"Surrogate-Control", + TCN, + TE, + Timeout, + @"Timing-Allow-Origin", + Topic, + Traceparent, + Tracestate, + Trailer, + @"Transfer-Encoding", + TTL, + Upgrade, + Urgency, + URI, + @"Use-As-Dictionary", + @"User-Agent", + @"Variant-Vary", + Vary, + Via, + @"Want-Content-Digest", + @"Want-Digest", + @"Want-Repr-Digest", + Warning, + @"WWW-Authenticate", + @"X-Content-Type-Options", + @"X-Frame-Options", + + // --- NON-STANDARD FIELD NAMES -------------------------------------------- + + // These names include, but are not limited to: + // + // - Cloudflare HTTP headers + // https://developers.cloudflare.com/fundamentals/reference/http-headers/ + // - Entries from MDN marked as "non-standard", but not "deprecated" + // https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Attribution-Reporting-Register-Trigger + + @"Cf-Cache-Status", + @"CF-Connecting-IP", + @"CF-Connecting-IPv6", + @"CF-Connecting-O2O", + @"CF-EW-Via", + @"CF-IPCountry", + @"CF-Pseudo-IPv4", + @"Cf-Ray", + @"CF-Visitor", + @"CF-Worker", + @"Idempotency-Key", + @"True-Client-IP", + @"X-Accel-Buffering", + @"X-Accel-Charset", + @"X-Accel-Limit-Rate", + @"X-Accel-Redirect", + @"X-API-Key", + @"X-Correlation-ID", + @"X-DNS-Prefetch-Control", + @"X-Forwarded-For", + @"X-Forwarded-Host", + @"X-Forwarded-Proto", + @"X-Permitted-Cross-Domain-Policies", + @"X-Powered-By", + @"X-Request-ID", + @"X-Robots-Tag", + + // --- EXPERIMENTAL FIELD NAMES -------------------------------------------- + + @"Sec-CH-Device-Memory", + @"Sec-CH-DPR", + @"Sec-CH-Prefers-Color-Scheme", + @"Sec-CH-Prefers-Reduced-Motion", + @"Sec-CH-Prefers-Reduced-Transparency", + @"Sec-CH-UA-Arch", + @"Sec-CH-UA-Bitness", + @"Sec-CH-UA-Form-Factors", + @"Sec-CH-UA-Full-Versi", + @"Sec-CH-UA-Full-Version-List", + @"Sec-CH-UA-Mobile", + @"Sec-CH-UA-Model", + @"Sec-CH-UA-Platform-Version", + @"Sec-CH-UA-Platform", + @"Sec-CH-UA-WoW64", + @"Sec-CH-UA", + @"Sec-CH-Viewport-Height", + @"Sec-CH-Viewport-Width", + @"Sec-CH-Width", + + /// Maps **lowercased** header names to enum values. + pub const map: std.StaticStringMap(KnownFieldName) = blk: { + @setEvalBranchQuota(20000); + const fields = @typeInfo(KnownFieldName).@"enum".fields; + + var kvs_list: [fields.len]struct { []const u8, KnownFieldName } = undefined; + for (fields, 0..) |field, i| { + var name_buf: [field.name.len]u8 = undefined; + _ = std.ascii.lowerString(&name_buf, field.name); + const name = name_buf; + kvs_list[i] = .{ &name, @field(KnownFieldName, field.name) }; + } + + break :blk .initComptime(kvs_list); + }; + + /// The maximum length of all known header names. Any header name longer + /// than this cannot be a known header name. + pub const max_known_field_name_len = blk: { + const fields = @typeInfo(KnownFieldName).@"enum".fields; + + var max_len: usize = 0; + for (fields) |field| { + max_len = @max(max_len, field.name.len); + } + break :blk max_len; + }; + + pub fn isKnownFieldName(name: []const u8) ?KnownFieldName { + if (name.len > max_known_field_name_len) { + @branchHint(.unlikely); + return null; + } + + var name_lowercase_buf: [max_known_field_name_len]u8 = undefined; + const name_lowercase = std.ascii.lowerString(&name_lowercase_buf, name); + return map.get(name_lowercase); + } +}; diff --git a/packages/web/src/http/Parser.zig b/packages/web/src/http/Parser.zig index 54836a7..96fecf6 100644 --- a/packages/web/src/http/Parser.zig +++ b/packages/web/src/http/Parser.zig @@ -1,22 +1,525 @@ +//! HTTP/1.1 parser. +//! +//! This parser is *streaming*, meaning it can gracefully consume partial HTTP +//! request bytes. An instance of this parser is meant for parsing a singular +//! request. Once the request if fully completed, a new instance of the parser +//! should be initialized. +//! +//! During a single ingestion, the parser can return one of the following: +//! +//! - route of type `Route`, i.e. HTTP method (aka verb) with pathname +//! - header of type `Header`, i.e. a field name with a value +//! - end_of_headers of type `void`, i.e. a marker which informs the user of +//! this parser that there will be no more headers; this moment can be used by +//! the user to make decisions about further processing of the request based +//! on the full knowledge of all the headers +//! - body of type `[]const u8`, i.e. a slice to the request body (or +//! zero-length slice if there is no request body) +//! +//! The first result returned from the parser will always be the route. Then, +//! one or more headers will follow terminated with end_of_headers marker. The +//! parser will always finish with a single body result. +//! +//! Parser methods stop processing at the first result. Therefore, if any result +//! is returned, the provided bytes might have been only partially consumed and +//! the methods must be repeatedly called until all of the bytes are consumed. +//! When the body is returned, the parser is finished and should be no longer +//! used. If the body was returned, but the bytes were not fully consumed, it +//! means that the remainder belongs to a subsequent HTTP request. +//! +//! When an error is returned from the parser, the HTTP request should be +//! considered malformed. You may choose to respond to it, but the request must +//! no longer be parsed and the connection should be closed. +//! +//! The parser is not involved in any HTTP semantics, only its syntax. It is up +//! to the user of this parser to respect all of the HTTP standards (if they +//! even choose to). For example, none of the header field valuess are verified. +//! The only exception is `Content-Length`. The parser must know the value to +//! determine the length of the request body. If the value fails to parse as a +//! decimal non-negative integer, a syntax error is returned. Note that +//! according to [RFC 9110, Section 8.6: HTTP Semantics](https://datatracker.ietf.org/doc/html/rfc9110#section-8.6), +//! `Content-Length` header field value consisting of the same decimal value +//! repeated as a comma-separated list (e.g. `Content-Length: 42, 42`) MAY be +//! accepted. This parser chooses not to accept it. + const std = @import("std"); const Parser = @This(); +const FieldName = @import("FieldName.zig").FieldName; const Header = @import("Header.zig"); const Method = @import("Method.zig").Method; -const Response = @import("../Response.zig"); -const RequestHandler = @import("../RequestHandler.zig"); -const RequestRouter = @import("../RequestRouter.zig"); +const Route = @import("Route.zig"); -const Error = error{ +pub const Error = error{ MethodNotSupported, HttpVersionNotSupported, - MissingLineFeed, - InvalidContentLength, - RouterError, - HandlerError, + SyntaxError, }; -const Vec: type = @Vector(std.simd.suggestVectorLength(u8).?, u8); +pub const Result = union(enum) { + route: Route, + header: Header, + end_of_headers: void, + body: []const u8, + + pub fn initRoute(route: Route) Result { + return .{ .route = route }; + } + + pub fn initHeader(header: Header) Result { + return .{ .header = header }; + } + + pub fn initBody(body: []const u8) Result { + return .{ .body = body }; + } +}; + +pub const ConsumeResult = struct { + consumed: usize, + result: ?Result, +}; + +pub const State = union(enum) { + init: void, + method_c: void, + method_d: void, + method_g: void, + method_h: void, + method_o: void, + method_p: void, + method_t: void, + method_co: void, + method_de: void, + method_ge: void, + method_he: void, + method_op: void, + method_pa: void, + method_po: void, + method_pu: void, + method_tr: void, + method_con: void, + method_del: void, + method_hea: void, + method_opt: void, + method_pat: void, + method_pos: void, + method_tra: void, + method_conn: void, + method_dele: void, + method_opti: void, + method_patc: void, + method_trac: void, + method_conne: void, + method_delet: void, + method_optio: void, + method_connec: void, + method_option: void, + method_complete: Method, + pathname: Route, + pathname_complete: void, + version_h: void, + version_ht: void, + version_htt: void, + version_http: void, + @"version_http/": void, + @"version_http/1": void, + @"version_http/1.": void, + version_complete: void, + start_line_end: void, + header_name_start: void, + header_name: []const u8, + header_value: Header, + header_line_end: void, + headers_end: void, + body: []const u8, + done: void, + + pub fn initMethodComplete(method: Method) State { + return .{ .method_complete = method }; + } + + pub fn initPathname(route: Route) State { + return .{ .pathname = route }; + } + + pub fn initHeaderName(name: []const u8) State { + return .{ .header_name = name }; + } + + pub fn initHeaderValue(header: Header) State { + return .{ .header_value = header }; + } + + pub fn initBody(body: []const u8) State { + return .{ .body = body }; + } +}; + +state: State, +content_length: ?usize, + +pub fn init() Parser { + return .{ + .state = .init, + .content_length = null, + }; +} + +pub fn consume(self: *Parser, chars: []const u8) Error!ConsumeResult { + var i: usize = 0; + while (i < chars.len) { + switch (self.state) { + .body => |body| { + const content_length = self.content_length.?; + const to_consume = @min(chars.len - i, content_length - body.len); + + const new_body = extendSliceBy(body, to_consume); + i += to_consume; + + if (new_body.len >= content_length) { + self.state = .done; + return .{ + .consumed = i, + .result = .initBody(new_body), + }; + } else { + self.state = .initBody(new_body); + } + }, + else => { + // TODO fix + // if (chars.len - i >= vec_len) { + // const vec_res = try self.consumeVec(chars[i..][0..vec_len]); + // i += vec_res.consumed; + + // if (vec_res.result) |result| { + // return .{ + // .consumed = i, + // .result = result, + // }; + // } + + // if (vec_res.consumed > 0) { + // continue; + // } + // } + + const maybe_result = try self.consumeChar(&chars[i]); + i += 1; + + if (maybe_result) |result| { + return .{ + .consumed = i, + .result = result, + }; + } + }, + } + } + + std.debug.assert(i == chars.len); + return .{ + .consumed = chars.len, + .result = null, + }; +} + +fn consumeChar(self: *Parser, char_ptr: *const u8) Error!?Result { + const char = char_ptr.*; + const char_slice: *const [1]u8 = @ptrCast(char_ptr); + const next_char_slice = @as([*]const u8, @ptrCast(char_ptr))[1..1]; + + switch (self.state) { + .init => switch (char) { + 'C' => self.state = .method_c, + 'D' => self.state = .method_d, + 'G' => self.state = .method_g, + 'H' => self.state = .method_h, + 'O' => self.state = .method_o, + 'P' => self.state = .method_p, + 'T' => self.state = .method_t, + else => error.MethodNotSupported, + }, + .method_c => switch (char) { + 'O' => self.state = .method_co, + else => error.MethodNotSupported, + }, + .method_d => switch (char) { + 'E' => self.state = .method_de, + else => return error.MethodNotSupported, + }, + .method_g => switch (char) { + 'E' => self.state = .method_ge, + else => return error.MethodNotSupported, + }, + .method_h => switch (char) { + 'E' => self.state = .method_he, + else => return error.MethodNotSupported, + }, + .method_o => switch (char) { + 'P' => self.state = .method_op, + else => return error.MethodNotSupported, + }, + .method_p => switch (char) { + 'A' => self.state = .method_pa, + 'O' => self.state = .method_po, + 'U' => self.state = .method_pu, + else => return error.MethodNotSupported, + }, + .method_t => switch (char) { + 'R' => self.state = .method_tr, + else => return error.MethodNotSupported, + }, + .method_co => switch (char) { + 'N' => self.state = .method_con, + else => return error.MethodNotSupported, + }, + .method_de => switch (char) { + 'L' => self.state = .method_del, + else => return error.MethodNotSupported, + }, + .method_ge => switch (char) { + 'T' => self.state = .initMethodComplete(.GET), + else => return error.MethodNotSupported, + }, + .method_he => switch (char) { + 'A' => self.state = .method_hea, + else => return error.MethodNotSupported, + }, + .method_op => switch (char) { + 'T' => self.state = .method_opt, + else => return error.MethodNotSupported, + }, + .method_pa => switch (char) { + 'T' => self.state = .method_pat, + else => return error.MethodNotSupported, + }, + .method_po => switch (char) { + 'S' => self.state = .method_pos, + else => return error.MethodNotSupported, + }, + .method_pu => switch (char) { + 'T' => self.state = .initMethodComplete(.PUT), + else => return error.MethodNotSupported, + }, + .method_tr => switch (char) { + 'A' => self.state = .method_tra, + else => return error.MethodNotSupported, + }, + .method_con => switch (char) { + 'N' => self.state = .method_conn, + else => return error.MethodNotSupported, + }, + .method_del => switch (char) { + 'E' => self.state = .method_dele, + else => return error.MethodNotSupported, + }, + .method_hea => switch (char) { + 'D' => self.state = .initHeaderName(.HEAD), + else => return error.MethodNotSupported, + }, + .method_opt => switch (char) { + 'I' => self.state = .method_opti, + else => return error.MethodNotSupported, + }, + .method_pat => switch (char) { + 'C' => self.state = .method_patc, + else => return error.MethodNotSupported, + }, + .method_pos => switch (char) { + 'T' => self.state = .initHeaderName(.POST), + else => return error.MethodNotSupported, + }, + .method_tra => switch (char) { + 'C' => self.state = .method_trac, + else => return error.MethodNotSupported, + }, + .method_conn => switch (char) { + 'E' => self.state = .method_conne, + else => return error.MethodNotSupported, + }, + .method_dele => switch (char) { + 'T' => self.state = .method_delet, + else => return error.MethodNotSupported, + }, + .method_opti => switch (char) { + 'O' => self.state = .method_optio, + else => return error.MethodNotSupported, + }, + .method_patc => switch (char) { + 'H' => self.state = .initMethodComplete(.PATCH), + else => return error.MethodNotSupported, + }, + .method_trac => switch (char) { + 'E' => self.state = .initMethodComplete(.TRACE), + else => return error.MethodNotSupported, + }, + .method_conne => switch (char) { + 'C' => self.state = .method_connec, + else => return error.MethodNotSupported, + }, + .method_delet => switch (char) { + 'E' => self.state = .initMethodComplete(.DELETE), + else => return error.MethodNotSupported, + }, + .method_optio => switch (char) { + 'N' => self.state = .method_option, + else => return error.MethodNotSupported, + }, + .method_connec => switch (char) { + 'T' => self.state = .initMethodComplete(.CONNECT), + else => return error.MethodNotSupported, + }, + .method_option => switch (char) { + 'S' => self.state = .initMethodComplete(.OPTIONS), + else => return error.MethodNotSupported, + }, + .method_complete => |method| switch (char) { + ' ' => self.state = .initPathname(.init(method, next_char_slice)), + else => return error.MethodNotSupported, + }, + .pathname => |route| switch (char) { + ' ' => { + self.state = .pathname_complete; + return .initRoute(route); + }, + else => self.state = .initPathname(extendRoute(route)), + }, + .pathname_complete => switch (char) { + 'H' => self.state = .version_h, + else => return error.HttpVersionNotSupported, + }, + .version_h => switch (char) { + 'T' => self.state = .version_ht, + else => return error.HttpVersionNotSupported, + }, + .version_ht => switch (char) { + 'T' => self.state = .version_htt, + else => return error.HttpVersionNotSupported, + }, + .version_htt => switch (char) { + 'P' => self.state = .version_http, + else => return error.HttpVersionNotSupported, + }, + .version_http => switch (char) { + '/' => self.state = .@"version_http/", + else => return error.HttpVersionNotSupported, + }, + .@"version_http/" => switch (char) { + '1' => self.state = .@"version_http/1", + else => return error.HttpVersionNotSupported, + }, + .@"version_http/1" => switch (char) { + '.' => self.state = .@"version_http/1.", + else => return error.HttpVersionNotSupported, + }, + .@"version_http/1." => switch (char) { + '1' => self.state = .version_complete, + else => return error.HttpVersionNotSupported, + }, + .version_complete => switch (char) { + '\r' => self.state = .start_line_end, + else => return error.HttpVersionNotSupported, + }, + .start_line_end => switch (char) { + '\n' => self.state = .header_name_start, + else => return error.SyntaxError, + }, + .header_name_start => switch (char) { + '\r' => { + self.state = .headers_end; + return .end_of_headers; + }, + else => self.state = .initHeaderName(char_slice), + }, + .header_name => |name| switch (char) { + ':' => self.state = .initHeaderValue(.init(.init(name), next_char_slice)), + else => self.state = .initHeaderName(extendSlice(name)), + }, + .header_value => |untrimmed_header| switch (char) { + '\r' => { + self.state = .header_line_end; + const header: Header = .init( + untrimmed_header.name, + std.mem.trim(u8, untrimmed_header.value, " \t"), + ); + + if (header.isNamedKnown(.@"Content-Length")) { + const content_length = std.fmt.parseInt(usize, header.value, 10) catch return error.SyntaxError; + if (self.content_length) |current_content_length| { + @branchHint(.unlikely); + // Accept multiple `Content-Length` headers as long as + // they have the exact same value. + if (content_length != current_content_length) { + return error.SyntaxError; + } + } else { + self.content_length = content_length; + } + } + + return .initHeader(header); + }, + else => self.state = .initHeaderValue(extendHeader(untrimmed_header)), + }, + .header_line_end => switch (char) { + '\n' => self.state = .header_name_start, + else => return error.SyntaxError, + }, + .headers_end => switch (char) { + '\n' => { + const content_length = self.content_length orelse 0; + if (content_length == 0) { + self.state = .done; + return .initBody(&.{}); + } else { + self.state = .initBody(next_char_slice); + } + }, + else => return error.SyntaxError, + }, + .body => |body| { + const content_length = self.content_length.?; + const new_body = extendSlice(body); + if (new_body.len >= content_length) { + self.state = .done; + return .initBody(new_body); + } else { + self.state = new_body; + } + }, + .done => unreachable, + } + + return null; +} + +fn extendSlice(slice: []const u8) []const u8 { + return slice.ptr[0 .. slice.len + 1]; +} + +fn extendSliceBy(slice: []const u8, n: usize) []const u8 { + return slice.ptr[0 .. slice.len + n]; +} + +fn extendRoute(route: Route) Route { + return .{ + .method = route.method, + .pathname = extendSlice(route.pathname), + }; +} + +fn extendHeader(header: Header) Header { + return .{ + .name = header.name, + .value = extendSlice(header.value), + }; +} + +// --- SIMD -------------------------------------------------------------------- + +const Vec = @Vector(std.simd.suggestVectorLength(u8).?, u8); const vec_len = @typeInfo(Vec).vector.len; const Pattern = struct { @@ -85,159 +588,8 @@ inline fn hasCRLF(vec: Vec) bool { return @reduce(.Or, has_cr | has_lf); } -const State = union(enum) { - pub fn methodComplete(method: Method) State { - return .{ - .method_complete = .{ - .method = method, - }, - }; - } - - pub fn pathname(method: Method, p: []const u8) State { - return .{ - .pathname_state = .{ - .method = method, - .pathname = p, - }, - }; - } - - pub fn headerValue(name: []const u8, value: []const u8) State { - return .{ - .header_value = .{ - .name = name, - .value = value, - }, - }; - } - - init: void, - // TODO Add all methods here and in `consumeChar` (they are covered by - // `consumeVec`, though) - method_d: void, - method_g: void, - method_h: void, - method_p: void, - method_de: void, - method_ge: void, - method_he: void, - method_pa: void, - method_po: void, - method_pu: void, - method_del: void, - method_hea: void, - method_pat: void, - method_pos: void, - method_dele: void, - method_patc: void, - method_delet: void, - method_complete: struct { method: Method }, - pathname_state: struct { method: Method, pathname: []const u8 }, - pathname_complete: void, - version_h: void, - version_ht: void, - version_htt: void, - version_http: void, - @"version_http/": void, - @"version_http/1": void, - @"version_http/1.": void, - version_complete: void, - start_line_end: void, - header_name_start: void, - header_name: []const u8, - header_value: struct { name: []const u8, value: []const u8 }, - header_line_end: void, - headers_end: void, - body: []const u8, -}; - -const ConsumeResult = struct { - consumed: usize, - done: bool, -}; - -const ConsumeCharResult = enum { - not_done, - done, -}; - -request_router: RequestRouter, -response: *Response, -state: State, -content_length: usize, - -request_handler: ?RequestHandler = null, -last_router_error: anyerror = undefined, -last_handler_error: anyerror = undefined, - -pub fn init(request_router: RequestRouter, response: *Response) Parser { - return .{ - .request_router = request_router, - .response = response, - .state = .init, - .content_length = 0, - }; -} - -pub fn consume(self: *Parser, chars: []const u8) Error!ConsumeResult { - var i: usize = 0; - while (i < chars.len) { - switch (self.state) { - .body => |body| { - const to_consume = @min(chars.len - i, self.content_length - body.len); - const new_body = body.ptr[0 .. body.len + to_consume]; - self.state = .{ .body = new_body }; - i += to_consume; - - const done = new_body.len >= self.content_length; - - if (done) { - self.request_handler.?.rawBody(self.response, new_body) catch |err| { - self.last_handler_error = err; - return error.HandlerError; - }; - } - - return .{ - .consumed = i, - .done = done, - }; - }, - else => { - // TODO Fix - // if (chars.len - i >= vec_len) { - // const vec_res = try self.consumeVec(chars[i..][0..vec_len]); - // i += vec_res.consumed; - - // if (vec_res.done) { - // return .{ - // .consumed = i, - // .done = true, - // }; - // } - - // if (vec_res.consumed > 0) { - // continue; - // } - // } - - const char_res = try self.consumeChar(&chars[i]); - i += 1; - if (char_res == .done) return .{ - .consumed = i, - .done = true, - }; - }, - } - } - - return .{ - .consumed = chars.len, - .done = false, - }; -} - +/// May return with `.consumed == 0`, in which case the parsing should be +/// retried with non-SIMD method. pub fn consumeVec(self: *Parser, vec_ptr: *const [vec_len]u8) Error!ConsumeResult { const vec: Vec = vec_ptr.*; switch (self.state) { @@ -305,197 +657,3 @@ pub fn consumeVec(self: *Parser, vec_ptr: *const [vec_len]u8) Error!ConsumeResul }, } } - -pub fn consumeChar(self: *Parser, c_ptr: *const u8) Error!ConsumeCharResult { - const c = c_ptr.*; - const c_slice = @as([*]const u8, @ptrCast(c_ptr))[0..1]; - switch (self.state) { - .init => switch (c) { - 'D' => self.state = .method_d, - 'G' => self.state = .method_g, - 'H' => self.state = .method_h, - 'P' => self.state = .method_p, - else => return error.MethodNotSupported, - }, - .method_d => switch (c) { - 'E' => self.state = .method_de, - else => return error.MethodNotSupported, - }, - .method_g => switch (c) { - 'E' => self.state = .method_ge, - else => return error.MethodNotSupported, - }, - .method_h => switch (c) { - 'E' => self.state = .method_he, - else => return error.MethodNotSupported, - }, - .method_p => switch (c) { - 'A' => self.state = .method_pa, - 'O' => self.state = .method_po, - 'U' => self.state = .method_pu, - else => return error.MethodNotSupported, - }, - .method_de => switch (c) { - 'L' => self.state = .method_del, - else => return error.MethodNotSupported, - }, - .method_ge => switch (c) { - 'T' => self.state = .methodComplete(.GET), - else => return error.MethodNotSupported, - }, - .method_he => switch (c) { - 'A' => self.state = .method_hea, - else => return error.MethodNotSupported, - }, - .method_pa => switch (c) { - 'T' => self.state = .method_pat, - else => return error.MethodNotSupported, - }, - .method_po => switch (c) { - 'S' => self.state = .method_pos, - else => return error.MethodNotSupported, - }, - .method_pu => switch (c) { - 'T' => self.state = .methodComplete(.PUT), - else => return error.MethodNotSupported, - }, - .method_del => switch (c) { - 'E' => self.state = .method_dele, - else => return error.MethodNotSupported, - }, - .method_hea => switch (c) { - 'D' => self.state = .methodComplete(.HEAD), - else => return error.MethodNotSupported, - }, - .method_pat => switch (c) { - 'C' => self.state = .method_patc, - else => return error.MethodNotSupported, - }, - .method_pos => switch (c) { - 'T' => self.state = .methodComplete(.POST), - else => return error.MethodNotSupported, - }, - .method_dele => switch (c) { - 'T' => self.state = .method_delet, - else => return error.MethodNotSupported, - }, - .method_patc => switch (c) { - 'H' => self.state = .methodComplete(.PATCH), - else => return error.MethodNotSupported, - }, - .method_delet => switch (c) { - 'E' => self.state = .methodComplete(.DELETE), - else => return error.MethodNotSupported, - }, - .method_complete => |s| switch (c) { - ' ' => self.state = .pathname(s.method, @as([*]const u8, @ptrCast(c_ptr))[1..1]), - else => return error.MethodNotSupported, - }, - .pathname_state => |s| switch (c) { - ' ' => { - self.state = .pathname_complete; - self.request_handler = self.request_router.rawRoute(.init(s.method, s.pathname)) catch |err| { - self.last_router_error = err; - return error.RouterError; - }; - }, - else => self.state = .pathname(s.method, s.pathname.ptr[0 .. s.pathname.len + 1]), - }, - .pathname_complete => switch (c) { - 'H' => self.state = .version_h, - else => return error.HttpVersionNotSupported, - }, - .version_h => switch (c) { - 'T' => self.state = .version_ht, - else => return error.HttpVersionNotSupported, - }, - .version_ht => switch (c) { - 'T' => self.state = .version_htt, - else => return error.HttpVersionNotSupported, - }, - .version_htt => switch (c) { - 'P' => self.state = .version_http, - else => return error.HttpVersionNotSupported, - }, - .version_http => switch (c) { - '/' => self.state = .@"version_http/", - else => return error.HttpVersionNotSupported, - }, - .@"version_http/" => switch (c) { - '1' => self.state = .@"version_http/1", - else => return error.HttpVersionNotSupported, - }, - .@"version_http/1" => switch (c) { - '.' => self.state = .@"version_http/1.", - else => return error.HttpVersionNotSupported, - }, - .@"version_http/1." => switch (c) { - '1' => self.state = .version_complete, - else => return error.HttpVersionNotSupported, - }, - .version_complete => switch (c) { - '\r' => self.state = .start_line_end, - else => return error.HttpVersionNotSupported, - }, - .start_line_end => switch (c) { - '\n' => self.state = .header_name_start, - else => return error.MissingLineFeed, - }, - .header_name_start => switch (c) { - '\r' => self.state = .headers_end, - else => self.state = .{ .header_name = c_slice }, - }, - .header_name => |name| switch (c) { - ':' => { - self.state = .headerValue(name, @as([*]const u8, @ptrCast(c_ptr))[1..1]); - }, - else => self.state = .{ .header_name = name.ptr[0 .. name.len + 1] }, - }, - .header_value => |s| switch (c) { - '\r' => { - self.state = .header_line_end; - const header: Header = .init(s.name, std.mem.trim(u8, s.value, " \t")); - - if (header.isKnown(.@"Content-Length")) { - self.content_length = std.fmt.parseInt(usize, header.value, 10) catch return error.InvalidContentLength; - } - - self.request_handler.?.rawHeader(self.response, header) catch |err| { - self.last_handler_error = err; - return error.HandlerError; - }; - }, - else => self.state = .headerValue(s.name, s.value.ptr[0 .. s.value.len + 1]), - }, - .header_line_end => switch (c) { - '\n' => self.state = .header_name_start, - else => return error.MissingLineFeed, - }, - .headers_end => switch (c) { - '\n' => { - if (self.content_length == 0) { - self.request_handler.?.rawBody(self.response, &.{}) catch |err| { - self.last_handler_error = err; - return error.HandlerError; - }; - return .done; - } - self.state = .{ .body = @as([*]const u8, @ptrCast(c_ptr))[1..1] }; - }, - else => return error.MissingLineFeed, - }, - .body => |body| { - const new_body = body.ptr[0 .. body.len + 1]; - self.state = .{ .body = new_body }; - if (new_body.len >= self.content_length) { - self.request_handler.?.rawBody(self.response, new_body) catch |err| { - self.last_handler_error = err; - return error.HandlerError; - }; - return .done; - } - }, - } - - return .not_done; -} diff --git a/packages/web/src/Route.zig b/packages/web/src/http/Route.zig similarity index 82% rename from packages/web/src/Route.zig rename to packages/web/src/http/Route.zig index 9a0ea70..7219255 100644 --- a/packages/web/src/Route.zig +++ b/packages/web/src/http/Route.zig @@ -1,7 +1,7 @@ const std = @import("std"); const Route = @This(); -const Method = @import("http/Method.zig").Method; +const Method = @import("Method.zig").Method; method: Method, pathname: []const u8,