//! Module for string interning. A string can be converted to a stable integer //! constant, called an *atom*. The value of an atom for a given string is //! guaranteed to be stable throughout a program's runtime, but not across //! different runs. There can be no more than 2¹⁶ atoms. //! //! Use this module to convert string IDs into numbers, so that they can be //! compared more easily. The users of this module should import it with //! `@import("Atom.zig").Atom` and use the methods available in the `Atom` type. const std = @import("std"); pub const Atom = enum(u16) { // VOLATILE Synchronize explicit values with `init` implementation. /// Atom representing an empty string, i.e. `""`. empty, _, /// Cast an integer into an atom. This can produce an invalid atom. pub fn fromInt(value: u16) Atom { return @enumFromInt(value); } /// Cast an index into an atom. This can produce an invalid atom. The caller /// asserts that the index is not greater than the max atom value. pub fn fromIndex(index: usize) Atom { return @enumFromInt(@as(u16, @intCast(index))); } /// Cast an index into an atom. This can produce an invalid atom. Returns an /// error if the index is greater than the max atom value. pub fn fromIndexSafe(index: usize) error{Overflow}!Atom { return @enumFromInt(std.math.cast(u16, index) orelse return error.Overflow); } /// Turn a string into an atom. Returns either an existing atom or makes a /// new one, if necessary. This will always produce a valid atom. Will not /// return any error if the atom already exists. pub fn fromString(string: []const u8) error{ OutOfMemory, OutOfAtoms }!Atom { mutex.lock(); defer mutex.unlock(); std.debug.assert(initialized); const entry = try map.getOrPut(allocator, string); if (entry.found_existing) { return entry.value_ptr.*; } else { errdefer _ = map.remove(string); const atom = Atom.fromIndexSafe(array.items.len) catch |err| switch (err) { error.Overflow => return error.OutOfAtoms, }; try array.ensureUnusedCapacity(allocator, 1); const owned_string = try toOwnedString(string); entry.key_ptr.* = owned_string; entry.value_ptr.* = atom; array.appendAssumeCapacity(owned_string); return atom; } } /// Turn a string into an atom, if the string has been already registered as /// an atom. Returns `null` otherwise. This will always produce a valid /// atom. pub fn fromStringIfExists(string: []const u8) ?Atom { mutex.lock(); defer mutex.unlock(); std.debug.assert(initialized); return map.get(string); } /// Cast an atom into an integer. pub fn toInt(self: Atom) u16 { return @intFromEnum(self); } /// Cast an atom into a string. The caller asserts that the atom is valid. pub fn toString(self: Atom) [:0]const u8 { try mutex.lock(); defer mutex.unlock(); std.debug.assert(initialized); return array.items[self.toInt()]; } }; /// Flag for debug purposes, to catch misuses of the API. var initialized: bool = false; /// Allocator used for `map` and `array`. Also used as a child allocator for /// `string_arena`. var allocator: std.mem.Allocator = undefined; /// Allocator for all string values. All values of `map` and keys of `map` are /// allocated with this arena. The strings are allocated with a null terminator /// for interoperability with libraries. var string_arena: std.heap.ArenaAllocator = undefined; /// Maps a string value to an atom value. var map: std.StringHashMapUnmanaged(Atom) = undefined; /// Maps an atom value to a string. var array: std.ArrayList([:0]const u8) = undefined; /// Protects all reads and writes to `map` and `array`. var mutex: std.Thread.Mutex = .{}; pub fn init(_allocator: std.mem.Allocator) !void { mutex.lock(); defer mutex.unlock(); std.debug.assert(!initialized); allocator = _allocator; string_arena = .init(_allocator); map = .{}; array = .empty; initialized = true; // VOLATILE Synchronize with explicit values on top of `Atom` type. try map.put(allocator, "", .empty); try array.append(allocator, ""); } pub fn deinit() void { mutex.lock(); defer mutex.unlock(); std.log.scoped(.deinit).debug("Deinitializing atoms", .{}); std.debug.assert(initialized); string_arena.deinit(); map.deinit(allocator); array.deinit(allocator); allocator = undefined; string_arena = undefined; map = undefined; array = undefined; initialized = false; } /// Dump all atoms in a readable format. Use for debugging. Does not flush the /// writer. pub fn dump(writer: std.Io.Writer) !void { mutex.lock(); defer mutex.unlock(); std.debug.assert(initialized); for (array.items, 0..) |string, i| { const atom: u32 = @intCast(i); writer.print("0x{X:0<8} {s}\n", .{ atom, string }); } } fn toOwnedString(string: []const u8) ![:0]const u8 { const owned_string = try string_arena.allocator().dupeZ(u8, string); return owned_string; }