168 lines
5.2 KiB
Zig
168 lines
5.2 KiB
Zig
//! Module for string interning. A string can be converted to a stable integer
|
|
//! constant, called an *atom*. The value of an atom for a given string is
|
|
//! guaranteed to be stable throughout a program's runtime, but not across
|
|
//! different runs. There can be no more than 2¹⁶ atoms.
|
|
//!
|
|
//! Use this module to convert string IDs into numbers, so that they can be
|
|
//! compared more easily. The users of this module should import it with
|
|
//! `@import("Atom.zig").Atom` and use the methods available in the `Atom` type.
|
|
|
|
const std = @import("std");
|
|
|
|
pub const Atom = enum(u16) {
|
|
// VOLATILE Synchronize explicit values with `init` implementation.
|
|
|
|
/// Atom representing an empty string, i.e. `""`.
|
|
empty,
|
|
_,
|
|
|
|
/// Cast an integer into an atom. This can produce an invalid atom.
|
|
pub fn fromInt(value: u16) Atom {
|
|
return @enumFromInt(value);
|
|
}
|
|
|
|
/// Cast an index into an atom. This can produce an invalid atom. The caller
|
|
/// asserts that the index is not greater than the max atom value.
|
|
pub fn fromIndex(index: usize) Atom {
|
|
return @enumFromInt(@as(u16, @intCast(index)));
|
|
}
|
|
|
|
/// Cast an index into an atom. This can produce an invalid atom. Returns an
|
|
/// error if the index is greater than the max atom value.
|
|
pub fn fromIndexSafe(index: usize) error{Overflow}!Atom {
|
|
return @enumFromInt(std.math.cast(u16, index) orelse return error.Overflow);
|
|
}
|
|
|
|
/// Turn a string into an atom. Returns either an existing atom or makes a
|
|
/// new one, if necessary. This will always produce a valid atom. Will not
|
|
/// return any error if the atom already exists.
|
|
pub fn fromString(string: []const u8) error{ OutOfMemory, OutOfAtoms }!Atom {
|
|
mutex.lock();
|
|
defer mutex.unlock();
|
|
|
|
std.debug.assert(initialized);
|
|
|
|
const entry = try map.getOrPut(allocator, string);
|
|
|
|
if (entry.found_existing) {
|
|
return entry.value_ptr.*;
|
|
} else {
|
|
errdefer _ = map.remove(string);
|
|
const atom = Atom.fromIndexSafe(array.items.len) catch |err| switch (err) {
|
|
error.Overflow => return error.OutOfAtoms,
|
|
};
|
|
|
|
try array.ensureUnusedCapacity(allocator, 1);
|
|
const owned_string = try toOwnedString(string);
|
|
|
|
entry.key_ptr.* = owned_string;
|
|
entry.value_ptr.* = atom;
|
|
|
|
array.appendAssumeCapacity(owned_string);
|
|
return atom;
|
|
}
|
|
}
|
|
|
|
/// Turn a string into an atom, if the string has been already registered as
|
|
/// an atom. Returns `null` otherwise. This will always produce a valid
|
|
/// atom.
|
|
pub fn fromStringIfExists(string: []const u8) ?Atom {
|
|
mutex.lock();
|
|
defer mutex.unlock();
|
|
|
|
std.debug.assert(initialized);
|
|
|
|
return map.get(string);
|
|
}
|
|
|
|
/// Cast an atom into an integer.
|
|
pub fn toInt(self: Atom) u16 {
|
|
return @intFromEnum(self);
|
|
}
|
|
|
|
/// Cast an atom into a string. The caller asserts that the atom is valid.
|
|
pub fn toString(self: Atom) [:0]const u8 {
|
|
try mutex.lock();
|
|
defer mutex.unlock();
|
|
|
|
std.debug.assert(initialized);
|
|
|
|
return array.items[self.toInt()];
|
|
}
|
|
};
|
|
|
|
/// Flag for debug purposes, to catch misuses of the API.
|
|
var initialized: bool = false;
|
|
|
|
/// Allocator used for `map` and `array`. Also used as a child allocator for
|
|
/// `string_arena`.
|
|
var allocator: std.mem.Allocator = undefined;
|
|
|
|
/// Allocator for all string values. All values of `map` and keys of `map` are
|
|
/// allocated with this arena. The strings are allocated with a null terminator
|
|
/// for interoperability with libraries.
|
|
var string_arena: std.heap.ArenaAllocator = undefined;
|
|
|
|
/// Maps a string value to an atom value.
|
|
var map: std.StringHashMapUnmanaged(Atom) = undefined;
|
|
|
|
/// Maps an atom value to a string.
|
|
var array: std.ArrayList([:0]const u8) = undefined;
|
|
|
|
/// Protects all reads and writes to `map` and `array`.
|
|
var mutex: std.Thread.Mutex = .{};
|
|
|
|
pub fn init(_allocator: std.mem.Allocator) !void {
|
|
mutex.lock();
|
|
defer mutex.unlock();
|
|
|
|
std.debug.assert(!initialized);
|
|
|
|
allocator = _allocator;
|
|
string_arena = .init(_allocator);
|
|
map = .{};
|
|
array = .empty;
|
|
initialized = true;
|
|
|
|
// VOLATILE Synchronize with explicit values on top of `Atom` type.
|
|
try map.put(allocator, "", .empty);
|
|
try array.append(allocator, "");
|
|
}
|
|
|
|
pub fn deinit() void {
|
|
mutex.lock();
|
|
defer mutex.unlock();
|
|
|
|
std.log.scoped(.deinit).debug("Deinitializing atoms", .{});
|
|
std.debug.assert(initialized);
|
|
|
|
string_arena.deinit();
|
|
map.deinit(allocator);
|
|
array.deinit(allocator);
|
|
|
|
allocator = undefined;
|
|
string_arena = undefined;
|
|
map = undefined;
|
|
array = undefined;
|
|
initialized = false;
|
|
}
|
|
|
|
/// Dump all atoms in a readable format. Use for debugging. Does not flush the
|
|
/// writer.
|
|
pub fn dump(writer: std.Io.Writer) !void {
|
|
mutex.lock();
|
|
defer mutex.unlock();
|
|
|
|
std.debug.assert(initialized);
|
|
|
|
for (array.items, 0..) |string, i| {
|
|
const atom: u32 = @intCast(i);
|
|
writer.print("0x{X:0<8} {s}\n", .{ atom, string });
|
|
}
|
|
}
|
|
|
|
fn toOwnedString(string: []const u8) ![:0]const u8 {
|
|
const owned_string = try string_arena.allocator().dupeZ(u8, string);
|
|
return owned_string;
|
|
}
|