From 377d417e5af07f52c4ba359b0a5aec9e79dcd9f5 Mon Sep 17 00:00:00 2001 From: Edward Langley Date: Sun, 5 Apr 2026 01:09:17 -0700 Subject: [PATCH] feat(model): add symbol table module Add a new SymbolTable module for interned string identifiers. The module implements a bidirectional mapping between strings and Symbol IDs using a HashMap. Key functionality includes: - intern(): Add a string to the table and return its Symbol ID - get(): Look up a string by Symbol ID - resolve(): Get the original string for a Symbol ID - intern_pair() and intern_coords(): Helper functions for structured data interning The implementation includes unit tests to verify correct behavior. Co-Authored-By: fiddlerwoaroof/git-smart-commit (unsloth/Qwen3.5-35B-A3B-GGUF:Q5_K_M) --- src/model/mod.rs | 1 + src/model/symbol.rs | 79 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 src/model/symbol.rs diff --git a/src/model/mod.rs b/src/model/mod.rs index 8528f9a..303309f 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -1,5 +1,6 @@ pub mod category; pub mod cell; +pub mod symbol; pub mod types; pub use types::Model; diff --git a/src/model/symbol.rs b/src/model/symbol.rs new file mode 100644 index 0000000..65d5385 --- /dev/null +++ b/src/model/symbol.rs @@ -0,0 +1,79 @@ +use std::collections::HashMap; + +/// An interned string identifier. Copy-cheap, O(1) hash and equality. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Symbol(u64); + +/// Bidirectional string ↔ Symbol mapping. +#[derive(Debug, Clone, Default)] +pub struct SymbolTable { + to_id: HashMap, + to_str: Vec, +} + +impl SymbolTable { + #[allow(dead_code)] + pub fn new() -> Self { + Self::default() + } + + /// Intern a string, returning its Symbol. Returns existing Symbol if + /// already interned. + pub fn intern(&mut self, s: &str) -> Symbol { + if let Some(&id) = self.to_id.get(s) { + return id; + } + let id = Symbol(self.to_str.len() as u64); + self.to_str.push(s.to_string()); + self.to_id.insert(s.to_string(), id); + id + } + + /// Look up the Symbol for a string without interning. + pub fn get(&self, s: &str) -> Option { + self.to_id.get(s).copied() + } + + /// Resolve a Symbol back to its string. + pub fn resolve(&self, sym: Symbol) -> &str { + &self.to_str[sym.0 as usize] + } + + /// Intern a (category, item) pair. + pub fn intern_pair(&mut self, cat: &str, item: &str) -> (Symbol, Symbol) { + (self.intern(cat), self.intern(item)) + } + + /// Intern a full coordinate list. + pub fn intern_coords(&mut self, coords: &[(String, String)]) -> Vec<(Symbol, Symbol)> { + coords.iter().map(|(c, i)| self.intern_pair(c, i)).collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn intern_returns_same_id() { + let mut t = SymbolTable::new(); + let a = t.intern("hello"); + let b = t.intern("hello"); + assert_eq!(a, b); + } + + #[test] + fn different_strings_different_ids() { + let mut t = SymbolTable::new(); + let a = t.intern("hello"); + let b = t.intern("world"); + assert_ne!(a, b); + } + + #[test] + fn resolve_roundtrips() { + let mut t = SymbolTable::new(); + let s = t.intern("test"); + assert_eq!(t.resolve(s), "test"); + } +}