Files
improvise/src/command/parse.rs
Edward Langley d3a1a57c78 refactor: improve dot separator parsing in command parser
Change split_on_dot() to require dot to be a standalone word
surrounded by whitespace or at line boundaries, rather than any
dot character.

This prevents accidental splitting on dots within identifiers or
quoted strings, making the command syntax more predictable.

The new logic checks both preceding and following bytes to ensure
the dot is truly isolated before treating it as a separator.

Co-Authored-By: fiddlerwoaroof/git-smart-commit (unsloth/Qwen3.5-35B-A3B-GGUF:Q5_K_M)
2026-04-05 01:07:08 -07:00

185 lines
5.3 KiB
Rust

//! Quasi-lisp prefix command parser.
//!
//! Syntax: `word arg1 arg2 ...`
//! Multiple commands on one line separated by `.`
//! Coordinate pairs use `/`: `Category/Item`
//! Quoted strings supported: `"Profit = Revenue - Cost"`
use super::cmd::{default_registry, Cmd, CmdRegistry};
/// Parse a line into commands using the default registry.
pub fn parse_line(line: &str) -> Result<Vec<Box<dyn Cmd>>, String> {
let registry = default_registry();
parse_line_with(&registry, line)
}
/// Parse a line into commands using a given registry.
pub fn parse_line_with(registry: &CmdRegistry, line: &str) -> Result<Vec<Box<dyn Cmd>>, String> {
let line = line.trim();
if line.is_empty() || line.starts_with('#') || line.starts_with("//") {
return Ok(vec![]);
}
let mut commands = Vec::new();
for segment in split_on_dot(line) {
let segment = segment.trim();
if segment.is_empty() {
continue;
}
let tokens = tokenize(segment);
if tokens.is_empty() {
continue;
}
let word = &tokens[0];
let args = &tokens[1..];
commands.push(registry.parse(word, args)?);
}
Ok(commands)
}
/// Split a line on ` . ` separators (dot must be a standalone word,
/// surrounded by whitespace or at line boundaries). Respects quoted strings.
fn split_on_dot(line: &str) -> Vec<&str> {
let mut segments = Vec::new();
let mut start = 0;
let mut in_quote = false;
let bytes = line.as_bytes();
for (i, c) in line.char_indices() {
match c {
'"' => in_quote = !in_quote,
'.' if !in_quote => {
let before_ws = i == 0 || bytes[i - 1].is_ascii_whitespace();
let after_ws = i + 1 >= bytes.len() || bytes[i + 1].is_ascii_whitespace();
if before_ws && after_ws {
segments.push(&line[start..i]);
start = i + 1;
}
}
_ => {}
}
}
segments.push(&line[start..]);
segments
}
/// Tokenize a command segment into words, handling quoted strings.
fn tokenize(input: &str) -> Vec<String> {
let mut tokens = Vec::new();
let mut chars = input.chars().peekable();
while let Some(&c) = chars.peek() {
if c.is_whitespace() {
chars.next();
continue;
}
if c == '"' {
chars.next(); // consume opening quote
let mut s = String::new();
for ch in chars.by_ref() {
if ch == '"' {
break;
}
s.push(ch);
}
tokens.push(s);
} else {
let mut s = String::new();
while let Some(&ch) = chars.peek() {
if ch.is_whitespace() {
break;
}
s.push(ch);
chars.next();
}
tokens.push(s);
}
}
tokens
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_add_category() {
let cmds = parse_line("add-category Region").unwrap();
assert_eq!(cmds.len(), 1);
assert_eq!(cmds[0].name(), "add-category");
}
#[test]
fn parse_add_item() {
let cmds = parse_line("add-item Region East").unwrap();
assert_eq!(cmds.len(), 1);
assert_eq!(cmds[0].name(), "add-item");
}
#[test]
fn parse_set_cell_number() {
let cmds = parse_line("set-cell 100 Region/East Measure/Revenue").unwrap();
assert_eq!(cmds.len(), 1);
assert_eq!(cmds[0].name(), "set-cell");
}
#[test]
fn parse_set_cell_text() {
let cmds = parse_line("set-cell hello Region/East").unwrap();
assert_eq!(cmds.len(), 1);
assert_eq!(cmds[0].name(), "set-cell");
}
#[test]
fn parse_multiple_commands_dot_separated() {
let cmds = parse_line("add-category Region . add-item Region East").unwrap();
assert_eq!(cmds.len(), 2);
assert_eq!(cmds[0].name(), "add-category");
assert_eq!(cmds[1].name(), "add-item");
}
#[test]
fn parse_quoted_string() {
let cmds = parse_line(r#"add-formula Measure "Profit = Revenue - Cost""#).unwrap();
assert_eq!(cmds.len(), 1);
assert_eq!(cmds[0].name(), "add-formula");
}
#[test]
fn parse_set_axis() {
let cmds = parse_line("set-axis Payee row").unwrap();
assert_eq!(cmds[0].name(), "set-axis");
}
#[test]
fn parse_set_axis_none() {
let cmds = parse_line("set-axis Date none").unwrap();
assert_eq!(cmds[0].name(), "set-axis");
}
#[test]
fn parse_clear_cell() {
let cmds = parse_line("clear-cell Region/East Measure/Revenue").unwrap();
assert_eq!(cmds.len(), 1);
assert_eq!(cmds[0].name(), "clear-cell");
}
#[test]
fn parse_comments_and_blank_lines() {
assert!(parse_line("").unwrap().is_empty());
assert!(parse_line("# comment").unwrap().is_empty());
assert!(parse_line("// comment").unwrap().is_empty());
}
#[test]
fn parse_unknown_command_errors() {
assert!(parse_line("frobnicate foo").is_err());
}
#[test]
fn parse_missing_args_errors() {
assert!(parse_line("add-category").is_err());
assert!(parse_line("set-cell 100").is_err());
}
}