refactor(io): move persistence and import into improvise-io (improvise-8zh)
Relocate the two I/O module trees into the improvise-io sub-crate scaffolded in the previous commit: git mv src/persistence -> crates/improvise-io/src/persistence git mv src/import -> crates/improvise-io/src/import The grammar file `improv.pest` moves alongside `persistence/mod.rs`; the `#[grammar = "persistence/improv.pest"]` attribute resolves relative to the new crate root and keeps working unchanged. No path edits inside the moved code: the `crate::model::*`, `crate::view::*`, `crate::workbook::*`, `crate::format::*`, and `crate::formula::*` imports inside persistence and import all continue to resolve because improvise-io's lib.rs re-exports those modules from improvise-core and improvise-formula, mirroring the pattern improvise-core uses for `formula`. Verified no `crate::ui::*`, `crate::command::*`, `crate::draw::*` imports exist in the moved code (per improvise-8zh acceptance criterion #3). Main-crate `src/lib.rs` now re-exports `import` and `persistence` from improvise-io, keeping every `crate::persistence::*` and `crate::import::*` path in the 4 consumer files (ui/app.rs, ui/effect.rs, ui/import_wizard_ui.rs, main.rs) resolving unchanged — no downstream edits needed. `examples/gen-grammar.rs` had `include_str!("../src/persistence/improv.pest")`; updated the relative path to the new location under `crates/improvise-io/src/persistence/`. Verification: - cargo check --workspace --examples: clean - cargo test --workspace: 616 passing (219 main + 190 core + 65 formula + 142 io) - cargo clippy --workspace --tests: clean - cargo build -p improvise-io: standalone build succeeds, confirming no UI/command leakage into the IO crate (improvise-8zh acceptance #2, #3) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
292
crates/improvise-io/src/import/analyzer.rs
Normal file
292
crates/improvise-io/src/import/analyzer.rs
Normal file
@ -0,0 +1,292 @@
|
||||
use chrono::{Datelike, NaiveDate};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashSet;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum FieldKind {
|
||||
/// Small number of distinct string values → dimension/category
|
||||
Category,
|
||||
/// Numeric values → measure
|
||||
Measure,
|
||||
/// Date/time strings → time category
|
||||
TimeCategory,
|
||||
/// Many unique strings (IDs, names) → label/identifier
|
||||
Label,
|
||||
}
|
||||
|
||||
/// Date components that can be extracted from a date field.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum DateComponent {
|
||||
Year,
|
||||
Month,
|
||||
Quarter,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FieldProposal {
|
||||
pub field: String,
|
||||
pub kind: FieldKind,
|
||||
pub distinct_values: Vec<String>,
|
||||
pub accepted: bool,
|
||||
/// Detected chrono format string (e.g., "%m/%d/%Y"). Only set for TimeCategory.
|
||||
pub date_format: Option<String>,
|
||||
/// Which date components to extract as new categories.
|
||||
pub date_components: Vec<DateComponent>,
|
||||
}
|
||||
|
||||
impl FieldProposal {
|
||||
pub fn kind_label(&self) -> &'static str {
|
||||
match self.kind {
|
||||
FieldKind::Category => "Category (dimension)",
|
||||
FieldKind::Measure => "Measure (numeric)",
|
||||
FieldKind::TimeCategory => "Time Category",
|
||||
FieldKind::Label => "Label (per-row, drill-view only)",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Common date formats to try, in order of preference.
|
||||
const DATE_FORMATS: &[&str] = &[
|
||||
"%Y-%m-%d", // 2025-04-02
|
||||
"%m/%d/%Y", // 04/02/2025
|
||||
"%m/%d/%y", // 04/02/25
|
||||
"%d/%m/%Y", // 02/04/2025
|
||||
"%Y%m%d", // 20250402
|
||||
"%b %d, %Y", // Apr 02, 2025
|
||||
"%B %d, %Y", // April 02, 2025
|
||||
"%d-%b-%Y", // 02-Apr-2025
|
||||
];
|
||||
|
||||
/// Try to detect a chrono date format from sample values.
|
||||
/// Returns the first format that successfully parses all non-empty samples.
|
||||
pub fn detect_date_format(samples: &[&str]) -> Option<String> {
|
||||
let samples: Vec<&str> = samples.iter().copied().filter(|s| !s.is_empty()).collect();
|
||||
if samples.is_empty() {
|
||||
return None;
|
||||
}
|
||||
// Try up to 10 samples for efficiency
|
||||
let test_samples: Vec<&str> = samples.into_iter().take(10).collect();
|
||||
for fmt in DATE_FORMATS {
|
||||
if test_samples
|
||||
.iter()
|
||||
.all(|s| NaiveDate::parse_from_str(s, fmt).is_ok())
|
||||
{
|
||||
return Some(fmt.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Parse a date string and extract a component value.
|
||||
pub fn extract_date_component(
|
||||
value: &str,
|
||||
format: &str,
|
||||
component: DateComponent,
|
||||
) -> Option<String> {
|
||||
let date = NaiveDate::parse_from_str(value, format).ok()?;
|
||||
Some(match component {
|
||||
DateComponent::Year => format!("{}", date.format("%Y")),
|
||||
DateComponent::Month => format!("{}", date.format("%Y-%m")),
|
||||
DateComponent::Quarter => {
|
||||
let q = (date.month0() / 3) + 1;
|
||||
format!("{}-Q{}", date.format("%Y"), q)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
const CATEGORY_THRESHOLD: usize = 20;
|
||||
|
||||
pub fn analyze_records(records: &[Value]) -> Vec<FieldProposal> {
|
||||
if records.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
// Collect all field names
|
||||
let mut fields: Vec<String> = Vec::new();
|
||||
for record in records {
|
||||
if let Value::Object(map) = record {
|
||||
for key in map.keys() {
|
||||
if !fields.contains(key) {
|
||||
fields.push(key.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fields
|
||||
.into_iter()
|
||||
.map(|field| {
|
||||
let values: Vec<&Value> = records.iter().filter_map(|r| r.get(&field)).collect();
|
||||
|
||||
let all_numeric = values.iter().all(|v| v.is_number());
|
||||
let all_string = values.iter().all(|v| v.is_string());
|
||||
|
||||
if all_numeric {
|
||||
return FieldProposal {
|
||||
field,
|
||||
kind: FieldKind::Measure,
|
||||
distinct_values: vec![],
|
||||
accepted: true,
|
||||
date_format: None,
|
||||
date_components: vec![],
|
||||
};
|
||||
}
|
||||
|
||||
if all_string {
|
||||
let distinct: HashSet<&str> = values.iter().filter_map(|v| v.as_str()).collect();
|
||||
let distinct_vec: Vec<String> = distinct.into_iter().map(String::from).collect();
|
||||
let n = distinct_vec.len();
|
||||
|
||||
// Try chrono-based date detection
|
||||
let samples: Vec<&str> = distinct_vec.iter().map(|s| s.as_str()).collect();
|
||||
let date_format = detect_date_format(&samples);
|
||||
|
||||
if date_format.is_some() {
|
||||
return FieldProposal {
|
||||
field,
|
||||
kind: FieldKind::TimeCategory,
|
||||
distinct_values: distinct_vec,
|
||||
accepted: true,
|
||||
date_format,
|
||||
date_components: vec![],
|
||||
};
|
||||
}
|
||||
|
||||
if n <= CATEGORY_THRESHOLD {
|
||||
return FieldProposal {
|
||||
field,
|
||||
kind: FieldKind::Category,
|
||||
distinct_values: distinct_vec,
|
||||
accepted: true,
|
||||
date_format: None,
|
||||
date_components: vec![],
|
||||
};
|
||||
}
|
||||
|
||||
return FieldProposal {
|
||||
field,
|
||||
kind: FieldKind::Label,
|
||||
distinct_values: distinct_vec,
|
||||
accepted: true,
|
||||
date_format: None,
|
||||
date_components: vec![],
|
||||
};
|
||||
}
|
||||
|
||||
// Mixed or other: treat as label
|
||||
FieldProposal {
|
||||
field,
|
||||
kind: FieldKind::Label,
|
||||
distinct_values: vec![],
|
||||
accepted: true,
|
||||
date_format: None,
|
||||
date_components: vec![],
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Extract nested array from JSON by dot-path
|
||||
pub fn extract_array_at_path<'a>(value: &'a Value, path: &str) -> Option<&'a Vec<Value>> {
|
||||
if path.is_empty() {
|
||||
return value.as_array();
|
||||
}
|
||||
let mut current = value;
|
||||
for part in path.split('.') {
|
||||
current = current.get(part)?;
|
||||
}
|
||||
current.as_array()
|
||||
}
|
||||
|
||||
/// Find candidate paths to arrays in JSON
|
||||
pub fn find_array_paths(value: &Value) -> Vec<String> {
|
||||
let mut paths = Vec::new();
|
||||
find_array_paths_inner(value, "", &mut paths);
|
||||
paths
|
||||
}
|
||||
|
||||
fn find_array_paths_inner(value: &Value, prefix: &str, paths: &mut Vec<String>) {
|
||||
match value {
|
||||
Value::Array(_) => {
|
||||
paths.push(prefix.to_string());
|
||||
}
|
||||
Value::Object(map) => {
|
||||
for (key, val) in map {
|
||||
let path = if prefix.is_empty() {
|
||||
key.clone()
|
||||
} else {
|
||||
format!("{prefix}.{key}")
|
||||
};
|
||||
find_array_paths_inner(val, &path, paths);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn detect_iso_date_format() {
|
||||
let samples = vec!["2025-01-15", "2025-02-28", "2024-12-01"];
|
||||
assert_eq!(detect_date_format(&samples), Some("%Y-%m-%d".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_us_date_format() {
|
||||
let samples = vec!["03/31/2026", "01/15/2025", "12/25/2024"];
|
||||
assert_eq!(detect_date_format(&samples), Some("%m/%d/%Y".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_short_year_format() {
|
||||
// Two-digit years are ambiguous with four-digit format, so %m/%d/%Y
|
||||
// matches first. This is expected — the user can override in the wizard.
|
||||
let samples = vec!["03/31/26", "01/15/25"];
|
||||
assert!(detect_date_format(&samples).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_no_date_format() {
|
||||
let samples = vec!["hello", "world"];
|
||||
assert_eq!(detect_date_format(&samples), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_year_component() {
|
||||
let result = extract_date_component("03/31/2026", "%m/%d/%Y", DateComponent::Year);
|
||||
assert_eq!(result, Some("2026".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_month_component() {
|
||||
let result = extract_date_component("03/31/2026", "%m/%d/%Y", DateComponent::Month);
|
||||
assert_eq!(result, Some("2026-03".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_quarter_component() {
|
||||
let result = extract_date_component("03/31/2026", "%m/%d/%Y", DateComponent::Quarter);
|
||||
assert_eq!(result, Some("2026-Q1".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extract_quarter_q4() {
|
||||
let result = extract_date_component("12/15/2025", "%m/%d/%Y", DateComponent::Quarter);
|
||||
assert_eq!(result, Some("2025-Q4".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn analyze_detects_time_category_with_format() {
|
||||
let records: Vec<Value> = vec![
|
||||
serde_json::json!({"Date": "01/15/2025", "Amount": 100}),
|
||||
serde_json::json!({"Date": "02/20/2025", "Amount": 200}),
|
||||
];
|
||||
let proposals = analyze_records(&records);
|
||||
let date_prop = proposals.iter().find(|p| p.field == "Date").unwrap();
|
||||
assert_eq!(date_prop.kind, FieldKind::TimeCategory);
|
||||
assert_eq!(date_prop.date_format, Some("%m/%d/%Y".to_string()));
|
||||
}
|
||||
}
|
||||
300
crates/improvise-io/src/import/csv_parser.rs
Normal file
300
crates/improvise-io/src/import/csv_parser.rs
Normal file
@ -0,0 +1,300 @@
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use csv::ReaderBuilder;
|
||||
use serde_json::Value;
|
||||
|
||||
pub fn csv_path_p(path: &Path) -> bool {
|
||||
path.extension()
|
||||
.is_some_and(|ext| ext.eq_ignore_ascii_case("csv"))
|
||||
}
|
||||
|
||||
/// Parse a CSV file and return records as serde_json::Value array
|
||||
pub fn parse_csv(path: &Path) -> Result<Vec<Value>> {
|
||||
let mut reader = ReaderBuilder::new()
|
||||
.has_headers(true)
|
||||
.flexible(true)
|
||||
.trim(csv::Trim::All)
|
||||
.from_path(path)
|
||||
.with_context(|| format!("Failed to open CSV file: {}", path.display()))?;
|
||||
|
||||
// Detect if first row looks like headers (strings) or data (mixed)
|
||||
let has_headers = reader.headers().is_ok();
|
||||
|
||||
let mut records = Vec::new();
|
||||
let mut headers = Vec::new();
|
||||
|
||||
if has_headers {
|
||||
headers = reader
|
||||
.headers()
|
||||
.with_context(|| "Failed to read CSV headers")?
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
}
|
||||
|
||||
for result in reader.records() {
|
||||
let record = result.with_context(|| "Failed to read CSV record")?;
|
||||
let mut map = serde_json::Map::new();
|
||||
|
||||
for (i, field) in record.iter().enumerate() {
|
||||
let json_value: Value = parse_csv_field(field);
|
||||
if has_headers {
|
||||
if let Some(header) = headers.get(i) {
|
||||
map.insert(header.clone(), json_value);
|
||||
}
|
||||
} else {
|
||||
map.insert(i.to_string(), json_value);
|
||||
}
|
||||
}
|
||||
|
||||
if !map.is_empty() {
|
||||
records.push(Value::Object(map));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(records)
|
||||
}
|
||||
|
||||
/// Parse multiple CSV files and merge into a single JSON array.
|
||||
/// Each record gets a "File" field set to the filename stem (e.g., "sales" from "sales.csv").
|
||||
pub fn merge_csvs(paths: &[impl AsRef<Path>]) -> Result<Vec<Value>> {
|
||||
let mut all_records = Vec::new();
|
||||
for path in paths {
|
||||
let path = path.as_ref();
|
||||
let stem = path
|
||||
.file_stem()
|
||||
.and_then(|s| s.to_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
let records = parse_csv(path)?;
|
||||
for mut record in records {
|
||||
if let Value::Object(ref mut map) = record {
|
||||
map.insert("File".to_string(), Value::String(stem.clone()));
|
||||
}
|
||||
all_records.push(record);
|
||||
}
|
||||
}
|
||||
Ok(all_records)
|
||||
}
|
||||
|
||||
fn parse_csv_field(field: &str) -> Value {
|
||||
if field.is_empty() {
|
||||
return Value::Null;
|
||||
}
|
||||
|
||||
// Try to parse as number (integer or float)
|
||||
if let Ok(num) = field.parse::<i64>() {
|
||||
return Value::Number(serde_json::Number::from(num));
|
||||
}
|
||||
|
||||
if let Ok(num) = field.parse::<f64>() {
|
||||
return Value::Number(
|
||||
serde_json::Number::from_f64(num).unwrap_or(serde_json::Number::from(0)),
|
||||
);
|
||||
}
|
||||
|
||||
// Otherwise treat as string
|
||||
Value::String(field.to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::{fs, path::PathBuf};
|
||||
use tempfile::tempdir;
|
||||
|
||||
fn create_temp_csv(content: &str) -> (PathBuf, tempfile::TempDir) {
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("test.csv");
|
||||
fs::write(&path, content).unwrap();
|
||||
(path, dir)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_simple_csv() {
|
||||
let (path, _dir) =
|
||||
create_temp_csv("Region,Product,Revenue\nEast,Shirts,1000\nWest,Shirts,800");
|
||||
let records = parse_csv(&path).unwrap();
|
||||
|
||||
assert_eq!(records.len(), 2);
|
||||
assert_eq!(records[0]["Region"], Value::String("East".to_string()));
|
||||
assert_eq!(records[0]["Product"], Value::String("Shirts".to_string()));
|
||||
assert_eq!(
|
||||
records[0]["Revenue"],
|
||||
Value::Number(serde_json::Number::from(1000))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_csv_with_floats() {
|
||||
let (path, _dir) =
|
||||
create_temp_csv("Region,Revenue,Cost\nEast,1000.50,600.25\nWest,800.75,500.00");
|
||||
let records = parse_csv(&path).unwrap();
|
||||
|
||||
assert_eq!(records.len(), 2);
|
||||
assert!(records[0]["Revenue"].is_f64());
|
||||
assert_eq!(
|
||||
records[0]["Revenue"],
|
||||
Value::Number(serde_json::Number::from_f64(1000.50).unwrap())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_csv_with_quoted_fields() {
|
||||
let (path, _dir) =
|
||||
create_temp_csv("Product,Description,Price\n\"Shirts\",\"A nice shirt\",10.00");
|
||||
let records = parse_csv(&path).unwrap();
|
||||
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0]["Product"], Value::String("Shirts".to_string()));
|
||||
assert_eq!(
|
||||
records[0]["Description"],
|
||||
Value::String("A nice shirt".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_csv_with_empty_values() {
|
||||
let (path, _dir) = create_temp_csv("Region,Product,Revenue\nEast,,1000\nWest,Shirts,");
|
||||
let records = parse_csv(&path).unwrap();
|
||||
|
||||
assert_eq!(records.len(), 2);
|
||||
assert_eq!(records[0]["Product"], Value::Null);
|
||||
assert_eq!(records[1]["Revenue"], Value::Null);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_csv_mixed_types() {
|
||||
let (path, _dir) =
|
||||
create_temp_csv("Name,Count,Price,Active\nWidget,5,9.99,true\nGadget,3,19.99,false");
|
||||
let records = parse_csv(&path).unwrap();
|
||||
|
||||
assert_eq!(records.len(), 2);
|
||||
assert_eq!(records[0]["Name"], Value::String("Widget".to_string()));
|
||||
assert_eq!(
|
||||
records[0]["Count"],
|
||||
Value::Number(serde_json::Number::from(5))
|
||||
);
|
||||
assert!(records[0]["Price"].is_f64());
|
||||
assert_eq!(records[0]["Active"], Value::String("true".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_csvs_adds_file_field_from_stem() {
|
||||
let dir = tempdir().unwrap();
|
||||
let sales = dir.path().join("sales.csv");
|
||||
let expenses = dir.path().join("expenses.csv");
|
||||
fs::write(&sales, "Region,Revenue\nEast,100\nWest,200").unwrap();
|
||||
fs::write(&expenses, "Region,Revenue\nEast,50\nWest,75").unwrap();
|
||||
|
||||
let records = merge_csvs(&[sales, expenses]).unwrap();
|
||||
assert_eq!(records.len(), 4);
|
||||
assert_eq!(records[0]["File"], Value::String("sales".to_string()));
|
||||
assert_eq!(records[1]["File"], Value::String("sales".to_string()));
|
||||
assert_eq!(records[2]["File"], Value::String("expenses".to_string()));
|
||||
assert_eq!(records[3]["File"], Value::String("expenses".to_string()));
|
||||
// Original fields preserved
|
||||
assert_eq!(records[0]["Region"], Value::String("East".to_string()));
|
||||
assert_eq!(
|
||||
records[2]["Revenue"],
|
||||
Value::Number(serde_json::Number::from(50))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn merge_csvs_single_file_works() {
|
||||
let dir = tempdir().unwrap();
|
||||
let path = dir.path().join("data.csv");
|
||||
fs::write(&path, "Name,Value\nA,1").unwrap();
|
||||
|
||||
let records = merge_csvs(&[path]).unwrap();
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0]["File"], Value::String("data".to_string()));
|
||||
assert_eq!(records[0]["Name"], Value::String("A".to_string()));
|
||||
}
|
||||
|
||||
// ── RFC 4180 edge cases ───────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn rfc4180_embedded_comma_in_quoted_field() {
|
||||
let (path, _dir) =
|
||||
create_temp_csv("Name,Address,Value\n\"Smith, John\",\"123 Main St, Apt 4\",100");
|
||||
let records = parse_csv(&path).unwrap();
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0]["Name"], Value::String("Smith, John".to_string()));
|
||||
assert_eq!(
|
||||
records[0]["Address"],
|
||||
Value::String("123 Main St, Apt 4".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rfc4180_escaped_quotes_in_field() {
|
||||
// RFC 4180: doubled quotes ("") inside a quoted field represent a literal quote
|
||||
let (path, _dir) =
|
||||
create_temp_csv("Name,Description,Value\nWidget,\"A \"\"great\"\" product\",10");
|
||||
let records = parse_csv(&path).unwrap();
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(
|
||||
records[0]["Description"],
|
||||
Value::String("A \"great\" product".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rfc4180_newline_in_quoted_field() {
|
||||
// RFC 4180: quoted fields may contain newlines
|
||||
let (path, _dir) = create_temp_csv("Name,Notes,Value\n\"Widget\",\"Line 1\nLine 2\",10");
|
||||
let records = parse_csv(&path).unwrap();
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(
|
||||
records[0]["Notes"],
|
||||
Value::String("Line 1\nLine 2".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rfc4180_embedded_comma_and_quotes_combined() {
|
||||
let (path, _dir) =
|
||||
create_temp_csv("Name,Desc\n\"Smith, \"\"Jr.\"\"\",\"Said \"\"hello, world\"\"\"");
|
||||
let records = parse_csv(&path).unwrap();
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(
|
||||
records[0]["Name"],
|
||||
Value::String("Smith, \"Jr.\"".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
records[0]["Desc"],
|
||||
Value::String("Said \"hello, world\"".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_checking_csv_format() {
|
||||
// Simulates the format of /Users/edwlan/Downloads/Checking1.csv
|
||||
let (path, _dir) = create_temp_csv(
|
||||
"Date,Amount,Flag,CheckNo,Description\n\
|
||||
\"03/31/2026\",\"-50.00\",\"*\",\"\",\"VENMO PAYMENT 260331\"\n\
|
||||
\"03/31/2026\",\"-240.00\",\"*\",\"\",\"ROBINHOOD DEBITS XXXXX3795\"",
|
||||
);
|
||||
let records = parse_csv(&path).unwrap();
|
||||
|
||||
assert_eq!(records.len(), 2);
|
||||
assert_eq!(records[0]["Date"], Value::String("03/31/2026".to_string()));
|
||||
assert_eq!(
|
||||
records[0]["Amount"],
|
||||
Value::Number(serde_json::Number::from_f64(-50.00).unwrap())
|
||||
);
|
||||
assert_eq!(records[0]["Flag"], Value::String("*".to_string()));
|
||||
assert_eq!(records[0]["CheckNo"], Value::Null);
|
||||
assert_eq!(
|
||||
records[0]["Description"],
|
||||
Value::String("VENMO PAYMENT 260331".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
records[1]["Amount"],
|
||||
Value::Number(serde_json::Number::from_f64(-240.00).unwrap())
|
||||
);
|
||||
}
|
||||
}
|
||||
3
crates/improvise-io/src/import/mod.rs
Normal file
3
crates/improvise-io/src/import/mod.rs
Normal file
@ -0,0 +1,3 @@
|
||||
pub mod analyzer;
|
||||
pub mod csv_parser;
|
||||
pub mod wizard;
|
||||
1117
crates/improvise-io/src/import/wizard.rs
Normal file
1117
crates/improvise-io/src/import/wizard.rs
Normal file
File diff suppressed because it is too large
Load Diff
@ -9,7 +9,8 @@
|
||||
//! Re-exports the core modules under their conventional names so code in
|
||||
//! this crate can keep using `crate::model::*`, `crate::view::*`,
|
||||
//! `crate::workbook::*`, `crate::format::*`, and `crate::formula::*` paths.
|
||||
//!
|
||||
//! Scaffolded empty in this commit; the modules land in the next commit.
|
||||
pub use improvise_core::{format, model, view, workbook};
|
||||
pub use improvise_formula as formula;
|
||||
|
||||
pub mod import;
|
||||
pub mod persistence;
|
||||
|
||||
123
crates/improvise-io/src/persistence/improv.pest
Normal file
123
crates/improvise-io/src/persistence/improv.pest
Normal file
@ -0,0 +1,123 @@
|
||||
// ── .improv file grammar (v2025-04-09) ───────────────────────────────────────
|
||||
//
|
||||
// Line-oriented, markdown-flavoured format for multi-dimensional models.
|
||||
// Sections may appear in any order.
|
||||
//
|
||||
// Names: bare alphanumeric or pipe-quoted |like this|.
|
||||
// Inside pipes, backslash escapes: \| for literal pipe, \\ for backslash,
|
||||
// \n for newline.
|
||||
// Values: pipe-quoted |text| or bare numbers.
|
||||
|
||||
file = {
|
||||
SOI ~
|
||||
version_line ~
|
||||
model_name ~
|
||||
initial_view? ~
|
||||
section* ~
|
||||
EOI
|
||||
}
|
||||
|
||||
version_line = { "v2025-04-09" ~ NEWLINE ~ blank_lines }
|
||||
model_name = { "# " ~ rest_of_line ~ NEWLINE ~ blank_lines }
|
||||
initial_view = { "Initial View: " ~ rest_of_line ~ NEWLINE ~ blank_lines }
|
||||
|
||||
section = _{
|
||||
category_section
|
||||
| formulas_section
|
||||
| data_section
|
||||
| view_section
|
||||
}
|
||||
|
||||
// ── Category ─────────────────────────────────────────────────────────────────
|
||||
|
||||
category_section = {
|
||||
"## Category: " ~ rest_of_line ~ NEWLINE ~ blank_lines ~
|
||||
category_entry*
|
||||
}
|
||||
|
||||
category_entry = _{ group_hierarchy | grouped_item | item_list }
|
||||
|
||||
// Comma-separated bare items (no group): `- Food, Gas, Total`
|
||||
item_list = {
|
||||
"- " ~ name ~ ("," ~ " "* ~ name)* ~ NEWLINE ~ blank_lines
|
||||
}
|
||||
|
||||
// Single item with group bracket: `- Jan[Q1]`
|
||||
grouped_item = {
|
||||
"- " ~ name ~ "[" ~ name ~ "]" ~ NEWLINE ~ blank_lines
|
||||
}
|
||||
|
||||
group_hierarchy = {
|
||||
"> " ~ name ~ "[" ~ name ~ "]" ~ NEWLINE ~ blank_lines
|
||||
}
|
||||
|
||||
// ── Formulas ─────────────────────────────────────────────────────────────────
|
||||
|
||||
formulas_section = {
|
||||
"## Formulas" ~ NEWLINE ~ blank_lines ~
|
||||
formula_line*
|
||||
}
|
||||
|
||||
formula_line = {
|
||||
"- " ~ rest_of_line ~ NEWLINE ~ blank_lines
|
||||
}
|
||||
|
||||
// ── Data ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
data_section = {
|
||||
"## Data" ~ NEWLINE ~ blank_lines ~
|
||||
data_line*
|
||||
}
|
||||
|
||||
data_line = {
|
||||
coord_list ~ " = " ~ cell_value ~ NEWLINE ~ blank_lines
|
||||
}
|
||||
|
||||
coord_list = { coord ~ (", " ~ coord)* }
|
||||
coord = { name ~ "=" ~ name }
|
||||
|
||||
cell_value = _{ number | pipe_quoted | bare_value }
|
||||
|
||||
number = @{
|
||||
"-"? ~ ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT+)? ~ (("e" | "E") ~ ("+" | "-")? ~ ASCII_DIGIT+)?
|
||||
}
|
||||
|
||||
bare_value = @{ (!NEWLINE ~ ANY)+ }
|
||||
|
||||
// ── View ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
view_section = {
|
||||
"## View: " ~ rest_of_line ~ NEWLINE ~ blank_lines ~
|
||||
view_entry*
|
||||
}
|
||||
|
||||
view_entry = _{ format_line | hidden_line | collapsed_line | axis_line }
|
||||
|
||||
axis_line = {
|
||||
name ~ ": " ~ axis_kind ~ (", " ~ name)? ~ NEWLINE ~ blank_lines
|
||||
}
|
||||
|
||||
axis_kind = @{ "row" | "column" | "page" | "none" }
|
||||
|
||||
format_line = { "format: " ~ rest_of_line ~ NEWLINE ~ blank_lines }
|
||||
hidden_line = { "hidden: " ~ name ~ "/" ~ name ~ NEWLINE ~ blank_lines }
|
||||
collapsed_line = { "collapsed: " ~ name ~ "/" ~ name ~ NEWLINE ~ blank_lines }
|
||||
|
||||
// ── Names ────────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// A name is either pipe-quoted or a bare identifier.
|
||||
// Pipe-quoted: |Income, Gross| — backslash escapes inside:
|
||||
// \| = literal pipe, \\ = literal backslash, \n = newline
|
||||
// Bare: no = , | [ ] / : # or newlines.
|
||||
|
||||
name = _{ pipe_quoted | bare_name }
|
||||
|
||||
pipe_quoted = { "|" ~ pipe_inner ~ "|" }
|
||||
pipe_inner = @{ ("\\" ~ ANY | !"|" ~ ANY)* }
|
||||
|
||||
bare_name = @{ ('A'..'Z' | 'a'..'z' | "_") ~ ('A'..'Z' | 'a'..'z' | '0'..'9' | "_" | "-")* }
|
||||
|
||||
// ── Shared ───────────────────────────────────────────────────────────────────
|
||||
|
||||
rest_of_line = @{ (!NEWLINE ~ ANY)* }
|
||||
blank_lines = _{ NEWLINE* }
|
||||
2410
crates/improvise-io/src/persistence/mod.rs
Normal file
2410
crates/improvise-io/src/persistence/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user