Add CSV import functionality

- Use csv crate for robust CSV parsing (handles quoted fields, empty values, \r\n)
- Extend --import command to auto-detect format by file extension (.csv or .json)
- Reuse existing ImportPipeline and analyzer for field type detection
- Categories detected automatically (string fields), measures for numeric fields
- Updated help text and welcome screen to mention CSV support

All 201 tests pass.
This commit is contained in:
Edward Langley
2026-04-01 01:32:19 -07:00
parent 2cf1123bcb
commit 23e26f0e06
6 changed files with 256 additions and 38 deletions

159
src/import/csv_parser.rs Normal file
View File

@ -0,0 +1,159 @@
use anyhow::{Context, Result};
use csv::ReaderBuilder;
use serde_json::Value;
/// Parse a CSV file and return records as serde_json::Value array
pub fn parse_csv(path: &str) -> Result<Vec<Value>> {
let mut reader = ReaderBuilder::new()
.has_headers(true)
.flexible(true)
.trim(csv::Trim::All)
.from_path(path)
.with_context(|| format!("Failed to open CSV file: {path}"))?;
// Detect if first row looks like headers (strings) or data (mixed)
let has_headers = reader.headers().is_ok();
let mut records = Vec::new();
let mut headers = Vec::new();
if has_headers {
headers = reader
.headers()
.with_context(|| "Failed to read CSV headers")?
.iter()
.map(|s| s.to_string())
.collect();
}
for result in reader.records() {
let record = result.with_context(|| "Failed to read CSV record")?;
let mut map = serde_json::Map::new();
for (i, field) in record.iter().enumerate() {
let json_value: Value = parse_csv_field(field);
if has_headers {
if let Some(header) = headers.get(i) {
map.insert(header.clone(), json_value);
}
} else {
map.insert(i.to_string(), json_value);
}
}
if !map.is_empty() {
records.push(Value::Object(map));
}
}
Ok(records)
}
fn parse_csv_field(field: &str) -> Value {
if field.is_empty() {
return Value::Null;
}
// Try to parse as number (integer or float)
if let Ok(num) = field.parse::<i64>() {
return Value::Number(serde_json::Number::from(num));
}
if let Ok(num) = field.parse::<f64>() {
return Value::Number(
serde_json::Number::from_f64(num).unwrap_or(serde_json::Number::from(0)),
);
}
// Otherwise treat as string
Value::String(field.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
fn create_temp_csv(content: &str) -> (String, tempfile::TempDir) {
let dir = tempdir().unwrap();
let path = dir.path().join("test.csv");
fs::write(&path, content).unwrap();
(path.to_string_lossy().to_string(), dir)
}
#[test]
fn parse_simple_csv() {
let (path, _dir) = create_temp_csv("Region,Product,Revenue\nEast,Shirts,1000\nWest,Shirts,800");
let records = parse_csv(&path).unwrap();
assert_eq!(records.len(), 2);
assert_eq!(records[0]["Region"], Value::String("East".to_string()));
assert_eq!(records[0]["Product"], Value::String("Shirts".to_string()));
assert_eq!(records[0]["Revenue"], Value::Number(serde_json::Number::from(1000)));
}
#[test]
fn parse_csv_with_floats() {
let (path, _dir) =
create_temp_csv("Region,Revenue,Cost\nEast,1000.50,600.25\nWest,800.75,500.00");
let records = parse_csv(&path).unwrap();
assert_eq!(records.len(), 2);
assert!(records[0]["Revenue"].is_f64());
assert_eq!(records[0]["Revenue"], Value::Number(serde_json::Number::from_f64(1000.50).unwrap()));
}
#[test]
fn parse_csv_with_quoted_fields() {
let (path, _dir) = create_temp_csv("Product,Description,Price\n\"Shirts\",\"A nice shirt\",10.00");
let records = parse_csv(&path).unwrap();
assert_eq!(records.len(), 1);
assert_eq!(records[0]["Product"], Value::String("Shirts".to_string()));
assert_eq!(records[0]["Description"], Value::String("A nice shirt".to_string()));
}
#[test]
fn parse_csv_with_empty_values() {
let (path, _dir) = create_temp_csv("Region,Product,Revenue\nEast,,1000\nWest,Shirts,");
let records = parse_csv(&path).unwrap();
assert_eq!(records.len(), 2);
assert_eq!(records[0]["Product"], Value::Null);
assert_eq!(records[1]["Revenue"], Value::Null);
}
#[test]
fn parse_csv_mixed_types() {
let (path, _dir) = create_temp_csv(
"Name,Count,Price,Active\nWidget,5,9.99,true\nGadget,3,19.99,false",
);
let records = parse_csv(&path).unwrap();
assert_eq!(records.len(), 2);
assert_eq!(records[0]["Name"], Value::String("Widget".to_string()));
assert_eq!(records[0]["Count"], Value::Number(serde_json::Number::from(5)));
assert!(records[0]["Price"].is_f64());
assert_eq!(records[0]["Active"], Value::String("true".to_string()));
}
#[test]
fn parse_checking_csv_format() {
// Simulates the format of /Users/edwlan/Downloads/Checking1.csv
let (path, _dir) = create_temp_csv(
"Date,Amount,Flag,CheckNo,Description\n\
\"03/31/2026\",\"-50.00\",\"*\",\"\",\"VENMO PAYMENT 260331\"\n\
\"03/31/2026\",\"-240.00\",\"*\",\"\",\"ROBINHOOD DEBITS XXXXX3795\"",
);
let records = parse_csv(&path).unwrap();
assert_eq!(records.len(), 2);
assert_eq!(records[0]["Date"], Value::String("03/31/2026".to_string()));
assert_eq!(records[0]["Amount"], Value::Number(serde_json::Number::from_f64(-50.00).unwrap()));
assert_eq!(records[0]["Flag"], Value::String("*".to_string()));
assert_eq!(records[0]["CheckNo"], Value::Null);
assert_eq!(records[0]["Description"], Value::String("VENMO PAYMENT 260331".to_string()));
assert_eq!(records[1]["Amount"], Value::Number(serde_json::Number::from_f64(-240.00).unwrap()));
}
}

View File

@ -1,2 +1,3 @@
pub mod analyzer;
pub mod csv_parser;
pub mod wizard;