use std::path::Path; use anyhow::{Context, Result}; use csv::ReaderBuilder; use serde_json::Value; pub fn csv_path_p(path: &Path) -> bool { path.extension() .is_some_and(|ext| ext.eq_ignore_ascii_case("csv")) } /// Parse a CSV file and return records as serde_json::Value array pub fn parse_csv(path: &Path) -> Result> { let mut reader = ReaderBuilder::new() .has_headers(true) .flexible(true) .trim(csv::Trim::All) .from_path(path) .with_context(|| format!("Failed to open CSV file: {}", path.display()))?; // Detect if first row looks like headers (strings) or data (mixed) let has_headers = reader.headers().is_ok(); let mut records = Vec::new(); let mut headers = Vec::new(); if has_headers { headers = reader .headers() .with_context(|| "Failed to read CSV headers")? .iter() .map(|s| s.to_string()) .collect(); } for result in reader.records() { let record = result.with_context(|| "Failed to read CSV record")?; let mut map = serde_json::Map::new(); for (i, field) in record.iter().enumerate() { let json_value: Value = parse_csv_field(field); if has_headers { if let Some(header) = headers.get(i) { map.insert(header.clone(), json_value); } } else { map.insert(i.to_string(), json_value); } } if !map.is_empty() { records.push(Value::Object(map)); } } Ok(records) } /// Parse multiple CSV files and merge into a single JSON array. /// Each record gets a "File" field set to the filename stem (e.g., "sales" from "sales.csv"). pub fn merge_csvs(paths: &[impl AsRef]) -> Result> { let mut all_records = Vec::new(); for path in paths { let path = path.as_ref(); let stem = path .file_stem() .and_then(|s| s.to_str()) .unwrap_or("unknown") .to_string(); let records = parse_csv(path)?; for mut record in records { if let Value::Object(ref mut map) = record { map.insert("File".to_string(), Value::String(stem.clone())); } all_records.push(record); } } Ok(all_records) } fn parse_csv_field(field: &str) -> Value { if field.is_empty() { return Value::Null; } // Try to parse as number (integer or float) if let Ok(num) = field.parse::() { return Value::Number(serde_json::Number::from(num)); } if let Ok(num) = field.parse::() { return Value::Number( serde_json::Number::from_f64(num).unwrap_or(serde_json::Number::from(0)), ); } // Otherwise treat as string Value::String(field.to_string()) } #[cfg(test)] mod tests { use super::*; use std::{fs, path::PathBuf}; use tempfile::tempdir; fn create_temp_csv(content: &str) -> (PathBuf, tempfile::TempDir) { let dir = tempdir().unwrap(); let path = dir.path().join("test.csv"); fs::write(&path, content).unwrap(); (path, dir) } #[test] fn parse_simple_csv() { let (path, _dir) = create_temp_csv("Region,Product,Revenue\nEast,Shirts,1000\nWest,Shirts,800"); let records = parse_csv(&path).unwrap(); assert_eq!(records.len(), 2); assert_eq!(records[0]["Region"], Value::String("East".to_string())); assert_eq!(records[0]["Product"], Value::String("Shirts".to_string())); assert_eq!( records[0]["Revenue"], Value::Number(serde_json::Number::from(1000)) ); } #[test] fn parse_csv_with_floats() { let (path, _dir) = create_temp_csv("Region,Revenue,Cost\nEast,1000.50,600.25\nWest,800.75,500.00"); let records = parse_csv(&path).unwrap(); assert_eq!(records.len(), 2); assert!(records[0]["Revenue"].is_f64()); assert_eq!( records[0]["Revenue"], Value::Number(serde_json::Number::from_f64(1000.50).unwrap()) ); } #[test] fn parse_csv_with_quoted_fields() { let (path, _dir) = create_temp_csv("Product,Description,Price\n\"Shirts\",\"A nice shirt\",10.00"); let records = parse_csv(&path).unwrap(); assert_eq!(records.len(), 1); assert_eq!(records[0]["Product"], Value::String("Shirts".to_string())); assert_eq!( records[0]["Description"], Value::String("A nice shirt".to_string()) ); } #[test] fn parse_csv_with_empty_values() { let (path, _dir) = create_temp_csv("Region,Product,Revenue\nEast,,1000\nWest,Shirts,"); let records = parse_csv(&path).unwrap(); assert_eq!(records.len(), 2); assert_eq!(records[0]["Product"], Value::Null); assert_eq!(records[1]["Revenue"], Value::Null); } #[test] fn parse_csv_mixed_types() { let (path, _dir) = create_temp_csv("Name,Count,Price,Active\nWidget,5,9.99,true\nGadget,3,19.99,false"); let records = parse_csv(&path).unwrap(); assert_eq!(records.len(), 2); assert_eq!(records[0]["Name"], Value::String("Widget".to_string())); assert_eq!( records[0]["Count"], Value::Number(serde_json::Number::from(5)) ); assert!(records[0]["Price"].is_f64()); assert_eq!(records[0]["Active"], Value::String("true".to_string())); } #[test] fn merge_csvs_adds_file_field_from_stem() { let dir = tempdir().unwrap(); let sales = dir.path().join("sales.csv"); let expenses = dir.path().join("expenses.csv"); fs::write(&sales, "Region,Revenue\nEast,100\nWest,200").unwrap(); fs::write(&expenses, "Region,Revenue\nEast,50\nWest,75").unwrap(); let records = merge_csvs(&[sales, expenses]).unwrap(); assert_eq!(records.len(), 4); assert_eq!(records[0]["File"], Value::String("sales".to_string())); assert_eq!(records[1]["File"], Value::String("sales".to_string())); assert_eq!(records[2]["File"], Value::String("expenses".to_string())); assert_eq!(records[3]["File"], Value::String("expenses".to_string())); // Original fields preserved assert_eq!(records[0]["Region"], Value::String("East".to_string())); assert_eq!( records[2]["Revenue"], Value::Number(serde_json::Number::from(50)) ); } #[test] fn merge_csvs_single_file_works() { let dir = tempdir().unwrap(); let path = dir.path().join("data.csv"); fs::write(&path, "Name,Value\nA,1").unwrap(); let records = merge_csvs(&[path]).unwrap(); assert_eq!(records.len(), 1); assert_eq!(records[0]["File"], Value::String("data".to_string())); assert_eq!(records[0]["Name"], Value::String("A".to_string())); } #[test] fn parse_checking_csv_format() { // Simulates the format of /Users/edwlan/Downloads/Checking1.csv let (path, _dir) = create_temp_csv( "Date,Amount,Flag,CheckNo,Description\n\ \"03/31/2026\",\"-50.00\",\"*\",\"\",\"VENMO PAYMENT 260331\"\n\ \"03/31/2026\",\"-240.00\",\"*\",\"\",\"ROBINHOOD DEBITS XXXXX3795\"", ); let records = parse_csv(&path).unwrap(); assert_eq!(records.len(), 2); assert_eq!(records[0]["Date"], Value::String("03/31/2026".to_string())); assert_eq!( records[0]["Amount"], Value::Number(serde_json::Number::from_f64(-50.00).unwrap()) ); assert_eq!(records[0]["Flag"], Value::String("*".to_string())); assert_eq!(records[0]["CheckNo"], Value::Null); assert_eq!( records[0]["Description"], Value::String("VENMO PAYMENT 260331".to_string()) ); assert_eq!( records[1]["Amount"], Value::Number(serde_json::Number::from_f64(-240.00).unwrap()) ); } }