245 lines
8.0 KiB
Rust
245 lines
8.0 KiB
Rust
use std::path::Path;
|
|
|
|
use anyhow::{Context, Result};
|
|
use csv::ReaderBuilder;
|
|
use serde_json::Value;
|
|
|
|
pub fn csv_path_p(path: &Path) -> bool {
|
|
path.extension()
|
|
.is_some_and(|ext| ext.eq_ignore_ascii_case("csv"))
|
|
}
|
|
|
|
/// Parse a CSV file and return records as serde_json::Value array
|
|
pub fn parse_csv(path: &Path) -> Result<Vec<Value>> {
|
|
let mut reader = ReaderBuilder::new()
|
|
.has_headers(true)
|
|
.flexible(true)
|
|
.trim(csv::Trim::All)
|
|
.from_path(path)
|
|
.with_context(|| format!("Failed to open CSV file: {}", path.display()))?;
|
|
|
|
// Detect if first row looks like headers (strings) or data (mixed)
|
|
let has_headers = reader.headers().is_ok();
|
|
|
|
let mut records = Vec::new();
|
|
let mut headers = Vec::new();
|
|
|
|
if has_headers {
|
|
headers = reader
|
|
.headers()
|
|
.with_context(|| "Failed to read CSV headers")?
|
|
.iter()
|
|
.map(|s| s.to_string())
|
|
.collect();
|
|
}
|
|
|
|
for result in reader.records() {
|
|
let record = result.with_context(|| "Failed to read CSV record")?;
|
|
let mut map = serde_json::Map::new();
|
|
|
|
for (i, field) in record.iter().enumerate() {
|
|
let json_value: Value = parse_csv_field(field);
|
|
if has_headers {
|
|
if let Some(header) = headers.get(i) {
|
|
map.insert(header.clone(), json_value);
|
|
}
|
|
} else {
|
|
map.insert(i.to_string(), json_value);
|
|
}
|
|
}
|
|
|
|
if !map.is_empty() {
|
|
records.push(Value::Object(map));
|
|
}
|
|
}
|
|
|
|
Ok(records)
|
|
}
|
|
|
|
/// Parse multiple CSV files and merge into a single JSON array.
|
|
/// Each record gets a "File" field set to the filename stem (e.g., "sales" from "sales.csv").
|
|
pub fn merge_csvs(paths: &[impl AsRef<Path>]) -> Result<Vec<Value>> {
|
|
let mut all_records = Vec::new();
|
|
for path in paths {
|
|
let path = path.as_ref();
|
|
let stem = path
|
|
.file_stem()
|
|
.and_then(|s| s.to_str())
|
|
.unwrap_or("unknown")
|
|
.to_string();
|
|
let records = parse_csv(path)?;
|
|
for mut record in records {
|
|
if let Value::Object(ref mut map) = record {
|
|
map.insert("File".to_string(), Value::String(stem.clone()));
|
|
}
|
|
all_records.push(record);
|
|
}
|
|
}
|
|
Ok(all_records)
|
|
}
|
|
|
|
fn parse_csv_field(field: &str) -> Value {
|
|
if field.is_empty() {
|
|
return Value::Null;
|
|
}
|
|
|
|
// Try to parse as number (integer or float)
|
|
if let Ok(num) = field.parse::<i64>() {
|
|
return Value::Number(serde_json::Number::from(num));
|
|
}
|
|
|
|
if let Ok(num) = field.parse::<f64>() {
|
|
return Value::Number(
|
|
serde_json::Number::from_f64(num).unwrap_or(serde_json::Number::from(0)),
|
|
);
|
|
}
|
|
|
|
// Otherwise treat as string
|
|
Value::String(field.to_string())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::{fs, path::PathBuf};
|
|
use tempfile::tempdir;
|
|
|
|
fn create_temp_csv(content: &str) -> (PathBuf, tempfile::TempDir) {
|
|
let dir = tempdir().unwrap();
|
|
let path = dir.path().join("test.csv");
|
|
fs::write(&path, content).unwrap();
|
|
(path, dir)
|
|
}
|
|
|
|
#[test]
|
|
fn parse_simple_csv() {
|
|
let (path, _dir) =
|
|
create_temp_csv("Region,Product,Revenue\nEast,Shirts,1000\nWest,Shirts,800");
|
|
let records = parse_csv(&path).unwrap();
|
|
|
|
assert_eq!(records.len(), 2);
|
|
assert_eq!(records[0]["Region"], Value::String("East".to_string()));
|
|
assert_eq!(records[0]["Product"], Value::String("Shirts".to_string()));
|
|
assert_eq!(
|
|
records[0]["Revenue"],
|
|
Value::Number(serde_json::Number::from(1000))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_csv_with_floats() {
|
|
let (path, _dir) =
|
|
create_temp_csv("Region,Revenue,Cost\nEast,1000.50,600.25\nWest,800.75,500.00");
|
|
let records = parse_csv(&path).unwrap();
|
|
|
|
assert_eq!(records.len(), 2);
|
|
assert!(records[0]["Revenue"].is_f64());
|
|
assert_eq!(
|
|
records[0]["Revenue"],
|
|
Value::Number(serde_json::Number::from_f64(1000.50).unwrap())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_csv_with_quoted_fields() {
|
|
let (path, _dir) =
|
|
create_temp_csv("Product,Description,Price\n\"Shirts\",\"A nice shirt\",10.00");
|
|
let records = parse_csv(&path).unwrap();
|
|
|
|
assert_eq!(records.len(), 1);
|
|
assert_eq!(records[0]["Product"], Value::String("Shirts".to_string()));
|
|
assert_eq!(
|
|
records[0]["Description"],
|
|
Value::String("A nice shirt".to_string())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_csv_with_empty_values() {
|
|
let (path, _dir) = create_temp_csv("Region,Product,Revenue\nEast,,1000\nWest,Shirts,");
|
|
let records = parse_csv(&path).unwrap();
|
|
|
|
assert_eq!(records.len(), 2);
|
|
assert_eq!(records[0]["Product"], Value::Null);
|
|
assert_eq!(records[1]["Revenue"], Value::Null);
|
|
}
|
|
|
|
#[test]
|
|
fn parse_csv_mixed_types() {
|
|
let (path, _dir) =
|
|
create_temp_csv("Name,Count,Price,Active\nWidget,5,9.99,true\nGadget,3,19.99,false");
|
|
let records = parse_csv(&path).unwrap();
|
|
|
|
assert_eq!(records.len(), 2);
|
|
assert_eq!(records[0]["Name"], Value::String("Widget".to_string()));
|
|
assert_eq!(
|
|
records[0]["Count"],
|
|
Value::Number(serde_json::Number::from(5))
|
|
);
|
|
assert!(records[0]["Price"].is_f64());
|
|
assert_eq!(records[0]["Active"], Value::String("true".to_string()));
|
|
}
|
|
|
|
#[test]
|
|
fn merge_csvs_adds_file_field_from_stem() {
|
|
let dir = tempdir().unwrap();
|
|
let sales = dir.path().join("sales.csv");
|
|
let expenses = dir.path().join("expenses.csv");
|
|
fs::write(&sales, "Region,Revenue\nEast,100\nWest,200").unwrap();
|
|
fs::write(&expenses, "Region,Revenue\nEast,50\nWest,75").unwrap();
|
|
|
|
let records = merge_csvs(&[sales, expenses]).unwrap();
|
|
assert_eq!(records.len(), 4);
|
|
assert_eq!(records[0]["File"], Value::String("sales".to_string()));
|
|
assert_eq!(records[1]["File"], Value::String("sales".to_string()));
|
|
assert_eq!(records[2]["File"], Value::String("expenses".to_string()));
|
|
assert_eq!(records[3]["File"], Value::String("expenses".to_string()));
|
|
// Original fields preserved
|
|
assert_eq!(records[0]["Region"], Value::String("East".to_string()));
|
|
assert_eq!(
|
|
records[2]["Revenue"],
|
|
Value::Number(serde_json::Number::from(50))
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn merge_csvs_single_file_works() {
|
|
let dir = tempdir().unwrap();
|
|
let path = dir.path().join("data.csv");
|
|
fs::write(&path, "Name,Value\nA,1").unwrap();
|
|
|
|
let records = merge_csvs(&[path]).unwrap();
|
|
assert_eq!(records.len(), 1);
|
|
assert_eq!(records[0]["File"], Value::String("data".to_string()));
|
|
assert_eq!(records[0]["Name"], Value::String("A".to_string()));
|
|
}
|
|
|
|
#[test]
|
|
fn parse_checking_csv_format() {
|
|
// Simulates the format of /Users/edwlan/Downloads/Checking1.csv
|
|
let (path, _dir) = create_temp_csv(
|
|
"Date,Amount,Flag,CheckNo,Description\n\
|
|
\"03/31/2026\",\"-50.00\",\"*\",\"\",\"VENMO PAYMENT 260331\"\n\
|
|
\"03/31/2026\",\"-240.00\",\"*\",\"\",\"ROBINHOOD DEBITS XXXXX3795\"",
|
|
);
|
|
let records = parse_csv(&path).unwrap();
|
|
|
|
assert_eq!(records.len(), 2);
|
|
assert_eq!(records[0]["Date"], Value::String("03/31/2026".to_string()));
|
|
assert_eq!(
|
|
records[0]["Amount"],
|
|
Value::Number(serde_json::Number::from_f64(-50.00).unwrap())
|
|
);
|
|
assert_eq!(records[0]["Flag"], Value::String("*".to_string()));
|
|
assert_eq!(records[0]["CheckNo"], Value::Null);
|
|
assert_eq!(
|
|
records[0]["Description"],
|
|
Value::String("VENMO PAYMENT 260331".to_string())
|
|
);
|
|
assert_eq!(
|
|
records[1]["Amount"],
|
|
Value::Number(serde_json::Number::from_f64(-240.00).unwrap())
|
|
);
|
|
}
|
|
}
|