feat(io): enhance CSV import with warnings and improved headless parsing

`parse_csv` now supports `parse_csv_with_warnings` to detect and report
short rows.

Short rows are now padded with `Value::Null` instead of being silently
dropped.

`ImportJsonHeadless` now uses `parse_csv_with_warnings` and surfaces
warnings in the status message.

`ImportJsonHeadless` now reuses parsed JSON/CSV data via
`json_import_records` instead of re-parsing.

Add regression tests for short row handling and headless import.

Co-Authored-By: fiddlerwoaroof/git-smart-commit (unsloth/gemma-4-26B-A4B-it-GGUF:UD-Q5_K_XL)
This commit is contained in:
Edward Langley
2026-06-09 21:43:13 -07:00
parent f04fe517ae
commit f0b9227d8f
2 changed files with 203 additions and 40 deletions
+127 -37
View File
@@ -768,6 +768,33 @@ impl Effect for LoadModel {
}
}
/// Extract the records array from an already-parsed JSON import value.
/// Precedence: explicit `array_path` → root array → first auto-detected
/// array path. Pure — lets [`ImportJsonHeadless`] parse the file exactly
/// once and reuse the parsed value as the pipeline's `raw` (improvise-oaq).
fn json_import_records(
value: &serde_json::Value,
array_path: Option<&str>,
) -> Result<Vec<serde_json::Value>, String> {
use crate::import::analyzer::{extract_array_at_path, find_array_paths};
if let Some(ap) = array_path.filter(|s| !s.is_empty()) {
return extract_array_at_path(value, ap)
.cloned()
.ok_or_else(|| format!("No array at path '{ap}'"));
}
if let Some(arr) = value.as_array() {
return Ok(arr.clone());
}
let paths = find_array_paths(value);
match paths.first() {
Some(first) => extract_array_at_path(value, first)
.cloned()
.ok_or_else(|| "Could not extract records array".to_string()),
None => Err("No array found in JSON".to_string()),
}
}
/// Headless JSON/CSV import: read file, analyze, build model, replace current.
#[derive(Debug)]
pub struct ImportJsonHeadless {
@@ -777,9 +804,7 @@ pub struct ImportJsonHeadless {
}
impl Effect for ImportJsonHeadless {
fn apply(&self, app: &mut App) {
use crate::import::analyzer::{
FieldKind, analyze_records, extract_array_at_path, find_array_paths,
};
use crate::import::analyzer::{FieldKind, analyze_records};
use crate::import::wizard::ImportPipeline;
let is_csv = self
@@ -787,9 +812,17 @@ impl Effect for ImportJsonHeadless {
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("csv"));
let records = if is_csv {
match crate::import::csv_parser::parse_csv(&self.path) {
Ok(recs) => recs,
// Parse the file exactly once: `records` feeds analysis, `raw` is the
// parsed value reused for the pipeline (improvise-oaq). CSV warnings
// (short rows, improvise-k8i) are threaded into the status message.
let mut warnings: Vec<String> = vec![];
let (records, raw) = if is_csv {
match crate::import::csv_parser::parse_csv_with_warnings(&self.path) {
Ok((recs, w)) => {
warnings = w;
let raw = serde_json::Value::Array(recs.clone());
(recs, raw)
}
Err(e) => {
app.view_state.status_msg = format!("CSV error: {e}");
return;
@@ -810,29 +843,10 @@ impl Effect for ImportJsonHeadless {
return;
}
};
if let Some(ap) = self.array_path.as_deref().filter(|s| !s.is_empty()) {
match extract_array_at_path(&value, ap) {
Some(arr) => arr.clone(),
None => {
app.view_state.status_msg = format!("No array at path '{ap}'");
return;
}
}
} else if let Some(arr) = value.as_array() {
arr.clone()
} else {
let paths = find_array_paths(&value);
if let Some(first) = paths.first() {
match extract_array_at_path(&value, first) {
Some(arr) => arr.clone(),
None => {
app.view_state.status_msg = "Could not extract records array".to_string();
return;
}
}
} else {
app.view_state.status_msg = "No array found in JSON".to_string();
match json_import_records(&value, self.array_path.as_deref()) {
Ok(recs) => (recs, value),
Err(msg) => {
app.view_state.status_msg = msg;
return;
}
}
@@ -840,13 +854,6 @@ impl Effect for ImportJsonHeadless {
let proposals = analyze_records(&records);
let raw = if is_csv {
serde_json::Value::Array(records.clone())
} else {
serde_json::from_str(&std::fs::read_to_string(&self.path).unwrap_or_default())
.unwrap_or(serde_json::Value::Array(records.clone()))
};
let pipeline = ImportPipeline {
raw,
array_paths: vec![],
@@ -870,7 +877,11 @@ impl Effect for ImportJsonHeadless {
match pipeline.build_model() {
Ok(new_workbook) => {
app.model_state.workbook = new_workbook;
app.view_state.status_msg = "Imported successfully".to_string();
app.view_state.status_msg = if warnings.is_empty() {
"Imported successfully".to_string()
} else {
format!("Imported with warnings: {}", warnings.join("; "))
};
}
Err(e) => {
app.view_state.status_msg = format!("Import error: {e}");
@@ -1857,4 +1868,83 @@ mod tests {
assert_eq!(Panel::Category.mode(), AppMode::CategoryPanel);
assert_eq!(Panel::View.mode(), AppMode::ViewPanel);
}
// ── Headless import ─────────────────────────────────────────────────
/// BUG (improvise-k8i): ImportJsonHeadless parsed CSVs with `parse_csv`,
/// which sends short-row warnings to stderr — invisible inside the TUI.
/// Short rows must surface in the status message, naming the affected
/// column, never silently succeed.
#[test]
fn import_headless_csv_short_rows_surface_warning_in_status() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("data.csv");
std::fs::write(
&path,
"Region,Product,Revenue\nEast,Shirts,100\nWest,Pants,200\nEast,Pants\n",
)
.unwrap();
let mut app = test_app();
ImportJsonHeadless {
path,
model_name: None,
array_path: None,
}
.apply(&mut app);
assert!(
app.view_state.status_msg.contains("Revenue"),
"status must name the short column, got: {}",
app.view_state.status_msg
);
}
#[test]
fn import_headless_csv_full_rows_report_plain_success() {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join("data.csv");
std::fs::write(&path, "Region,Revenue\nEast,100\nWest,200\n").unwrap();
let mut app = test_app();
ImportJsonHeadless {
path,
model_name: None,
array_path: None,
}
.apply(&mut app);
assert_eq!(app.view_state.status_msg, "Imported successfully");
}
// ── json_import_records (improvise-oaq helper) ──────────────────────
#[test]
fn json_import_records_root_array() {
let value = serde_json::json!([{"x": 1}, {"x": 2}]);
let recs = json_import_records(&value, None).unwrap();
assert_eq!(recs.len(), 2);
}
#[test]
fn json_import_records_explicit_path() {
let value = serde_json::json!({"data": [{"x": 1}], "meta": {"v": 1}});
let recs = json_import_records(&value, Some("data")).unwrap();
assert_eq!(recs.len(), 1);
}
#[test]
fn json_import_records_auto_detects_single_path() {
let value = serde_json::json!({"rows": [{"x": 1}, {"x": 2}, {"x": 3}]});
let recs = json_import_records(&value, None).unwrap();
assert_eq!(recs.len(), 3);
}
#[test]
fn json_import_records_errors_when_no_array() {
let value = serde_json::json!({"meta": {"v": 1}});
assert!(json_import_records(&value, None).is_err());
let value = serde_json::json!([{"x": 1}]);
assert!(json_import_records(&value, Some("nope")).is_err());
}
}