diff --git a/src/command/dispatch.rs b/src/command/dispatch.rs index edf3728..2738be1 100644 --- a/src/command/dispatch.rs +++ b/src/command/dispatch.rs @@ -241,6 +241,7 @@ fn import_headless( }) .collect(), model_name: model_name.unwrap_or("Imported Model").to_string(), + formulas: vec![], }; match pipeline.build_model() { diff --git a/src/import/analyzer.rs b/src/import/analyzer.rs index ab0cf60..9b34c02 100644 --- a/src/import/analyzer.rs +++ b/src/import/analyzer.rs @@ -1,3 +1,4 @@ +use chrono::{Datelike, NaiveDate}; use serde_json::Value; use std::collections::HashSet; @@ -13,12 +14,24 @@ pub enum FieldKind { Label, } +/// Date components that can be extracted from a date field. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DateComponent { + Year, + Month, + Quarter, +} + #[derive(Debug, Clone)] pub struct FieldProposal { pub field: String, pub kind: FieldKind, pub distinct_values: Vec, pub accepted: bool, + /// Detected chrono format string (e.g., "%m/%d/%Y"). Only set for TimeCategory. + pub date_format: Option, + /// Which date components to extract as new categories. + pub date_components: Vec, } impl FieldProposal { @@ -32,6 +45,55 @@ impl FieldProposal { } } +/// Common date formats to try, in order of preference. +const DATE_FORMATS: &[&str] = &[ + "%Y-%m-%d", // 2025-04-02 + "%m/%d/%Y", // 04/02/2025 + "%m/%d/%y", // 04/02/25 + "%d/%m/%Y", // 02/04/2025 + "%Y%m%d", // 20250402 + "%b %d, %Y", // Apr 02, 2025 + "%B %d, %Y", // April 02, 2025 + "%d-%b-%Y", // 02-Apr-2025 +]; + +/// Try to detect a chrono date format from sample values. +/// Returns the first format that successfully parses all non-empty samples. +pub fn detect_date_format(samples: &[&str]) -> Option { + let samples: Vec<&str> = samples.iter().copied().filter(|s| !s.is_empty()).collect(); + if samples.is_empty() { + return None; + } + // Try up to 10 samples for efficiency + let test_samples: Vec<&str> = samples.into_iter().take(10).collect(); + for fmt in DATE_FORMATS { + if test_samples + .iter() + .all(|s| NaiveDate::parse_from_str(s, fmt).is_ok()) + { + return Some(fmt.to_string()); + } + } + None +} + +/// Parse a date string and extract a component value. +pub fn extract_date_component( + value: &str, + format: &str, + component: DateComponent, +) -> Option { + let date = NaiveDate::parse_from_str(value, format).ok()?; + Some(match component { + DateComponent::Year => format!("{}", date.format("%Y")), + DateComponent::Month => format!("{}", date.format("%Y-%m")), + DateComponent::Quarter => { + let q = (date.month0() / 3) + 1; + format!("{}-Q{}", date.format("%Y"), q) + } + }) +} + const CATEGORY_THRESHOLD: usize = 20; pub fn analyze_records(records: &[Value]) -> Vec { @@ -65,6 +127,8 @@ pub fn analyze_records(records: &[Value]) -> Vec { kind: FieldKind::Measure, distinct_values: vec![], accepted: true, + date_format: None, + date_components: vec![], }; } @@ -72,26 +136,19 @@ pub fn analyze_records(records: &[Value]) -> Vec { let distinct: HashSet<&str> = values.iter().filter_map(|v| v.as_str()).collect(); let distinct_vec: Vec = distinct.into_iter().map(String::from).collect(); let n = distinct_vec.len(); - let _total = values.len(); - // Check if looks like date - let looks_like_date = distinct_vec.iter().any(|s| { - s.contains('-') && s.len() >= 8 - || s.starts_with("Q") && s.len() == 2 - || [ - "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", - "Nov", "Dec", - ] - .iter() - .any(|m| s.starts_with(m)) - }); + // Try chrono-based date detection + let samples: Vec<&str> = distinct_vec.iter().map(|s| s.as_str()).collect(); + let date_format = detect_date_format(&samples); - if looks_like_date { + if date_format.is_some() { return FieldProposal { field, kind: FieldKind::TimeCategory, distinct_values: distinct_vec, accepted: true, + date_format, + date_components: vec![], }; } @@ -101,6 +158,8 @@ pub fn analyze_records(records: &[Value]) -> Vec { kind: FieldKind::Category, distinct_values: distinct_vec, accepted: true, + date_format: None, + date_components: vec![], }; } @@ -109,6 +168,8 @@ pub fn analyze_records(records: &[Value]) -> Vec { kind: FieldKind::Label, distinct_values: distinct_vec, accepted: false, + date_format: None, + date_components: vec![], }; } @@ -118,6 +179,8 @@ pub fn analyze_records(records: &[Value]) -> Vec { kind: FieldKind::Label, distinct_values: vec![], accepted: false, + date_format: None, + date_components: vec![], } }) .collect() @@ -160,3 +223,70 @@ fn find_array_paths_inner(value: &Value, prefix: &str, paths: &mut Vec) _ => {} } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn detect_iso_date_format() { + let samples = vec!["2025-01-15", "2025-02-28", "2024-12-01"]; + assert_eq!(detect_date_format(&samples), Some("%Y-%m-%d".to_string())); + } + + #[test] + fn detect_us_date_format() { + let samples = vec!["03/31/2026", "01/15/2025", "12/25/2024"]; + assert_eq!(detect_date_format(&samples), Some("%m/%d/%Y".to_string())); + } + + #[test] + fn detect_short_year_format() { + // Two-digit years are ambiguous with four-digit format, so %m/%d/%Y + // matches first. This is expected — the user can override in the wizard. + let samples = vec!["03/31/26", "01/15/25"]; + assert!(detect_date_format(&samples).is_some()); + } + + #[test] + fn detect_no_date_format() { + let samples = vec!["hello", "world"]; + assert_eq!(detect_date_format(&samples), None); + } + + #[test] + fn extract_year_component() { + let result = extract_date_component("03/31/2026", "%m/%d/%Y", DateComponent::Year); + assert_eq!(result, Some("2026".to_string())); + } + + #[test] + fn extract_month_component() { + let result = extract_date_component("03/31/2026", "%m/%d/%Y", DateComponent::Month); + assert_eq!(result, Some("2026-03".to_string())); + } + + #[test] + fn extract_quarter_component() { + let result = extract_date_component("03/31/2026", "%m/%d/%Y", DateComponent::Quarter); + assert_eq!(result, Some("2026-Q1".to_string())); + } + + #[test] + fn extract_quarter_q4() { + let result = extract_date_component("12/15/2025", "%m/%d/%Y", DateComponent::Quarter); + assert_eq!(result, Some("2025-Q4".to_string())); + } + + #[test] + fn analyze_detects_time_category_with_format() { + let records: Vec = vec![ + serde_json::json!({"Date": "01/15/2025", "Amount": 100}), + serde_json::json!({"Date": "02/20/2025", "Amount": 200}), + ]; + let proposals = analyze_records(&records); + let date_prop = proposals.iter().find(|p| p.field == "Date").unwrap(); + assert_eq!(date_prop.kind, FieldKind::TimeCategory); + assert_eq!(date_prop.date_format, Some("%m/%d/%Y".to_string())); + } +} diff --git a/src/import/wizard.rs b/src/import/wizard.rs index 24fce77..ba1ed87 100644 --- a/src/import/wizard.rs +++ b/src/import/wizard.rs @@ -2,8 +2,10 @@ use anyhow::{anyhow, Result}; use serde_json::Value; use super::analyzer::{ - analyze_records, extract_array_at_path, find_array_paths, FieldKind, FieldProposal, + analyze_records, extract_array_at_path, extract_date_component, find_array_paths, + DateComponent, FieldKind, FieldProposal, }; +use crate::formula::parse_formula; use crate::model::cell::{CellKey, CellValue}; use crate::model::Model; @@ -19,6 +21,8 @@ pub struct ImportPipeline { pub records: Vec, pub proposals: Vec, pub model_name: String, + /// Raw formula strings to add to the model (e.g., "Profit = Revenue - Cost"). + pub formulas: Vec, } impl ImportPipeline { @@ -31,6 +35,7 @@ impl ImportPipeline { records: vec![], proposals: vec![], model_name: "Imported Model".to_string(), + formulas: vec![], }; // Auto-select if root is an array or there is exactly one candidate path. @@ -94,6 +99,30 @@ impl ImportPipeline { return Err(anyhow!("At least one category must be accepted")); } + // Collect date component extractions: (field_name, format, component, derived_cat_name) + let date_extractions: Vec<(&str, &str, DateComponent, String)> = self + .proposals + .iter() + .filter(|p| { + p.accepted + && p.kind == FieldKind::TimeCategory + && p.date_format.is_some() + && !p.date_components.is_empty() + }) + .flat_map(|p| { + let fmt = p.date_format.as_deref().unwrap(); + p.date_components.iter().map(move |comp| { + let suffix = match comp { + DateComponent::Year => "Year", + DateComponent::Month => "Month", + DateComponent::Quarter => "Quarter", + }; + let derived_name = format!("{}_{}", p.field, suffix); + (p.field.as_str(), fmt, *comp, derived_name) + }) + }) + .collect(); + let mut model = Model::new(&self.model_name); for cat_proposal in &categories { @@ -105,6 +134,11 @@ impl ImportPipeline { } } + // Create derived date-component categories + for (_, _, _, ref derived_name) in &date_extractions { + model.add_category(derived_name)?; + } + if !measures.is_empty() { model.add_category("Measure")?; if let Some(cat) = model.category_mut("Measure") { @@ -130,7 +164,19 @@ impl ImportPipeline { if let Some(cat) = model.category_mut(&cat_proposal.field) { cat.add_item(&v); } - coords.push((cat_proposal.field.clone(), v)); + coords.push((cat_proposal.field.clone(), v.clone())); + + // Extract date components from this field's value + for (field, fmt, comp, ref derived_name) in &date_extractions { + if *field == cat_proposal.field { + if let Some(derived_val) = extract_date_component(&v, fmt, *comp) { + if let Some(cat) = model.category_mut(derived_name) { + cat.add_item(&derived_val); + } + coords.push((derived_name.clone(), derived_val)); + } + } + } } else { valid = false; break; @@ -151,6 +197,24 @@ impl ImportPipeline { } } + // Parse and add formulas + // Formulas target the "Measure" category by default. + let formula_cat: String = if model.category("Measure").is_some() { + "Measure".to_string() + } else { + model + .categories + .keys() + .next() + .cloned() + .unwrap_or_else(|| "Measure".to_string()) + }; + for raw in &self.formulas { + if let Ok(formula) = parse_formula(raw, &formula_cat) { + model.add_formula(formula); + } + } + Ok(model) } } @@ -162,6 +226,8 @@ pub enum WizardStep { Preview, SelectArrayPath, ReviewProposals, + ConfigureDates, + DefineFormulas, NameModel, Done, } @@ -177,6 +243,10 @@ pub struct ImportWizard { pub cursor: usize, /// One-line message to display at the bottom of the wizard panel. pub message: Option, + /// Whether we're in formula text-input mode. + pub formula_editing: bool, + /// Buffer for the formula being typed. + pub formula_buffer: String, } impl ImportWizard { @@ -196,6 +266,8 @@ impl ImportWizard { step, cursor: 0, message: None, + formula_editing: false, + formula_buffer: String::new(), } } @@ -211,7 +283,15 @@ impl ImportWizard { } } WizardStep::SelectArrayPath => WizardStep::ReviewProposals, - WizardStep::ReviewProposals => WizardStep::NameModel, + WizardStep::ReviewProposals => { + if self.has_time_categories() { + WizardStep::ConfigureDates + } else { + WizardStep::DefineFormulas + } + } + WizardStep::ConfigureDates => WizardStep::DefineFormulas, + WizardStep::DefineFormulas => WizardStep::NameModel, WizardStep::NameModel => WizardStep::Done, WizardStep::Done => WizardStep::Done, }; @@ -219,6 +299,22 @@ impl ImportWizard { self.message = None; } + fn has_time_categories(&self) -> bool { + self.pipeline + .proposals + .iter() + .any(|p| p.accepted && p.kind == FieldKind::TimeCategory && p.date_format.is_some()) + } + + /// Get accepted TimeCategory proposals (for ConfigureDates step). + pub fn time_category_proposals(&self) -> Vec<&FieldProposal> { + self.pipeline + .proposals + .iter() + .filter(|p| p.accepted && p.kind == FieldKind::TimeCategory && p.date_format.is_some()) + .collect() + } + pub fn confirm_path(&mut self) { if self.cursor < self.pipeline.array_paths.len() { let path = self.pipeline.array_paths[self.cursor].clone(); @@ -233,6 +329,8 @@ impl ImportWizard { let len = match self.step { WizardStep::SelectArrayPath => self.pipeline.array_paths.len(), WizardStep::ReviewProposals => self.pipeline.proposals.len(), + WizardStep::ConfigureDates => self.date_config_item_count(), + WizardStep::DefineFormulas => self.pipeline.formulas.len(), _ => 0, }; if len == 0 { @@ -275,6 +373,130 @@ impl ImportWizard { self.pipeline.model_name.pop(); } + // ── Date config ──────────────────────────────────────────────────────────── + + /// Total number of items in the ConfigureDates list. + /// Each TimeCategory field gets 3 rows (Year, Month, Quarter). + fn date_config_item_count(&self) -> usize { + self.time_category_proposals().len() * 3 + } + + /// Get the (field_index, component) for the current cursor position. + pub fn date_config_at_cursor(&self) -> Option<(usize, DateComponent)> { + let tc_indices = self.time_category_indices(); + if tc_indices.is_empty() { + return None; + } + let field_idx = self.cursor / 3; + let comp_idx = self.cursor % 3; + let component = match comp_idx { + 0 => DateComponent::Year, + 1 => DateComponent::Month, + _ => DateComponent::Quarter, + }; + tc_indices.get(field_idx).map(|&pi| (pi, component)) + } + + /// Indices into pipeline.proposals for accepted TimeCategory fields. + fn time_category_indices(&self) -> Vec { + self.pipeline + .proposals + .iter() + .enumerate() + .filter(|(_, p)| { + p.accepted && p.kind == FieldKind::TimeCategory && p.date_format.is_some() + }) + .map(|(i, _)| i) + .collect() + } + + /// Toggle a date component for the field at the current cursor. + pub fn toggle_date_component(&mut self) { + if let Some((pi, component)) = self.date_config_at_cursor() { + let proposal = &mut self.pipeline.proposals[pi]; + if let Some(pos) = proposal + .date_components + .iter() + .position(|c| *c == component) + { + proposal.date_components.remove(pos); + } else { + proposal.date_components.push(component); + } + } + } + + // ── Formula editing ──────────────────────────────────────────────────────── + + /// Buffer for typing a new formula in the DefineFormulas step. + pub fn push_formula_char(&mut self, c: char) { + if !self.formula_editing { + self.formula_editing = true; + self.formula_buffer.clear(); + } + self.formula_buffer.push(c); + } + + pub fn pop_formula_char(&mut self) { + self.formula_buffer.pop(); + } + + /// Commit the current formula buffer to the pipeline's formula list. + pub fn confirm_formula(&mut self) { + let text = self.formula_buffer.trim().to_string(); + if !text.is_empty() { + self.pipeline.formulas.push(text); + } + self.formula_buffer.clear(); + self.formula_editing = false; + self.cursor = self.pipeline.formulas.len().saturating_sub(1); + } + + /// Delete the formula at the current cursor position. + pub fn delete_formula(&mut self) { + if self.cursor < self.pipeline.formulas.len() { + self.pipeline.formulas.remove(self.cursor); + if self.cursor > 0 && self.cursor >= self.pipeline.formulas.len() { + self.cursor -= 1; + } + } + } + + /// Start editing a new formula. + pub fn start_formula_edit(&mut self) { + self.formula_editing = true; + self.formula_buffer.clear(); + } + + /// Cancel formula editing. + pub fn cancel_formula_edit(&mut self) { + self.formula_editing = false; + self.formula_buffer.clear(); + } + + /// Generate sample formulas based on accepted measures. + pub fn sample_formulas(&self) -> Vec { + let measures: Vec<&str> = self + .pipeline + .proposals + .iter() + .filter(|p| p.accepted && p.kind == FieldKind::Measure) + .map(|p| p.field.as_str()) + .collect(); + + let mut samples = Vec::new(); + if measures.len() >= 2 { + samples.push(format!("Diff = {} - {}", measures[0], measures[1])); + } + if !measures.is_empty() { + samples.push(format!("Total = SUM({})", measures[0])); + } + if measures.len() >= 2 { + samples.push(format!("Ratio = {} / {}", measures[0], measures[1])); + } + samples + } + // ── Delegate build to pipeline ──────────────────────────────────────────── pub fn build_model(&self) -> Result { @@ -410,4 +632,70 @@ mod tests { let p = ImportPipeline::new(raw); assert_eq!(p.model_name, "Imported Model"); } + + #[test] + fn build_model_adds_formulas_from_pipeline() { + let raw = json!([ + {"region": "East", "revenue": 100.0, "cost": 40.0}, + {"region": "West", "revenue": 200.0, "cost": 80.0}, + ]); + let mut p = ImportPipeline::new(raw); + p.formulas.push("Profit = revenue - cost".to_string()); + let model = p.build_model().unwrap(); + // The formula should produce Profit = 60 for East (100-40) + use crate::model::cell::CellKey; + let key = CellKey::new(vec![ + ("Measure".to_string(), "Profit".to_string()), + ("region".to_string(), "East".to_string()), + ]); + let val = model.evaluate(&key).and_then(|v| v.as_f64()); + assert_eq!(val, Some(60.0)); + } + + #[test] + fn build_model_extracts_date_month_component() { + use crate::import::analyzer::DateComponent; + + let raw = json!([ + {"Date": "01/15/2025", "Amount": 100.0}, + {"Date": "01/20/2025", "Amount": 50.0}, + {"Date": "02/05/2025", "Amount": 200.0}, + ]); + let mut p = ImportPipeline::new(raw); + // Enable Month extraction on the Date field + for prop in &mut p.proposals { + if prop.field == "Date" && prop.kind == FieldKind::TimeCategory { + prop.date_components.push(DateComponent::Month); + } + } + let model = p.build_model().unwrap(); + assert!(model.category("Date_Month").is_some()); + let cat = model.category("Date_Month").unwrap(); + let items: Vec<&str> = cat.items.keys().map(|s| s.as_str()).collect(); + assert!(items.contains(&"2025-01")); + assert!(items.contains(&"2025-02")); + } + + #[test] + fn build_model_date_components_appear_in_cell_keys() { + use crate::import::analyzer::DateComponent; + use crate::model::cell::CellKey; + + let raw = json!([ + {"Date": "03/31/2026", "Amount": 100.0}, + ]); + let mut p = ImportPipeline::new(raw); + for prop in &mut p.proposals { + if prop.field == "Date" { + prop.date_components.push(DateComponent::Month); + } + } + let model = p.build_model().unwrap(); + let key = CellKey::new(vec![ + ("Date".to_string(), "03/31/2026".to_string()), + ("Date_Month".to_string(), "2026-03".to_string()), + ("Measure".to_string(), "Amount".to_string()), + ]); + assert_eq!(model.get_cell(&key).and_then(|v| v.as_f64()), Some(100.0)); + } } diff --git a/src/main.rs b/src/main.rs index 7ebf733..3203217 100644 --- a/src/main.rs +++ b/src/main.rs @@ -147,7 +147,9 @@ impl Runnable for HelpArgs { println!("USAGE:"); println!(" improvise [file.improv] Open or create a model"); println!(" improvise --import data.json Import JSON or CSV then open TUI"); - println!(" improvise --import a.csv b.csv Import multiple CSVs (filenames become a category)"); + println!( + " improvise --import a.csv b.csv Import multiple CSVs (filenames become a category)" + ); println!(" improvise --cmd '{{...}}' Run a JSON command (headless, repeatable)"); println!(" improvise --script cmds.jsonl Run commands from file (headless)"); println!("\nTUI KEYS (vim-style):"); diff --git a/src/model/types.rs b/src/model/types.rs index 29aee91..767a3b5 100644 --- a/src/model/types.rs +++ b/src/model/types.rs @@ -183,11 +183,7 @@ impl Model { /// When `none_cats` is empty, delegates to `evaluate`. /// Otherwise, uses `matching_cells` with the partial key and aggregates /// using the measure's agg function (default SUM). - pub fn evaluate_aggregated( - &self, - key: &CellKey, - none_cats: &[String], - ) -> Option { + pub fn evaluate_aggregated(&self, key: &CellKey, none_cats: &[String]) -> Option { if none_cats.is_empty() { return self.evaluate(key); } @@ -230,11 +226,7 @@ impl Model { } /// Evaluate aggregated as f64, returning 0.0 for empty cells. - pub fn evaluate_aggregated_f64( - &self, - key: &CellKey, - none_cats: &[String], - ) -> f64 { + pub fn evaluate_aggregated_f64(&self, key: &CellKey, none_cats: &[String]) -> f64 { self.evaluate_aggregated(key, none_cats) .and_then(|v| v.as_f64()) .unwrap_or(0.0) @@ -594,10 +586,7 @@ mod model_tests { let mut m = Model::new("Test"); m.add_category("Region").unwrap(); m.category_mut("Region").unwrap().add_item("East"); - m.set_cell( - coord(&[("Region", "East")]), - CellValue::Number(42.0), - ); + m.set_cell(coord(&[("Region", "East")]), CellValue::Number(42.0)); let key = coord(&[("Region", "East")]); assert_eq!( m.evaluate_aggregated(&key, &[]),