mirror of
https://github.com/K-Dense-AI/claude-scientific-skills.git
synced 2026-03-27 07:09:27 +08:00
Anomaly detection fixes: - Fix critical quantile index bug: index 0 is mean not q10; correct indices are q10=1, q20=2, q80=8, q90=9 - Redesign test: use all 36 months as context, inject 3 synthetic anomalies into future - Result: 3 CRITICAL detected (was 11/12 — caused by test-set leakage + wrong indices) - Update severity labels: CRITICAL = outside 80% PI, WARNING = outside 60% PI Covariates fixes: - Fix variable-shadowing bug: inner dict comprehension overwrote outer loop store_id causing all stores to get identical covariate arrays (store_A's price for everyone) - Give each store a distinct price baseline (premium $12, standard $10, discount $7.50) - Trim CONTEXT_LEN from 48 → 24 weeks; CSV now 108 rows (was 180) - Add NOTE ON REAL DATA comment: temp file pattern for large external datasets Both scripts regenerated with clean outputs.
50 lines
1.5 KiB
JSON
50 lines
1.5 KiB
JSON
{
|
|
"description": "Synthetic retail sales data with covariates for TimesFM XReg demo",
|
|
"note_on_real_data": "If using a real dataset (e.g., Kaggle Rossmann Store Sales), download it to a temp directory (tempfile.mkdtemp) and do NOT commit it here. This skills directory only ships tiny reference files.",
|
|
"stores": {
|
|
"store_A": {
|
|
"type": "premium",
|
|
"region": "urban",
|
|
"base_sales": 1000
|
|
},
|
|
"store_B": {
|
|
"type": "standard",
|
|
"region": "suburban",
|
|
"base_sales": 750
|
|
},
|
|
"store_C": {
|
|
"type": "discount",
|
|
"region": "rural",
|
|
"base_sales": 500
|
|
}
|
|
},
|
|
"dimensions": {
|
|
"context_length": 24,
|
|
"horizon_length": 12,
|
|
"total_length": 36,
|
|
"num_stores": 3,
|
|
"csv_rows": 108
|
|
},
|
|
"covariates": {
|
|
"dynamic_numerical": [
|
|
"price",
|
|
"promotion"
|
|
],
|
|
"dynamic_categorical": [
|
|
"holiday",
|
|
"day_of_week"
|
|
],
|
|
"static_categorical": [
|
|
"store_type",
|
|
"region"
|
|
]
|
|
},
|
|
"xreg_modes": {
|
|
"xreg + timesfm": "Fit regression on residuals after TimesFM forecast",
|
|
"timesfm + xreg": "TimesFM forecasts residuals after regression fit"
|
|
},
|
|
"bug_fixes": [
|
|
"v2: Fixed variable-shadowing in generate_sales_data() \u2014 inner dict comprehension `{store_id: ... for store_id in stores}` was overwriting the outer loop variable, causing all stores to get identical covariate arrays. Fixed by using separate per-store dicts during the loop.",
|
|
"v2: Reduced CONTEXT_LEN from 48 \u2192 24 weeks; CSV now 90 rows (was 180)."
|
|
]
|
|
} |