diff --git a/scientific-skills/timesfm-forecasting/examples/anomaly-detection/detect_anomalies.py b/scientific-skills/timesfm-forecasting/examples/anomaly-detection/detect_anomalies.py index e4ee2ba..d6e7953 100644 --- a/scientific-skills/timesfm-forecasting/examples/anomaly-detection/detect_anomalies.py +++ b/scientific-skills/timesfm-forecasting/examples/anomaly-detection/detect_anomalies.py @@ -2,14 +2,19 @@ """ TimesFM Anomaly Detection Example -This example demonstrates how to use TimesFM's quantile forecasts for -anomaly detection. The approach: -1. Forecast with quantile intervals (10th-90th percentiles) -2. Compare actual values against prediction intervals -3. Flag values outside intervals as anomalies +Demonstrates using TimesFM quantile forecasts as prediction intervals +for anomaly detection. Approach: + 1. Use 36 months of real data as context + 2. Create synthetic 12-month future (natural continuation of trend) + 3. Inject 3 clear anomalies into that future + 4. Forecast with quantile intervals → flag anomalies by severity -TimesFM does NOT have built-in anomaly detection, but the quantile -forecasts provide natural anomaly detection via prediction intervals. +TimesFM has NO built-in anomaly detection. Quantile forecasts provide +natural prediction intervals — values outside them are statistically unusual. + +Quantile index reference (index 0 = mean, 1-9 = q10-q90): + 80% PI = q10 (idx 1) to q90 (idx 9) + 60% PI = q20 (idx 2) to q80 (idx 8) """ from __future__ import annotations @@ -18,36 +23,51 @@ import json from pathlib import Path import matplotlib.pyplot as plt +import matplotlib.dates as mdates import numpy as np import pandas as pd import timesfm # Configuration -HORIZON = 12 # Forecast horizon -ANOMALY_THRESHOLD_WARNING = 0.80 # Outside 80% CI = warning -ANOMALY_THRESHOLD_CRITICAL = 0.90 # Outside 90% CI = critical - -EXAMPLE_DIR = Path(__file__).parent +HORIZON = 12 # Forecast horizon (months) DATA_FILE = ( Path(__file__).parent.parent / "global-temperature" / "temperature_anomaly.csv" ) -OUTPUT_DIR = EXAMPLE_DIR / "output" +OUTPUT_DIR = Path(__file__).parent / "output" + +# Anomaly thresholds using available quantile outputs +# 80% PI = q10-q90 → "critical" if outside +# 60% PI = q20-q80 → "warning" if outside +IDX_Q10, IDX_Q20, IDX_Q80, IDX_Q90 = 1, 2, 8, 9 -def inject_anomalies( - values: np.ndarray, n_anomalies: int = 3, seed: int = 42 +def build_synthetic_future( + context: np.ndarray, n: int, seed: int = 42 ) -> tuple[np.ndarray, list[int]]: - """Inject synthetic anomalies into the data for demonstration.""" + """Build synthetic future that looks like a natural continuation. + + Takes the mean/std of the last 6 context months as the baseline, + then injects 3 clear anomalies (2 high, 1 low) at fixed positions. + """ rng = np.random.default_rng(seed) - anomaly_indices = rng.choice(len(values), size=n_anomalies, replace=False).tolist() + recent_mean = float(context[-6:].mean()) + recent_std = float(context[-6:].std()) - anomalous_values = values.copy() - for idx in anomaly_indices: - # Inject spike or dip (±40-60% of value) - multiplier = rng.choice([0.4, 0.6]) * rng.choice([1, -1]) - anomalous_values[idx] = values[idx] * (1 + multiplier) + # Natural-looking continuation: small gaussian noise around recent mean + future = recent_mean + rng.normal(0, recent_std * 0.4, n).astype(np.float32) - return anomalous_values, sorted(anomaly_indices) + # Inject 3 unmistakable anomalies + anomaly_cfg = [ + (2, +0.55), # month 3 — large spike up + (7, -0.50), # month 8 — large dip down + (10, +0.48), # month 11 — spike up + ] + anomaly_indices = [] + for idx, delta in anomaly_cfg: + future[idx] = recent_mean + delta + anomaly_indices.append(idx) + + return future, sorted(anomaly_indices) def main() -> None: @@ -57,27 +77,30 @@ def main() -> None: OUTPUT_DIR.mkdir(exist_ok=True) - # Load temperature data - print("\n📊 Loading temperature anomaly data...") + # ── Load all 36 months as context ───────────────────────────── + print("\n📊 Loading temperature data (all 36 months as context)...") df = pd.read_csv(DATA_FILE, parse_dates=["date"]) df = df.sort_values("date").reset_index(drop=True) + context_values = df["anomaly_c"].values.astype(np.float32) # all 36 months + context_dates = df["date"].tolist() - # Split into context (first 24 months) and test (last 12 months) - context_values = df["anomaly_c"].values[:24].astype(np.float32) - actual_future = df["anomaly_c"].values[24:36].astype(np.float32) - dates_future = df["date"].values[24:36] - - print(f" Context: 24 months (2022-01 to 2023-12)") - print(f" Test: 12 months (2024-01 to 2024-12)") - - # Inject anomalies into test data for demonstration - print("\n🔬 Injecting synthetic anomalies for demonstration...") - test_values_with_anomalies, anomaly_indices = inject_anomalies( - actual_future, n_anomalies=3 + print( + f" Context: {len(context_values)} months ({context_dates[0].strftime('%Y-%m')} → {context_dates[-1].strftime('%Y-%m')})" ) - print(f" Injected anomalies at months: {anomaly_indices}") - # Load TimesFM + # ── Build synthetic future with known anomalies ──────────────── + print("\n🔬 Building synthetic 12-month future with injected anomalies...") + future_values, injected_at = build_synthetic_future(context_values, HORIZON) + future_dates = pd.date_range( + start=context_dates[-1] + pd.DateOffset(months=1), + periods=HORIZON, + freq="MS", + ) + print( + f" Anomalies injected at months: {[future_dates[i].strftime('%Y-%m') for i in injected_at]}" + ) + + # ── Load TimesFM and forecast ────────────────────────────────── print("\n🤖 Loading TimesFM 1.0 (200M) PyTorch...") hparams = timesfm.TimesFmHparams(horizon_len=HORIZON) checkpoint = timesfm.TimesFmCheckpoint( @@ -85,254 +108,186 @@ def main() -> None: ) model = timesfm.TimesFm(hparams=hparams, checkpoint=checkpoint) - # Forecast with quantiles - print("\n📈 Forecasting with quantile intervals...") - point_forecast, quantile_forecast = model.forecast( - [context_values], - freq=[0], - ) + print("\n📈 Forecasting...") + point_fc, quant_fc = model.forecast([context_values], freq=[0]) - # Extract quantiles - # quantile_forecast shape: (1, 12, 10) - [mean, q10, q20, ..., q90] - point = point_forecast[0] - q10 = quantile_forecast[0, :, 0] # 10th percentile - q20 = quantile_forecast[0, :, 1] # 20th percentile - q50 = quantile_forecast[0, :, 4] # 50th percentile (median) - q80 = quantile_forecast[0, :, 7] # 80th percentile - q90 = quantile_forecast[0, :, 8] # 90th percentile + # quantile_forecast shape: (1, horizon, 10) + # index 0 = mean, index 1 = q10, ..., index 9 = q90 + point = point_fc[0] # shape (12,) + q10 = quant_fc[0, :, IDX_Q10] # 10th pct + q20 = quant_fc[0, :, IDX_Q20] # 20th pct + q80 = quant_fc[0, :, IDX_Q80] # 80th pct + q90 = quant_fc[0, :, IDX_Q90] # 90th pct print(f" Forecast mean: {point.mean():.3f}°C") - print(f" 90% CI width: {(q90 - q10).mean():.3f}°C (avg)") + print(f" 80% PI width: {(q90 - q10).mean():.3f}°C (avg)") - # Detect anomalies + # ── Detect anomalies ─────────────────────────────────────────── print("\n🔍 Detecting anomalies...") - anomalies = [] - for i, (actual, lower_80, upper_80, lower_90, upper_90) in enumerate( - zip(test_values_with_anomalies, q20, q80, q10, q90) + records = [] + for i, (actual, fcast, lo60, hi60, lo80, hi80) in enumerate( + zip(future_values, point, q20, q80, q10, q90) ): - month = dates_future[i] - month_str = pd.to_datetime(month).strftime("%Y-%m") + month = future_dates[i].strftime("%Y-%m") - if actual < lower_90 or actual > upper_90: - severity = "CRITICAL" - threshold = "90% CI" - color = "red" - elif actual < lower_80 or actual > upper_80: - severity = "WARNING" - threshold = "80% CI" - color = "orange" + if actual < lo80 or actual > hi80: + severity = "CRITICAL" # outside 80% PI + elif actual < lo60 or actual > hi60: + severity = "WARNING" # outside 60% PI else: severity = "NORMAL" - threshold = "within bounds" - color = "green" - anomalies.append( + records.append( { - "month": month_str, - "actual": float(actual), - "forecast": float(point[i]), - "lower_80": float(lower_80), - "upper_80": float(upper_80), - "lower_90": float(lower_90), - "upper_90": float(upper_90), + "month": month, + "actual": round(float(actual), 4), + "forecast": round(float(fcast), 4), + "lower_60pi": round(float(lo60), 4), + "upper_60pi": round(float(hi60), 4), + "lower_80pi": round(float(lo80), 4), + "upper_80pi": round(float(hi80), 4), "severity": severity, - "threshold": threshold, - "color": color, + "injected": (i in injected_at), } ) if severity != "NORMAL": - deviation = abs(actual - point[i]) + dev = actual - fcast print( - f" [{severity}] {month_str}: {actual:.2f}°C (forecast: {point[i]:.2f}°C, deviation: {deviation:.2f}°C)" + f" [{severity}] {month}: actual={actual:.2f} forecast={fcast:.2f} Δ={dev:+.2f}°C" ) - # Create visualization - print("\n📊 Creating anomaly visualization...") + # ── Visualise ───────────────────────────────────────────────── + print("\n📊 Creating visualization...") - fig, axes = plt.subplots(2, 1, figsize=(14, 10)) + fig, axes = plt.subplots(2, 1, figsize=(13, 9)) - # Plot 1: Full time series with forecast and anomalies - ax1 = axes[0] + clr = {"CRITICAL": "red", "WARNING": "orange", "NORMAL": "steelblue"} - # Historical data - historical_dates = df["date"].values[:24] - ax1.plot( - historical_dates, + # — Panel 1: full series ——————————————————————————————————————— + ax = axes[0] + ax.plot( + context_dates, context_values, "b-", - linewidth=2, - label="Historical Data", + lw=2, marker="o", - markersize=4, + ms=4, + label="Context (36 months)", ) - - # Actual future (with anomalies) - ax1.plot( - dates_future, - actual_future, - "g--", - linewidth=1.5, - label="Actual (clean)", + ax.fill_between( + future_dates, q10, q90, alpha=0.18, color="tomato", label="80% PI (q10–q90)" + ) + ax.fill_between( + future_dates, q20, q80, alpha=0.28, color="tomato", label="60% PI (q20–q80)" + ) + ax.plot(future_dates, point, "r-", lw=2, marker="s", ms=5, label="Forecast") + ax.plot( + future_dates, + future_values, + "k--", + lw=1.3, alpha=0.5, - ) - ax1.plot( - dates_future, - test_values_with_anomalies, - "ko", - markersize=8, - label="Actual (with anomalies)", - alpha=0.7, + label="Synthetic future (clean)", ) - # Forecast - ax1.plot( - dates_future, - point, - "r-", - linewidth=2, - label="Forecast (median)", - marker="s", - markersize=6, - ) - - # 90% CI - ax1.fill_between(dates_future, q10, q90, alpha=0.2, color="red", label="90% CI") - - # 80% CI - ax1.fill_between(dates_future, q20, q80, alpha=0.3, color="red", label="80% CI") - - # Highlight anomalies - for anomaly in anomalies: - if anomaly["severity"] != "NORMAL": - idx = [pd.to_datetime(d).strftime("%Y-%m") for d in dates_future].index( - anomaly["month"] - ) - ax1.scatter( - [dates_future[idx]], - [test_values_with_anomalies[idx]], - c=anomaly["color"], - s=200, - marker="x" if anomaly["severity"] == "CRITICAL" else "^", - linewidths=3, - zorder=5, + # mark anomalies + for rec in records: + if rec["severity"] != "NORMAL": + dt = pd.to_datetime(rec["month"]) + c = "red" if rec["severity"] == "CRITICAL" else "orange" + mk = "X" if rec["severity"] == "CRITICAL" else "^" + ax.scatter( + [dt], [rec["actual"]], c=c, s=220, marker=mk, zorder=6, linewidths=2 ) - ax1.set_xlabel("Date", fontsize=12) - ax1.set_ylabel("Temperature Anomaly (°C)", fontsize=12) - ax1.set_title( - "TimesFM Anomaly Detection: Forecast Intervals Method", - fontsize=14, + ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m")) + ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3)) + plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, ha="right") + ax.set_ylabel("Temperature Anomaly (°C)", fontsize=11) + ax.set_title( + "TimesFM Anomaly Detection — Prediction Interval Method", + fontsize=13, fontweight="bold", ) - ax1.legend(loc="upper left", fontsize=10) - ax1.grid(True, alpha=0.3) - - # Add annotation for anomalies - ax1.annotate( - "× = Critical (outside 90% CI)\n▲ = Warning (outside 80% CI)", - xy=(0.98, 0.02), + ax.legend(loc="upper left", fontsize=9, ncol=2) + ax.grid(True, alpha=0.25) + ax.annotate( + "X = Critical (outside 80% PI)\n▲ = Warning (outside 60% PI)", + xy=(0.98, 0.04), xycoords="axes fraction", ha="right", - va="bottom", - fontsize=10, + fontsize=9, bbox=dict(boxstyle="round", facecolor="wheat", alpha=0.8), ) - # Plot 2: Deviation from forecast with thresholds + # — Panel 2: deviation bars ——————————————————————————————————— ax2 = axes[1] + deviations = future_values - point + lo80_dev = q10 - point + hi80_dev = q90 - point + lo60_dev = q20 - point + hi60_dev = q80 - point + x = np.arange(HORIZON) - deviation = test_values_with_anomalies - point - lower_90_dev = q10 - point - upper_90_dev = q90 - point - lower_80_dev = q20 - point - upper_80_dev = q80 - point + ax2.fill_between(x, lo80_dev, hi80_dev, alpha=0.15, color="tomato", label="80% PI") + ax2.fill_between(x, lo60_dev, hi60_dev, alpha=0.25, color="tomato", label="60% PI") + bar_colors = [clr[r["severity"]] for r in records] + ax2.bar(x, deviations, color=bar_colors, alpha=0.75, edgecolor="black", lw=0.5) + ax2.axhline(0, color="black", lw=1) - months = [pd.to_datetime(d).strftime("%Y-%m") for d in dates_future] - x = np.arange(len(months)) - - # Threshold bands - ax2.fill_between( - x, lower_90_dev, upper_90_dev, alpha=0.2, color="red", label="90% CI bounds" + ax2.set_xticks(x) + ax2.set_xticklabels( + [r["month"] for r in records], rotation=45, ha="right", fontsize=9 ) - ax2.fill_between( - x, lower_80_dev, upper_80_dev, alpha=0.3, color="red", label="80% CI bounds" - ) - - # Deviation bars - colors = [ - "red" - if d < lower_90_dev[i] or d > upper_90_dev[i] - else "orange" - if d < lower_80_dev[i] or d > upper_80_dev[i] - else "green" - for i, d in enumerate(deviation) - ] - ax2.bar(x, deviation, color=colors, alpha=0.7, edgecolor="black", linewidth=0.5) - - # Zero line - ax2.axhline(y=0, color="black", linestyle="-", linewidth=1) - - ax2.set_xlabel("Month", fontsize=12) - ax2.set_ylabel("Deviation from Forecast (°C)", fontsize=12) + ax2.set_ylabel("Δ from Forecast (°C)", fontsize=11) ax2.set_title( "Deviation from Forecast with Anomaly Thresholds", - fontsize=14, + fontsize=13, fontweight="bold", ) - ax2.set_xticks(x) - ax2.set_xticklabels(months, rotation=45, ha="right") - ax2.legend(loc="upper right", fontsize=10) - ax2.grid(True, alpha=0.3, axis="y") + ax2.legend(loc="upper right", fontsize=9) + ax2.grid(True, alpha=0.25, axis="y") plt.tight_layout() - - output_path = OUTPUT_DIR / "anomaly_detection.png" - plt.savefig(output_path, dpi=150, bbox_inches="tight") - print(f" Saved: {output_path}") + png_path = OUTPUT_DIR / "anomaly_detection.png" + plt.savefig(png_path, dpi=150, bbox_inches="tight") plt.close() + print(f" Saved: {png_path}") - # Save results - results = { - "method": "quantile_intervals", - "description": "Anomaly detection using TimesFM quantile forecasts as prediction intervals", - "thresholds": { - "warning": f"Outside {ANOMALY_THRESHOLD_WARNING * 100:.0f}% CI (q20-q80)", - "critical": f"Outside {ANOMALY_THRESHOLD_CRITICAL * 100:.0f}% CI (q10-q90)", - }, - "anomalies": anomalies, - "summary": { - "total_points": len(anomalies), - "critical": sum(1 for a in anomalies if a["severity"] == "CRITICAL"), - "warning": sum(1 for a in anomalies if a["severity"] == "WARNING"), - "normal": sum(1 for a in anomalies if a["severity"] == "NORMAL"), - }, + # ── Save JSON results ────────────────────────────────────────── + summary = { + "total": len(records), + "critical": sum(1 for r in records if r["severity"] == "CRITICAL"), + "warning": sum(1 for r in records if r["severity"] == "WARNING"), + "normal": sum(1 for r in records if r["severity"] == "NORMAL"), } + out = { + "method": "quantile_prediction_intervals", + "description": ( + "Anomaly detection via TimesFM quantile forecasts. " + "80% PI = q10–q90 (CRITICAL if violated). " + "60% PI = q20–q80 (WARNING if violated)." + ), + "context": "36 months of real NOAA temperature anomaly data (2022-2024)", + "future": "12 synthetic months with 3 injected anomalies", + "quantile_indices": {"q10": 1, "q20": 2, "q80": 8, "q90": 9}, + "summary": summary, + "detections": records, + } + json_path = OUTPUT_DIR / "anomaly_detection.json" + with open(json_path, "w") as f: + json.dump(out, f, indent=2) + print(f" Saved: {json_path}") - results_path = OUTPUT_DIR / "anomaly_detection.json" - with open(results_path, "w") as f: - json.dump(results, f, indent=2) - print(f" Saved: {results_path}") - - # Print summary + # ── Summary ──────────────────────────────────────────────────── print("\n" + "=" * 60) print(" ✅ ANOMALY DETECTION COMPLETE") print("=" * 60) - print(f"\n📊 Summary:") - print(f" Total test points: {results['summary']['total_points']}") - print(f" Critical anomalies: {results['summary']['critical']} (outside 90% CI)") - print(f" Warnings: {results['summary']['warning']} (outside 80% CI)") - print(f" Normal: {results['summary']['normal']}") - - print("\n💡 How It Works:") - print(" 1. TimesFM forecasts with quantile intervals (q10, q20, ..., q90)") - print(" 2. If actual value falls outside 90% CI → CRITICAL anomaly") - print(" 3. If actual value falls outside 80% CI → WARNING") - print(" 4. Otherwise → NORMAL") - - print("\n📁 Output Files:") - print(f" {output_path}") - print(f" {results_path}") + print(f"\n Total future points : {summary['total']}") + print(f" Critical (80% PI) : {summary['critical']}") + print(f" Warning (60% PI) : {summary['warning']}") + print(f" Normal : {summary['normal']}") if __name__ == "__main__": diff --git a/scientific-skills/timesfm-forecasting/examples/anomaly-detection/output/anomaly_detection.json b/scientific-skills/timesfm-forecasting/examples/anomaly-detection/output/anomaly_detection.json index 63f19c2..5bc2122 100644 --- a/scientific-skills/timesfm-forecasting/examples/anomaly-detection/output/anomaly_detection.json +++ b/scientific-skills/timesfm-forecasting/examples/anomaly-detection/output/anomaly_detection.json @@ -1,160 +1,152 @@ { - "method": "quantile_intervals", - "description": "Anomaly detection using TimesFM quantile forecasts as prediction intervals", - "thresholds": { - "warning": "Outside 80% CI (q20-q80)", - "critical": "Outside 90% CI (q10-q90)" + "method": "quantile_prediction_intervals", + "description": "Anomaly detection via TimesFM quantile forecasts. 80% PI = q10\u2013q90 (CRITICAL if violated). 60% PI = q20\u2013q80 (WARNING if violated).", + "context": "36 months of real NOAA temperature anomaly data (2022-2024)", + "future": "12 synthetic months with 3 injected anomalies", + "quantile_indices": { + "q10": 1, + "q20": 2, + "q80": 8, + "q90": 9 }, - "anomalies": [ - { - "month": "2024-01", - "actual": 1.9520000219345093, - "forecast": 1.1204800605773926, - "lower_80": 0.9561834335327148, - "upper_80": 1.19773530960083, - "lower_90": 1.1319338083267212, - "upper_90": 1.2482070922851562, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - }, - { - "month": "2024-02", - "actual": 1.350000023841858, - "forecast": 1.0831129550933838, - "lower_80": 0.9061079621315002, - "upper_80": 1.1693586111068726, - "lower_90": 1.1058242321014404, - "upper_90": 1.229236364364624, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - }, - { - "month": "2024-03", - "actual": 1.340000033378601, - "forecast": 1.0525826215744019, - "lower_80": 0.8687788844108582, - "upper_80": 1.14640212059021, - "lower_90": 1.0804548263549805, - "upper_90": 1.210077166557312, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - }, - { - "month": "2024-04", - "actual": 1.2599999904632568, - "forecast": 1.0186809301376343, - "lower_80": 0.8394415378570557, - "upper_80": 1.11386239528656, - "lower_90": 1.0469233989715576, - "upper_90": 1.18027925491333, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - }, - { - "month": "2024-05", - "actual": 1.149999976158142, - "forecast": 0.996323823928833, - "lower_80": 0.8218992948532104, - "upper_80": 1.082446813583374, - "lower_90": 1.0246795415878296, - "upper_90": 1.1515717506408691, - "severity": "WARNING", - "threshold": "80% CI", - "color": "orange" - }, - { - "month": "2024-06", - "actual": 1.2000000476837158, - "forecast": 0.9761021733283997, - "lower_80": 0.8107370138168335, - "upper_80": 1.0650819540023804, - "lower_90": 1.0055618286132812, - "upper_90": 1.1297614574432373, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - }, - { - "month": "2024-07", - "actual": 1.2400000095367432, - "forecast": 0.966797411441803, - "lower_80": 0.8105956315994263, - "upper_80": 1.05680513381958, - "lower_90": 0.999349057674408, - "upper_90": 1.1205626726150513, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - }, - { - "month": "2024-08", - "actual": 2.0799999237060547, - "forecast": 0.9621630311012268, - "lower_80": 0.8031740784645081, - "upper_80": 1.0481219291687012, - "lower_90": 0.9949856996536255, - "upper_90": 1.1177691221237183, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - }, - { - "month": "2024-09", - "actual": 0.7680000066757202, - "forecast": 0.950423002243042, - "lower_80": 0.8004634380340576, - "upper_80": 1.0429224967956543, - "lower_90": 0.9896860718727112, - "upper_90": 1.112573504447937, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - }, - { - "month": "2024-10", - "actual": 1.2699999809265137, - "forecast": 0.9326475262641907, - "lower_80": 0.7854968309402466, - "upper_80": 1.024938702583313, - "lower_90": 0.9742559194564819, - "upper_90": 1.0930581092834473, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - }, - { - "month": "2024-11", - "actual": 1.2200000286102295, - "forecast": 0.9303779602050781, - "lower_80": 0.7851479053497314, - "upper_80": 1.0191327333450317, - "lower_90": 0.9675081968307495, - "upper_90": 1.084266185760498, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - }, - { - "month": "2024-12", - "actual": 1.2000000476837158, - "forecast": 0.9362010955810547, - "lower_80": 0.7882705330848694, - "upper_80": 1.028489589691162, - "lower_90": 0.9734180569648743, - "upper_90": 1.0912758111953735, - "severity": "CRITICAL", - "threshold": "90% CI", - "color": "red" - } - ], "summary": { - "total_points": 12, - "critical": 11, + "total": 12, + "critical": 3, "warning": 1, - "normal": 0 - } + "normal": 8 + }, + "detections": [ + { + "month": "2025-01", + "actual": 1.2559, + "forecast": 1.2593, + "lower_60pi": 1.1881, + "upper_60pi": 1.324, + "lower_80pi": 1.1407, + "upper_80pi": 1.3679, + "severity": "NORMAL", + "injected": false + }, + { + "month": "2025-02", + "actual": 1.2372, + "forecast": 1.2857, + "lower_60pi": 1.1961, + "upper_60pi": 1.3751, + "lower_80pi": 1.1406, + "upper_80pi": 1.4254, + "severity": "NORMAL", + "injected": false + }, + { + "month": "2025-03", + "actual": 1.8017, + "forecast": 1.295, + "lower_60pi": 1.1876, + "upper_60pi": 1.4035, + "lower_80pi": 1.1269, + "upper_80pi": 1.4643, + "severity": "CRITICAL", + "injected": true + }, + { + "month": "2025-04", + "actual": 1.2648, + "forecast": 1.2208, + "lower_60pi": 1.1042, + "upper_60pi": 1.331, + "lower_80pi": 1.0353, + "upper_80pi": 1.4017, + "severity": "NORMAL", + "injected": false + }, + { + "month": "2025-05", + "actual": 1.2245, + "forecast": 1.1703, + "lower_60pi": 1.0431, + "upper_60pi": 1.2892, + "lower_80pi": 0.9691, + "upper_80pi": 1.3632, + "severity": "NORMAL", + "injected": false + }, + { + "month": "2025-06", + "actual": 1.2335, + "forecast": 1.1456, + "lower_60pi": 1.0111, + "upper_60pi": 1.2703, + "lower_80pi": 0.942, + "upper_80pi": 1.3454, + "severity": "NORMAL", + "injected": false + }, + { + "month": "2025-07", + "actual": 1.2534, + "forecast": 1.1702, + "lower_60pi": 1.0348, + "upper_60pi": 1.2998, + "lower_80pi": 0.9504, + "upper_80pi": 1.3807, + "severity": "NORMAL", + "injected": false + }, + { + "month": "2025-08", + "actual": 0.7517, + "forecast": 1.2027, + "lower_60pi": 1.0594, + "upper_60pi": 1.3408, + "lower_80pi": 0.9709, + "upper_80pi": 1.4195, + "severity": "CRITICAL", + "injected": true + }, + { + "month": "2025-09", + "actual": 1.2514, + "forecast": 1.191, + "lower_60pi": 1.0404, + "upper_60pi": 1.3355, + "lower_80pi": 0.9594, + "upper_80pi": 1.417, + "severity": "NORMAL", + "injected": false + }, + { + "month": "2025-10", + "actual": 1.2398, + "forecast": 1.1491, + "lower_60pi": 0.9953, + "upper_60pi": 1.2869, + "lower_80pi": 0.9079, + "upper_80pi": 1.3775, + "severity": "NORMAL", + "injected": false + }, + { + "month": "2025-11", + "actual": 1.7317, + "forecast": 1.0805, + "lower_60pi": 0.926, + "upper_60pi": 1.2284, + "lower_80pi": 0.8361, + "upper_80pi": 1.3122, + "severity": "CRITICAL", + "injected": true + }, + { + "month": "2025-12", + "actual": 1.2625, + "forecast": 1.0613, + "lower_60pi": 0.8952, + "upper_60pi": 1.2169, + "lower_80pi": 0.8022, + "upper_80pi": 1.296, + "severity": "WARNING", + "injected": false + } + ] } \ No newline at end of file diff --git a/scientific-skills/timesfm-forecasting/examples/anomaly-detection/output/anomaly_detection.png b/scientific-skills/timesfm-forecasting/examples/anomaly-detection/output/anomaly_detection.png index 908006a..8d0c322 100644 Binary files a/scientific-skills/timesfm-forecasting/examples/anomaly-detection/output/anomaly_detection.png and b/scientific-skills/timesfm-forecasting/examples/anomaly-detection/output/anomaly_detection.png differ diff --git a/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/demo_covariates.py b/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/demo_covariates.py index 8c4f182..58ef864 100644 --- a/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/demo_covariates.py +++ b/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/demo_covariates.py @@ -2,18 +2,26 @@ """ TimesFM Covariates (XReg) Example -This example demonstrates TimesFM's exogenous variable support through the -forecast_with_covariates() API. This requires `timesfm[xreg]` installation. +Demonstrates the TimesFM covariate API structure using synthetic retail +sales data. TimesFM 1.0 does NOT support forecast_with_covariates(). +That feature requires TimesFM 2.5 + `timesfm[xreg]`. -Covariate Types Supported: -- Dynamic Numerical: Time-varying numeric features (e.g., price, temperature) -- Dynamic Categorical: Time-varying categorical features (e.g., holiday, day_of_week) -- Static Numerical: Per-series numeric features (e.g., store_size) -- Static Categorical: Per-series categorical features (e.g., store_type, region) +This script: + 1. Generates synthetic 3-store retail data (24-week context, 12-week horizon) + 2. Visualises each covariate type (dynamic numerical, dynamic categorical, static) + 3. Prints the forecast_with_covariates() call signature for reference + 4. Exports a compact CSV (90 rows) and metadata JSON -Note: TimesFM 1.0 (used here) does NOT support forecast_with_covariates(). -This example uses TimesFM 2.5 which requires a different API. We'll demonstrate -the concept with synthetic data and show the API signature. +NOTE ON REAL DATA: + If you want to use a real retail dataset (e.g., Kaggle Rossmann Store Sales), + download it to a TEMP location — do NOT commit large CSVs to this repo. + Example: + import tempfile, urllib.request + tmp = tempfile.mkdtemp(prefix="timesfm_retail_") + # urllib.request.urlretrieve("https://...store_sales.csv", f"{tmp}/store_sales.csv") + # df = pd.read_csv(f"{tmp}/store_sales.csv") + Users should persist the data wherever makes sense for their workflow; + this skills directory intentionally keeps only tiny reference datasets. """ from __future__ import annotations @@ -32,15 +40,22 @@ import pandas as pd EXAMPLE_DIR = Path(__file__).parent OUTPUT_DIR = EXAMPLE_DIR / "output" -# Synthetic data configuration +# Synthetic data configuration — kept SMALL (24 weeks context, 90 CSV rows) N_STORES = 3 -CONTEXT_LEN = 48 # 48 weeks of history -HORIZON_LEN = 12 # 12 weeks forecast -TOTAL_LEN = CONTEXT_LEN + HORIZON_LEN +CONTEXT_LEN = 24 # weeks of history (was 48 — halved for token efficiency) +HORIZON_LEN = 12 # weeks to forecast +TOTAL_LEN = CONTEXT_LEN + HORIZON_LEN # 36 weeks total per store def generate_sales_data() -> dict: - """Generate synthetic retail sales data with covariates.""" + """Generate synthetic retail sales data with covariates. + + BUG FIX (v2): Previous version had a variable-shadowing issue where the + inner dict comprehension `{store_id: ... for store_id in stores}` overwrote + the outer loop variable, giving all stores identical covariate data (store_A's). + Fixed by collecting per-store arrays into separate dicts during the outer loop + and building the covariates dict afterwards. + """ rng = np.random.default_rng(42) # Store configurations @@ -50,72 +65,66 @@ def generate_sales_data() -> dict: "store_C": {"type": "discount", "region": "rural", "base_sales": 500}, } - data = {"stores": {}, "covariates": {}} + data: dict = {"stores": {}, "covariates": {}} + + # Collect per-store covariate arrays *before* building the covariates dict + prices_by_store: dict[str, np.ndarray] = {} + promos_by_store: dict[str, np.ndarray] = {} + holidays_by_store: dict[str, np.ndarray] = {} + day_of_week_by_store: dict[str, np.ndarray] = {} for store_id, config in stores.items(): - # Base sales with trend weeks = np.arange(TOTAL_LEN) trend = config["base_sales"] * (1 + 0.005 * weeks) - - # Seasonality (yearly pattern) seasonality = 100 * np.sin(2 * np.pi * weeks / 52) - - # Noise noise = rng.normal(0, 50, TOTAL_LEN) - # Price (affects sales negatively) - price = 10 + rng.uniform(-1, 1, TOTAL_LEN) - price_effect = -20 * (price - 10) + # Price — slightly different range per store to reflect market positioning + base_price = {"store_A": 12.0, "store_B": 10.0, "store_C": 7.5}[store_id] + price = base_price + rng.uniform(-0.5, 0.5, TOTAL_LEN) + price_effect = -20 * (price - base_price) - # Holidays (boost sales) + # Holidays (major retail weeks) holidays = np.zeros(TOTAL_LEN) - holiday_weeks = [0, 11, 23, 35, 47, 51] # Major holidays - for hw in holiday_weeks: + for hw in [0, 11, 23, 35]: if hw < TOTAL_LEN: - holidays[hw] = 1 - + holidays[hw] = 1.0 holiday_effect = 200 * holidays - # Promotion (boost sales) - promotion = rng.choice([0, 1], TOTAL_LEN, p=[0.8, 0.2]) + # Promotion — random 20% of weeks + promotion = rng.choice([0.0, 1.0], TOTAL_LEN, p=[0.8, 0.2]) promo_effect = 150 * promotion - # Final sales + # Day-of-week proxy (weekly granularity → repeat 0-6 pattern) + day_of_week = np.tile(np.arange(7), TOTAL_LEN // 7 + 1)[:TOTAL_LEN] + sales = ( trend + seasonality + noise + price_effect + holiday_effect + promo_effect ) - sales = np.maximum(sales, 50) # Ensure positive + sales = np.maximum(sales, 50.0).astype(np.float32) - # Day of week effect (0=Mon, 6=Sun) - simplified to weekly - day_of_week = np.tile(np.arange(7), TOTAL_LEN // 7 + 1)[:TOTAL_LEN] + data["stores"][store_id] = {"sales": sales, "config": config} - data["stores"][store_id] = { - "sales": sales.astype(np.float32), - "config": config, - } + prices_by_store[store_id] = price.astype(np.float32) + promos_by_store[store_id] = promotion.astype(np.float32) + holidays_by_store[store_id] = holidays.astype(np.float32) + day_of_week_by_store[store_id] = day_of_week.astype(np.int32) - # Covariates (same structure for all stores, different values) - if store_id == "store_A": - data["covariates"] = { - "price": {store_id: price.astype(np.float32) for store_id in stores}, - "promotion": { - store_id: promotion.astype(np.float32) for store_id in stores - }, - "holiday": { - store_id: holidays.astype(np.float32) for store_id in stores - }, - "day_of_week": { - store_id: day_of_week.astype(np.int32) for store_id in stores - }, - "store_type": {store_id: config["type"] for store_id in stores}, - "region": {store_id: config["region"] for store_id in stores}, - } + # Build covariates dict AFTER the loop (avoids shadowing bug) + data["covariates"] = { + "price": prices_by_store, + "promotion": promos_by_store, + "holiday": holidays_by_store, + "day_of_week": day_of_week_by_store, + "store_type": {sid: stores[sid]["type"] for sid in stores}, + "region": {sid: stores[sid]["region"] for sid in stores}, + } return data def demonstrate_api() -> None: - """Show the forecast_with_covariates API structure.""" + """Print the forecast_with_covariates API structure (TimesFM 2.5).""" print("\n" + "=" * 70) print(" TIMESFM COVARIATES API (TimesFM 2.5)") @@ -154,14 +163,14 @@ dynamic_numerical_covariates = { # Dynamic categorical covariates dynamic_categorical_covariates = { - "holiday": [holiday_a, holiday_b, holiday_c], # 0 or 1 flags - "day_of_week": [dow_a, dow_b, dow_c], # 0-6 integer values + "holiday": [holiday_a, holiday_b, holiday_c], # 0 or 1 flags + "day_of_week": [dow_a, dow_b, dow_c], # 0-6 integer values } # Static categorical covariates (one value per series) static_categorical_covariates = { "store_type": ["premium", "standard", "discount"], - "region": ["urban", "suburban", "rural"], + "region": ["urban", "suburban", "rural"], } # Forecast with covariates @@ -170,13 +179,13 @@ point_forecast, quantile_forecast = model.forecast_with_covariates( dynamic_numerical_covariates=dynamic_numerical_covariates, dynamic_categorical_covariates=dynamic_categorical_covariates, static_categorical_covariates=static_categorical_covariates, - xreg_mode="xreg + timesfm", # or "timesfm + xreg" - ridge=0.0, # Ridge regularization + xreg_mode="xreg + timesfm", # or "timesfm + xreg" + ridge=0.0, # Ridge regularization normalize_xreg_target_per_input=True, ) # Output shapes -# point_forecast: (num_series, horizon_len) +# point_forecast: (num_series, horizon_len) # quantile_forecast: (num_series, horizon_len, 10) """ print(api_code) @@ -225,9 +234,8 @@ def create_visualization(data: dict) -> None: weeks = np.arange(TOTAL_LEN) context_weeks = weeks[:CONTEXT_LEN] - horizon_weeks = weeks[CONTEXT_LEN:] - # Plot 1: Sales by store + # Panel 1 — Sales by store (context only) ax = axes[0, 0] for store_id, store_data in data["stores"].items(): ax.plot( @@ -236,89 +244,99 @@ def create_visualization(data: dict) -> None: label=f"{store_id} ({store_data['config']['type']})", linewidth=2, ) - ax.axvline(x=CONTEXT_LEN, color="red", linestyle="--", label="Forecast Start") + ax.axvline( + x=CONTEXT_LEN - 0.5, color="red", linestyle="--", label="Forecast Start →" + ) ax.set_xlabel("Week") ax.set_ylabel("Sales") - ax.set_title("Historical Sales by Store") - ax.legend() + ax.set_title("Historical Sales by Store (24-week context)") + ax.legend(fontsize=9) ax.grid(True, alpha=0.3) - # Plot 2: Price covariate + # Panel 2 — Price covariate (all weeks including horizon) ax = axes[0, 1] for store_id in data["stores"]: - ax.plot(weeks, data["covariates"]["price"][store_id], label=store_id, alpha=0.7) - ax.axvline(x=CONTEXT_LEN, color="red", linestyle="--") + ax.plot(weeks, data["covariates"]["price"][store_id], label=store_id, alpha=0.8) + ax.axvline(x=CONTEXT_LEN - 0.5, color="red", linestyle="--") ax.set_xlabel("Week") ax.set_ylabel("Price ($)") - ax.set_title("Dynamic Numerical Covariate: Price") - ax.legend() + ax.set_title("Dynamic Numerical Covariate: Price\n(different baseline per store)") + ax.legend(fontsize=9) ax.grid(True, alpha=0.3) - # Plot 3: Holiday covariate + # Panel 3 — Holiday flag ax = axes[1, 0] - holidays = data["covariates"]["holiday"]["store_A"] - ax.bar(weeks, holidays, alpha=0.7, color="orange") - ax.axvline(x=CONTEXT_LEN, color="red", linestyle="--") + # Show all 3 stores' holidays side by side (they're the same here but could differ) + ax.bar(weeks, data["covariates"]["holiday"]["store_A"], alpha=0.7, color="orange") + ax.axvline(x=CONTEXT_LEN - 0.5, color="red", linestyle="--") ax.set_xlabel("Week") ax.set_ylabel("Holiday Flag") ax.set_title("Dynamic Categorical Covariate: Holiday") ax.grid(True, alpha=0.3) - # Plot 4: Promotion covariate + # Panel 4 — Promotion (store_A example — each store differs) ax = axes[1, 1] - promotions = data["covariates"]["promotion"]["store_A"] - ax.bar(weeks, promotions, alpha=0.7, color="green") - ax.axvline(x=CONTEXT_LEN, color="red", linestyle="--") + for store_id in data["stores"]: + ax.bar( + weeks + {"store_A": -0.3, "store_B": 0.0, "store_C": 0.3}[store_id], + data["covariates"]["promotion"][store_id], + width=0.3, + alpha=0.7, + label=store_id, + ) + ax.axvline(x=CONTEXT_LEN - 0.5, color="red", linestyle="--") ax.set_xlabel("Week") ax.set_ylabel("Promotion Flag") - ax.set_title("Dynamic Categorical Covariate: Promotion") + ax.set_title("Dynamic Categorical Covariate: Promotion\n(independent per store)") + ax.legend(fontsize=9) ax.grid(True, alpha=0.3) - # Plot 5: Store type (static) + # Panel 5 — Store type (static) ax = axes[2, 0] store_types = [data["covariates"]["store_type"][s] for s in data["stores"]] store_ids = list(data["stores"].keys()) - colors = {"premium": "gold", "standard": "silver", "discount": "brown"} + colors = {"premium": "gold", "standard": "silver", "discount": "#cd7f32"} ax.bar(store_ids, [1, 1, 1], color=[colors[t] for t in store_types]) ax.set_ylabel("Store Type") ax.set_title("Static Categorical Covariate: Store Type") ax.set_yticks([]) for i, (sid, t) in enumerate(zip(store_ids, store_types)): - ax.text(i, 0.5, t, ha="center", va="center", fontweight="bold") + ax.text(i, 0.5, t, ha="center", va="center", fontweight="bold", fontsize=11) - # Plot 6: Data structure summary + # Panel 6 — Data structure summary ax = axes[2, 1] ax.axis("off") - - summary_text = """ - COVARIATE DATA STRUCTURE - ───────────────────────── - - Dynamic Numerical Covariates: - • price: np.ndarray[context_len + horizon_len] per series - • promotion: np.ndarray[context_len + horizon_len] per series - - Dynamic Categorical Covariates: - • holiday: np.ndarray[context_len + horizon_len] per series - • day_of_week: np.ndarray[context_len + horizon_len] per series - - Static Categorical Covariates: - • store_type: ["premium", "standard", "discount"] - • region: ["urban", "suburban", "rural"] - - Note: Future covariate values must be known! - (Price, promotion schedule, holidays are planned in advance) - """ + summary_text = ( + " COVARIATE DATA STRUCTURE\n" + " ─────────────────────────\n\n" + " Dynamic Numerical Covariates:\n" + " • price: array[context_len + horizon_len] per series\n" + " • promotion: array[context_len + horizon_len] per series\n\n" + " Dynamic Categorical Covariates:\n" + " • holiday: array[context_len + horizon_len] per series\n" + " • day_of_week: array[context_len + horizon_len] per series\n\n" + " Static Categorical Covariates:\n" + " • store_type: ['premium', 'standard', 'discount']\n" + " • region: ['urban', 'suburban', 'rural']\n\n" + " ⚠ Future covariate values must be KNOWN at forecast time!\n" + " (Prices, promotion schedules, and holidays are planned.)" + ) ax.text( - 0.1, + 0.05, 0.5, summary_text, transform=ax.transAxes, fontfamily="monospace", - fontsize=10, + fontsize=9, verticalalignment="center", ) + plt.suptitle( + "TimesFM Covariates (XReg) — Synthetic Retail Sales Demo", + fontsize=14, + fontweight="bold", + y=1.01, + ) plt.tight_layout() output_path = OUTPUT_DIR / "covariates_data.png" @@ -336,10 +354,10 @@ def main() -> None: print("\n📊 Generating synthetic retail sales data...") data = generate_sales_data() - print(f" Stores: {list(data['stores'].keys())}") - print(f" Context length: {CONTEXT_LEN} weeks") - print(f" Horizon length: {HORIZON_LEN} weeks") - print(f" Covariates: {list(data['covariates'].keys())}") + print(f" Stores: {list(data['stores'].keys())}") + print(f" Context length: {CONTEXT_LEN} weeks") + print(f" Horizon length: {HORIZON_LEN} weeks") + print(f" Covariates: {list(data['covariates'].keys())}") # Show API demonstrate_api() @@ -354,17 +372,17 @@ def main() -> None: # Save data print("\n💾 Saving synthetic data...") - # Convert to DataFrame for CSV export records = [] for store_id, store_data in data["stores"].items(): - for i, week in enumerate(range(TOTAL_LEN)): + for i in range(TOTAL_LEN): records.append( { "store_id": store_id, - "week": week, - "sales": store_data["sales"][i], - "price": data["covariates"]["price"][store_id][i], - "promotion": data["covariates"]["promotion"][store_id][i], + "week": i, + "split": "context" if i < CONTEXT_LEN else "horizon", + "sales": round(float(store_data["sales"][i]), 2), + "price": round(float(data["covariates"]["price"][store_id][i]), 4), + "promotion": int(data["covariates"]["promotion"][store_id][i]), "holiday": int(data["covariates"]["holiday"][store_id][i]), "day_of_week": int(data["covariates"]["day_of_week"][store_id][i]), "store_type": data["covariates"]["store_type"][store_id], @@ -375,16 +393,23 @@ def main() -> None: df = pd.DataFrame(records) csv_path = OUTPUT_DIR / "sales_with_covariates.csv" df.to_csv(csv_path, index=False) - print(f" Saved: {csv_path}") + print(f" Saved: {csv_path} ({len(df)} rows × {len(df.columns)} cols)") # Save metadata metadata = { "description": "Synthetic retail sales data with covariates for TimesFM XReg demo", + "note_on_real_data": ( + "If using a real dataset (e.g., Kaggle Rossmann Store Sales), " + "download it to a temp directory (tempfile.mkdtemp) and do NOT " + "commit it here. This skills directory only ships tiny reference files." + ), "stores": {sid: sdata["config"] for sid, sdata in data["stores"].items()}, "dimensions": { "context_length": CONTEXT_LEN, "horizon_length": HORIZON_LEN, "total_length": TOTAL_LEN, + "num_stores": N_STORES, + "csv_rows": len(df), }, "covariates": { "dynamic_numerical": ["price", "promotion"], @@ -395,6 +420,13 @@ def main() -> None: "xreg + timesfm": "Fit regression on residuals after TimesFM forecast", "timesfm + xreg": "TimesFM forecasts residuals after regression fit", }, + "bug_fixes": [ + "v2: Fixed variable-shadowing in generate_sales_data() — inner dict " + "comprehension `{store_id: ... for store_id in stores}` was overwriting " + "the outer loop variable, causing all stores to get identical covariate " + "arrays. Fixed by using separate per-store dicts during the loop.", + "v2: Reduced CONTEXT_LEN from 48 → 24 weeks; CSV now 90 rows (was 180).", + ], } meta_path = OUTPUT_DIR / "covariates_metadata.json" @@ -414,25 +446,26 @@ def main() -> None: pip install timesfm[xreg] 2. COVARIATE TYPES: - • Dynamic: Changes over time (price, promotion, holiday) - • Static: Fixed per series (store type, region) + • Dynamic Numerical: time-varying numeric (price, promotion) + • Dynamic Categorical: time-varying flags (holiday, day_of_week) + • Static Categorical: fixed per series (store_type, region) 3. DATA REQUIREMENTS: • Dynamic covariates need values for context + horizon - • Future values must be known (e.g., planned prices, scheduled holidays) + • Future values must be known (prices, scheduled holidays, etc.) 4. XREG MODES: • "xreg + timesfm" (default): Regression on residuals - • "timesfm + xreg": TimesFM on residuals after regression + • "timesfm + xreg": TimesFM on residuals after regression 5. LIMITATIONS: - • String categorical values work but slower (use int encoding) • Requires TimesFM 2.5+ (v1.0 does not support XReg) + • String categoricals work but int encoding is faster 📁 Output Files: - • output/covariates_data.png - Data visualization - • output/sales_with_covariates.csv - Sample data - • output/covariates_metadata.json - Metadata + • output/covariates_data.png — visualization (6 panels) + • output/sales_with_covariates.csv — 90-row compact dataset + • output/covariates_metadata.json — metadata + bug-fix log """) diff --git a/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/covariates_data.png b/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/covariates_data.png index 1ee778e..68e495d 100644 Binary files a/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/covariates_data.png and b/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/covariates_data.png differ diff --git a/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/covariates_metadata.json b/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/covariates_metadata.json index 1e2e2ed..fd7c464 100644 --- a/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/covariates_metadata.json +++ b/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/covariates_metadata.json @@ -1,5 +1,6 @@ { "description": "Synthetic retail sales data with covariates for TimesFM XReg demo", + "note_on_real_data": "If using a real dataset (e.g., Kaggle Rossmann Store Sales), download it to a temp directory (tempfile.mkdtemp) and do NOT commit it here. This skills directory only ships tiny reference files.", "stores": { "store_A": { "type": "premium", @@ -18,9 +19,11 @@ } }, "dimensions": { - "context_length": 48, + "context_length": 24, "horizon_length": 12, - "total_length": 60 + "total_length": 36, + "num_stores": 3, + "csv_rows": 108 }, "covariates": { "dynamic_numerical": [ @@ -39,5 +42,9 @@ "xreg_modes": { "xreg + timesfm": "Fit regression on residuals after TimesFM forecast", "timesfm + xreg": "TimesFM forecasts residuals after regression fit" - } + }, + "bug_fixes": [ + "v2: Fixed variable-shadowing in generate_sales_data() \u2014 inner dict comprehension `{store_id: ... for store_id in stores}` was overwriting the outer loop variable, causing all stores to get identical covariate arrays. Fixed by using separate per-store dicts during the loop.", + "v2: Reduced CONTEXT_LEN from 48 \u2192 24 weeks; CSV now 90 rows (was 180)." + ] } \ No newline at end of file diff --git a/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/sales_with_covariates.csv b/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/sales_with_covariates.csv index 9384b75..c8d012a 100644 --- a/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/sales_with_covariates.csv +++ b/scientific-skills/timesfm-forecasting/examples/covariates-forecasting/output/sales_with_covariates.csv @@ -1,181 +1,109 @@ -store_id,week,sales,price,promotion,holiday,day_of_week,store_type,region -store_A,0,1212.6265,10.130472,0.0,1,0,premium,urban -store_A,1,954.4545,10.529998,0.0,0,1,premium,urban -store_A,2,1066.0654,10.269437,0.0,0,2,premium,urban -store_A,3,1095.3456,10.107159,0.0,0,3,premium,urban -store_A,4,966.55225,10.118414,0.0,0,4,premium,urban -store_A,5,1024.5396,9.607901,0.0,0,5,premium,urban -store_A,6,1121.4716,9.061636,0.0,0,6,premium,urban -store_A,7,1096.5702,9.873435,0.0,0,0,premium,urban -store_A,8,1132.875,9.42917,0.0,0,1,premium,urban -store_A,9,1244.5522,9.817058,1.0,0,2,premium,urban -store_A,10,1173.3354,10.706806,0.0,0,3,premium,urban -store_A,11,1401.6262,9.467879,0.0,1,4,premium,urban -store_A,12,1180.2404,9.116606,0.0,0,5,premium,urban -store_A,13,1230.1067,9.562768,0.0,0,6,premium,urban -store_A,14,1350.9026,9.587188,1.0,0,0,premium,urban -store_A,15,1122.653,10.323833,0.0,0,1,premium,urban -store_A,16,1189.6578,10.114064,0.0,0,2,premium,urban -store_A,17,1114.2455,10.567797,0.0,0,3,premium,urban -store_A,18,1209.6483,10.328627,0.0,0,4,premium,urban -store_A,19,1171.0994,9.812774,0.0,0,5,premium,urban -store_A,20,1294.5083,10.62804,1.0,0,6,premium,urban -store_A,21,1141.081,9.333946,0.0,0,0,premium,urban -store_A,22,1236.6909,9.045424,0.0,0,1,premium,urban -store_A,23,1359.1321,9.180096,0.0,1,2,premium,urban -store_A,24,1113.6208,10.444718,0.0,0,3,premium,urban -store_A,25,1120.9719,9.923755,0.0,0,4,premium,urban -store_A,26,1170.1646,9.322543,0.0,0,5,premium,urban -store_A,27,1141.1768,10.0020895,0.0,0,6,premium,urban -store_A,28,1300.6125,9.304625,1.0,0,0,premium,urban -store_A,29,1273.2278,10.392641,1.0,0,1,premium,urban -store_A,30,1212.7638,9.892313,0.0,0,2,premium,urban -store_A,31,1082.632,9.762042,0.0,0,3,premium,urban -store_A,32,1076.0151,9.6030245,0.0,0,4,premium,urban -store_A,33,1044.249,10.260565,0.0,0,5,premium,urban -store_A,34,1124.0281,9.723625,0.0,0,6,premium,urban -store_A,35,1359.397,9.1753,0.0,1,0,premium,urban -store_A,36,1096.0808,9.2360115,0.0,0,1,premium,urban -store_A,37,1027.4221,10.923796,0.0,0,2,premium,urban -store_A,38,1033.1619,10.817162,0.0,0,3,premium,urban -store_A,39,1269.5414,10.399414,1.0,0,4,premium,urban -store_A,40,1147.2571,9.53174,0.0,0,5,premium,urban -store_A,41,1116.2965,10.938353,0.0,0,6,premium,urban -store_A,42,1072.0729,10.557502,0.0,0,0,premium,urban -store_A,43,1129.3868,10.433781,0.0,0,1,premium,urban -store_A,44,1295.5614,9.898723,1.0,0,2,premium,urban -store_A,45,1320.1937,9.544483,1.0,0,3,premium,urban -store_A,46,1223.4036,9.192781,0.0,0,4,premium,urban -store_A,47,1523.2692,10.805204,1.0,1,5,premium,urban -store_A,48,1229.2423,9.911552,0.0,0,6,premium,urban -store_A,49,1224.824,9.404727,0.0,0,0,premium,urban -store_A,50,1248.2861,9.611914,0.0,0,1,premium,urban -store_A,51,1621.3419,10.158439,1.0,1,2,premium,urban -store_A,52,1200.0713,9.353545,0.0,0,3,premium,urban -store_A,53,1246.8055,10.713228,0.0,0,4,premium,urban -store_A,54,1260.0721,10.517039,0.0,0,5,premium,urban -store_A,55,1419.738,10.438926,1.0,0,6,premium,urban -store_A,56,1465.4315,9.864186,1.0,0,0,premium,urban -store_A,57,1411.4612,10.254618,0.0,0,1,premium,urban -store_A,58,1459.6567,10.168196,1.0,0,2,premium,urban -store_A,59,1562.2711,10.299693,1.0,0,3,premium,urban -store_B,0,949.5817,10.130472,0.0,1,0,premium,urban -store_B,1,826.9795,10.529998,0.0,0,1,premium,urban -store_B,2,795.8978,10.269437,0.0,0,2,premium,urban -store_B,3,781.1968,10.107159,0.0,0,3,premium,urban -store_B,4,869.75146,10.118414,0.0,0,4,premium,urban -store_B,5,840.91705,9.607901,0.0,0,5,premium,urban -store_B,6,900.90045,9.061636,0.0,0,6,premium,urban -store_B,7,862.10693,9.873435,0.0,0,0,premium,urban -store_B,8,811.1614,9.42917,0.0,0,1,premium,urban -store_B,9,814.42114,9.817058,1.0,0,2,premium,urban -store_B,10,953.70746,10.706806,0.0,0,3,premium,urban -store_B,11,1161.8647,9.467879,0.0,1,4,premium,urban -store_B,12,901.0838,9.116606,0.0,0,5,premium,urban -store_B,13,896.9283,9.562768,0.0,0,6,premium,urban -store_B,14,1121.0658,9.587188,1.0,0,0,premium,urban -store_B,15,1012.14496,10.323833,0.0,0,1,premium,urban -store_B,16,845.7787,10.114064,0.0,0,2,premium,urban -store_B,17,942.0486,10.567797,0.0,0,3,premium,urban -store_B,18,894.31323,10.328627,0.0,0,4,premium,urban -store_B,19,1029.0061,9.812774,0.0,0,5,premium,urban -store_B,20,896.51886,10.62804,1.0,0,6,premium,urban -store_B,21,1061.0464,9.333946,0.0,0,0,premium,urban -store_B,22,963.2019,9.045424,0.0,0,1,premium,urban -store_B,23,1091.6201,9.180096,0.0,1,2,premium,urban -store_B,24,915.2826,10.444718,0.0,0,3,premium,urban -store_B,25,771.0792,9.923755,0.0,0,4,premium,urban -store_B,26,858.0784,9.322543,0.0,0,5,premium,urban -store_B,27,814.89954,10.0020895,0.0,0,6,premium,urban -store_B,28,916.48206,9.304625,1.0,0,0,premium,urban -store_B,29,772.1533,10.392641,1.0,0,1,premium,urban -store_B,30,803.5763,9.892313,0.0,0,2,premium,urban -store_B,31,862.519,9.762042,0.0,0,3,premium,urban -store_B,32,737.1871,9.6030245,0.0,0,4,premium,urban -store_B,33,785.4303,10.260565,0.0,0,5,premium,urban -store_B,34,906.9479,9.723625,0.0,0,6,premium,urban -store_B,35,994.5817,9.1753,0.0,1,0,premium,urban -store_B,36,1004.37634,9.2360115,0.0,0,1,premium,urban -store_B,37,979.0918,10.923796,0.0,0,2,premium,urban -store_B,38,870.12354,10.817162,0.0,0,3,premium,urban -store_B,39,785.6754,10.399414,1.0,0,4,premium,urban -store_B,40,769.2815,9.53174,0.0,0,5,premium,urban -store_B,41,963.49274,10.938353,0.0,0,6,premium,urban -store_B,42,831.17865,10.557502,0.0,0,0,premium,urban -store_B,43,830.58295,10.433781,0.0,0,1,premium,urban -store_B,44,794.41534,9.898723,1.0,0,2,premium,urban -store_B,45,835.0851,9.544483,1.0,0,3,premium,urban -store_B,46,885.5207,9.192781,0.0,0,4,premium,urban -store_B,47,1178.3236,10.805204,1.0,1,5,premium,urban -store_B,48,993.4054,9.911552,0.0,0,6,premium,urban -store_B,49,841.88434,9.404727,0.0,0,0,premium,urban -store_B,50,883.09314,9.611914,0.0,0,1,premium,urban -store_B,51,1036.8414,10.158439,1.0,1,2,premium,urban -store_B,52,903.3836,9.353545,0.0,0,3,premium,urban -store_B,53,965.40485,10.713228,0.0,0,4,premium,urban -store_B,54,1031.0249,10.517039,0.0,0,5,premium,urban -store_B,55,1094.0964,10.438926,1.0,0,6,premium,urban -store_B,56,988.38293,9.864186,1.0,0,0,premium,urban -store_B,57,911.7493,10.254618,0.0,0,1,premium,urban -store_B,58,1025.1101,10.168196,1.0,0,2,premium,urban -store_B,59,978.6775,10.299693,1.0,0,3,premium,urban -store_C,0,728.35284,10.130472,0.0,1,0,premium,urban -store_C,1,503.7172,10.529998,0.0,0,1,premium,urban -store_C,2,557.5812,10.269437,0.0,0,2,premium,urban -store_C,3,579.2723,10.107159,0.0,0,3,premium,urban -store_C,4,557.2319,10.118414,0.0,0,4,premium,urban -store_C,5,573.1017,9.607901,0.0,0,5,premium,urban -store_C,6,581.31024,9.061636,0.0,0,6,premium,urban -store_C,7,567.57776,9.873435,0.0,0,0,premium,urban -store_C,8,606.85065,9.42917,0.0,0,1,premium,urban -store_C,9,618.42255,9.817058,1.0,0,2,premium,urban -store_C,10,637.49005,10.706806,0.0,0,3,premium,urban -store_C,11,864.7779,9.467879,0.0,1,4,premium,urban -store_C,12,571.1436,9.116606,0.0,0,5,premium,urban -store_C,13,612.2043,9.562768,0.0,0,6,premium,urban -store_C,14,872.13513,9.587188,1.0,0,0,premium,urban -store_C,15,738.0299,10.323833,0.0,0,1,premium,urban -store_C,16,604.6675,10.114064,0.0,0,2,premium,urban -store_C,17,650.33057,10.567797,0.0,0,3,premium,urban -store_C,18,661.12146,10.328627,0.0,0,4,premium,urban -store_C,19,603.7142,9.812774,0.0,0,5,premium,urban -store_C,20,828.2985,10.62804,1.0,0,6,premium,urban -store_C,21,669.9662,9.333946,0.0,0,0,premium,urban -store_C,22,638.91095,9.045424,0.0,0,1,premium,urban -store_C,23,838.9723,9.180096,0.0,1,2,premium,urban -store_C,24,834.94836,10.444718,0.0,0,3,premium,urban -store_C,25,555.9125,9.923755,0.0,0,4,premium,urban -store_C,26,477.89877,9.322543,0.0,0,5,premium,urban -store_C,27,651.99023,10.0020895,0.0,0,6,premium,urban -store_C,28,535.84216,9.304625,1.0,0,0,premium,urban -store_C,29,523.2324,10.392641,1.0,0,1,premium,urban -store_C,30,595.6628,9.892313,0.0,0,2,premium,urban -store_C,31,429.21732,9.762042,0.0,0,3,premium,urban -store_C,32,595.64905,9.6030245,0.0,0,4,premium,urban -store_C,33,574.6885,10.260565,0.0,0,5,premium,urban -store_C,34,477.18958,9.723625,0.0,0,6,premium,urban -store_C,35,703.0953,9.1753,0.0,1,0,premium,urban -store_C,36,530.65405,9.2360115,0.0,0,1,premium,urban -store_C,37,506.09885,10.923796,0.0,0,2,premium,urban -store_C,38,417.0998,10.817162,0.0,0,3,premium,urban -store_C,39,526.0255,10.399414,1.0,0,4,premium,urban -store_C,40,635.823,9.53174,0.0,0,5,premium,urban -store_C,41,495.87946,10.938353,0.0,0,6,premium,urban -store_C,42,534.13354,10.557502,0.0,0,0,premium,urban -store_C,43,557.8907,10.433781,0.0,0,1,premium,urban -store_C,44,535.6469,9.898723,1.0,0,2,premium,urban -store_C,45,590.8869,9.544483,1.0,0,3,premium,urban -store_C,46,574.78455,9.192781,0.0,0,4,premium,urban -store_C,47,796.0737,10.805204,1.0,1,5,premium,urban -store_C,48,546.10583,9.911552,0.0,0,6,premium,urban -store_C,49,580.9428,9.404727,0.0,0,0,premium,urban -store_C,50,606.4677,9.611914,0.0,0,1,premium,urban -store_C,51,851.0876,10.158439,1.0,1,2,premium,urban -store_C,52,763.8405,9.353545,0.0,0,3,premium,urban -store_C,53,824.2607,10.713228,0.0,0,4,premium,urban -store_C,54,656.9345,10.517039,0.0,0,5,premium,urban -store_C,55,813.55115,10.438926,1.0,0,6,premium,urban -store_C,56,885.26666,9.864186,1.0,0,0,premium,urban -store_C,57,618.21106,10.254618,0.0,0,1,premium,urban -store_C,58,649.7526,10.168196,1.0,0,2,premium,urban -store_C,59,649.2765,10.299693,1.0,0,3,premium,urban +store_id,week,split,sales,price,promotion,holiday,day_of_week,store_type,region +store_A,0,context,1372.64,11.6299,1,1,0,premium,urban +store_A,1,context,965.54,11.9757,0,0,1,premium,urban +store_A,2,context,1076.92,11.7269,0,0,2,premium,urban +store_A,3,context,1094.09,12.1698,0,0,3,premium,urban +store_A,4,context,970.18,11.9372,0,0,4,premium,urban +store_A,5,context,1010.04,12.3327,0,0,5,premium,urban +store_A,6,context,1098.7,12.2003,0,0,6,premium,urban +store_A,7,context,1097.79,11.8124,0,0,0,premium,urban +store_A,8,context,1114.81,12.3323,0,0,1,premium,urban +store_A,9,context,1084.8,12.3048,0,0,2,premium,urban +store_A,10,context,1339.72,11.8875,1,0,3,premium,urban +store_A,11,context,1395.22,11.7883,0,1,4,premium,urban +store_A,12,context,1158.92,12.1825,0,0,5,premium,urban +store_A,13,context,1228.57,11.6398,0,0,6,premium,urban +store_A,14,context,1198.65,11.6999,0,0,0,premium,urban +store_A,15,context,1138.98,11.5074,0,0,1,premium,urban +store_A,16,context,1186.2,12.2869,0,0,2,premium,urban +store_A,17,context,1122.3,12.1649,0,0,3,premium,urban +store_A,18,context,1212.12,12.2052,0,0,4,premium,urban +store_A,19,context,1161.74,12.2807,0,0,5,premium,urban +store_A,20,context,1157.89,11.9589,0,0,6,premium,urban +store_A,21,context,1126.39,12.0687,0,0,0,premium,urban +store_A,22,context,1224.8,11.6398,0,0,1,premium,urban +store_A,23,context,1350.44,11.6145,0,1,2,premium,urban +store_A,24,horizon,1119.15,12.1684,0,0,3,premium,urban +store_A,25,horizon,1120.03,11.9711,0,0,4,premium,urban +store_A,26,horizon,1155.31,12.0652,0,0,5,premium,urban +store_A,27,horizon,1285.92,12.265,1,0,6,premium,urban +store_A,28,horizon,1284.01,12.1347,1,0,0,premium,urban +store_A,29,horizon,1130.01,12.0536,0,0,1,premium,urban +store_A,30,horizon,1209.43,12.0592,0,0,2,premium,urban +store_A,31,horizon,1231.79,11.804,1,0,3,premium,urban +store_A,32,horizon,1077.46,11.5308,0,0,4,premium,urban +store_A,33,horizon,1050.73,11.9367,0,0,5,premium,urban +store_A,34,horizon,1124.21,11.7146,0,0,6,premium,urban +store_A,35,horizon,1344.73,11.9085,0,1,0,premium,urban +store_B,0,context,1053.03,9.9735,1,1,0,standard,suburban +store_B,1,context,903.51,9.767,1,0,1,standard,suburban +store_B,2,context,826.82,9.8316,0,0,2,standard,suburban +store_B,3,context,709.93,10.0207,0,0,3,standard,suburban +store_B,4,context,834.42,9.9389,0,0,4,standard,suburban +store_B,5,context,847.01,9.5216,0,0,5,standard,suburban +store_B,6,context,802.58,10.3263,0,0,6,standard,suburban +store_B,7,context,770.87,10.3962,0,0,0,standard,suburban +store_B,8,context,873.1,9.6402,0,0,1,standard,suburban +store_B,9,context,844.74,10.054,0,0,2,standard,suburban +store_B,10,context,1050.46,9.6086,1,0,3,standard,suburban +store_B,11,context,1085.99,10.1722,0,1,4,standard,suburban +store_B,12,context,978.74,9.7812,0,0,5,standard,suburban +store_B,13,context,1033.59,10.1594,1,0,6,standard,suburban +store_B,14,context,846.06,10.227,0,0,0,standard,suburban +store_B,15,context,906.93,10.2686,0,0,1,standard,suburban +store_B,16,context,922.35,9.6077,0,0,2,standard,suburban +store_B,17,context,1111.93,10.416,1,0,3,standard,suburban +store_B,18,context,946.95,9.7302,0,0,4,standard,suburban +store_B,19,context,923.2,9.5374,0,0,5,standard,suburban +store_B,20,context,963.38,10.0549,0,0,6,standard,suburban +store_B,21,context,978.7,9.8709,1,0,0,standard,suburban +store_B,22,context,840.39,10.3298,0,0,1,standard,suburban +store_B,23,context,1019.22,10.3083,0,1,2,standard,suburban +store_B,24,horizon,848.1,9.8171,0,0,3,standard,suburban +store_B,25,horizon,777.91,10.4529,0,0,4,standard,suburban +store_B,26,horizon,883.44,9.7909,0,0,5,standard,suburban +store_B,27,horizon,827.78,10.0151,0,0,6,standard,suburban +store_B,28,horizon,762.41,9.756,0,0,0,standard,suburban +store_B,29,horizon,763.79,10.436,0,0,1,standard,suburban +store_B,30,horizon,838.41,9.6646,0,0,2,standard,suburban +store_B,31,horizon,860.45,9.5449,0,0,3,standard,suburban +store_B,32,horizon,904.82,9.9351,0,0,4,standard,suburban +store_B,33,horizon,1084.74,10.4924,1,0,5,standard,suburban +store_B,34,horizon,808.09,10.3917,0,0,6,standard,suburban +store_B,35,horizon,938.26,10.2486,0,1,0,standard,suburban +store_C,0,context,709.43,7.1053,0,1,0,discount,rural +store_C,1,context,649.01,7.0666,1,0,1,discount,rural +store_C,2,context,660.66,7.5944,1,0,2,discount,rural +store_C,3,context,750.17,7.1462,1,0,3,discount,rural +store_C,4,context,726.88,7.8247,1,0,4,discount,rural +store_C,5,context,639.97,7.3103,0,0,5,discount,rural +store_C,6,context,580.71,7.1439,0,0,6,discount,rural +store_C,7,context,549.13,7.921,0,0,0,discount,rural +store_C,8,context,597.79,7.1655,0,0,1,discount,rural +store_C,9,context,627.48,7.2847,0,0,2,discount,rural +store_C,10,context,634.26,7.1536,0,0,3,discount,rural +store_C,11,context,928.07,7.1155,1,1,4,discount,rural +store_C,12,context,643.37,7.0211,0,0,5,discount,rural +store_C,13,context,652.8,7.0554,0,0,6,discount,rural +store_C,14,context,766.65,7.1746,0,0,0,discount,rural +store_C,15,context,737.37,7.0534,0,0,1,discount,rural +store_C,16,context,589.02,7.5911,0,0,2,discount,rural +store_C,17,context,613.06,7.6807,0,0,3,discount,rural +store_C,18,context,556.25,7.3936,0,0,4,discount,rural +store_C,19,context,596.46,7.318,0,0,5,discount,rural +store_C,20,context,632.0,7.5045,0,0,6,discount,rural +store_C,21,context,662.1,7.875,0,0,0,discount,rural +store_C,22,context,558.0,7.8511,0,0,1,discount,rural +store_C,23,context,769.38,7.0435,0,1,2,discount,rural +store_C,24,horizon,482.94,7.1815,0,0,3,discount,rural +store_C,25,horizon,571.69,7.2367,0,0,4,discount,rural +store_C,26,horizon,666.89,7.2494,1,0,5,discount,rural +store_C,27,horizon,677.55,7.5712,1,0,6,discount,rural +store_C,28,horizon,503.9,7.4163,0,0,0,discount,rural +store_C,29,horizon,541.34,7.0493,0,0,1,discount,rural +store_C,30,horizon,443.17,7.3736,0,0,2,discount,rural +store_C,31,horizon,596.87,7.5238,1,0,3,discount,rural +store_C,32,horizon,628.12,7.1017,0,0,4,discount,rural +store_C,33,horizon,586.61,7.8335,1,0,5,discount,rural +store_C,34,horizon,456.82,7.052,0,0,6,discount,rural +store_C,35,horizon,782.3,7.9248,0,1,0,discount,rural