multi model testing

This commit is contained in:
Pedro Rodrigues
2026-02-18 13:28:42 +00:00
parent 27d7af255d
commit 082eac2a01
8 changed files with 315 additions and 107 deletions

View File

@@ -46,11 +46,15 @@ sources = ["test/**", "skills/**"]
# ── Eval tasks ────────────────────────────────────────────────────────
[tasks.eval]
description = "Run code-fix evals (local, no upload)"
description = "Run code-fix evals for all configured models (local, no upload)"
run = "npm --prefix packages/evals run eval"
sources = ["packages/evals/src/**", "skills/**/references/**"]
[tasks."eval:model"]
description = "Run code-fix eval for a single model (local, no upload)"
run = "EVAL_MODEL={{arg(name='model')}} npm --prefix packages/evals run eval"
[tasks."eval:upload"]
description = "Run code-fix evals and upload to Braintrust"
description = "Run code-fix evals for all models and upload to Braintrust"
run = "npm --prefix packages/evals run eval:upload"
sources = ["packages/evals/src/**", "skills/**/references/**"]