initial skills evals

This commit is contained in:
Pedro Rodrigues
2026-02-18 12:02:28 +00:00
parent 69575f4c87
commit 27d7af255d
17 changed files with 3177 additions and 10 deletions

View File

@@ -46,16 +46,11 @@ sources = ["test/**", "skills/**"]
# ── Eval tasks ────────────────────────────────────────────────────────
[tasks.eval]
description = "Run all evals"
run = "tsx packages/evals/src/cli.ts"
description = "Run code-fix evals (local, no upload)"
run = "npm --prefix packages/evals run eval"
sources = ["packages/evals/src/**", "skills/**/references/**"]
[tasks."eval:code-fix"]
description = "Run code-fix evals"
run = "tsx packages/evals/src/cli.ts --type code-fix"
sources = ["packages/evals/src/**", "skills/**/references/**"]
[tasks."eval:workflow"]
description = "Run workflow evals"
run = "tsx packages/evals/src/cli.ts --type workflow"
[tasks."eval:upload"]
description = "Run code-fix evals and upload to Braintrust"
run = "npm --prefix packages/evals run eval:upload"
sources = ["packages/evals/src/**", "skills/**/references/**"]