[settings] experimental = true lockfile = true [tools] node = "lts" [env] _.path = ["{{config_root}}/node_modules/.bin"] _.file = [".env", "packages/evals/.env"] # ── Root tasks ──────────────────────────────────────────────────────── [tasks.install] description = "Install all dependencies" run = "npm install && npm --prefix packages/skills-build install && npm --prefix packages/evals install" sources = ["package.json", "packages/skills-build/package.json", "packages/evals/package.json"] outputs = ["node_modules/.package-lock.json"] [tasks.validate] description = "Validate all skills" run = "npm --prefix packages/skills-build run validate" sources = ["skills/**/SKILL.md", "skills/**/references/**"] [tasks.build] description = "Build all skills" run = "npm --prefix packages/skills-build run build" sources = ["skills/**/SKILL.md", "skills/**/references/**", "packages/skills-build/src/**"] outputs = ["skills/**/AGENTS.md"] [tasks.check] description = "Format and lint (auto-fix)" run = "biome check --write ." sources = ["**/*.ts", "**/*.js", "**/*.json", "biome.json"] [tasks."ci:check"] description = "CI format and lint check" run = "biome ci ." sources = ["**/*.ts", "**/*.js", "**/*.json", "biome.json"] [tasks.test] description = "Run tests" run = "vitest run" sources = ["test/**", "skills/**"] # ── Eval tasks ──────────────────────────────────────────────────────── [tasks.eval] description = "Run workflow evals" run = "npm --prefix packages/evals run eval" sources = ["packages/evals/src/**", "packages/evals/evals/**"] [tasks."eval:upload"] description = "Run workflow evals and upload to Braintrust" run = "npm --prefix packages/evals run eval:upload" sources = ["packages/evals/src/**", "packages/evals/evals/**"] # ── Docker eval tasks ──────────────────────────────────────────────── [tasks."eval:docker:build"] description = "Build the eval Docker image" run = "docker build -t supabase-evals:local -f packages/evals/Dockerfile ." [tasks."eval:docker"] description = "Run evals in Docker" depends = ["eval:docker:build"] run = """ docker run --rm \ -e ANTHROPIC_API_KEY \ -e EVAL_MODEL \ -e EVAL_SCENARIO \ -e EVAL_BASELINE \ -e EVAL_SKILL \ -e BRAINTRUST_UPLOAD \ -e BRAINTRUST_API_KEY \ -e BRAINTRUST_PROJECT_ID \ -e EVAL_RESULTS_DIR=/app/results \ -v "$(pwd)/packages/evals/results:/app/results" \ -v "$(pwd)/packages/evals/project:/app/packages/evals/project" \ -v /var/run/docker.sock:/var/run/docker.sock \ --group-add 0 \ --network host \ supabase-evals:local """ [tasks."eval:docker:shell"] description = "Open a debug shell in the eval container" depends = ["eval:docker:build"] run = """ docker run --rm -it \ -e ANTHROPIC_API_KEY \ -e IN_DOCKER=true \ -v "$(pwd)/packages/evals/results:/app/results" \ supabase-evals:local /bin/bash """