initial skills evals

This commit is contained in:
Pedro Rodrigues
2026-02-18 12:02:28 +00:00
parent 69575f4c87
commit 27d7af255d
17 changed files with 3177 additions and 10 deletions

49
.github/workflows/evals.yml vendored Normal file
View File

@@ -0,0 +1,49 @@
name: Skill Evals
on:
workflow_dispatch:
pull_request:
types: [opened, synchronize, labeled]
paths:
- "skills/**"
- "packages/evals/**"
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
permissions:
pull-requests: write
contents: read
jobs:
eval:
name: Run evals
if: >
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'pull_request' &&
contains(github.event.pull_request.labels.*.name, 'run-evals'))
runs-on: ubuntu-latest
environment: evals
timeout-minutes: 30
env:
BRAINTRUST_PROJECT_ID: ${{ secrets.BRAINTRUST_PROJECT_ID }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
steps:
- uses: actions/checkout@v4
- uses: jdx/mise-action@v3
with:
install: true
- name: Install dependencies
run: npm install && npm --prefix packages/evals install
- name: Run Evals
uses: braintrustdata/eval-action@v1
with:
api_key: ${{ secrets.BRAINTRUST_API_KEY }}
runtime: node
root: packages/evals