mirror of
https://github.com/supabase/agent-skills.git
synced 2026-03-27 10:09:26 +08:00
Host now only needs Docker + ANTHROPIC_API_KEY to run evals. Adds multi-stage Dockerfile, mock supabase/docker/psql scripts, entrypoint, docker-compose for local use, and switches CI to Docker-based execution. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
68 lines
1.7 KiB
YAML
68 lines
1.7 KiB
YAML
name: Skill Evals
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
pull_request:
|
|
types: [opened, synchronize, labeled]
|
|
paths:
|
|
- "skills/**"
|
|
- "packages/evals/**"
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
|
cancel-in-progress: true
|
|
|
|
permissions:
|
|
pull-requests: write
|
|
contents: read
|
|
|
|
jobs:
|
|
eval:
|
|
name: Run evals
|
|
if: >
|
|
github.event_name == 'workflow_dispatch' ||
|
|
(github.event_name == 'pull_request' &&
|
|
contains(github.event.pull_request.labels.*.name, 'run-evals'))
|
|
runs-on: ubuntu-latest
|
|
environment: evals
|
|
timeout-minutes: 30
|
|
|
|
env:
|
|
BRAINTRUST_PROJECT_ID: ${{ secrets.BRAINTRUST_PROJECT_ID }}
|
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
|
|
- name: Set up Docker Buildx
|
|
uses: docker/setup-buildx-action@v3
|
|
|
|
- name: Build eval image
|
|
uses: docker/build-push-action@v6
|
|
with:
|
|
context: .
|
|
file: packages/evals/Dockerfile
|
|
tags: supabase-evals:ci
|
|
load: true
|
|
cache-from: type=gha
|
|
cache-to: type=gha,mode=max
|
|
|
|
- name: Run Evals
|
|
run: |
|
|
docker run --rm \
|
|
-e ANTHROPIC_API_KEY \
|
|
-e BRAINTRUST_PROJECT_ID \
|
|
-e BRAINTRUST_API_KEY=${{ secrets.BRAINTRUST_API_KEY }} \
|
|
-e BRAINTRUST_UPLOAD=true \
|
|
-e EVAL_RESULTS_DIR=/app/results \
|
|
-v "${{ github.workspace }}/results:/app/results" \
|
|
supabase-evals:ci
|
|
|
|
- name: Upload results
|
|
if: always()
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: eval-results
|
|
path: results/
|
|
if-no-files-found: ignore
|