name: Skill Evals on: workflow_dispatch: pull_request: types: [opened, synchronize, labeled] paths: - "skills/**" - "packages/evals/**" concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true permissions: pull-requests: write contents: read jobs: eval: name: Run evals if: > github.event_name == 'workflow_dispatch' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'run-evals')) runs-on: ubuntu-latest environment: evals timeout-minutes: 30 env: BRAINTRUST_PROJECT_ID: ${{ secrets.BRAINTRUST_PROJECT_ID }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} steps: - uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build eval image uses: docker/build-push-action@v6 with: context: . file: packages/evals/Dockerfile tags: supabase-evals:ci load: true cache-from: type=gha cache-to: type=gha,mode=max - name: Run Evals run: | docker run --rm \ -e ANTHROPIC_API_KEY \ -e BRAINTRUST_PROJECT_ID \ -e BRAINTRUST_API_KEY=${{ secrets.BRAINTRUST_API_KEY }} \ -e BRAINTRUST_UPLOAD=true \ -e EVAL_RESULTS_DIR=/app/results \ -v "${{ github.workspace }}/results:/app/results" \ supabase-evals:ci - name: Upload results if: always() uses: actions/upload-artifact@v4 with: name: eval-results path: results/ if-no-files-found: ignore