mirror of
https://github.com/supabase/agent-skills.git
synced 2026-03-27 10:09:26 +08:00
containerize eval environment with Docker and mock CLIs
Host now only needs Docker + ANTHROPIC_API_KEY to run evals. Adds multi-stage Dockerfile, mock supabase/docker/psql scripts, entrypoint, docker-compose for local use, and switches CI to Docker-based execution. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
9
.dockerignore
Normal file
9
.dockerignore
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
.git
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
node_modules
|
||||||
|
packages/evals/results
|
||||||
|
packages/evals/node_modules
|
||||||
|
packages/skills-build/node_modules
|
||||||
|
reports
|
||||||
|
*.log
|
||||||
36
.github/workflows/evals.yml
vendored
36
.github/workflows/evals.yml
vendored
@@ -34,16 +34,34 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- uses: jdx/mise-action@v3
|
- name: Set up Docker Buildx
|
||||||
with:
|
uses: docker/setup-buildx-action@v3
|
||||||
install: true
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Build eval image
|
||||||
run: npm install && npm --prefix packages/evals install
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
file: packages/evals/Dockerfile
|
||||||
|
tags: supabase-evals:ci
|
||||||
|
load: true
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
- name: Run Evals
|
- name: Run Evals
|
||||||
uses: braintrustdata/eval-action@v1
|
run: |
|
||||||
|
docker run --rm \
|
||||||
|
-e ANTHROPIC_API_KEY \
|
||||||
|
-e BRAINTRUST_PROJECT_ID \
|
||||||
|
-e BRAINTRUST_API_KEY=${{ secrets.BRAINTRUST_API_KEY }} \
|
||||||
|
-e BRAINTRUST_UPLOAD=true \
|
||||||
|
-e EVAL_RESULTS_DIR=/app/results \
|
||||||
|
-v "${{ github.workspace }}/results:/app/results" \
|
||||||
|
supabase-evals:ci
|
||||||
|
|
||||||
|
- name: Upload results
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
api_key: ${{ secrets.BRAINTRUST_API_KEY }}
|
name: eval-results
|
||||||
runtime: node
|
path: results/
|
||||||
root: packages/evals
|
if-no-files-found: ignore
|
||||||
|
|||||||
35
mise.toml
35
mise.toml
@@ -54,3 +54,38 @@ sources = ["packages/evals/src/**", "packages/evals/evals/**"]
|
|||||||
description = "Run workflow evals and upload to Braintrust"
|
description = "Run workflow evals and upload to Braintrust"
|
||||||
run = "npm --prefix packages/evals run eval:upload"
|
run = "npm --prefix packages/evals run eval:upload"
|
||||||
sources = ["packages/evals/src/**", "packages/evals/evals/**"]
|
sources = ["packages/evals/src/**", "packages/evals/evals/**"]
|
||||||
|
|
||||||
|
# ── Docker eval tasks ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
[tasks."eval:docker:build"]
|
||||||
|
description = "Build the eval Docker image"
|
||||||
|
run = "docker build -t supabase-evals:local -f packages/evals/Dockerfile ."
|
||||||
|
|
||||||
|
[tasks."eval:docker"]
|
||||||
|
description = "Run evals in Docker"
|
||||||
|
depends = ["eval:docker:build"]
|
||||||
|
run = """
|
||||||
|
docker run --rm \
|
||||||
|
-e ANTHROPIC_API_KEY \
|
||||||
|
-e EVAL_MODEL \
|
||||||
|
-e EVAL_SCENARIO \
|
||||||
|
-e EVAL_BASELINE \
|
||||||
|
-e EVAL_SKILL \
|
||||||
|
-e BRAINTRUST_UPLOAD \
|
||||||
|
-e BRAINTRUST_API_KEY \
|
||||||
|
-e BRAINTRUST_PROJECT_ID \
|
||||||
|
-e EVAL_RESULTS_DIR=/app/results \
|
||||||
|
-v "$(pwd)/packages/evals/results:/app/results" \
|
||||||
|
supabase-evals:local
|
||||||
|
"""
|
||||||
|
|
||||||
|
[tasks."eval:docker:shell"]
|
||||||
|
description = "Open a debug shell in the eval container"
|
||||||
|
depends = ["eval:docker:build"]
|
||||||
|
run = """
|
||||||
|
docker run --rm -it \
|
||||||
|
-e ANTHROPIC_API_KEY \
|
||||||
|
-e IN_DOCKER=true \
|
||||||
|
-v "$(pwd)/packages/evals/results:/app/results" \
|
||||||
|
supabase-evals:local /bin/bash
|
||||||
|
"""
|
||||||
|
|||||||
69
packages/evals/Dockerfile
Normal file
69
packages/evals/Dockerfile
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
# ---------- Stage 1: builder ----------
|
||||||
|
FROM node:22-slim AS builder
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends git && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy root package files first (layer caching)
|
||||||
|
COPY package.json package-lock.json ./
|
||||||
|
|
||||||
|
# Copy workspace package files
|
||||||
|
COPY packages/skills-build/package.json packages/skills-build/
|
||||||
|
COPY packages/evals/package.json packages/evals/
|
||||||
|
|
||||||
|
# Install all dependencies
|
||||||
|
RUN npm install && \
|
||||||
|
npm --prefix packages/skills-build install && \
|
||||||
|
npm --prefix packages/evals install
|
||||||
|
|
||||||
|
# Copy source code
|
||||||
|
COPY skills/ skills/
|
||||||
|
COPY packages/skills-build/ packages/skills-build/
|
||||||
|
COPY packages/evals/ packages/evals/
|
||||||
|
|
||||||
|
# Build skills (generates AGENTS.md / CLAUDE.md files)
|
||||||
|
RUN npm --prefix packages/skills-build run build
|
||||||
|
|
||||||
|
# ---------- Stage 2: runtime ----------
|
||||||
|
FROM node:22-slim
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends git && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Use the existing node user (UID 1000) — Claude Code refuses --dangerously-skip-permissions as root
|
||||||
|
# node:22-slim already ships with user "node" (uid=1000, gid=1000)
|
||||||
|
|
||||||
|
# Copy built artifacts from builder
|
||||||
|
COPY --from=builder /app/package.json /app/package-lock.json ./
|
||||||
|
COPY --from=builder /app/node_modules/ node_modules/
|
||||||
|
COPY --from=builder /app/skills/ skills/
|
||||||
|
COPY --from=builder /app/packages/skills-build/ packages/skills-build/
|
||||||
|
COPY --from=builder /app/packages/evals/ packages/evals/
|
||||||
|
|
||||||
|
# Install mock scripts
|
||||||
|
COPY packages/evals/mocks/supabase /usr/local/bin/supabase
|
||||||
|
COPY packages/evals/mocks/docker /usr/local/bin/docker
|
||||||
|
COPY packages/evals/mocks/psql /usr/local/bin/psql
|
||||||
|
RUN chmod +x /usr/local/bin/supabase /usr/local/bin/docker /usr/local/bin/psql
|
||||||
|
|
||||||
|
# Install entrypoint
|
||||||
|
COPY packages/evals/docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
|
||||||
|
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
|
||||||
|
|
||||||
|
# Create results directory writable by node user
|
||||||
|
RUN mkdir -p /app/packages/evals/results && chown -R node:node /app/packages/evals/results
|
||||||
|
|
||||||
|
# Ensure node user owns tmp and home for Claude Code
|
||||||
|
RUN mkdir -p /tmp && chmod 1777 /tmp && chown -R node:node /home/node
|
||||||
|
|
||||||
|
USER node
|
||||||
|
|
||||||
|
ENV IN_DOCKER=true
|
||||||
|
ENV NODE_ENV=production
|
||||||
|
|
||||||
|
ENTRYPOINT ["docker-entrypoint.sh"]
|
||||||
|
CMD ["npm", "--prefix", "packages/evals", "run", "eval"]
|
||||||
17
packages/evals/docker-compose.yml
Normal file
17
packages/evals/docker-compose.yml
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
services:
|
||||||
|
evals:
|
||||||
|
build:
|
||||||
|
context: ../..
|
||||||
|
dockerfile: packages/evals/Dockerfile
|
||||||
|
environment:
|
||||||
|
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY}
|
||||||
|
- EVAL_MODEL=${EVAL_MODEL:-}
|
||||||
|
- EVAL_SCENARIO=${EVAL_SCENARIO:-}
|
||||||
|
- EVAL_BASELINE=${EVAL_BASELINE:-}
|
||||||
|
- EVAL_SKILL=${EVAL_SKILL:-}
|
||||||
|
- BRAINTRUST_UPLOAD=${BRAINTRUST_UPLOAD:-}
|
||||||
|
- BRAINTRUST_API_KEY=${BRAINTRUST_API_KEY:-}
|
||||||
|
- BRAINTRUST_PROJECT_ID=${BRAINTRUST_PROJECT_ID:-}
|
||||||
|
- EVAL_RESULTS_DIR=/app/results
|
||||||
|
volumes:
|
||||||
|
- ./results:/app/results
|
||||||
26
packages/evals/docker-entrypoint.sh
Executable file
26
packages/evals/docker-entrypoint.sh
Executable file
@@ -0,0 +1,26 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Entrypoint for the eval Docker container.
|
||||||
|
# Validates environment, adds mocks to PATH, then runs the given command.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
export IN_DOCKER=true
|
||||||
|
|
||||||
|
# Validate required env
|
||||||
|
if [[ -z "${ANTHROPIC_API_KEY:-}" ]]; then
|
||||||
|
echo "ERROR: ANTHROPIC_API_KEY is not set." >&2
|
||||||
|
echo "Pass it via: docker run -e ANTHROPIC_API_KEY=sk-ant-... ..." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Prepend mocks directory to PATH so mock supabase/docker/psql are found first
|
||||||
|
export PATH="/app/packages/evals/mocks:${PATH}"
|
||||||
|
|
||||||
|
echo "=== Eval Environment ==="
|
||||||
|
echo " Node: $(node --version)"
|
||||||
|
echo " Claude: $(claude --version 2>/dev/null || echo 'n/a')"
|
||||||
|
echo " Docker: mock"
|
||||||
|
echo " Model: ${EVAL_MODEL:-default}"
|
||||||
|
echo " Scenario: ${EVAL_SCENARIO:-all}"
|
||||||
|
echo "========================"
|
||||||
|
|
||||||
|
exec "$@"
|
||||||
27
packages/evals/mocks/docker
Executable file
27
packages/evals/mocks/docker
Executable file
@@ -0,0 +1,27 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Mock Docker CLI for eval environments.
|
||||||
|
# Returns success for common commands the agent may invoke.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
CMD="${1:-}"
|
||||||
|
shift || true
|
||||||
|
|
||||||
|
case "$CMD" in
|
||||||
|
ps)
|
||||||
|
echo "CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES"
|
||||||
|
;;
|
||||||
|
exec)
|
||||||
|
# Consume flags until we hit something that isn't a flag
|
||||||
|
while [[ "${1:-}" == -* ]]; do shift || true; done
|
||||||
|
# Remaining args are container + command — just succeed silently
|
||||||
|
;;
|
||||||
|
info)
|
||||||
|
echo "Server Version: 24.0.0 (mock)"
|
||||||
|
;;
|
||||||
|
compose)
|
||||||
|
echo "docker compose: ok"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
# Default: succeed silently
|
||||||
|
;;
|
||||||
|
esac
|
||||||
15
packages/evals/mocks/psql
Executable file
15
packages/evals/mocks/psql
Executable file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Mock psql for eval environments.
|
||||||
|
# Accepts any arguments and returns an empty result set.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# If -c is used (inline command), print column headers for a SELECT
|
||||||
|
for arg in "$@"; do
|
||||||
|
if [[ "$arg" == "-c" ]]; then
|
||||||
|
echo "(0 rows)"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Default: succeed silently
|
||||||
|
exit 0
|
||||||
161
packages/evals/mocks/supabase
Executable file
161
packages/evals/mocks/supabase
Executable file
@@ -0,0 +1,161 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Mock Supabase CLI for eval environments.
|
||||||
|
# Returns realistic output so the agent doesn't retry, and creates real
|
||||||
|
# migration files when asked.
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
CMD="${1:-}"
|
||||||
|
shift || true
|
||||||
|
|
||||||
|
case "$CMD" in
|
||||||
|
init)
|
||||||
|
mkdir -p supabase/migrations supabase/functions
|
||||||
|
cat > supabase/config.toml << 'TOML'
|
||||||
|
[project]
|
||||||
|
id = "mock-project-ref"
|
||||||
|
|
||||||
|
[api]
|
||||||
|
enabled = true
|
||||||
|
port = 54321
|
||||||
|
schemas = ["public", "graphql_public"]
|
||||||
|
|
||||||
|
[db]
|
||||||
|
port = 54322
|
||||||
|
major_version = 15
|
||||||
|
|
||||||
|
[studio]
|
||||||
|
enabled = true
|
||||||
|
port = 54323
|
||||||
|
TOML
|
||||||
|
echo "Finished supabase init."
|
||||||
|
;;
|
||||||
|
|
||||||
|
start)
|
||||||
|
echo "Applying migration 00000000000000_init.sql..."
|
||||||
|
echo "Started supabase local development setup."
|
||||||
|
echo ""
|
||||||
|
echo " API URL: http://127.0.0.1:54321"
|
||||||
|
echo " GraphQL URL: http://127.0.0.1:54321/graphql/v1"
|
||||||
|
echo " S3 Storage URL: http://127.0.0.1:54321/storage/v1/s3"
|
||||||
|
echo " DB URL: postgresql://postgres:postgres@127.0.0.1:54322/postgres"
|
||||||
|
echo " Studio URL: http://127.0.0.1:54323"
|
||||||
|
echo " Inbucket URL: http://127.0.0.1:54324"
|
||||||
|
echo " JWT secret: super-secret-jwt-token-with-at-least-32-characters-long"
|
||||||
|
echo " anon key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6ImFub24iLCJleHAiOjE5ODM4MTI5OTZ9.CRXP1A7WOeoJeXxjNni43kdQwgnWNReilDMblYTn_I0"
|
||||||
|
echo "service_role key: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU"
|
||||||
|
echo " S3 Access Key: 625729a08b95bf1b7ff351a663f3a23c"
|
||||||
|
echo " S3 Secret Key: 850181e4652dd023b7a98c58ae0d2d34bd487ee0cc3254aed6eda37307425907"
|
||||||
|
echo " S3 Region: local"
|
||||||
|
;;
|
||||||
|
|
||||||
|
stop)
|
||||||
|
echo "Stopped supabase local development setup."
|
||||||
|
;;
|
||||||
|
|
||||||
|
status)
|
||||||
|
if [[ "${1:-}" == "-o" && "${2:-}" == "env" ]]; then
|
||||||
|
echo "ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6ImFub24iLCJleHAiOjE5ODM4MTI5OTZ9.CRXP1A7WOeoJeXxjNni43kdQwgnWNReilDMblYTn_I0"
|
||||||
|
echo "SERVICE_ROLE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU"
|
||||||
|
echo "API_URL=http://127.0.0.1:54321"
|
||||||
|
echo "DB_URL=postgresql://postgres:postgres@127.0.0.1:54322/postgres"
|
||||||
|
echo "STUDIO_URL=http://127.0.0.1:54323"
|
||||||
|
else
|
||||||
|
echo " API URL: http://127.0.0.1:54321"
|
||||||
|
echo " DB URL: postgresql://postgres:postgres@127.0.0.1:54322/postgres"
|
||||||
|
echo " Studio URL: http://127.0.0.1:54323"
|
||||||
|
echo " DB: running"
|
||||||
|
echo " Auth: running"
|
||||||
|
echo " REST: running"
|
||||||
|
echo " Realtime: running"
|
||||||
|
echo " Storage: running"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
migration)
|
||||||
|
SUBCMD="${1:-}"
|
||||||
|
shift || true
|
||||||
|
case "$SUBCMD" in
|
||||||
|
new)
|
||||||
|
NAME="${1:-migration}"
|
||||||
|
TIMESTAMP=$(date -u +"%Y%m%d%H%M%S")
|
||||||
|
mkdir -p supabase/migrations
|
||||||
|
MIGRATION_FILE="supabase/migrations/${TIMESTAMP}_${NAME}.sql"
|
||||||
|
touch "$MIGRATION_FILE"
|
||||||
|
echo "Created new migration at $MIGRATION_FILE"
|
||||||
|
;;
|
||||||
|
list)
|
||||||
|
echo "No migrations found."
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "supabase migration $SUBCMD: ok"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
|
||||||
|
db)
|
||||||
|
SUBCMD="${1:-}"
|
||||||
|
shift || true
|
||||||
|
case "$SUBCMD" in
|
||||||
|
push)
|
||||||
|
echo "Applying unapplied migrations..."
|
||||||
|
echo "Applied migration(s) successfully."
|
||||||
|
;;
|
||||||
|
reset)
|
||||||
|
echo "Resetting local database..."
|
||||||
|
echo "Database reset successfully."
|
||||||
|
;;
|
||||||
|
diff)
|
||||||
|
echo "No schema changes detected."
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "supabase db $SUBCMD: ok"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
|
||||||
|
functions)
|
||||||
|
SUBCMD="${1:-}"
|
||||||
|
shift || true
|
||||||
|
case "$SUBCMD" in
|
||||||
|
new)
|
||||||
|
FUNC_NAME="${1:-my-function}"
|
||||||
|
mkdir -p "supabase/functions/$FUNC_NAME"
|
||||||
|
cat > "supabase/functions/$FUNC_NAME/index.ts" << 'TS'
|
||||||
|
import { serve } from "https://deno.land/std@0.168.0/http/server.ts"
|
||||||
|
|
||||||
|
serve(async (req) => {
|
||||||
|
return new Response(JSON.stringify({ message: "Hello from Edge Functions!" }), {
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
})
|
||||||
|
})
|
||||||
|
TS
|
||||||
|
echo "Created new Function at supabase/functions/$FUNC_NAME"
|
||||||
|
;;
|
||||||
|
serve)
|
||||||
|
echo "Serving functions on http://127.0.0.1:54321/functions/v1/<function-name>"
|
||||||
|
;;
|
||||||
|
deploy)
|
||||||
|
echo "Deployed function successfully."
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "supabase functions $SUBCMD: ok"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
|
||||||
|
gen)
|
||||||
|
echo "Generated types successfully."
|
||||||
|
;;
|
||||||
|
|
||||||
|
link)
|
||||||
|
echo "Linked project: mock-project-ref"
|
||||||
|
;;
|
||||||
|
|
||||||
|
login)
|
||||||
|
echo "Already logged in."
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
echo "supabase $CMD: ok"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
@@ -7,10 +7,14 @@ import type { TranscriptSummary } from "./transcript.js";
|
|||||||
const __filename = fileURLToPath(import.meta.url);
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
const __dirname = dirname(__filename);
|
const __dirname = dirname(__filename);
|
||||||
|
|
||||||
/** Resolve the evals package root (packages/evals). */
|
/** Resolve the base directory for storing results.
|
||||||
function evalsRoot(): string {
|
* Supports EVAL_RESULTS_DIR override for Docker volume mounts. */
|
||||||
// __dirname is packages/evals/src/runner
|
function resultsBase(): string {
|
||||||
return join(__dirname, "..", "..");
|
if (process.env.EVAL_RESULTS_DIR) {
|
||||||
|
return process.env.EVAL_RESULTS_DIR;
|
||||||
|
}
|
||||||
|
// Default: packages/evals/results (__dirname is packages/evals/src/runner)
|
||||||
|
return join(__dirname, "..", "..", "results");
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Create the results directory for a single scenario run. Returns the path. */
|
/** Create the results directory for a single scenario run. Returns the path. */
|
||||||
@@ -19,7 +23,7 @@ export function createResultDir(
|
|||||||
scenarioId: string,
|
scenarioId: string,
|
||||||
variant: "with-skill" | "baseline",
|
variant: "with-skill" | "baseline",
|
||||||
): string {
|
): string {
|
||||||
const dir = join(evalsRoot(), "results", runTimestamp, scenarioId, variant);
|
const dir = join(resultsBase(), runTimestamp, scenarioId, variant);
|
||||||
mkdirSync(dir, { recursive: true });
|
mkdirSync(dir, { recursive: true });
|
||||||
return dir;
|
return dir;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,19 @@
|
|||||||
import { execFileSync } from "node:child_process";
|
import { execFileSync } from "node:child_process";
|
||||||
import { existsSync } from "node:fs";
|
import { accessSync, constants, existsSync } from "node:fs";
|
||||||
import { dirname, join } from "node:path";
|
import { dirname, join } from "node:path";
|
||||||
import { fileURLToPath } from "node:url";
|
import { fileURLToPath } from "node:url";
|
||||||
|
|
||||||
|
/** Detect if we're running inside the eval Docker container. */
|
||||||
|
export function isRunningInDocker(): boolean {
|
||||||
|
if (process.env.IN_DOCKER === "true") return true;
|
||||||
|
try {
|
||||||
|
accessSync("/.dockerenv", constants.F_OK);
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
const __dirname = dirname(__filename);
|
const __dirname = dirname(__filename);
|
||||||
|
|
||||||
@@ -66,12 +77,14 @@ export function preflight(): void {
|
|||||||
errors.push(`Node.js >= 20 required (found ${process.versions.node})`);
|
errors.push(`Node.js >= 20 required (found ${process.versions.node})`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Docker daemon running
|
// Docker daemon running (skip when inside the eval container — mocks handle it)
|
||||||
|
if (!isRunningInDocker()) {
|
||||||
try {
|
try {
|
||||||
execFileSync("docker", ["info"], { stdio: "ignore", timeout: 10_000 });
|
execFileSync("docker", ["info"], { stdio: "ignore", timeout: 10_000 });
|
||||||
} catch {
|
} catch {
|
||||||
errors.push("Docker is not running (required by supabase CLI)");
|
errors.push("Docker is not running (required by supabase CLI)");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Claude CLI available
|
// Claude CLI available
|
||||||
try {
|
try {
|
||||||
|
|||||||
Reference in New Issue
Block a user