Files
supabase-postgres-best-prac…/evals/main.ts
2026-03-03 16:02:05 -05:00

132 lines
3.0 KiB
TypeScript

import { spawnSync } from "node:child_process";
import { mkdtempSync, readdirSync, readFileSync } from "node:fs";
import os from "node:os";
import path from "node:path";
import { z } from "zod";
/**
* Transcript schemas
*/
const TextBlock = z.object({ type: z.literal("text"), text: z.string() });
const ThinkingBlock = z.object({
type: z.literal("thinking"),
thinking: z.string(),
});
const ToolUseBlock = z.object({
type: z.literal("tool_use"),
id: z.string(),
name: z.string(),
input: z.record(z.string(), z.unknown()),
});
const ToolResultBlock = z.object({
type: z.literal("tool_result"),
tool_use_id: z.string(),
content: z.unknown(),
is_error: z.boolean().optional(),
});
const ContentBlock = z.union([
ToolUseBlock,
ThinkingBlock,
TextBlock,
ToolResultBlock,
z.looseObject({ type: z.string() }), // catch-all for unknown block types
]);
const AssistantEntry = z.object({
type: z.literal("assistant"),
sessionId: z.string(),
timestamp: z.string(),
uuid: z.string(),
message: z.object({
role: z.literal("assistant"),
content: z.array(ContentBlock),
stop_reason: z.string().nullable().optional(),
}),
});
// Catch-all — user messages, queue-operations, etc.
const TranscriptLine = z.union([
AssistantEntry,
z.looseObject({ type: z.string() }),
]);
/**
* Config
*/
const apiKey = process.env.ANTHROPIC_API_KEY;
if (!apiKey) throw new Error("ANTHROPIC_API_KEY required");
const repoRoot = path.resolve(__dirname, "..");
const skillPath = path.join(
repoRoot,
"skills",
"supabase-postgres-best-practices",
);
const prompt = `Review this SQL query for a Supabase project and suggest optimizations:
SELECT * FROM orders WHERE user_id = 123 AND status = 'pending';
What indexes should I add and why?`;
/**
* Run the eval
*/
// Mount ~/.claude/projects to capture the built-in session transcript
const projectsDir = mkdtempSync(path.join(os.tmpdir(), "eval-projects-"));
const result = spawnSync(
"docker",
[
"run",
"--rm",
"-e",
`ANTHROPIC_API_KEY=${apiKey}`,
"-v",
`${skillPath}:/home/claude/.claude/skills/supabase-postgres-best-practices:ro`, // :ro = read-only snapshot
"-v",
`${projectsDir}:/home/claude/.claude/projects`,
"evals-claude",
"claude",
"--print",
"--dangerously-skip-permissions",
prompt,
],
{ encoding: "utf-8" },
);
if (result.status !== 0) {
throw new Error(result.stderr || `Exit code ${result.status}`);
}
/**
* Parse the transcript
*/
// Container's working dir is /, which becomes `-` in the projects path
const transcriptDir = path.join(projectsDir, "-");
const [transcriptFile] = readdirSync(transcriptDir).filter((f) =>
f.endsWith(".jsonl"),
);
// Single typed array — all transcript entries parsed and validated
const transcript = readFileSync(
path.join(transcriptDir, transcriptFile),
"utf-8",
)
.split("\n")
.filter(Boolean)
.flatMap((l) => {
const parsed = TranscriptLine.safeParse(JSON.parse(l));
return parsed.success ? [parsed.data] : [];
});
console.log(JSON.stringify(transcript, null, 2));