mirror of
https://github.com/supabase/agent-skills.git
synced 2026-03-27 10:09:26 +08:00
feat: Zod-typed transcript parsing, drop hooks
This commit is contained in:
108
evals/main.ts
108
evals/main.ts
@@ -1,18 +1,67 @@
|
||||
import { execSync, spawnSync } from "node:child_process";
|
||||
import { spawnSync } from "node:child_process";
|
||||
import { mkdtempSync, readdirSync, readFileSync } from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { z } from "zod";
|
||||
|
||||
/**
|
||||
* Transcript schemas
|
||||
*/
|
||||
|
||||
const TextBlock = z.object({ type: z.literal("text"), text: z.string() });
|
||||
|
||||
const ThinkingBlock = z.object({
|
||||
type: z.literal("thinking"),
|
||||
thinking: z.string(),
|
||||
});
|
||||
|
||||
const ToolUseBlock = z.object({
|
||||
type: z.literal("tool_use"),
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
input: z.record(z.string(), z.unknown()),
|
||||
});
|
||||
|
||||
const ToolResultBlock = z.object({
|
||||
type: z.literal("tool_result"),
|
||||
tool_use_id: z.string(),
|
||||
content: z.unknown(),
|
||||
is_error: z.boolean().optional(),
|
||||
});
|
||||
|
||||
const ContentBlock = z.union([
|
||||
ToolUseBlock,
|
||||
ThinkingBlock,
|
||||
TextBlock,
|
||||
ToolResultBlock,
|
||||
z.looseObject({ type: z.string() }), // catch-all for unknown block types
|
||||
]);
|
||||
|
||||
const AssistantEntry = z.object({
|
||||
type: z.literal("assistant"),
|
||||
sessionId: z.string(),
|
||||
timestamp: z.string(),
|
||||
uuid: z.string(),
|
||||
message: z.object({
|
||||
role: z.literal("assistant"),
|
||||
content: z.array(ContentBlock),
|
||||
stop_reason: z.string().nullable().optional(),
|
||||
}),
|
||||
});
|
||||
|
||||
// Catch-all — user messages, queue-operations, etc.
|
||||
const TranscriptLine = z.union([
|
||||
AssistantEntry,
|
||||
z.looseObject({ type: z.string() }),
|
||||
]);
|
||||
|
||||
/**
|
||||
* Config
|
||||
*/
|
||||
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
if (!apiKey) throw new Error("ANTHROPIC_API_KEY required");
|
||||
|
||||
try {
|
||||
execSync("docker image inspect evals-claude", { stdio: "ignore" });
|
||||
} catch {
|
||||
console.error(
|
||||
"Docker image 'evals-claude' not found. Build it first with:\n npm run evals:build",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const repoRoot = path.resolve(__dirname, "..");
|
||||
const skillPath = path.join(
|
||||
repoRoot,
|
||||
@@ -26,6 +75,13 @@ SELECT * FROM orders WHERE user_id = 123 AND status = 'pending';
|
||||
|
||||
What indexes should I add and why?`;
|
||||
|
||||
/**
|
||||
* Run the eval
|
||||
*/
|
||||
|
||||
// Mount ~/.claude/projects to capture the built-in session transcript
|
||||
const projectsDir = mkdtempSync(path.join(os.tmpdir(), "eval-projects-"));
|
||||
|
||||
const result = spawnSync(
|
||||
"docker",
|
||||
[
|
||||
@@ -34,10 +90,13 @@ const result = spawnSync(
|
||||
"-e",
|
||||
`ANTHROPIC_API_KEY=${apiKey}`,
|
||||
"-v",
|
||||
`${skillPath}:/root/.claude/skills/supabase-postgres-best-practices:ro`, // :ro = read-only snapshot
|
||||
`${skillPath}:/home/claude/.claude/skills/supabase-postgres-best-practices:ro`, // :ro = read-only snapshot
|
||||
"-v",
|
||||
`${projectsDir}:/home/claude/.claude/projects`,
|
||||
"evals-claude",
|
||||
"claude",
|
||||
"-p",
|
||||
"--print",
|
||||
"--dangerously-skip-permissions",
|
||||
prompt,
|
||||
],
|
||||
{ encoding: "utf-8" },
|
||||
@@ -46,4 +105,27 @@ const result = spawnSync(
|
||||
if (result.status !== 0) {
|
||||
throw new Error(result.stderr || `Exit code ${result.status}`);
|
||||
}
|
||||
console.log(result.stdout);
|
||||
|
||||
/**
|
||||
* Parse the transcript
|
||||
*/
|
||||
|
||||
// Container's working dir is /, which becomes `-` in the projects path
|
||||
const transcriptDir = path.join(projectsDir, "-");
|
||||
const [transcriptFile] = readdirSync(transcriptDir).filter((f) =>
|
||||
f.endsWith(".jsonl"),
|
||||
);
|
||||
|
||||
// Single typed array — all transcript entries parsed and validated
|
||||
const transcript = readFileSync(
|
||||
path.join(transcriptDir, transcriptFile),
|
||||
"utf-8",
|
||||
)
|
||||
.split("\n")
|
||||
.filter(Boolean)
|
||||
.flatMap((l) => {
|
||||
const parsed = TranscriptLine.safeParse(JSON.parse(l));
|
||||
return parsed.success ? [parsed.data] : [];
|
||||
});
|
||||
|
||||
console.log(JSON.stringify(transcript, null, 2));
|
||||
|
||||
Reference in New Issue
Block a user