feat: Zod-typed transcript parsing, drop hooks

This commit is contained in:
Matt Rossman
2026-03-03 16:02:05 -05:00
parent 383156e529
commit 18f58ffc7c
2 changed files with 98 additions and 13 deletions

View File

@@ -1,2 +1,5 @@
FROM node:24-slim FROM node:24-slim
RUN npm install -g @anthropic-ai/claude-code@2.1.63 RUN npm install -g @anthropic-ai/claude-code@2.1.63
RUN useradd -m claude
USER claude
RUN mkdir -p /home/claude/.claude

View File

@@ -1,18 +1,67 @@
import { execSync, spawnSync } from "node:child_process"; import { spawnSync } from "node:child_process";
import { mkdtempSync, readdirSync, readFileSync } from "node:fs";
import os from "node:os";
import path from "node:path"; import path from "node:path";
import { z } from "zod";
/**
* Transcript schemas
*/
const TextBlock = z.object({ type: z.literal("text"), text: z.string() });
const ThinkingBlock = z.object({
type: z.literal("thinking"),
thinking: z.string(),
});
const ToolUseBlock = z.object({
type: z.literal("tool_use"),
id: z.string(),
name: z.string(),
input: z.record(z.string(), z.unknown()),
});
const ToolResultBlock = z.object({
type: z.literal("tool_result"),
tool_use_id: z.string(),
content: z.unknown(),
is_error: z.boolean().optional(),
});
const ContentBlock = z.union([
ToolUseBlock,
ThinkingBlock,
TextBlock,
ToolResultBlock,
z.looseObject({ type: z.string() }), // catch-all for unknown block types
]);
const AssistantEntry = z.object({
type: z.literal("assistant"),
sessionId: z.string(),
timestamp: z.string(),
uuid: z.string(),
message: z.object({
role: z.literal("assistant"),
content: z.array(ContentBlock),
stop_reason: z.string().nullable().optional(),
}),
});
// Catch-all — user messages, queue-operations, etc.
const TranscriptLine = z.union([
AssistantEntry,
z.looseObject({ type: z.string() }),
]);
/**
* Config
*/
const apiKey = process.env.ANTHROPIC_API_KEY; const apiKey = process.env.ANTHROPIC_API_KEY;
if (!apiKey) throw new Error("ANTHROPIC_API_KEY required"); if (!apiKey) throw new Error("ANTHROPIC_API_KEY required");
try {
execSync("docker image inspect evals-claude", { stdio: "ignore" });
} catch {
console.error(
"Docker image 'evals-claude' not found. Build it first with:\n npm run evals:build",
);
process.exit(1);
}
const repoRoot = path.resolve(__dirname, ".."); const repoRoot = path.resolve(__dirname, "..");
const skillPath = path.join( const skillPath = path.join(
repoRoot, repoRoot,
@@ -26,6 +75,13 @@ SELECT * FROM orders WHERE user_id = 123 AND status = 'pending';
What indexes should I add and why?`; What indexes should I add and why?`;
/**
* Run the eval
*/
// Mount ~/.claude/projects to capture the built-in session transcript
const projectsDir = mkdtempSync(path.join(os.tmpdir(), "eval-projects-"));
const result = spawnSync( const result = spawnSync(
"docker", "docker",
[ [
@@ -34,10 +90,13 @@ const result = spawnSync(
"-e", "-e",
`ANTHROPIC_API_KEY=${apiKey}`, `ANTHROPIC_API_KEY=${apiKey}`,
"-v", "-v",
`${skillPath}:/root/.claude/skills/supabase-postgres-best-practices:ro`, // :ro = read-only snapshot `${skillPath}:/home/claude/.claude/skills/supabase-postgres-best-practices:ro`, // :ro = read-only snapshot
"-v",
`${projectsDir}:/home/claude/.claude/projects`,
"evals-claude", "evals-claude",
"claude", "claude",
"-p", "--print",
"--dangerously-skip-permissions",
prompt, prompt,
], ],
{ encoding: "utf-8" }, { encoding: "utf-8" },
@@ -46,4 +105,27 @@ const result = spawnSync(
if (result.status !== 0) { if (result.status !== 0) {
throw new Error(result.stderr || `Exit code ${result.status}`); throw new Error(result.stderr || `Exit code ${result.status}`);
} }
console.log(result.stdout);
/**
* Parse the transcript
*/
// Container's working dir is /, which becomes `-` in the projects path
const transcriptDir = path.join(projectsDir, "-");
const [transcriptFile] = readdirSync(transcriptDir).filter((f) =>
f.endsWith(".jsonl"),
);
// Single typed array — all transcript entries parsed and validated
const transcript = readFileSync(
path.join(transcriptDir, transcriptFile),
"utf-8",
)
.split("\n")
.filter(Boolean)
.flatMap((l) => {
const parsed = TranscriptLine.safeParse(JSON.parse(l));
return parsed.success ? [parsed.data] : [];
});
console.log(JSON.stringify(transcript, null, 2));