mirror of
https://github.com/supabase/agent-skills.git
synced 2026-03-27 10:09:26 +08:00
feat: Zod-typed transcript parsing, drop hooks
This commit is contained in:
@@ -1,2 +1,5 @@
|
|||||||
FROM node:24-slim
|
FROM node:24-slim
|
||||||
RUN npm install -g @anthropic-ai/claude-code@2.1.63
|
RUN npm install -g @anthropic-ai/claude-code@2.1.63
|
||||||
|
RUN useradd -m claude
|
||||||
|
USER claude
|
||||||
|
RUN mkdir -p /home/claude/.claude
|
||||||
|
|||||||
108
evals/main.ts
108
evals/main.ts
@@ -1,18 +1,67 @@
|
|||||||
import { execSync, spawnSync } from "node:child_process";
|
import { spawnSync } from "node:child_process";
|
||||||
|
import { mkdtempSync, readdirSync, readFileSync } from "node:fs";
|
||||||
|
import os from "node:os";
|
||||||
import path from "node:path";
|
import path from "node:path";
|
||||||
|
import { z } from "zod";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Transcript schemas
|
||||||
|
*/
|
||||||
|
|
||||||
|
const TextBlock = z.object({ type: z.literal("text"), text: z.string() });
|
||||||
|
|
||||||
|
const ThinkingBlock = z.object({
|
||||||
|
type: z.literal("thinking"),
|
||||||
|
thinking: z.string(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const ToolUseBlock = z.object({
|
||||||
|
type: z.literal("tool_use"),
|
||||||
|
id: z.string(),
|
||||||
|
name: z.string(),
|
||||||
|
input: z.record(z.string(), z.unknown()),
|
||||||
|
});
|
||||||
|
|
||||||
|
const ToolResultBlock = z.object({
|
||||||
|
type: z.literal("tool_result"),
|
||||||
|
tool_use_id: z.string(),
|
||||||
|
content: z.unknown(),
|
||||||
|
is_error: z.boolean().optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const ContentBlock = z.union([
|
||||||
|
ToolUseBlock,
|
||||||
|
ThinkingBlock,
|
||||||
|
TextBlock,
|
||||||
|
ToolResultBlock,
|
||||||
|
z.looseObject({ type: z.string() }), // catch-all for unknown block types
|
||||||
|
]);
|
||||||
|
|
||||||
|
const AssistantEntry = z.object({
|
||||||
|
type: z.literal("assistant"),
|
||||||
|
sessionId: z.string(),
|
||||||
|
timestamp: z.string(),
|
||||||
|
uuid: z.string(),
|
||||||
|
message: z.object({
|
||||||
|
role: z.literal("assistant"),
|
||||||
|
content: z.array(ContentBlock),
|
||||||
|
stop_reason: z.string().nullable().optional(),
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Catch-all — user messages, queue-operations, etc.
|
||||||
|
const TranscriptLine = z.union([
|
||||||
|
AssistantEntry,
|
||||||
|
z.looseObject({ type: z.string() }),
|
||||||
|
]);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Config
|
||||||
|
*/
|
||||||
|
|
||||||
const apiKey = process.env.ANTHROPIC_API_KEY;
|
const apiKey = process.env.ANTHROPIC_API_KEY;
|
||||||
if (!apiKey) throw new Error("ANTHROPIC_API_KEY required");
|
if (!apiKey) throw new Error("ANTHROPIC_API_KEY required");
|
||||||
|
|
||||||
try {
|
|
||||||
execSync("docker image inspect evals-claude", { stdio: "ignore" });
|
|
||||||
} catch {
|
|
||||||
console.error(
|
|
||||||
"Docker image 'evals-claude' not found. Build it first with:\n npm run evals:build",
|
|
||||||
);
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
const repoRoot = path.resolve(__dirname, "..");
|
const repoRoot = path.resolve(__dirname, "..");
|
||||||
const skillPath = path.join(
|
const skillPath = path.join(
|
||||||
repoRoot,
|
repoRoot,
|
||||||
@@ -26,6 +75,13 @@ SELECT * FROM orders WHERE user_id = 123 AND status = 'pending';
|
|||||||
|
|
||||||
What indexes should I add and why?`;
|
What indexes should I add and why?`;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the eval
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Mount ~/.claude/projects to capture the built-in session transcript
|
||||||
|
const projectsDir = mkdtempSync(path.join(os.tmpdir(), "eval-projects-"));
|
||||||
|
|
||||||
const result = spawnSync(
|
const result = spawnSync(
|
||||||
"docker",
|
"docker",
|
||||||
[
|
[
|
||||||
@@ -34,10 +90,13 @@ const result = spawnSync(
|
|||||||
"-e",
|
"-e",
|
||||||
`ANTHROPIC_API_KEY=${apiKey}`,
|
`ANTHROPIC_API_KEY=${apiKey}`,
|
||||||
"-v",
|
"-v",
|
||||||
`${skillPath}:/root/.claude/skills/supabase-postgres-best-practices:ro`, // :ro = read-only snapshot
|
`${skillPath}:/home/claude/.claude/skills/supabase-postgres-best-practices:ro`, // :ro = read-only snapshot
|
||||||
|
"-v",
|
||||||
|
`${projectsDir}:/home/claude/.claude/projects`,
|
||||||
"evals-claude",
|
"evals-claude",
|
||||||
"claude",
|
"claude",
|
||||||
"-p",
|
"--print",
|
||||||
|
"--dangerously-skip-permissions",
|
||||||
prompt,
|
prompt,
|
||||||
],
|
],
|
||||||
{ encoding: "utf-8" },
|
{ encoding: "utf-8" },
|
||||||
@@ -46,4 +105,27 @@ const result = spawnSync(
|
|||||||
if (result.status !== 0) {
|
if (result.status !== 0) {
|
||||||
throw new Error(result.stderr || `Exit code ${result.status}`);
|
throw new Error(result.stderr || `Exit code ${result.status}`);
|
||||||
}
|
}
|
||||||
console.log(result.stdout);
|
|
||||||
|
/**
|
||||||
|
* Parse the transcript
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Container's working dir is /, which becomes `-` in the projects path
|
||||||
|
const transcriptDir = path.join(projectsDir, "-");
|
||||||
|
const [transcriptFile] = readdirSync(transcriptDir).filter((f) =>
|
||||||
|
f.endsWith(".jsonl"),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Single typed array — all transcript entries parsed and validated
|
||||||
|
const transcript = readFileSync(
|
||||||
|
path.join(transcriptDir, transcriptFile),
|
||||||
|
"utf-8",
|
||||||
|
)
|
||||||
|
.split("\n")
|
||||||
|
.filter(Boolean)
|
||||||
|
.flatMap((l) => {
|
||||||
|
const parsed = TranscriptLine.safeParse(JSON.parse(l));
|
||||||
|
return parsed.success ? [parsed.data] : [];
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(JSON.stringify(transcript, null, 2));
|
||||||
|
|||||||
Reference in New Issue
Block a user