diff --git a/packages/skills-build/src/build.ts b/packages/skills-build/src/build.ts index 25d102c..95cfe87 100644 --- a/packages/skills-build/src/build.ts +++ b/packages/skills-build/src/build.ts @@ -7,7 +7,11 @@ import { validateSkillExists, } from "./config.js"; import { parseRuleFile } from "./parser.js"; -import { filterRulesForProfile, listProfiles, loadProfile } from "./profiles.js"; +import { + filterRulesForProfile, + listProfiles, + loadProfile, +} from "./profiles.js"; import type { Metadata, Profile, Rule, Section } from "./types.js"; import { validateRuleFile } from "./validate.js"; @@ -118,10 +122,7 @@ function buildSkill(paths: SkillPaths, profile?: Profile): void { // Check if rules directory exists if (!existsSync(paths.rulesDir)) { console.log(` No rules directory found. Generating empty AGENTS.md.`); - writeFileSync( - outputFile, - `# ${skillTitle}\n\nNo rules defined yet.\n`, - ); + writeFileSync(outputFile, `# ${skillTitle}\n\nNo rules defined yet.\n`); return; } @@ -157,7 +158,9 @@ function buildSkill(paths: SkillPaths, profile?: Profile): void { let filteredRules = rules; if (profile) { filteredRules = filterRulesForProfile(rules, profile); - console.log(` Filtered to ${filteredRules.length} rules for profile "${profile.name}"`); + console.log( + ` Filtered to ${filteredRules.length} rules for profile "${profile.name}"`, + ); } // Group rules by section and assign IDs @@ -244,7 +247,9 @@ function buildSkill(paths: SkillPaths, profile?: Profile): void { prerequisites.push(`PostgreSQL ${rule.minVersion}+`); } if (rule.extensions && rule.extensions.length > 0) { - prerequisites.push(`Extension${rule.extensions.length > 1 ? "s" : ""}: ${rule.extensions.join(", ")}`); + prerequisites.push( + `Extension${rule.extensions.length > 1 ? "s" : ""}: ${rule.extensions.join(", ")}`, + ); } if (prerequisites.length > 0) { output.push(`**Prerequisites:** ${prerequisites.join(" | ")}\n`); @@ -302,7 +307,11 @@ function buildSkill(paths: SkillPaths, profile?: Profile): void { /** * Parse CLI arguments */ -function parseArgs(): { skill?: string; profile?: string; allProfiles: boolean } { +function parseArgs(): { + skill?: string; + profile?: string; + allProfiles: boolean; +} { const args = process.argv.slice(2); let skill: string | undefined; let profile: string | undefined; diff --git a/packages/skills-build/src/parser.ts b/packages/skills-build/src/parser.ts index 06a7841..a0fa0f2 100644 --- a/packages/skills-build/src/parser.ts +++ b/packages/skills-build/src/parser.ts @@ -251,7 +251,8 @@ export function parseRuleFile( const examples = extractExamples(body); const tags = frontmatter.tags?.split(",").map((t) => t.trim()) || []; - const extensions = frontmatter.extensions?.split(",").map((e) => e.trim()) || []; + const extensions = + frontmatter.extensions?.split(",").map((e) => e.trim()) || []; // Validation warnings if (!explanation || explanation.length < 20) { diff --git a/packages/skills-build/src/profiles.ts b/packages/skills-build/src/profiles.ts index 4083381..f5b5cf9 100644 --- a/packages/skills-build/src/profiles.ts +++ b/packages/skills-build/src/profiles.ts @@ -5,7 +5,10 @@ import type { Profile, Rule } from "./types.js"; /** * Load a profile from the profiles directory */ -export function loadProfile(profilesDir: string, profileName: string): Profile | null { +export function loadProfile( + profilesDir: string, + profileName: string, +): Profile | null { const profileFile = join(profilesDir, `${profileName}.json`); if (!existsSync(profileFile)) { return null; @@ -54,14 +57,20 @@ function compareVersions(a: string, b: string): number { /** * Check if a rule is compatible with a profile */ -export function isRuleCompatibleWithProfile(rule: Rule, profile: Profile): boolean { +export function isRuleCompatibleWithProfile( + rule: Rule, + profile: Profile, +): boolean { // Check version requirement if (rule.minVersion) { if (compareVersions(rule.minVersion, profile.minVersion) > 0) { // Rule requires a higher version than profile supports return false; } - if (profile.maxVersion && compareVersions(rule.minVersion, profile.maxVersion) > 0) { + if ( + profile.maxVersion && + compareVersions(rule.minVersion, profile.maxVersion) > 0 + ) { // Rule requires a version higher than profile's max return false; } diff --git a/packages/skills-build/src/types.ts b/packages/skills-build/src/types.ts index 520e1e4..5b41374 100644 --- a/packages/skills-build/src/types.ts +++ b/packages/skills-build/src/types.ts @@ -26,8 +26,8 @@ export interface Rule { references?: string[]; tags?: string[]; supabaseNotes?: string; - minVersion?: string; // Minimum PostgreSQL version required (e.g., "11", "14") - extensions?: string[]; // Required PostgreSQL extensions (e.g., ["pg_stat_statements"]) + minVersion?: string; // Minimum PostgreSQL version required (e.g., "11", "14") + extensions?: string[]; // Required PostgreSQL extensions (e.g., ["pg_stat_statements"]) } export interface Section { diff --git a/skills/postgres-best-practices/evals/package.json b/skills/postgres-best-practices/evals/package.json index da22c87..3409793 100644 --- a/skills/postgres-best-practices/evals/package.json +++ b/skills/postgres-best-practices/evals/package.json @@ -1,18 +1,18 @@ { - "name": "postgres-best-practices-evals", - "version": "1.0.0", - "description": "Evaluation scenarios for Postgres Best Practices skill", - "type": "module", - "scripts": { - "eval": "vitest run", - "eval:watch": "vitest", - "eval:ui": "vitest --ui" - }, - "devDependencies": { - "@ai-sdk/anthropic": "^0.0.30", - "@types/node": "^20.0.0", - "ai": "^3.0.0", - "typescript": "^5.0.0", - "vitest": "^1.0.0" - } + "name": "postgres-best-practices-evals", + "version": "1.0.0", + "description": "Evaluation scenarios for Postgres Best Practices skill", + "type": "module", + "scripts": { + "eval": "vitest run", + "eval:watch": "vitest", + "eval:ui": "vitest --ui" + }, + "devDependencies": { + "@ai-sdk/anthropic": "^0.0.30", + "@types/node": "^20.0.0", + "ai": "^3.0.0", + "typescript": "^5.0.0", + "vitest": "^1.0.0" + } } diff --git a/skills/postgres-best-practices/evals/runner.ts b/skills/postgres-best-practices/evals/runner.ts index 3bf51df..465e73c 100644 --- a/skills/postgres-best-practices/evals/runner.ts +++ b/skills/postgres-best-practices/evals/runner.ts @@ -1,129 +1,143 @@ -import { generateText } from "ai"; -import { anthropic } from "@ai-sdk/anthropic"; import { readFileSync } from "node:fs"; import { join } from "node:path"; -import type { CriterionResult, EvalConfig, EvalResult, EvalScenario } from "./types.js"; +import { anthropic } from "@ai-sdk/anthropic"; +import { generateText } from "ai"; +import type { + CriterionResult, + EvalConfig, + EvalResult, + EvalScenario, +} from "./types.js"; const DEFAULT_CONFIG: EvalConfig = { - agentsPath: join(import.meta.dirname, "..", "AGENTS.md"), - model: "claude-sonnet-4-20250514", - maxTokens: 2048, - temperature: 0, + agentsPath: join(import.meta.dirname, "..", "AGENTS.md"), + model: "claude-sonnet-4-20250514", + maxTokens: 2048, + temperature: 0, }; /** * Build the user prompt from a scenario */ function buildUserPrompt(scenario: EvalScenario): string { - const parts: string[] = []; + const parts: string[] = []; - // Add version context if specified - if (scenario.input.postgresVersion) { - parts.push(`PostgreSQL Version: ${scenario.input.postgresVersion}`); - } + // Add version context if specified + if (scenario.input.postgresVersion) { + parts.push(`PostgreSQL Version: ${scenario.input.postgresVersion}`); + } - // Add extensions context if specified - if (scenario.input.availableExtensions) { - if (scenario.input.availableExtensions.length === 0) { - parts.push("Available Extensions: None installed"); - } else { - parts.push(`Available Extensions: ${scenario.input.availableExtensions.join(", ")}`); - } - } + // Add extensions context if specified + if (scenario.input.availableExtensions) { + if (scenario.input.availableExtensions.length === 0) { + parts.push("Available Extensions: None installed"); + } else { + parts.push( + `Available Extensions: ${scenario.input.availableExtensions.join(", ")}`, + ); + } + } - // Add additional context if provided - if (scenario.input.context) { - parts.push(`Context: ${scenario.input.context}`); - } + // Add additional context if provided + if (scenario.input.context) { + parts.push(`Context: ${scenario.input.context}`); + } - // Add schema - parts.push(`\nSchema:\n\`\`\`sql\n${scenario.input.schema}\n\`\`\``); + // Add schema + parts.push(`\nSchema:\n\`\`\`sql\n${scenario.input.schema}\n\`\`\``); - // Add user query - parts.push(`\nQuestion: ${scenario.input.userQuery}`); + // Add user query + parts.push(`\nQuestion: ${scenario.input.userQuery}`); - return parts.join("\n"); + return parts.join("\n"); } /** * Extract rule IDs mentioned in a response */ function extractRuleIds(response: string): string[] { - // Match patterns like "1.1", "2.3", etc. - const rulePattern = /\b(\d+\.\d+)\b/g; - const matches = response.match(rulePattern) || []; - return [...new Set(matches)]; + // Match patterns like "1.1", "2.3", etc. + const rulePattern = /\b(\d+\.\d+)\b/g; + const matches = response.match(rulePattern) || []; + return [...new Set(matches)]; } /** * Evaluate the response against expected criteria */ -function evaluateCriteria(scenario: EvalScenario, response: string): CriterionResult[] { - const results: CriterionResult[] = []; - const responseLower = response.toLowerCase(); +function evaluateCriteria( + scenario: EvalScenario, + response: string, +): CriterionResult[] { + const results: CriterionResult[] = []; + const responseLower = response.toLowerCase(); - // Check mustContain criteria - for (const term of scenario.expectedOutput.mustContain) { - const found = responseLower.includes(term.toLowerCase()); - results.push({ - criterion: `Response should contain "${term}"`, - passed: found, - evidence: found ? "Found in response" : "Not found in response", - }); - } + // Check mustContain criteria + for (const term of scenario.expectedOutput.mustContain) { + const found = responseLower.includes(term.toLowerCase()); + results.push({ + criterion: `Response should contain "${term}"`, + passed: found, + evidence: found ? "Found in response" : "Not found in response", + }); + } - // Check mustNotContain criteria - if (scenario.expectedOutput.mustNotContain) { - for (const term of scenario.expectedOutput.mustNotContain) { - const found = responseLower.includes(term.toLowerCase()); - results.push({ - criterion: `Response should NOT contain "${term}"`, - passed: !found, - evidence: found ? "Found in response (should not be present)" : "Not found (correct)", - }); - } - } + // Check mustNotContain criteria + if (scenario.expectedOutput.mustNotContain) { + for (const term of scenario.expectedOutput.mustNotContain) { + const found = responseLower.includes(term.toLowerCase()); + results.push({ + criterion: `Response should NOT contain "${term}"`, + passed: !found, + evidence: found + ? "Found in response (should not be present)" + : "Not found (correct)", + }); + } + } - // Check shouldRecommendRules - const referencedRules = extractRuleIds(response); - for (const ruleId of scenario.expectedOutput.shouldRecommendRules) { - const found = referencedRules.includes(ruleId); - results.push({ - criterion: `Should recommend rule ${ruleId}`, - passed: found, - evidence: found ? "Rule referenced" : "Rule not referenced", - }); - } + // Check shouldRecommendRules + const referencedRules = extractRuleIds(response); + for (const ruleId of scenario.expectedOutput.shouldRecommendRules) { + const found = referencedRules.includes(ruleId); + results.push({ + criterion: `Should recommend rule ${ruleId}`, + passed: found, + evidence: found ? "Rule referenced" : "Rule not referenced", + }); + } - // Check shouldNotRecommendRules - if (scenario.expectedOutput.shouldNotRecommendRules) { - for (const ruleId of scenario.expectedOutput.shouldNotRecommendRules) { - const found = referencedRules.includes(ruleId); - results.push({ - criterion: `Should NOT recommend rule ${ruleId}`, - passed: !found, - evidence: found ? "Rule referenced (should not be)" : "Rule not referenced (correct)", - }); - } - } + // Check shouldNotRecommendRules + if (scenario.expectedOutput.shouldNotRecommendRules) { + for (const ruleId of scenario.expectedOutput.shouldNotRecommendRules) { + const found = referencedRules.includes(ruleId); + results.push({ + criterion: `Should NOT recommend rule ${ruleId}`, + passed: !found, + evidence: found + ? "Rule referenced (should not be)" + : "Rule not referenced (correct)", + }); + } + } - return results; + return results; } /** * Run a single evaluation scenario */ export async function runEval( - scenario: EvalScenario, - config: Partial = {} + scenario: EvalScenario, + config: Partial = {}, ): Promise { - const finalConfig = { ...DEFAULT_CONFIG, ...config }; + const finalConfig = { ...DEFAULT_CONFIG, ...config }; - try { - // Load AGENTS.md - const agentsMd = readFileSync(finalConfig.agentsPath, "utf-8"); + try { + // Load AGENTS.md + const agentsMd = readFileSync(finalConfig.agentsPath, "utf-8"); - const systemPrompt = `You are a PostgreSQL expert assistant. Use the following knowledge base to provide accurate recommendations: + const systemPrompt = `You are a PostgreSQL expert assistant. Use the following knowledge base to provide accurate recommendations: ${agentsMd} @@ -134,59 +148,59 @@ IMPORTANT: When the user specifies a PostgreSQL version or available extensions, When making recommendations, reference specific rule IDs (e.g., "1.1", "2.3") from the knowledge base.`; - const userPrompt = buildUserPrompt(scenario); + const userPrompt = buildUserPrompt(scenario); - const start = Date.now(); - const { text } = await generateText({ - model: anthropic(finalConfig.model!), - system: systemPrompt, - prompt: userPrompt, - maxTokens: finalConfig.maxTokens, - temperature: finalConfig.temperature, - }); - const latencyMs = Date.now() - start; + const start = Date.now(); + const { text } = await generateText({ + model: anthropic(finalConfig.model ?? DEFAULT_CONFIG.model), + system: systemPrompt, + prompt: userPrompt, + maxTokens: finalConfig.maxTokens, + temperature: finalConfig.temperature, + }); + const latencyMs = Date.now() - start; - // Evaluate the response - const criteriaResults = evaluateCriteria(scenario, text); - const rulesReferenced = extractRuleIds(text); - const passed = criteriaResults.every((r) => r.passed); + // Evaluate the response + const criteriaResults = evaluateCriteria(scenario, text); + const rulesReferenced = extractRuleIds(text); + const passed = criteriaResults.every((r) => r.passed); - return { - scenarioId: scenario.id, - passed, - rulesReferenced, - criteriaResults, - response: text, - latencyMs, - }; - } catch (error) { - return { - scenarioId: scenario.id, - passed: false, - rulesReferenced: [], - criteriaResults: [], - response: "", - latencyMs: 0, - error: error instanceof Error ? error.message : String(error), - }; - } + return { + scenarioId: scenario.id, + passed, + rulesReferenced, + criteriaResults, + response: text, + latencyMs, + }; + } catch (error) { + return { + scenarioId: scenario.id, + passed: false, + rulesReferenced: [], + criteriaResults: [], + response: "", + latencyMs: 0, + error: error instanceof Error ? error.message : String(error), + }; + } } /** * Run multiple evaluation scenarios */ export async function runEvals( - scenarios: EvalScenario[], - config: Partial = {} + scenarios: EvalScenario[], + config: Partial = {}, ): Promise { - const results: EvalResult[] = []; + const results: EvalResult[] = []; - for (const scenario of scenarios) { - console.log(`Running eval: ${scenario.name}...`); - const result = await runEval(scenario, config); - results.push(result); - console.log(` ${result.passed ? "PASS" : "FAIL"} (${result.latencyMs}ms)`); - } + for (const scenario of scenarios) { + console.log(`Running eval: ${scenario.name}...`); + const result = await runEval(scenario, config); + results.push(result); + console.log(` ${result.passed ? "PASS" : "FAIL"} (${result.latencyMs}ms)`); + } - return results; + return results; } diff --git a/skills/postgres-best-practices/evals/scenarios/covering-index.eval.ts b/skills/postgres-best-practices/evals/scenarios/covering-index.eval.ts index e0d4e24..169ea47 100644 --- a/skills/postgres-best-practices/evals/scenarios/covering-index.eval.ts +++ b/skills/postgres-best-practices/evals/scenarios/covering-index.eval.ts @@ -1,16 +1,16 @@ -import { describe, it, expect } from "vitest"; +import { describe, expect, it } from "vitest"; import { runEval } from "../runner.js"; import type { EvalScenario } from "../types.js"; const scenario: EvalScenario = { - id: "covering-index-suggestion", - name: "Covering Index Suggestion", - description: - "Agent should suggest using INCLUDE clause for columns in SELECT that aren't in WHERE clause", - category: "query-performance", - difficulty: "intermediate", - input: { - schema: ` + id: "covering-index-suggestion", + name: "Covering Index Suggestion", + description: + "Agent should suggest using INCLUDE clause for columns in SELECT that aren't in WHERE clause", + category: "query-performance", + difficulty: "intermediate", + input: { + schema: ` CREATE TABLE users ( id SERIAL PRIMARY KEY, email VARCHAR(255) NOT NULL, @@ -22,41 +22,40 @@ CREATE TABLE users ( CREATE INDEX users_email_idx ON users (email); -- Table has 2 million rows `, - userQuery: `This query still does heap fetches even though we have an index on email: + userQuery: `This query still does heap fetches even though we have an index on email: SELECT email, name, department FROM users WHERE email = 'user@example.com' EXPLAIN shows "Index Scan" but not "Index Only Scan". How can I avoid the table lookup?`, - postgresVersion: "15.4", - }, - expectedOutput: { - shouldRecommendRules: ["1.2"], // query-covering-indexes - mustContain: ["include", "covering"], - }, - expectedReasoning: [ - "Identify that the query selects columns (name, department) not in the index", - "Recognize this causes additional heap fetches after the index scan", - "Recommend using INCLUDE clause to create a covering index", - "Explain that this enables index-only scans", - ], + postgresVersion: "15.4", + }, + expectedOutput: { + shouldRecommendRules: ["1.2"], // query-covering-indexes + mustContain: ["include", "covering"], + }, + expectedReasoning: [ + "Identify that the query selects columns (name, department) not in the index", + "Recognize this causes additional heap fetches after the index scan", + "Recommend using INCLUDE clause to create a covering index", + "Explain that this enables index-only scans", + ], }; describe("Covering Index Suggestion", () => { - it("should recommend INCLUDE clause for covering index", async () => { - const result = await runEval(scenario); + it("should recommend INCLUDE clause for covering index", async () => { + const result = await runEval(scenario); - console.log("Response:", result.response); - console.log("Criteria results:", result.criteriaResults); + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); - // Response should mention INCLUDE keyword - expect(result.response.toLowerCase()).toContain("include"); + // Response should mention INCLUDE keyword + expect(result.response.toLowerCase()).toContain("include"); - // Response should mention covering index concept - const responseLower = result.response.toLowerCase(); - expect( - responseLower.includes("covering") || responseLower.includes("index-only") - ).toBe(true); - }); + // Response should mention covering index concept + const responseLower = result.response.toLowerCase(); + expect( + responseLower.includes("covering") || + responseLower.includes("index-only"), + ).toBe(true); + }); }); - -export { scenario }; diff --git a/skills/postgres-best-practices/evals/scenarios/extension-available.eval.ts b/skills/postgres-best-practices/evals/scenarios/extension-available.eval.ts index 0b87c31..479a26f 100644 --- a/skills/postgres-best-practices/evals/scenarios/extension-available.eval.ts +++ b/skills/postgres-best-practices/evals/scenarios/extension-available.eval.ts @@ -1,56 +1,54 @@ -import { describe, it, expect } from "vitest"; +import { describe, expect, it } from "vitest"; import { runEval } from "../runner.js"; import type { EvalScenario } from "../types.js"; const scenario: EvalScenario = { - id: "extension-available-pg-stat-statements", - name: "Extension Available - pg_stat_statements", - description: - "Agent should recommend pg_stat_statements for query monitoring when the extension is available", - category: "extension-requirements", - difficulty: "basic", - input: { - schema: ` + id: "extension-available-pg-stat-statements", + name: "Extension Available - pg_stat_statements", + description: + "Agent should recommend pg_stat_statements for query monitoring when the extension is available", + category: "extension-requirements", + difficulty: "basic", + input: { + schema: ` -- Production database with various tables CREATE TABLE users (id SERIAL PRIMARY KEY, email VARCHAR(255)); CREATE TABLE orders (id SERIAL PRIMARY KEY, user_id INT, total DECIMAL); CREATE TABLE products (id SERIAL PRIMARY KEY, name VARCHAR(200), price DECIMAL); `, - userQuery: - "Our database is slow but we don't know which queries are causing the problem. How can we identify the slowest queries?", - postgresVersion: "15.4", - availableExtensions: ["pg_stat_statements", "pgcrypto", "uuid-ossp"], - }, - expectedOutput: { - shouldRecommendRules: ["7.1"], // monitor-pg-stat-statements - mustContain: ["pg_stat_statements"], - }, - expectedReasoning: [ - "Recognize this is a query monitoring/performance diagnosis problem", - "Check that pg_stat_statements is available in the extensions list", - "Recommend enabling pg_stat_statements for query analysis", - "Explain how to use it to find slow queries", - ], + userQuery: + "Our database is slow but we don't know which queries are causing the problem. How can we identify the slowest queries?", + postgresVersion: "15.4", + availableExtensions: ["pg_stat_statements", "pgcrypto", "uuid-ossp"], + }, + expectedOutput: { + shouldRecommendRules: ["7.1"], // monitor-pg-stat-statements + mustContain: ["pg_stat_statements"], + }, + expectedReasoning: [ + "Recognize this is a query monitoring/performance diagnosis problem", + "Check that pg_stat_statements is available in the extensions list", + "Recommend enabling pg_stat_statements for query analysis", + "Explain how to use it to find slow queries", + ], }; describe("Extension Available - pg_stat_statements", () => { - it("should recommend pg_stat_statements when available", async () => { - const result = await runEval(scenario); + it("should recommend pg_stat_statements when available", async () => { + const result = await runEval(scenario); - console.log("Response:", result.response); - console.log("Criteria results:", result.criteriaResults); + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); - // Response should mention pg_stat_statements - expect(result.response.toLowerCase()).toContain("pg_stat_statements"); + // Response should mention pg_stat_statements + expect(result.response.toLowerCase()).toContain("pg_stat_statements"); - // Should suggest enabling/using the extension - const responseLower = result.response.toLowerCase(); - expect( - responseLower.includes("create extension") || - responseLower.includes("enable") || - responseLower.includes("query") - ).toBe(true); - }); + // Should suggest enabling/using the extension + const responseLower = result.response.toLowerCase(); + expect( + responseLower.includes("create extension") || + responseLower.includes("enable") || + responseLower.includes("query"), + ).toBe(true); + }); }); - -export { scenario }; diff --git a/skills/postgres-best-practices/evals/scenarios/extension-unavailable.eval.ts b/skills/postgres-best-practices/evals/scenarios/extension-unavailable.eval.ts index 14d4b00..b323bb9 100644 --- a/skills/postgres-best-practices/evals/scenarios/extension-unavailable.eval.ts +++ b/skills/postgres-best-practices/evals/scenarios/extension-unavailable.eval.ts @@ -1,56 +1,56 @@ -import { describe, it, expect } from "vitest"; +import { describe, expect, it } from "vitest"; import { runEval } from "../runner.js"; import type { EvalScenario } from "../types.js"; const scenario: EvalScenario = { - id: "extension-unavailable-no-pg-stat-statements", - name: "Extension Unavailable - No pg_stat_statements", - description: - "Agent should provide alternatives when pg_stat_statements is not available for query monitoring", - category: "extension-requirements", - difficulty: "intermediate", - input: { - schema: ` + id: "extension-unavailable-no-pg-stat-statements", + name: "Extension Unavailable - No pg_stat_statements", + description: + "Agent should provide alternatives when pg_stat_statements is not available for query monitoring", + category: "extension-requirements", + difficulty: "intermediate", + input: { + schema: ` -- Production database with various tables CREATE TABLE users (id SERIAL PRIMARY KEY, email VARCHAR(255)); CREATE TABLE orders (id SERIAL PRIMARY KEY, user_id INT, total DECIMAL); CREATE TABLE products (id SERIAL PRIMARY KEY, name VARCHAR(200), price DECIMAL); `, - userQuery: - "Our database is slow but we don't know which queries are causing the problem. How can we identify the slowest queries?", - postgresVersion: "15.4", - availableExtensions: [], // No extensions available - context: - "This is a managed database environment where we cannot install additional extensions.", - }, - expectedOutput: { - shouldRecommendRules: [], // Should not recommend pg_stat_statements rule - shouldNotRecommendRules: ["7.1"], // monitor-pg-stat-statements - mustContain: ["explain", "analyze"], - mustNotContain: ["pg_stat_statements"], - }, - expectedReasoning: [ - "Recognize that no extensions are available", - "Check that pg_stat_statements cannot be used", - "Avoid recommending pg_stat_statements", - "Suggest alternative approaches like EXPLAIN ANALYZE, log_min_duration_statement, or pg_stat_activity", - ], + userQuery: + "Our database is slow but we don't know which queries are causing the problem. How can we identify the slowest queries?", + postgresVersion: "15.4", + availableExtensions: [], // No extensions available + context: + "This is a managed database environment where we cannot install additional extensions.", + }, + expectedOutput: { + shouldRecommendRules: [], // Should not recommend pg_stat_statements rule + shouldNotRecommendRules: ["7.1"], // monitor-pg-stat-statements + mustContain: ["explain", "analyze"], + mustNotContain: ["pg_stat_statements"], + }, + expectedReasoning: [ + "Recognize that no extensions are available", + "Check that pg_stat_statements cannot be used", + "Avoid recommending pg_stat_statements", + "Suggest alternative approaches like EXPLAIN ANALYZE, log_min_duration_statement, or pg_stat_activity", + ], }; describe("Extension Unavailable - No pg_stat_statements", () => { - it("should suggest alternatives when pg_stat_statements is unavailable", async () => { - const result = await runEval(scenario); + it("should suggest alternatives when pg_stat_statements is unavailable", async () => { + const result = await runEval(scenario); - console.log("Response:", result.response); - console.log("Criteria results:", result.criteriaResults); + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); - // Response should NOT primarily recommend pg_stat_statements - // (it might mention it as unavailable, but shouldn't suggest installing it) - const responseLower = result.response.toLowerCase(); + // Response should NOT primarily recommend pg_stat_statements + // (it might mention it as unavailable, but shouldn't suggest installing it) + const responseLower = result.response.toLowerCase(); - // Should suggest EXPLAIN ANALYZE as an alternative - expect(responseLower.includes("explain") && responseLower.includes("analyze")).toBe(true); - }); + // Should suggest EXPLAIN ANALYZE as an alternative + expect( + responseLower.includes("explain") && responseLower.includes("analyze"), + ).toBe(true); + }); }); - -export { scenario }; diff --git a/skills/postgres-best-practices/evals/scenarios/missing-index.eval.ts b/skills/postgres-best-practices/evals/scenarios/missing-index.eval.ts index 8db466a..55fd009 100644 --- a/skills/postgres-best-practices/evals/scenarios/missing-index.eval.ts +++ b/skills/postgres-best-practices/evals/scenarios/missing-index.eval.ts @@ -1,16 +1,16 @@ -import { describe, it, expect } from "vitest"; +import { describe, expect, it } from "vitest"; import { runEval } from "../runner.js"; import type { EvalScenario } from "../types.js"; const scenario: EvalScenario = { - id: "missing-index-detection", - name: "Missing Index Detection", - description: - "Agent should identify missing index on WHERE clause columns and recommend creating an appropriate index", - category: "query-performance", - difficulty: "basic", - input: { - schema: ` + id: "missing-index-detection", + name: "Missing Index Detection", + description: + "Agent should identify missing index on WHERE clause columns and recommend creating an appropriate index", + category: "query-performance", + difficulty: "basic", + input: { + schema: ` CREATE TABLE orders ( id SERIAL PRIMARY KEY, customer_id INT NOT NULL, @@ -21,36 +21,36 @@ CREATE TABLE orders ( -- No indexes besides primary key -- Table has 5 million rows `, - userQuery: - "This query is slow and takes 3 seconds: SELECT * FROM orders WHERE customer_id = 12345 AND status = 'pending'", - }, - expectedOutput: { - shouldRecommendRules: ["1.1"], // query-missing-indexes - mustContain: ["index", "customer_id"], - }, - expectedReasoning: [ - "Identify that the query filters on customer_id and status", - "Recognize that without an index, this causes a sequential scan", - "Recommend creating an index on the filtered columns", - ], + userQuery: + "This query is slow and takes 3 seconds: SELECT * FROM orders WHERE customer_id = 12345 AND status = 'pending'", + }, + expectedOutput: { + shouldRecommendRules: ["1.1"], // query-missing-indexes + mustContain: ["index", "customer_id"], + }, + expectedReasoning: [ + "Identify that the query filters on customer_id and status", + "Recognize that without an index, this causes a sequential scan", + "Recommend creating an index on the filtered columns", + ], }; describe("Missing Index Detection", () => { - it("should recommend creating an index on filtered columns", async () => { - const result = await runEval(scenario); + it("should recommend creating an index on filtered columns", async () => { + const result = await runEval(scenario); - console.log("Response:", result.response); - console.log("Criteria results:", result.criteriaResults); + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); - // Check that key criteria passed - expect(result.criteriaResults.some((c) => c.criterion.includes("index") && c.passed)).toBe( - true - ); + // Check that key criteria passed + expect( + result.criteriaResults.some( + (c) => c.criterion.includes("index") && c.passed, + ), + ).toBe(true); - // Response should mention creating an index - expect(result.response.toLowerCase()).toContain("index"); - expect(result.response.toLowerCase()).toContain("customer_id"); - }); + // Response should mention creating an index + expect(result.response.toLowerCase()).toContain("index"); + expect(result.response.toLowerCase()).toContain("customer_id"); + }); }); - -export { scenario }; diff --git a/skills/postgres-best-practices/evals/scenarios/n-plus-one.eval.ts b/skills/postgres-best-practices/evals/scenarios/n-plus-one.eval.ts index 6268e86..3ea87c7 100644 --- a/skills/postgres-best-practices/evals/scenarios/n-plus-one.eval.ts +++ b/skills/postgres-best-practices/evals/scenarios/n-plus-one.eval.ts @@ -1,16 +1,16 @@ -import { describe, it, expect } from "vitest"; +import { describe, expect, it } from "vitest"; import { runEval } from "../runner.js"; import type { EvalScenario } from "../types.js"; const scenario: EvalScenario = { - id: "n-plus-one-detection", - name: "N+1 Query Detection", - description: - "Agent should identify N+1 query pattern in application code and recommend using JOINs or batch queries", - category: "query-performance", - difficulty: "intermediate", - input: { - schema: ` + id: "n-plus-one-detection", + name: "N+1 Query Detection", + description: + "Agent should identify N+1 query pattern in application code and recommend using JOINs or batch queries", + category: "query-performance", + difficulty: "intermediate", + input: { + schema: ` CREATE TABLE users ( id SERIAL PRIMARY KEY, name VARCHAR(100), @@ -25,7 +25,7 @@ CREATE TABLE posts ( created_at TIMESTAMPTZ DEFAULT NOW() ); `, - userQuery: `My API endpoint is slow. Here's the code: + userQuery: `My API endpoint is slow. Here's the code: \`\`\`typescript // Get all posts @@ -39,33 +39,33 @@ for (const post of posts) { \`\`\` This makes 101 database queries. How can I optimize it?`, - }, - expectedOutput: { - shouldRecommendRules: ["6.1"], // data-n-plus-one - mustContain: ["join", "n+1"], - }, - expectedReasoning: [ - "Identify the N+1 query pattern (1 query for posts + N queries for users)", - "Recognize this as a common performance anti-pattern", - "Recommend using a JOIN to fetch all data in a single query", - "Optionally suggest using IN clause for batch fetching", - ], + }, + expectedOutput: { + shouldRecommendRules: ["6.1"], // data-n-plus-one + mustContain: ["join", "n+1"], + }, + expectedReasoning: [ + "Identify the N+1 query pattern (1 query for posts + N queries for users)", + "Recognize this as a common performance anti-pattern", + "Recommend using a JOIN to fetch all data in a single query", + "Optionally suggest using IN clause for batch fetching", + ], }; describe("N+1 Query Detection", () => { - it("should identify N+1 pattern and recommend JOIN", async () => { - const result = await runEval(scenario); + it("should identify N+1 pattern and recommend JOIN", async () => { + const result = await runEval(scenario); - console.log("Response:", result.response); - console.log("Criteria results:", result.criteriaResults); + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); - // Response should mention JOIN - expect(result.response.toLowerCase()).toContain("join"); + // Response should mention JOIN + expect(result.response.toLowerCase()).toContain("join"); - // Response should explain the N+1 problem - const responseLower = result.response.toLowerCase(); - expect(responseLower.includes("n+1") || responseLower.includes("n + 1")).toBe(true); - }); + // Response should explain the N+1 problem + const responseLower = result.response.toLowerCase(); + expect( + responseLower.includes("n+1") || responseLower.includes("n + 1"), + ).toBe(true); + }); }); - -export { scenario }; diff --git a/skills/postgres-best-practices/evals/scenarios/version-constraint.eval.ts b/skills/postgres-best-practices/evals/scenarios/version-constraint.eval.ts index 86adbd3..72de967 100644 --- a/skills/postgres-best-practices/evals/scenarios/version-constraint.eval.ts +++ b/skills/postgres-best-practices/evals/scenarios/version-constraint.eval.ts @@ -1,4 +1,4 @@ -import { describe, it, expect } from "vitest"; +import { describe, expect, it } from "vitest"; import { runEval } from "../runner.js"; import type { EvalScenario } from "../types.js"; @@ -6,14 +6,14 @@ import type { EvalScenario } from "../types.js"; * Scenario 1: PG10 - Should NOT recommend covering indexes (requires PG11+) */ const scenarioPg10NoCoveringIndex: EvalScenario = { - id: "version-constraint-pg10-no-covering", - name: "Version Constraint - PG10 No Covering Index", - description: - "Agent should NOT recommend INCLUDE clause on PostgreSQL 10 since it requires PG11+", - category: "version-constraints", - difficulty: "intermediate", - input: { - schema: ` + id: "version-constraint-pg10-no-covering", + name: "Version Constraint - PG10 No Covering Index", + description: + "Agent should NOT recommend INCLUDE clause on PostgreSQL 10 since it requires PG11+", + category: "version-constraints", + difficulty: "intermediate", + input: { + schema: ` CREATE TABLE users ( id SERIAL PRIMARY KEY, email VARCHAR(255) NOT NULL, @@ -23,36 +23,36 @@ CREATE TABLE users ( CREATE INDEX users_email_idx ON users (email); `, - userQuery: - "How can I optimize this query to avoid heap fetches? SELECT email, name FROM users WHERE email = 'test@example.com'", - postgresVersion: "10.0", - }, - expectedOutput: { - shouldRecommendRules: [], - shouldNotRecommendRules: ["1.2"], // query-covering-indexes requires PG11 - mustContain: ["index"], - mustNotContain: ["include"], - }, - expectedReasoning: [ - "Recognize that PostgreSQL 10 is specified", - "Check that covering indexes (INCLUDE clause) require PG11+", - "Avoid recommending INCLUDE clause", - "Suggest alternative optimization strategies appropriate for PG10", - ], + userQuery: + "How can I optimize this query to avoid heap fetches? SELECT email, name FROM users WHERE email = 'test@example.com'", + postgresVersion: "10.0", + }, + expectedOutput: { + shouldRecommendRules: [], + shouldNotRecommendRules: ["1.2"], // query-covering-indexes requires PG11 + mustContain: ["index"], + mustNotContain: ["include"], + }, + expectedReasoning: [ + "Recognize that PostgreSQL 10 is specified", + "Check that covering indexes (INCLUDE clause) require PG11+", + "Avoid recommending INCLUDE clause", + "Suggest alternative optimization strategies appropriate for PG10", + ], }; /** * Scenario 2: PG9.3 - Should NOT recommend UPSERT (requires PG9.5+) */ const scenarioPg93NoUpsert: EvalScenario = { - id: "version-constraint-pg93-no-upsert", - name: "Version Constraint - PG9.3 No UPSERT", - description: - "Agent should NOT recommend ON CONFLICT on PostgreSQL 9.3 since it requires PG9.5+", - category: "version-constraints", - difficulty: "intermediate", - input: { - schema: ` + id: "version-constraint-pg93-no-upsert", + name: "Version Constraint - PG9.3 No UPSERT", + description: + "Agent should NOT recommend ON CONFLICT on PostgreSQL 9.3 since it requires PG9.5+", + category: "version-constraints", + difficulty: "intermediate", + input: { + schema: ` CREATE TABLE settings ( user_id INT NOT NULL, key VARCHAR(50) NOT NULL, @@ -60,49 +60,47 @@ CREATE TABLE settings ( PRIMARY KEY (user_id, key) ); `, - userQuery: - "I need to insert a setting if it doesn't exist, or update it if it does. How should I do this?", - postgresVersion: "9.3", - }, - expectedOutput: { - shouldRecommendRules: [], - shouldNotRecommendRules: ["6.3"], // data-upsert requires PG9.5 - mustContain: ["insert", "update"], - mustNotContain: ["on conflict"], - }, - expectedReasoning: [ - "Recognize that PostgreSQL 9.3 is specified", - "Check that ON CONFLICT (UPSERT) requires PG9.5+", - "Avoid recommending ON CONFLICT syntax", - "Suggest alternative pattern (e.g., CTE with INSERT/UPDATE, or try/catch approach)", - ], + userQuery: + "I need to insert a setting if it doesn't exist, or update it if it does. How should I do this?", + postgresVersion: "9.3", + }, + expectedOutput: { + shouldRecommendRules: [], + shouldNotRecommendRules: ["6.3"], // data-upsert requires PG9.5 + mustContain: ["insert", "update"], + mustNotContain: ["on conflict"], + }, + expectedReasoning: [ + "Recognize that PostgreSQL 9.3 is specified", + "Check that ON CONFLICT (UPSERT) requires PG9.5+", + "Avoid recommending ON CONFLICT syntax", + "Suggest alternative pattern (e.g., CTE with INSERT/UPDATE, or try/catch approach)", + ], }; describe("Version Constraint Tests", () => { - describe("PG10 - No Covering Index", () => { - it("should NOT recommend INCLUDE clause for PG10", async () => { - const result = await runEval(scenarioPg10NoCoveringIndex); + describe("PG10 - No Covering Index", () => { + it("should NOT recommend INCLUDE clause for PG10", async () => { + const result = await runEval(scenarioPg10NoCoveringIndex); - console.log("Response:", result.response); - console.log("Criteria results:", result.criteriaResults); + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); - // Response should NOT contain INCLUDE recommendation - expect(result.response.toLowerCase()).not.toContain("include ("); - expect(result.response.toLowerCase()).not.toContain("include("); - }); - }); + // Response should NOT contain INCLUDE recommendation + expect(result.response.toLowerCase()).not.toContain("include ("); + expect(result.response.toLowerCase()).not.toContain("include("); + }); + }); - describe("PG9.3 - No UPSERT", () => { - it("should NOT recommend ON CONFLICT for PG9.3", async () => { - const result = await runEval(scenarioPg93NoUpsert); + describe("PG9.3 - No UPSERT", () => { + it("should NOT recommend ON CONFLICT for PG9.3", async () => { + const result = await runEval(scenarioPg93NoUpsert); - console.log("Response:", result.response); - console.log("Criteria results:", result.criteriaResults); + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); - // Response should NOT recommend ON CONFLICT - expect(result.response.toLowerCase()).not.toContain("on conflict"); - }); - }); + // Response should NOT recommend ON CONFLICT + expect(result.response.toLowerCase()).not.toContain("on conflict"); + }); + }); }); - -export { scenarioPg10NoCoveringIndex, scenarioPg93NoUpsert }; diff --git a/skills/postgres-best-practices/evals/tsconfig.json b/skills/postgres-best-practices/evals/tsconfig.json index 2451b91..39fa291 100644 --- a/skills/postgres-best-practices/evals/tsconfig.json +++ b/skills/postgres-best-practices/evals/tsconfig.json @@ -1,13 +1,13 @@ { - "compilerOptions": { - "target": "ES2022", - "module": "ESNext", - "moduleResolution": "bundler", - "esModuleInterop": true, - "strict": true, - "skipLibCheck": true, - "outDir": "dist", - "declaration": true - }, - "include": ["*.ts", "scenarios/**/*.ts"] + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true, + "outDir": "dist", + "declaration": true + }, + "include": ["*.ts", "scenarios/**/*.ts"] } diff --git a/skills/postgres-best-practices/evals/types.ts b/skills/postgres-best-practices/evals/types.ts index ed013c6..bdf9115 100644 --- a/skills/postgres-best-practices/evals/types.ts +++ b/skills/postgres-best-practices/evals/types.ts @@ -2,111 +2,114 @@ * Evaluation scenario definition */ export interface EvalScenario { - /** Unique identifier for the scenario */ - id: string; + /** Unique identifier for the scenario */ + id: string; - /** Human-readable name */ - name: string; + /** Human-readable name */ + name: string; - /** Description of what this scenario tests */ - description: string; + /** Description of what this scenario tests */ + description: string; - /** Category of the scenario */ - category: "query-performance" | "version-constraints" | "extension-requirements"; + /** Category of the scenario */ + category: + | "query-performance" + | "version-constraints" + | "extension-requirements"; - /** Difficulty level */ - difficulty: "basic" | "intermediate" | "advanced"; + /** Difficulty level */ + difficulty: "basic" | "intermediate" | "advanced"; - /** Input for the scenario */ - input: { - /** SQL schema context */ - schema: string; + /** Input for the scenario */ + input: { + /** SQL schema context */ + schema: string; - /** User's question or request */ - userQuery: string; + /** User's question or request */ + userQuery: string; - /** Optional PostgreSQL version (e.g., "10.0", "15.4") */ - postgresVersion?: string; + /** Optional PostgreSQL version (e.g., "10.0", "15.4") */ + postgresVersion?: string; - /** Optional list of available extensions */ - availableExtensions?: string[]; + /** Optional list of available extensions */ + availableExtensions?: string[]; - /** Additional context */ - context?: string; - }; + /** Additional context */ + context?: string; + }; - /** Expected output criteria */ - expectedOutput: { - /** Rule IDs that should be recommended */ - shouldRecommendRules: string[]; + /** Expected output criteria */ + expectedOutput: { + /** Rule IDs that should be recommended */ + shouldRecommendRules: string[]; - /** Rule IDs that should NOT be recommended (version/extension constraints) */ - shouldNotRecommendRules?: string[]; + /** Rule IDs that should NOT be recommended (version/extension constraints) */ + shouldNotRecommendRules?: string[]; - /** Strings that must appear in the response */ - mustContain: string[]; + /** Strings that must appear in the response */ + mustContain: string[]; - /** Strings that must NOT appear in the response */ - mustNotContain?: string[]; - }; + /** Strings that must NOT appear in the response */ + mustNotContain?: string[]; + }; - /** Expected reasoning steps the agent should follow */ - expectedReasoning: string[]; + /** Expected reasoning steps the agent should follow */ + expectedReasoning: string[]; } /** * Result of evaluating a single criterion */ export interface CriterionResult { - /** Description of the criterion */ - criterion: string; + /** Description of the criterion */ + criterion: string; - /** Whether the criterion passed */ - passed: boolean; + /** Whether the criterion passed */ + passed: boolean; - /** Evidence or explanation */ - evidence?: string; + /** Evidence or explanation */ + evidence?: string; } /** * Result of running an evaluation scenario */ export interface EvalResult { - /** Scenario ID */ - scenarioId: string; + /** Scenario ID */ + scenarioId: string; - /** Whether all criteria passed */ - passed: boolean; + /** Whether all criteria passed */ + passed: boolean; - /** Rule IDs that were referenced in the response */ - rulesReferenced: string[]; + /** Rule IDs that were referenced in the response */ + rulesReferenced: string[]; - /** Results for each evaluation criterion */ - criteriaResults: CriterionResult[]; + /** Results for each evaluation criterion */ + criteriaResults: CriterionResult[]; - /** The agent's full response */ - response: string; + /** The agent's full response */ + response: string; - /** Time taken in milliseconds */ - latencyMs: number; + /** Time taken in milliseconds */ + latencyMs: number; - /** Error message if evaluation failed */ - error?: string; + /** Error message if evaluation failed */ + error?: string; } /** * Configuration for the eval runner */ export interface EvalConfig { - /** Path to AGENTS.md file */ - agentsPath: string; + /** Path to AGENTS.md file */ + agentsPath: string; - /** Model to use for evaluation */ - model?: string; + /** Model to use for evaluation */ + model?: string; - /** Maximum tokens for response */ - maxTokens?: number; + /** Maximum tokens for response */ + maxTokens?: number; - /** Temperature for generation */ - temperature?: number; + /** Temperature for generation */ + temperature?: number; } diff --git a/skills/postgres-best-practices/evals/utils.ts b/skills/postgres-best-practices/evals/utils.ts index aa2c46e..ba24395 100644 --- a/skills/postgres-best-practices/evals/utils.ts +++ b/skills/postgres-best-practices/evals/utils.ts @@ -4,69 +4,71 @@ import type { EvalResult, EvalScenario } from "./types.js"; * Format eval results as a summary table */ export function formatResultsSummary(results: EvalResult[]): string { - const lines: string[] = []; + const lines: string[] = []; - lines.push("## Eval Results Summary\n"); + lines.push("## Eval Results Summary\n"); - const passed = results.filter((r) => r.passed).length; - const total = results.length; - const passRate = ((passed / total) * 100).toFixed(1); + const passed = results.filter((r) => r.passed).length; + const total = results.length; + const passRate = ((passed / total) * 100).toFixed(1); - lines.push(`**Pass Rate:** ${passed}/${total} (${passRate}%)\n`); + lines.push(`**Pass Rate:** ${passed}/${total} (${passRate}%)\n`); - lines.push("| Scenario | Status | Latency | Rules Referenced |"); - lines.push("|----------|--------|---------|------------------|"); + lines.push("| Scenario | Status | Latency | Rules Referenced |"); + lines.push("|----------|--------|---------|------------------|"); - for (const result of results) { - const status = result.passed ? "PASS" : "FAIL"; - const latency = `${result.latencyMs}ms`; - const rules = result.rulesReferenced.join(", ") || "none"; - lines.push(`| ${result.scenarioId} | ${status} | ${latency} | ${rules} |`); - } + for (const result of results) { + const status = result.passed ? "PASS" : "FAIL"; + const latency = `${result.latencyMs}ms`; + const rules = result.rulesReferenced.join(", ") || "none"; + lines.push(`| ${result.scenarioId} | ${status} | ${latency} | ${rules} |`); + } - return lines.join("\n"); + return lines.join("\n"); } /** * Format detailed results for a single scenario */ export function formatDetailedResult(result: EvalResult): string { - const lines: string[] = []; + const lines: string[] = []; - lines.push(`## ${result.scenarioId}\n`); - lines.push(`**Status:** ${result.passed ? "PASS" : "FAIL"}`); - lines.push(`**Latency:** ${result.latencyMs}ms`); - lines.push(`**Rules Referenced:** ${result.rulesReferenced.join(", ") || "none"}\n`); + lines.push(`## ${result.scenarioId}\n`); + lines.push(`**Status:** ${result.passed ? "PASS" : "FAIL"}`); + lines.push(`**Latency:** ${result.latencyMs}ms`); + lines.push( + `**Rules Referenced:** ${result.rulesReferenced.join(", ") || "none"}\n`, + ); - if (result.error) { - lines.push(`**Error:** ${result.error}\n`); - } + if (result.error) { + lines.push(`**Error:** ${result.error}\n`); + } - lines.push("### Criteria Results\n"); - for (const criterion of result.criteriaResults) { - const icon = criterion.passed ? "+" : "-"; - lines.push(`${icon} ${criterion.criterion}`); - if (criterion.evidence) { - lines.push(` Evidence: ${criterion.evidence}`); - } - } + lines.push("### Criteria Results\n"); + for (const criterion of result.criteriaResults) { + const icon = criterion.passed ? "+" : "-"; + lines.push(`${icon} ${criterion.criterion}`); + if (criterion.evidence) { + lines.push(` Evidence: ${criterion.evidence}`); + } + } - lines.push("\n### Response\n"); - lines.push("```"); - lines.push(result.response); - lines.push("```"); + lines.push("\n### Response\n"); + lines.push("```"); + lines.push(result.response); + lines.push("```"); - return lines.join("\n"); + return lines.join("\n"); } /** * Create a scenario builder for cleaner test definitions */ export function createScenario( - partial: Omit & { id?: string } + partial: Omit & { id?: string }, ): EvalScenario { - return { - id: partial.id || partial.name.toLowerCase().replace(/\s+/g, "-"), - ...partial, - } as EvalScenario; + return { + id: partial.id || partial.name.toLowerCase().replace(/\s+/g, "-"), + ...partial, + } as EvalScenario; } diff --git a/skills/postgres-best-practices/evals/vitest.config.ts b/skills/postgres-best-practices/evals/vitest.config.ts index c56d462..9a50b6c 100644 --- a/skills/postgres-best-practices/evals/vitest.config.ts +++ b/skills/postgres-best-practices/evals/vitest.config.ts @@ -1,9 +1,9 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ - test: { - include: ["scenarios/**/*.eval.ts"], - testTimeout: 60000, // 60 seconds for LLM calls - reporters: ["verbose"], - }, + test: { + include: ["scenarios/**/*.eval.ts"], + testTimeout: 60000, // 60 seconds for LLM calls + reporters: ["verbose"], + }, }); diff --git a/skills/postgres-best-practices/profiles/aurora.json b/skills/postgres-best-practices/profiles/aurora.json index 6b79e75..9e7e480 100644 --- a/skills/postgres-best-practices/profiles/aurora.json +++ b/skills/postgres-best-practices/profiles/aurora.json @@ -1,23 +1,11 @@ { - "name": "aurora", - "minVersion": "13", - "maxVersion": "16", - "extensions": { - "available": [ - "pg_stat_statements", - "pgcrypto", - "uuid-ossp" - ], - "installable": [ - "postgis", - "pg_hint_plan", - "pg_similarity" - ], - "unavailable": [ - "pg_cron", - "pg_partman", - "timescaledb" - ] - }, - "notes": "AWS Aurora PostgreSQL. Some extensions are not available due to managed service restrictions. Aurora has its own connection pooling (RDS Proxy) and automatic failover." + "name": "aurora", + "minVersion": "13", + "maxVersion": "16", + "extensions": { + "available": ["pg_stat_statements", "pgcrypto", "uuid-ossp"], + "installable": ["postgis", "pg_hint_plan", "pg_similarity"], + "unavailable": ["pg_cron", "pg_partman", "timescaledb"] + }, + "notes": "AWS Aurora PostgreSQL. Some extensions are not available due to managed service restrictions. Aurora has its own connection pooling (RDS Proxy) and automatic failover." } diff --git a/skills/postgres-best-practices/profiles/self-hosted.json b/skills/postgres-best-practices/profiles/self-hosted.json index 3d02199..86d9386 100644 --- a/skills/postgres-best-practices/profiles/self-hosted.json +++ b/skills/postgres-best-practices/profiles/self-hosted.json @@ -1,18 +1,18 @@ { - "name": "self-hosted", - "minVersion": "12", - "extensions": { - "available": [], - "installable": [ - "pg_stat_statements", - "pgcrypto", - "uuid-ossp", - "postgis", - "pg_trgm", - "btree_gin", - "btree_gist" - ], - "unavailable": [] - }, - "notes": "Generic self-hosted PostgreSQL. Extension availability depends on server configuration. Check pg_available_extensions for what can be installed." + "name": "self-hosted", + "minVersion": "12", + "extensions": { + "available": [], + "installable": [ + "pg_stat_statements", + "pgcrypto", + "uuid-ossp", + "postgis", + "pg_trgm", + "btree_gin", + "btree_gist" + ], + "unavailable": [] + }, + "notes": "Generic self-hosted PostgreSQL. Extension availability depends on server configuration. Check pg_available_extensions for what can be installed." } diff --git a/skills/postgres-best-practices/profiles/supabase.json b/skills/postgres-best-practices/profiles/supabase.json index be0a2f6..6210d62 100644 --- a/skills/postgres-best-practices/profiles/supabase.json +++ b/skills/postgres-best-practices/profiles/supabase.json @@ -1,27 +1,27 @@ { - "name": "supabase", - "minVersion": "15", - "extensions": { - "available": [ - "pg_stat_statements", - "pgcrypto", - "uuid-ossp", - "pgjwt", - "pg_graphql", - "pg_net", - "pgsodium", - "supabase_vault", - "pg_jsonschema" - ], - "installable": [ - "postgis", - "pg_cron", - "pgtap", - "plv8", - "http", - "pg_hashids" - ], - "unavailable": [] - }, - "notes": "Supabase manages connection pooling via Supavisor. Direct connection limits differ from pooled connections. All standard Postgres extensions are available." + "name": "supabase", + "minVersion": "15", + "extensions": { + "available": [ + "pg_stat_statements", + "pgcrypto", + "uuid-ossp", + "pgjwt", + "pg_graphql", + "pg_net", + "pgsodium", + "supabase_vault", + "pg_jsonschema" + ], + "installable": [ + "postgis", + "pg_cron", + "pgtap", + "plv8", + "http", + "pg_hashids" + ], + "unavailable": [] + }, + "notes": "Supabase manages connection pooling via Supavisor. Direct connection limits differ from pooled connections. All standard Postgres extensions are available." }