improve postgres best practices and add evals

2026-01-26 19:09:51 +08:00 · 2026-01-23 17:26:45 +00:00
parent bbde7ff5f8
commit 1d9f4ea441
33 changed files with 6024 additions and 11 deletions
--- a/.mcp.json
+++ b/.mcp.json
@@ -3,6 +3,10 @@
 		"supabase": {
 			"type": "http",
 			"url": "https://mcp.supabase.com/mcp?features=docs"
 		},
 		"linear": {
 			"type": "http",
 			"url": "https://mcp.linear.app/mcp"
 		}
 	}
 }
--- a/packages/skills-build/src/build.ts
+++ b/packages/skills-build/src/build.ts
@@ -7,7 +7,8 @@ import {
 	validateSkillExists,
 } from "./config.js";
 import { parseRuleFile } from "./parser.js";
-import type { Metadata, Rule, Section } from "./types.js";
+import { filterRulesForProfile, listProfiles, loadProfile } from "./profiles.js";
 import type { Metadata, Profile, Rule, Section } from "./types.js";
 import { validateRuleFile } from "./validate.js";
 /**
@@ -100,8 +101,13 @@ export function generateSectionMap(
 /**
 * Build AGENTS.md for a specific skill
 */
-function buildSkill(paths: SkillPaths): void {
+function buildSkill(paths: SkillPaths, profile?: Profile): void {
-	console.log(`[${paths.name}] Building AGENTS.md...`);
+	const profileSuffix = profile ? `.${profile.name}` : "";
 	const outputFile = profile
 		? paths.agentsOutput.replace(".md", `${profileSuffix}.md`)
 		: paths.agentsOutput;
 	console.log(`[${paths.name}] Building AGENTS${profileSuffix}.md...`);
 	// Load metadata and sections
 	const metadata = loadMetadata(paths.metadataFile, paths.name);
@@ -113,7 +119,7 @@ function buildSkill(paths: SkillPaths): void {
 	if (!existsSync(paths.rulesDir)) {
 		console.log(`  No rules directory found. Generating empty AGENTS.md.`);
 		writeFileSync(
-			paths.agentsOutput,
+			outputFile,
 			`# ${skillTitle}\n\nNo rules defined yet.\n`,
 		);
 		return;
@@ -147,10 +153,17 @@ function buildSkill(paths: SkillPaths): void {
 		}
 	}
 	// Filter rules by profile if specified
 	let filteredRules = rules;
 	if (profile) {
 		filteredRules = filterRulesForProfile(rules, profile);
 		console.log(`  Filtered to ${filteredRules.length} rules for profile "${profile.name}"`);
 	}
 	// Group rules by section and assign IDs
 	const rulesBySection = new Map<number, Rule[]>();
-	for (const rule of rules) {
+	for (const rule of filteredRules) {
 		const sectionRules = rulesBySection.get(rule.section) || [];
 		sectionRules.push(rule);
 		rulesBySection.set(rule.section, sectionRules);
@@ -225,6 +238,18 @@ function buildSkill(paths: SkillPaths): void {
 				output.push(`**Impact: ${rule.impact}**\n`);
 			}
 			// Add prerequisites if minVersion or extensions are specified
 			const prerequisites: string[] = [];
 			if (rule.minVersion) {
 				prerequisites.push(`PostgreSQL ${rule.minVersion}+`);
 			}
 			if (rule.extensions && rule.extensions.length > 0) {
 				prerequisites.push(`Extension${rule.extensions.length > 1 ? "s" : ""}: ${rule.extensions.join(", ")}`);
 			}
 			if (prerequisites.length > 0) {
 				output.push(`**Prerequisites:** ${prerequisites.join(" | ")}\n`);
 			}
 			output.push(`${rule.explanation}\n`);
 			for (const example of rule.examples) {
@@ -269,9 +294,52 @@ function buildSkill(paths: SkillPaths): void {
 	}
 	// Write output
-	writeFileSync(paths.agentsOutput, output.join("\n"));
+	writeFileSync(outputFile, output.join("\n"));
-	console.log(`  Generated: ${paths.agentsOutput}`);
+	console.log(`  Generated: ${outputFile}`);
-	console.log(`  Total rules: ${rules.length}`);
+	console.log(`  Total rules: ${filteredRules.length}`);
 }
 /**
 * Parse CLI arguments
 */
 function parseArgs(): { skill?: string; profile?: string; allProfiles: boolean } {
 	const args = process.argv.slice(2);
 	let skill: string | undefined;
 	let profile: string | undefined;
 	let allProfiles = false;
 	for (let i = 0; i < args.length; i++) {
 		const arg = args[i];
 		if (arg === "--profile" && args[i + 1]) {
 			profile = args[i + 1];
 			i++;
 		} else if (arg === "--all-profiles") {
 			allProfiles = true;
 		} else if (!arg.startsWith("--")) {
 			skill = arg;
 		}
 	}
 	return { skill, profile, allProfiles };
 }
 /**
 * Build a skill with all available profiles
 */
 function buildSkillWithAllProfiles(paths: SkillPaths): void {
 	const profilesDir = join(paths.skillDir, "profiles");
 	const profiles = listProfiles(profilesDir);
 	// Build default (no profile)
 	buildSkill(paths);
 	// Build each profile variant
 	for (const profileName of profiles) {
 		const profile = loadProfile(profilesDir, profileName);
 		if (profile) {
 			buildSkill(paths, profile);
 		}
 	}
 }
 // Run build when executed directly
@@ -280,7 +348,7 @@ const isMainModule =
 	process.argv[1]?.endsWith("build.js");
 if (isMainModule) {
-	const targetSkill = process.argv[2];
+	const { skill: targetSkill, profile: profileName, allProfiles } = parseArgs();
 	if (targetSkill) {
 		// Build specific skill
@@ -292,7 +360,29 @@ if (isMainModule) {
 			}
 			process.exit(1);
 		}
-		buildSkill(getSkillPaths(targetSkill));
+
 		const paths = getSkillPaths(targetSkill);
 		if (allProfiles) {
 			// Build all profile variants
 			buildSkillWithAllProfiles(paths);
 		} else if (profileName) {
 			// Build with specific profile
 			const profilesDir = join(paths.skillDir, "profiles");
 			const profile = loadProfile(profilesDir, profileName);
 			if (!profile) {
 				console.error(`Error: Profile "${profileName}" not found`);
 				const available = listProfiles(profilesDir);
 				if (available.length > 0) {
 					console.error(`Available profiles: ${available.join(", ")}`);
 				}
 				process.exit(1);
 			}
 			buildSkill(paths, profile);
 		} else {
 			// Build default
 			buildSkill(paths);
 		}
 	} else {
 		// Build all skills
 		const skills = discoverSkills();
@@ -303,7 +393,12 @@ if (isMainModule) {
 		console.log(`Found ${skills.length} skill(s): ${skills.join(", ")}\n`);
 		for (const skill of skills) {
-			buildSkill(getSkillPaths(skill));
+			const paths = getSkillPaths(skill);
 			if (allProfiles) {
 				buildSkillWithAllProfiles(paths);
 			} else {
 				buildSkill(paths);
 			}
 			console.log("");
 		}
 	}
--- a/packages/skills-build/src/parser.ts
+++ b/packages/skills-build/src/parser.ts
@@ -251,6 +251,7 @@ export function parseRuleFile(
 		const examples = extractExamples(body);
 		const tags = frontmatter.tags?.split(",").map((t) => t.trim()) || [];
 		const extensions = frontmatter.extensions?.split(",").map((e) => e.trim()) || [];
 		// Validation warnings
 		if (!explanation || explanation.length < 20) {
@@ -271,6 +272,8 @@ export function parseRuleFile(
 			examples,
 			references: extractReferences(body),
 			tags: tags.length > 0 ? tags : undefined,
 			minVersion: frontmatter.minVersion || undefined,
 			extensions: extensions.length > 0 ? extensions : undefined,
 		};
 		return { success: true, rule, errors, warnings };
--- a/packages/skills-build/src/profiles.ts
+++ b/packages/skills-build/src/profiles.ts
@@ -0,0 +1,102 @@
 import { existsSync, readdirSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import type { Profile, Rule } from "./types.js";
 /**
 * Load a profile from the profiles directory
 */
 export function loadProfile(profilesDir: string, profileName: string): Profile | null {
 	const profileFile = join(profilesDir, `${profileName}.json`);
 	if (!existsSync(profileFile)) {
 		return null;
 	}
 	try {
 		return JSON.parse(readFileSync(profileFile, "utf-8"));
 	} catch (error) {
 		console.error(`Error loading profile ${profileName}:`, error);
 		return null;
 	}
 }
 /**
 * List all available profiles in the profiles directory
 */
 export function listProfiles(profilesDir: string): string[] {
 	if (!existsSync(profilesDir)) {
 		return [];
 	}
 	return readdirSync(profilesDir)
 		.filter((f) => f.endsWith(".json"))
 		.map((f) => f.replace(".json", ""));
 }
 /**
 * Compare version strings (e.g., "9.5", "11", "14.2")
 * Returns: negative if a < b, 0 if equal, positive if a > b
 */
 function compareVersions(a: string, b: string): number {
 	const partsA = a.split(".").map(Number);
 	const partsB = b.split(".").map(Number);
 	const maxLen = Math.max(partsA.length, partsB.length);
 	for (let i = 0; i < maxLen; i++) {
 		const numA = partsA[i] || 0;
 		const numB = partsB[i] || 0;
 		if (numA !== numB) {
 			return numA - numB;
 		}
 	}
 	return 0;
 }
 /**
 * Check if a rule is compatible with a profile
 */
 export function isRuleCompatibleWithProfile(rule: Rule, profile: Profile): boolean {
 	// Check version requirement
 	if (rule.minVersion) {
 		if (compareVersions(rule.minVersion, profile.minVersion) > 0) {
 			// Rule requires a higher version than profile supports
 			return false;
 		}
 		if (profile.maxVersion && compareVersions(rule.minVersion, profile.maxVersion) > 0) {
 			// Rule requires a version higher than profile's max
 			return false;
 		}
 	}
 	// Check extension requirements
 	if (rule.extensions && rule.extensions.length > 0) {
 		const allExtensions = [
 			...(profile.extensions.available || []),
 			...(profile.extensions.installable || []),
 		];
 		for (const ext of rule.extensions) {
 			if (profile.extensions.unavailable?.includes(ext)) {
 				// Extension is explicitly unavailable in this profile
 				return false;
 			}
 			if (!allExtensions.includes(ext)) {
 				// Extension is not available or installable
 				return false;
 			}
 		}
 	}
 	// Check if rule is explicitly excluded
 	if (profile.excludeRules?.includes(rule.id)) {
 		return false;
 	}
 	return true;
 }
 /**
 * Filter rules based on profile constraints
 */
 export function filterRulesForProfile(rules: Rule[], profile: Profile): Rule[] {
 	return rules.filter((rule) => isRuleCompatibleWithProfile(rule, profile));
 }
--- a/packages/skills-build/src/types.ts
+++ b/packages/skills-build/src/types.ts
@@ -26,6 +26,8 @@ export interface Rule {
 	references?: string[];
 	tags?: string[];
 	supabaseNotes?: string;
 	minVersion?: string;      // Minimum PostgreSQL version required (e.g., "11", "14")
 	extensions?: string[];    // Required PostgreSQL extensions (e.g., ["pg_stat_statements"])
 }
 export interface Section {
@@ -57,3 +59,16 @@ export interface ValidationResult {
 	errors: string[];
 	warnings: string[];
 }
 export interface Profile {
 	name: string;
 	minVersion: string;
 	maxVersion?: string;
 	extensions: {
 		available: string[];
 		installable?: string[];
 		unavailable: string[];
 	};
 	excludeRules?: string[];
 	notes?: string;
 }
--- a/skills/postgres-best-practices/AGENTS.aurora.md
+++ b/skills/postgres-best-practices/AGENTS.aurora.md
--- a/skills/postgres-best-practices/AGENTS.md
+++ b/skills/postgres-best-practices/AGENTS.md
@@ -191,6 +191,8 @@ Reference: https://www.postgresql.org/docs/current/indexes-multicolumn.html
 **Impact: MEDIUM-HIGH (2-5x faster queries by eliminating heap fetches)**
 **Prerequisites:** PostgreSQL 11+
 Covering indexes include all columns needed by a query, enabling index-only scans that skip the table entirely.
 **Incorrect (index scan + heap fetch):**
@@ -680,6 +682,8 @@ Reference: https://www.postgresql.org/docs/current/ddl-constraints.html#DDL-CONS
 **Impact: MEDIUM-HIGH (5-20x faster queries and maintenance on large tables)**
 **Prerequisites:** PostgreSQL 10+
 Partitioning splits a large table into smaller pieces, improving query performance and maintenance operations.
 **Incorrect (single large table):**
@@ -997,6 +1001,8 @@ Reference: https://www.postgresql.org/docs/current/explicit-locking.html#ADVISOR
 **Impact: MEDIUM-HIGH (10x throughput for worker queues)**
 **Prerequisites:** PostgreSQL 9.5+
 When multiple workers process a queue, SKIP LOCKED allows workers to process different rows without waiting.
 **Incorrect (workers block each other):**
@@ -1194,6 +1200,8 @@ Reference: https://supabase.com/docs/guides/database/pagination
 **Impact: MEDIUM (Atomic operation, eliminates race conditions)**
 **Prerequisites:** PostgreSQL 9.5+
 Using separate SELECT-then-INSERT/UPDATE creates race conditions. Use INSERT ... ON CONFLICT for atomic upserts.
 **Incorrect (check-then-insert race condition):**
@@ -1245,6 +1253,8 @@ Using pg_stat_statements, EXPLAIN ANALYZE, metrics collection, and performance d
 **Impact: LOW-MEDIUM (Identify top resource-consuming queries)**
 **Prerequisites:** Extension: pg_stat_statements
 pg_stat_statements tracks execution statistics for all queries, helping identify slow and frequent queries.
 **Incorrect (no visibility into query patterns):**
@@ -1391,6 +1401,8 @@ Full-text search, JSONB optimization, PostGIS, extensions, and advanced Postgres
 **Impact: MEDIUM (10-100x faster JSONB queries with proper indexing)**
 **Prerequisites:** PostgreSQL 9.4+
 JSONB queries without indexes scan the entire table. Use GIN indexes for containment queries.
 **Incorrect (no index on JSONB):**
--- a/skills/postgres-best-practices/AGENTS.self-hosted.md
+++ b/skills/postgres-best-practices/AGENTS.self-hosted.md
--- a/skills/postgres-best-practices/AGENTS.supabase.md
+++ b/skills/postgres-best-practices/AGENTS.supabase.md
--- a/skills/postgres-best-practices/README.md
+++ b/skills/postgres-best-practices/README.md
@@ -3,6 +3,26 @@
 This repository contains Postgres performance optimization rules optimized for
 AI agents and LLMs.
 ## Requirements
 - **Minimum PostgreSQL Version:** 9.5 (some rules require newer versions)
 - **Recommended Version:** 15+
 - **Optional Extensions:** pg_stat_statements (for monitoring rules)
 ### Version Compatibility
 Some rules require specific PostgreSQL versions due to feature availability:
 | Feature | Min Version | Affected Rules |
 |---------|-------------|----------------|
 | ON CONFLICT (UPSERT) | 9.5 | data-upsert |
 | SKIP LOCKED | 9.5 | lock-skip-locked |
 | JSONB type | 9.4 | advanced-jsonb-indexing |
 | Declarative Partitioning | 10 | schema-partitioning |
 | Covering Indexes (INCLUDE) | 11 | query-covering-indexes |
 Rules include `minVersion` and `extensions` frontmatter fields. Agents should check these against the target environment before recommending specific rules.
 ## Quick Start
 ```bash
--- a/skills/postgres-best-practices/SKILL.md
+++ b/skills/postgres-best-practices/SKILL.md
@@ -55,3 +55,52 @@ Each rule file contains:
 ## Full Compiled Document
 For the complete guide with all rules expanded: `AGENTS.md`
 ## Runtime Detection
 Before applying rules, agents should detect the PostgreSQL environment to ensure compatibility:
 ### Version Detection
 ```sql
 SELECT version();
 -- Example output: PostgreSQL 15.4 on x86_64-pc-linux-gnu
 ```
 Extract the major version number (e.g., "15" from "PostgreSQL 15.4") to check against rule `minVersion` requirements.
 ### Extension Availability
 ```sql
 SELECT name, installed_version, default_version
 FROM pg_available_extensions
 WHERE name IN ('pg_stat_statements', 'pgcrypto', 'uuid-ossp', 'postgis')
 ORDER BY name;
 ```
 Check if required extensions are available before recommending rules that depend on them.
 ### Configuration Check
 ```sql
 SELECT name, setting
 FROM pg_settings
 WHERE name IN ('shared_preload_libraries', 'max_connections', 'work_mem');
 ```
 ## Rule Filtering
 Only recommend rules where:
 - `minVersion` <= detected PostgreSQL version (or minVersion is unset)
 - All required `extensions` are available or installable
 - The rule is appropriate for the user's deployment context
 ### Version Compatibility
 | Feature | Min Version | Affected Rules |
 |---------|-------------|----------------|
 | ON CONFLICT (UPSERT) | 9.5 | data-upsert |
 | SKIP LOCKED | 9.5 | lock-skip-locked |
 | JSONB type | 9.4 | advanced-jsonb-indexing |
 | Declarative Partitioning | 10 | schema-partitioning |
 | Covering Indexes (INCLUDE) | 11 | query-covering-indexes |
--- a/skills/postgres-best-practices/evals/README.md
+++ b/skills/postgres-best-practices/evals/README.md
@@ -0,0 +1,308 @@
 # PostgreSQL Best Practices - Evaluation Scenarios
 This directory contains evaluation scenarios to test that AI agents correctly apply the PostgreSQL best practices rules, including proper handling of version constraints and extension requirements.
 ## Overview
 The evals use [Vitest](https://vitest.dev/) as the test framework and the [Vercel AI SDK](https://ai-sdk.dev/) to interact with Claude for generating responses. Each scenario tests a specific aspect of rule application.
 ## Running Evals
 ```bash
 # Install dependencies
 npm install
 # Run all evals
 npm run eval
 # Run in watch mode
 npm run eval:watch
 # Run specific scenario
 npm run eval -- -t "Missing Index"
 ```
 ## Environment Setup
 Set your Anthropic API key:
 ```bash
 export ANTHROPIC_API_KEY=your_api_key
 ```
 ## Evaluation Scenarios
 ### Category 1: Core Query Patterns
 #### 1.1 Missing Index Detection
 | Field | Value |
 |-------|-------|
 | **ID** | `missing-index-detection` |
 | **File** | `scenarios/missing-index.eval.ts` |
 | **Difficulty** | Basic |
 | **Tests** | Agent identifies missing indexes on filtered columns |
 **Description:**
 Tests that the agent correctly identifies when a query would benefit from an index and recommends creating one.
 **Input:**
 - Schema: `orders` table with no indexes beyond PK
 - Query: `SELECT * FROM orders WHERE customer_id = 12345 AND status = 'pending'`
 **Expected Output:**
 - Should recommend rule 1.1 (query-missing-indexes)
 - Must mention "index" and "customer_id"
 **Expected Reasoning:**
 1. Identify that the query filters on customer_id and status
 2. Recognize that without an index, this causes a sequential scan
 3. Recommend creating an index on the filtered columns
 ---
 #### 1.2 N+1 Query Detection
 | Field | Value |
 |-------|-------|
 | **ID** | `n-plus-one-detection` |
 | **File** | `scenarios/n-plus-one.eval.ts` |
 | **Difficulty** | Intermediate |
 | **Tests** | Agent identifies N+1 query pattern in code |
 **Description:**
 Tests that the agent recognizes N+1 query patterns in application code and recommends using JOINs.
 **Input:**
 - Schema: `users` and `posts` tables with relationship
 - Code snippet showing loop that queries for each post's author
 **Expected Output:**
 - Should recommend rule 6.1 (data-n-plus-one)
 - Must mention "JOIN" and "N+1"
 **Expected Reasoning:**
 1. Identify the N+1 query pattern (1 + N queries)
 2. Recognize this as a common performance anti-pattern
 3. Recommend using a JOIN to fetch all data in a single query
 ---
 #### 1.3 Covering Index Suggestion
 | Field | Value |
 |-------|-------|
 | **ID** | `covering-index-suggestion` |
 | **File** | `scenarios/covering-index.eval.ts` |
 | **Difficulty** | Intermediate |
 | **Tests** | Agent suggests INCLUDE clause for covering index |
 **Description:**
 Tests that the agent recommends covering indexes when SELECT columns aren't in the index.
 **Input:**
 - Schema: `users` table with index on `email` only
 - Query: `SELECT email, name, department FROM users WHERE email = ?`
 - PostgreSQL version: 15.4
 **Expected Output:**
 - Should recommend rule 1.2 (query-covering-indexes)
 - Must mention "INCLUDE" and "covering"
 **Expected Reasoning:**
 1. Identify that query selects columns not in the index
 2. Recognize this causes heap fetches
 3. Recommend using INCLUDE clause for index-only scans
 ---
 ### Category 2: Version Constraints
 #### 2.1 PG10 - No Covering Index
 | Field | Value |
 |-------|-------|
 | **ID** | `version-constraint-pg10-no-covering` |
 | **File** | `scenarios/version-constraint.eval.ts` |
 | **Difficulty** | Intermediate |
 | **Tests** | Agent respects PG11+ requirement for INCLUDE |
 **Description:**
 Tests that the agent does NOT recommend INCLUDE clause when PostgreSQL version is 10 (INCLUDE requires PG11+).
 **Input:**
 - Same setup as covering index scenario
 - PostgreSQL version: 10.0
 **Expected Output:**
 - Should NOT recommend rule 1.2
 - Must NOT contain "INCLUDE"
 - Should suggest alternative optimizations
 **Expected Reasoning:**
 1. Recognize PostgreSQL 10 is specified
 2. Check that INCLUDE requires PG11+
 3. Avoid recommending incompatible features
 4. Suggest PG10-compatible alternatives
 ---
 #### 2.2 PG9.3 - No UPSERT
 | Field | Value |
 |-------|-------|
 | **ID** | `version-constraint-pg93-no-upsert` |
 | **File** | `scenarios/version-constraint.eval.ts` |
 | **Difficulty** | Intermediate |
 | **Tests** | Agent respects PG9.5+ requirement for ON CONFLICT |
 **Description:**
 Tests that the agent does NOT recommend ON CONFLICT when PostgreSQL version is 9.3 (requires PG9.5+).
 **Input:**
 - Schema: `settings` table with composite primary key
 - Query: Need insert-or-update functionality
 - PostgreSQL version: 9.3
 **Expected Output:**
 - Should NOT recommend rule 6.3 (data-upsert)
 - Must NOT contain "ON CONFLICT"
 - Should suggest CTE-based or try/catch pattern
 **Expected Reasoning:**
 1. Recognize PostgreSQL 9.3 is specified
 2. Check that ON CONFLICT requires PG9.5+
 3. Avoid recommending UPSERT syntax
 4. Suggest compatible alternatives
 ---
 ### Category 3: Extension Requirements
 #### 3.1 Extension Available
 | Field | Value |
 |-------|-------|
 | **ID** | `extension-available-pg-stat-statements` |
 | **File** | `scenarios/extension-available.eval.ts` |
 | **Difficulty** | Basic |
 | **Tests** | Agent recommends extension when available |
 **Description:**
 Tests that the agent recommends pg_stat_statements when it's listed as available.
 **Input:**
 - General schema
 - Query: How to identify slow queries
 - Available extensions: pg_stat_statements, pgcrypto, uuid-ossp
 **Expected Output:**
 - Should recommend rule 7.1 (monitor-pg-stat-statements)
 - Must mention "pg_stat_statements"
 **Expected Reasoning:**
 1. Recognize query monitoring problem
 2. Check that pg_stat_statements is available
 3. Recommend enabling and using the extension
 ---
 #### 3.2 Extension Unavailable
 | Field | Value |
 |-------|-------|
 | **ID** | `extension-unavailable-no-pg-stat-statements` |
 | **File** | `scenarios/extension-unavailable.eval.ts` |
 | **Difficulty** | Intermediate |
 | **Tests** | Agent provides alternatives when extension unavailable |
 **Description:**
 Tests that the agent suggests alternatives when pg_stat_statements is not available.
 **Input:**
 - General schema
 - Query: How to identify slow queries
 - Available extensions: [] (none)
 - Context: Managed database, can't install extensions
 **Expected Output:**
 - Should NOT recommend pg_stat_statements
 - Must mention "EXPLAIN" and "ANALYZE"
 - Should suggest built-in alternatives
 **Expected Reasoning:**
 1. Recognize no extensions are available
 2. Avoid recommending pg_stat_statements
 3. Suggest EXPLAIN ANALYZE, log_min_duration_statement, or pg_stat_activity
 ---
 ## Adding New Scenarios
 1. Create a new file in `scenarios/` following the naming convention `{name}.eval.ts`
 2. Define the scenario using the `EvalScenario` interface:
 ```typescript
 import { describe, it, expect } from "vitest";
 import { runEval } from "../runner.js";
 import type { EvalScenario } from "../types.js";
 const scenario: EvalScenario = {
  id: "unique-scenario-id",
  name: "Human Readable Name",
  description: "What this scenario tests",
  category: "query-performance" | "version-constraints" | "extension-requirements",
  difficulty: "basic" | "intermediate" | "advanced",
  input: {
    schema: "SQL schema definition",
    userQuery: "User's question or problem",
    postgresVersion: "15.4", // Optional
    availableExtensions: ["list"], // Optional
  },
  expectedOutput: {
    shouldRecommendRules: ["1.1"],
    shouldNotRecommendRules: ["2.3"], // Optional
    mustContain: ["keyword"],
    mustNotContain: ["avoid"], // Optional
  },
  expectedReasoning: [
    "Step 1 of expected reasoning",
    "Step 2 of expected reasoning",
  ],
 };
 describe("Scenario Name", () => {
  it("should do something specific", async () => {
    const result = await runEval(scenario);
    // Add assertions
  });
 });
 export { scenario };
 ```
 3. Run the new scenario: `npm run eval -- -t "Scenario Name"`
 ## Evaluation Criteria
 Each scenario is evaluated against:
 1. **Rule References**: Does the response reference the expected rules?
 2. **Must Contain**: Does the response include required keywords?
 3. **Must Not Contain**: Does the response avoid prohibited content?
 4. **Version Constraints**: Are version requirements respected?
 5. **Extension Requirements**: Are extension dependencies checked?
 ## Troubleshooting
 **Evals timing out:**
 - Increase timeout in `vitest.config.ts` (default: 60s)
 - Check API key is valid
 **Flaky results:**
 - Set `temperature: 0` in runner config
 - Make assertions more flexible (check for concept presence, not exact wording)
 **Missing AGENTS.md:**
 - Run `npm run build` from repository root first
--- a/skills/postgres-best-practices/evals/package.json
+++ b/skills/postgres-best-practices/evals/package.json
@@ -0,0 +1,18 @@
 {
  "name": "postgres-best-practices-evals",
  "version": "1.0.0",
  "description": "Evaluation scenarios for Postgres Best Practices skill",
  "type": "module",
  "scripts": {
    "eval": "vitest run",
    "eval:watch": "vitest",
    "eval:ui": "vitest --ui"
  },
  "devDependencies": {
    "@ai-sdk/anthropic": "^0.0.30",
    "@types/node": "^20.0.0",
    "ai": "^3.0.0",
    "typescript": "^5.0.0",
    "vitest": "^1.0.0"
  }
 }
--- a/skills/postgres-best-practices/evals/runner.ts
+++ b/skills/postgres-best-practices/evals/runner.ts
@@ -0,0 +1,192 @@
 import { generateText } from "ai";
 import { anthropic } from "@ai-sdk/anthropic";
 import { readFileSync } from "node:fs";
 import { join } from "node:path";
 import type { CriterionResult, EvalConfig, EvalResult, EvalScenario } from "./types.js";
 const DEFAULT_CONFIG: EvalConfig = {
  agentsPath: join(import.meta.dirname, "..", "AGENTS.md"),
  model: "claude-sonnet-4-20250514",
  maxTokens: 2048,
  temperature: 0,
 };
 /**
 * Build the user prompt from a scenario
 */
 function buildUserPrompt(scenario: EvalScenario): string {
  const parts: string[] = [];
  // Add version context if specified
  if (scenario.input.postgresVersion) {
    parts.push(`PostgreSQL Version: ${scenario.input.postgresVersion}`);
  }
  // Add extensions context if specified
  if (scenario.input.availableExtensions) {
    if (scenario.input.availableExtensions.length === 0) {
      parts.push("Available Extensions: None installed");
    } else {
      parts.push(`Available Extensions: ${scenario.input.availableExtensions.join(", ")}`);
    }
  }
  // Add additional context if provided
  if (scenario.input.context) {
    parts.push(`Context: ${scenario.input.context}`);
  }
  // Add schema
  parts.push(`\nSchema:\n\`\`\`sql\n${scenario.input.schema}\n\`\`\``);
  // Add user query
  parts.push(`\nQuestion: ${scenario.input.userQuery}`);
  return parts.join("\n");
 }
 /**
 * Extract rule IDs mentioned in a response
 */
 function extractRuleIds(response: string): string[] {
  // Match patterns like "1.1", "2.3", etc.
  const rulePattern = /\b(\d+\.\d+)\b/g;
  const matches = response.match(rulePattern) || [];
  return [...new Set(matches)];
 }
 /**
 * Evaluate the response against expected criteria
 */
 function evaluateCriteria(scenario: EvalScenario, response: string): CriterionResult[] {
  const results: CriterionResult[] = [];
  const responseLower = response.toLowerCase();
  // Check mustContain criteria
  for (const term of scenario.expectedOutput.mustContain) {
    const found = responseLower.includes(term.toLowerCase());
    results.push({
      criterion: `Response should contain "${term}"`,
      passed: found,
      evidence: found ? "Found in response" : "Not found in response",
    });
  }
  // Check mustNotContain criteria
  if (scenario.expectedOutput.mustNotContain) {
    for (const term of scenario.expectedOutput.mustNotContain) {
      const found = responseLower.includes(term.toLowerCase());
      results.push({
        criterion: `Response should NOT contain "${term}"`,
        passed: !found,
        evidence: found ? "Found in response (should not be present)" : "Not found (correct)",
      });
    }
  }
  // Check shouldRecommendRules
  const referencedRules = extractRuleIds(response);
  for (const ruleId of scenario.expectedOutput.shouldRecommendRules) {
    const found = referencedRules.includes(ruleId);
    results.push({
      criterion: `Should recommend rule ${ruleId}`,
      passed: found,
      evidence: found ? "Rule referenced" : "Rule not referenced",
    });
  }
  // Check shouldNotRecommendRules
  if (scenario.expectedOutput.shouldNotRecommendRules) {
    for (const ruleId of scenario.expectedOutput.shouldNotRecommendRules) {
      const found = referencedRules.includes(ruleId);
      results.push({
        criterion: `Should NOT recommend rule ${ruleId}`,
        passed: !found,
        evidence: found ? "Rule referenced (should not be)" : "Rule not referenced (correct)",
      });
    }
  }
  return results;
 }
 /**
 * Run a single evaluation scenario
 */
 export async function runEval(
  scenario: EvalScenario,
  config: Partial<EvalConfig> = {}
 ): Promise<EvalResult> {
  const finalConfig = { ...DEFAULT_CONFIG, ...config };
  try {
    // Load AGENTS.md
    const agentsMd = readFileSync(finalConfig.agentsPath, "utf-8");
    const systemPrompt = `You are a PostgreSQL expert assistant. Use the following knowledge base to provide accurate recommendations:
 ${agentsMd}
 IMPORTANT: When the user specifies a PostgreSQL version or available extensions, you MUST respect those constraints:
 - Do not recommend features that require a higher PostgreSQL version than specified
 - Do not recommend extensions that are not available
 - If a recommended optimization requires a specific version or extension, mention the prerequisite
 When making recommendations, reference specific rule IDs (e.g., "1.1", "2.3") from the knowledge base.`;
    const userPrompt = buildUserPrompt(scenario);
    const start = Date.now();
    const { text } = await generateText({
      model: anthropic(finalConfig.model!),
      system: systemPrompt,
      prompt: userPrompt,
      maxTokens: finalConfig.maxTokens,
      temperature: finalConfig.temperature,
    });
    const latencyMs = Date.now() - start;
    // Evaluate the response
    const criteriaResults = evaluateCriteria(scenario, text);
    const rulesReferenced = extractRuleIds(text);
    const passed = criteriaResults.every((r) => r.passed);
    return {
      scenarioId: scenario.id,
      passed,
      rulesReferenced,
      criteriaResults,
      response: text,
      latencyMs,
    };
  } catch (error) {
    return {
      scenarioId: scenario.id,
      passed: false,
      rulesReferenced: [],
      criteriaResults: [],
      response: "",
      latencyMs: 0,
      error: error instanceof Error ? error.message : String(error),
    };
  }
 }
 /**
 * Run multiple evaluation scenarios
 */
 export async function runEvals(
  scenarios: EvalScenario[],
  config: Partial<EvalConfig> = {}
 ): Promise<EvalResult[]> {
  const results: EvalResult[] = [];
  for (const scenario of scenarios) {
    console.log(`Running eval: ${scenario.name}...`);
    const result = await runEval(scenario, config);
    results.push(result);
    console.log(`  ${result.passed ? "PASS" : "FAIL"} (${result.latencyMs}ms)`);
  }
  return results;
 }
--- a/skills/postgres-best-practices/evals/scenarios/covering-index.eval.ts
+++ b/skills/postgres-best-practices/evals/scenarios/covering-index.eval.ts
@@ -0,0 +1,62 @@
 import { describe, it, expect } from "vitest";
 import { runEval } from "../runner.js";
 import type { EvalScenario } from "../types.js";
 const scenario: EvalScenario = {
  id: "covering-index-suggestion",
  name: "Covering Index Suggestion",
  description:
    "Agent should suggest using INCLUDE clause for columns in SELECT that aren't in WHERE clause",
  category: "query-performance",
  difficulty: "intermediate",
  input: {
    schema: `
 CREATE TABLE users (
  id SERIAL PRIMARY KEY,
  email VARCHAR(255) NOT NULL,
  name VARCHAR(100),
  department VARCHAR(50),
  created_at TIMESTAMPTZ DEFAULT NOW()
 );
 CREATE INDEX users_email_idx ON users (email);
 -- Table has 2 million rows
 `,
    userQuery: `This query still does heap fetches even though we have an index on email:
 SELECT email, name, department FROM users WHERE email = 'user@example.com'
 EXPLAIN shows "Index Scan" but not "Index Only Scan". How can I avoid the table lookup?`,
    postgresVersion: "15.4",
  },
  expectedOutput: {
    shouldRecommendRules: ["1.2"], // query-covering-indexes
    mustContain: ["include", "covering"],
  },
  expectedReasoning: [
    "Identify that the query selects columns (name, department) not in the index",
    "Recognize this causes additional heap fetches after the index scan",
    "Recommend using INCLUDE clause to create a covering index",
    "Explain that this enables index-only scans",
  ],
 };
 describe("Covering Index Suggestion", () => {
  it("should recommend INCLUDE clause for covering index", async () => {
    const result = await runEval(scenario);
    console.log("Response:", result.response);
    console.log("Criteria results:", result.criteriaResults);
    // Response should mention INCLUDE keyword
    expect(result.response.toLowerCase()).toContain("include");
    // Response should mention covering index concept
    const responseLower = result.response.toLowerCase();
    expect(
      responseLower.includes("covering") || responseLower.includes("index-only")
    ).toBe(true);
  });
 });
 export { scenario };
--- a/skills/postgres-best-practices/evals/scenarios/extension-available.eval.ts
+++ b/skills/postgres-best-practices/evals/scenarios/extension-available.eval.ts
@@ -0,0 +1,56 @@
 import { describe, it, expect } from "vitest";
 import { runEval } from "../runner.js";
 import type { EvalScenario } from "../types.js";
 const scenario: EvalScenario = {
  id: "extension-available-pg-stat-statements",
  name: "Extension Available - pg_stat_statements",
  description:
    "Agent should recommend pg_stat_statements for query monitoring when the extension is available",
  category: "extension-requirements",
  difficulty: "basic",
  input: {
    schema: `
 -- Production database with various tables
 CREATE TABLE users (id SERIAL PRIMARY KEY, email VARCHAR(255));
 CREATE TABLE orders (id SERIAL PRIMARY KEY, user_id INT, total DECIMAL);
 CREATE TABLE products (id SERIAL PRIMARY KEY, name VARCHAR(200), price DECIMAL);
 `,
    userQuery:
      "Our database is slow but we don't know which queries are causing the problem. How can we identify the slowest queries?",
    postgresVersion: "15.4",
    availableExtensions: ["pg_stat_statements", "pgcrypto", "uuid-ossp"],
  },
  expectedOutput: {
    shouldRecommendRules: ["7.1"], // monitor-pg-stat-statements
    mustContain: ["pg_stat_statements"],
  },
  expectedReasoning: [
    "Recognize this is a query monitoring/performance diagnosis problem",
    "Check that pg_stat_statements is available in the extensions list",
    "Recommend enabling pg_stat_statements for query analysis",
    "Explain how to use it to find slow queries",
  ],
 };
 describe("Extension Available - pg_stat_statements", () => {
  it("should recommend pg_stat_statements when available", async () => {
    const result = await runEval(scenario);
    console.log("Response:", result.response);
    console.log("Criteria results:", result.criteriaResults);
    // Response should mention pg_stat_statements
    expect(result.response.toLowerCase()).toContain("pg_stat_statements");
    // Should suggest enabling/using the extension
    const responseLower = result.response.toLowerCase();
    expect(
      responseLower.includes("create extension") ||
        responseLower.includes("enable") ||
        responseLower.includes("query")
    ).toBe(true);
  });
 });
 export { scenario };
--- a/skills/postgres-best-practices/evals/scenarios/extension-unavailable.eval.ts
+++ b/skills/postgres-best-practices/evals/scenarios/extension-unavailable.eval.ts
@@ -0,0 +1,56 @@
 import { describe, it, expect } from "vitest";
 import { runEval } from "../runner.js";
 import type { EvalScenario } from "../types.js";
 const scenario: EvalScenario = {
  id: "extension-unavailable-no-pg-stat-statements",
  name: "Extension Unavailable - No pg_stat_statements",
  description:
    "Agent should provide alternatives when pg_stat_statements is not available for query monitoring",
  category: "extension-requirements",
  difficulty: "intermediate",
  input: {
    schema: `
 -- Production database with various tables
 CREATE TABLE users (id SERIAL PRIMARY KEY, email VARCHAR(255));
 CREATE TABLE orders (id SERIAL PRIMARY KEY, user_id INT, total DECIMAL);
 CREATE TABLE products (id SERIAL PRIMARY KEY, name VARCHAR(200), price DECIMAL);
 `,
    userQuery:
      "Our database is slow but we don't know which queries are causing the problem. How can we identify the slowest queries?",
    postgresVersion: "15.4",
    availableExtensions: [], // No extensions available
    context:
      "This is a managed database environment where we cannot install additional extensions.",
  },
  expectedOutput: {
    shouldRecommendRules: [], // Should not recommend pg_stat_statements rule
    shouldNotRecommendRules: ["7.1"], // monitor-pg-stat-statements
    mustContain: ["explain", "analyze"],
    mustNotContain: ["pg_stat_statements"],
  },
  expectedReasoning: [
    "Recognize that no extensions are available",
    "Check that pg_stat_statements cannot be used",
    "Avoid recommending pg_stat_statements",
    "Suggest alternative approaches like EXPLAIN ANALYZE, log_min_duration_statement, or pg_stat_activity",
  ],
 };
 describe("Extension Unavailable - No pg_stat_statements", () => {
  it("should suggest alternatives when pg_stat_statements is unavailable", async () => {
    const result = await runEval(scenario);
    console.log("Response:", result.response);
    console.log("Criteria results:", result.criteriaResults);
    // Response should NOT primarily recommend pg_stat_statements
    // (it might mention it as unavailable, but shouldn't suggest installing it)
    const responseLower = result.response.toLowerCase();
    // Should suggest EXPLAIN ANALYZE as an alternative
    expect(responseLower.includes("explain") && responseLower.includes("analyze")).toBe(true);
  });
 });
 export { scenario };
--- a/skills/postgres-best-practices/evals/scenarios/missing-index.eval.ts
+++ b/skills/postgres-best-practices/evals/scenarios/missing-index.eval.ts
@@ -0,0 +1,56 @@
 import { describe, it, expect } from "vitest";
 import { runEval } from "../runner.js";
 import type { EvalScenario } from "../types.js";
 const scenario: EvalScenario = {
  id: "missing-index-detection",
  name: "Missing Index Detection",
  description:
    "Agent should identify missing index on WHERE clause columns and recommend creating an appropriate index",
  category: "query-performance",
  difficulty: "basic",
  input: {
    schema: `
 CREATE TABLE orders (
  id SERIAL PRIMARY KEY,
  customer_id INT NOT NULL,
  status VARCHAR(50),
  total DECIMAL(10,2),
  created_at TIMESTAMPTZ DEFAULT NOW()
 );
 -- No indexes besides primary key
 -- Table has 5 million rows
 `,
    userQuery:
      "This query is slow and takes 3 seconds: SELECT * FROM orders WHERE customer_id = 12345 AND status = 'pending'",
  },
  expectedOutput: {
    shouldRecommendRules: ["1.1"], // query-missing-indexes
    mustContain: ["index", "customer_id"],
  },
  expectedReasoning: [
    "Identify that the query filters on customer_id and status",
    "Recognize that without an index, this causes a sequential scan",
    "Recommend creating an index on the filtered columns",
  ],
 };
 describe("Missing Index Detection", () => {
  it("should recommend creating an index on filtered columns", async () => {
    const result = await runEval(scenario);
    console.log("Response:", result.response);
    console.log("Criteria results:", result.criteriaResults);
    // Check that key criteria passed
    expect(result.criteriaResults.some((c) => c.criterion.includes("index") && c.passed)).toBe(
      true
    );
    // Response should mention creating an index
    expect(result.response.toLowerCase()).toContain("index");
    expect(result.response.toLowerCase()).toContain("customer_id");
  });
 });
 export { scenario };
--- a/skills/postgres-best-practices/evals/scenarios/n-plus-one.eval.ts
+++ b/skills/postgres-best-practices/evals/scenarios/n-plus-one.eval.ts
@@ -0,0 +1,71 @@
 import { describe, it, expect } from "vitest";
 import { runEval } from "../runner.js";
 import type { EvalScenario } from "../types.js";
 const scenario: EvalScenario = {
  id: "n-plus-one-detection",
  name: "N+1 Query Detection",
  description:
    "Agent should identify N+1 query pattern in application code and recommend using JOINs or batch queries",
  category: "query-performance",
  difficulty: "intermediate",
  input: {
    schema: `
 CREATE TABLE users (
  id SERIAL PRIMARY KEY,
  name VARCHAR(100),
  email VARCHAR(255)
 );
 CREATE TABLE posts (
  id SERIAL PRIMARY KEY,
  user_id INT REFERENCES users(id),
  title VARCHAR(200),
  content TEXT,
  created_at TIMESTAMPTZ DEFAULT NOW()
 );
 `,
    userQuery: `My API endpoint is slow. Here's the code:
 \`\`\`typescript
 // Get all posts
 const posts = await db.query('SELECT * FROM posts LIMIT 100');
 // For each post, get the author
 for (const post of posts) {
  const author = await db.query('SELECT * FROM users WHERE id = $1', [post.user_id]);
  post.author = author;
 }
 \`\`\`
 This makes 101 database queries. How can I optimize it?`,
  },
  expectedOutput: {
    shouldRecommendRules: ["6.1"], // data-n-plus-one
    mustContain: ["join", "n+1"],
  },
  expectedReasoning: [
    "Identify the N+1 query pattern (1 query for posts + N queries for users)",
    "Recognize this as a common performance anti-pattern",
    "Recommend using a JOIN to fetch all data in a single query",
    "Optionally suggest using IN clause for batch fetching",
  ],
 };
 describe("N+1 Query Detection", () => {
  it("should identify N+1 pattern and recommend JOIN", async () => {
    const result = await runEval(scenario);
    console.log("Response:", result.response);
    console.log("Criteria results:", result.criteriaResults);
    // Response should mention JOIN
    expect(result.response.toLowerCase()).toContain("join");
    // Response should explain the N+1 problem
    const responseLower = result.response.toLowerCase();
    expect(responseLower.includes("n+1") || responseLower.includes("n + 1")).toBe(true);
  });
 });
 export { scenario };
--- a/skills/postgres-best-practices/evals/scenarios/version-constraint.eval.ts
+++ b/skills/postgres-best-practices/evals/scenarios/version-constraint.eval.ts
@@ -0,0 +1,108 @@
 import { describe, it, expect } from "vitest";
 import { runEval } from "../runner.js";
 import type { EvalScenario } from "../types.js";
 /**
 * Scenario 1: PG10 - Should NOT recommend covering indexes (requires PG11+)
 */
 const scenarioPg10NoCoveringIndex: EvalScenario = {
  id: "version-constraint-pg10-no-covering",
  name: "Version Constraint - PG10 No Covering Index",
  description:
    "Agent should NOT recommend INCLUDE clause on PostgreSQL 10 since it requires PG11+",
  category: "version-constraints",
  difficulty: "intermediate",
  input: {
    schema: `
 CREATE TABLE users (
  id SERIAL PRIMARY KEY,
  email VARCHAR(255) NOT NULL,
  name VARCHAR(100),
  created_at TIMESTAMPTZ DEFAULT NOW()
 );
 CREATE INDEX users_email_idx ON users (email);
 `,
    userQuery:
      "How can I optimize this query to avoid heap fetches? SELECT email, name FROM users WHERE email = 'test@example.com'",
    postgresVersion: "10.0",
  },
  expectedOutput: {
    shouldRecommendRules: [],
    shouldNotRecommendRules: ["1.2"], // query-covering-indexes requires PG11
    mustContain: ["index"],
    mustNotContain: ["include"],
  },
  expectedReasoning: [
    "Recognize that PostgreSQL 10 is specified",
    "Check that covering indexes (INCLUDE clause) require PG11+",
    "Avoid recommending INCLUDE clause",
    "Suggest alternative optimization strategies appropriate for PG10",
  ],
 };
 /**
 * Scenario 2: PG9.3 - Should NOT recommend UPSERT (requires PG9.5+)
 */
 const scenarioPg93NoUpsert: EvalScenario = {
  id: "version-constraint-pg93-no-upsert",
  name: "Version Constraint - PG9.3 No UPSERT",
  description:
    "Agent should NOT recommend ON CONFLICT on PostgreSQL 9.3 since it requires PG9.5+",
  category: "version-constraints",
  difficulty: "intermediate",
  input: {
    schema: `
 CREATE TABLE settings (
  user_id INT NOT NULL,
  key VARCHAR(50) NOT NULL,
  value TEXT,
  PRIMARY KEY (user_id, key)
 );
 `,
    userQuery:
      "I need to insert a setting if it doesn't exist, or update it if it does. How should I do this?",
    postgresVersion: "9.3",
  },
  expectedOutput: {
    shouldRecommendRules: [],
    shouldNotRecommendRules: ["6.3"], // data-upsert requires PG9.5
    mustContain: ["insert", "update"],
    mustNotContain: ["on conflict"],
  },
  expectedReasoning: [
    "Recognize that PostgreSQL 9.3 is specified",
    "Check that ON CONFLICT (UPSERT) requires PG9.5+",
    "Avoid recommending ON CONFLICT syntax",
    "Suggest alternative pattern (e.g., CTE with INSERT/UPDATE, or try/catch approach)",
  ],
 };
 describe("Version Constraint Tests", () => {
  describe("PG10 - No Covering Index", () => {
    it("should NOT recommend INCLUDE clause for PG10", async () => {
      const result = await runEval(scenarioPg10NoCoveringIndex);
      console.log("Response:", result.response);
      console.log("Criteria results:", result.criteriaResults);
      // Response should NOT contain INCLUDE recommendation
      expect(result.response.toLowerCase()).not.toContain("include (");
      expect(result.response.toLowerCase()).not.toContain("include(");
    });
  });
  describe("PG9.3 - No UPSERT", () => {
    it("should NOT recommend ON CONFLICT for PG9.3", async () => {
      const result = await runEval(scenarioPg93NoUpsert);
      console.log("Response:", result.response);
      console.log("Criteria results:", result.criteriaResults);
      // Response should NOT recommend ON CONFLICT
      expect(result.response.toLowerCase()).not.toContain("on conflict");
    });
  });
 });
 export { scenarioPg10NoCoveringIndex, scenarioPg93NoUpsert };
--- a/skills/postgres-best-practices/evals/tsconfig.json
+++ b/skills/postgres-best-practices/evals/tsconfig.json
@@ -0,0 +1,13 @@
 {
  "compilerOptions": {
    "target": "ES2022",
    "module": "ESNext",
    "moduleResolution": "bundler",
    "esModuleInterop": true,
    "strict": true,
    "skipLibCheck": true,
    "outDir": "dist",
    "declaration": true
  },
  "include": ["*.ts", "scenarios/**/*.ts"]
 }
--- a/skills/postgres-best-practices/evals/types.ts
+++ b/skills/postgres-best-practices/evals/types.ts
@@ -0,0 +1,112 @@
 /**
 * Evaluation scenario definition
 */
 export interface EvalScenario {
  /** Unique identifier for the scenario */
  id: string;
  /** Human-readable name */
  name: string;
  /** Description of what this scenario tests */
  description: string;
  /** Category of the scenario */
  category: "query-performance" | "version-constraints" | "extension-requirements";
  /** Difficulty level */
  difficulty: "basic" | "intermediate" | "advanced";
  /** Input for the scenario */
  input: {
    /** SQL schema context */
    schema: string;
    /** User's question or request */
    userQuery: string;
    /** Optional PostgreSQL version (e.g., "10.0", "15.4") */
    postgresVersion?: string;
    /** Optional list of available extensions */
    availableExtensions?: string[];
    /** Additional context */
    context?: string;
  };
  /** Expected output criteria */
  expectedOutput: {
    /** Rule IDs that should be recommended */
    shouldRecommendRules: string[];
    /** Rule IDs that should NOT be recommended (version/extension constraints) */
    shouldNotRecommendRules?: string[];
    /** Strings that must appear in the response */
    mustContain: string[];
    /** Strings that must NOT appear in the response */
    mustNotContain?: string[];
  };
  /** Expected reasoning steps the agent should follow */
  expectedReasoning: string[];
 }
 /**
 * Result of evaluating a single criterion
 */
 export interface CriterionResult {
  /** Description of the criterion */
  criterion: string;
  /** Whether the criterion passed */
  passed: boolean;
  /** Evidence or explanation */
  evidence?: string;
 }
 /**
 * Result of running an evaluation scenario
 */
 export interface EvalResult {
  /** Scenario ID */
  scenarioId: string;
  /** Whether all criteria passed */
  passed: boolean;
  /** Rule IDs that were referenced in the response */
  rulesReferenced: string[];
  /** Results for each evaluation criterion */
  criteriaResults: CriterionResult[];
  /** The agent's full response */
  response: string;
  /** Time taken in milliseconds */
  latencyMs: number;
  /** Error message if evaluation failed */
  error?: string;
 }
 /**
 * Configuration for the eval runner
 */
 export interface EvalConfig {
  /** Path to AGENTS.md file */
  agentsPath: string;
  /** Model to use for evaluation */
  model?: string;
  /** Maximum tokens for response */
  maxTokens?: number;
  /** Temperature for generation */
  temperature?: number;
 }
--- a/skills/postgres-best-practices/evals/utils.ts
+++ b/skills/postgres-best-practices/evals/utils.ts
@@ -0,0 +1,72 @@
 import type { EvalResult, EvalScenario } from "./types.js";
 /**
 * Format eval results as a summary table
 */
 export function formatResultsSummary(results: EvalResult[]): string {
  const lines: string[] = [];
  lines.push("## Eval Results Summary\n");
  const passed = results.filter((r) => r.passed).length;
  const total = results.length;
  const passRate = ((passed / total) * 100).toFixed(1);
  lines.push(`**Pass Rate:** ${passed}/${total} (${passRate}%)\n`);
  lines.push("| Scenario | Status | Latency | Rules Referenced |");
  lines.push("|----------|--------|---------|------------------|");
  for (const result of results) {
    const status = result.passed ? "PASS" : "FAIL";
    const latency = `${result.latencyMs}ms`;
    const rules = result.rulesReferenced.join(", ") || "none";
    lines.push(`| ${result.scenarioId} | ${status} | ${latency} | ${rules} |`);
  }
  return lines.join("\n");
 }
 /**
 * Format detailed results for a single scenario
 */
 export function formatDetailedResult(result: EvalResult): string {
  const lines: string[] = [];
  lines.push(`## ${result.scenarioId}\n`);
  lines.push(`**Status:** ${result.passed ? "PASS" : "FAIL"}`);
  lines.push(`**Latency:** ${result.latencyMs}ms`);
  lines.push(`**Rules Referenced:** ${result.rulesReferenced.join(", ") || "none"}\n`);
  if (result.error) {
    lines.push(`**Error:** ${result.error}\n`);
  }
  lines.push("### Criteria Results\n");
  for (const criterion of result.criteriaResults) {
    const icon = criterion.passed ? "+" : "-";
    lines.push(`${icon} ${criterion.criterion}`);
    if (criterion.evidence) {
      lines.push(`  Evidence: ${criterion.evidence}`);
    }
  }
  lines.push("\n### Response\n");
  lines.push("```");
  lines.push(result.response);
  lines.push("```");
  return lines.join("\n");
 }
 /**
 * Create a scenario builder for cleaner test definitions
 */
 export function createScenario(
  partial: Omit<EvalScenario, "id"> & { id?: string }
 ): EvalScenario {
  return {
    id: partial.id || partial.name.toLowerCase().replace(/\s+/g, "-"),
    ...partial,
  } as EvalScenario;
 }
--- a/skills/postgres-best-practices/evals/vitest.config.ts
+++ b/skills/postgres-best-practices/evals/vitest.config.ts
@@ -0,0 +1,9 @@
 import { defineConfig } from "vitest/config";
 export default defineConfig({
  test: {
    include: ["scenarios/**/*.eval.ts"],
    testTimeout: 60000, // 60 seconds for LLM calls
    reporters: ["verbose"],
  },
 });
--- a/skills/postgres-best-practices/profiles/aurora.json
+++ b/skills/postgres-best-practices/profiles/aurora.json
@@ -0,0 +1,23 @@
 {
  "name": "aurora",
  "minVersion": "13",
  "maxVersion": "16",
  "extensions": {
    "available": [
      "pg_stat_statements",
      "pgcrypto",
      "uuid-ossp"
    ],
    "installable": [
      "postgis",
      "pg_hint_plan",
      "pg_similarity"
    ],
    "unavailable": [
      "pg_cron",
      "pg_partman",
      "timescaledb"
    ]
  },
  "notes": "AWS Aurora PostgreSQL. Some extensions are not available due to managed service restrictions. Aurora has its own connection pooling (RDS Proxy) and automatic failover."
 }
--- a/skills/postgres-best-practices/profiles/self-hosted.json
+++ b/skills/postgres-best-practices/profiles/self-hosted.json
@@ -0,0 +1,18 @@
 {
  "name": "self-hosted",
  "minVersion": "12",
  "extensions": {
    "available": [],
    "installable": [
      "pg_stat_statements",
      "pgcrypto",
      "uuid-ossp",
      "postgis",
      "pg_trgm",
      "btree_gin",
      "btree_gist"
    ],
    "unavailable": []
  },
  "notes": "Generic self-hosted PostgreSQL. Extension availability depends on server configuration. Check pg_available_extensions for what can be installed."
 }
--- a/skills/postgres-best-practices/profiles/supabase.json
+++ b/skills/postgres-best-practices/profiles/supabase.json
@@ -0,0 +1,27 @@
 {
  "name": "supabase",
  "minVersion": "15",
  "extensions": {
    "available": [
      "pg_stat_statements",
      "pgcrypto",
      "uuid-ossp",
      "pgjwt",
      "pg_graphql",
      "pg_net",
      "pgsodium",
      "supabase_vault",
      "pg_jsonschema"
    ],
    "installable": [
      "postgis",
      "pg_cron",
      "pgtap",
      "plv8",
      "http",
      "pg_hashids"
    ],
    "unavailable": []
  },
  "notes": "Supabase manages connection pooling via Supavisor. Direct connection limits differ from pooled connections. All standard Postgres extensions are available."
 }
--- a/skills/postgres-best-practices/rules/advanced-jsonb-indexing.md
+++ b/skills/postgres-best-practices/rules/advanced-jsonb-indexing.md
@@ -3,6 +3,7 @@ title: Index JSONB Columns for Efficient Querying
 impact: MEDIUM
 impactDescription: 10-100x faster JSONB queries with proper indexing
 tags: jsonb, gin, indexes, json
 minVersion: "9.4"
 ---
 ## Index JSONB Columns for Efficient Querying
--- a/skills/postgres-best-practices/rules/data-upsert.md
+++ b/skills/postgres-best-practices/rules/data-upsert.md
@@ -3,6 +3,7 @@ title: Use UPSERT for Insert-or-Update Operations
 impact: MEDIUM
 impactDescription: Atomic operation, eliminates race conditions
 tags: upsert, on-conflict, insert, update
 minVersion: "9.5"
 ---
 ## Use UPSERT for Insert-or-Update Operations
--- a/skills/postgres-best-practices/rules/lock-skip-locked.md
+++ b/skills/postgres-best-practices/rules/lock-skip-locked.md
@@ -3,6 +3,7 @@ title: Use SKIP LOCKED for Non-Blocking Queue Processing
 impact: MEDIUM-HIGH
 impactDescription: 10x throughput for worker queues
 tags: skip-locked, queue, workers, concurrency
 minVersion: "9.5"
 ---
 ## Use SKIP LOCKED for Non-Blocking Queue Processing
--- a/skills/postgres-best-practices/rules/monitor-pg-stat-statements.md
+++ b/skills/postgres-best-practices/rules/monitor-pg-stat-statements.md
@@ -3,6 +3,7 @@ title: Enable pg_stat_statements for Query Analysis
 impact: LOW-MEDIUM
 impactDescription: Identify top resource-consuming queries
 tags: pg-stat-statements, monitoring, statistics, performance
 extensions: pg_stat_statements
 ---
 ## Enable pg_stat_statements for Query Analysis
--- a/skills/postgres-best-practices/rules/query-covering-indexes.md
+++ b/skills/postgres-best-practices/rules/query-covering-indexes.md
@@ -3,6 +3,7 @@ title: Use Covering Indexes to Avoid Table Lookups
 impact: MEDIUM-HIGH
 impactDescription: 2-5x faster queries by eliminating heap fetches
 tags: indexes, covering-index, include, index-only-scan
 minVersion: "11"
 ---
 ## Use Covering Indexes to Avoid Table Lookups
--- a/skills/postgres-best-practices/rules/schema-partitioning.md
+++ b/skills/postgres-best-practices/rules/schema-partitioning.md
@@ -3,6 +3,7 @@ title: Partition Large Tables for Better Performance
 impact: MEDIUM-HIGH
 impactDescription: 5-20x faster queries and maintenance on large tables
 tags: partitioning, large-tables, time-series, performance
 minVersion: "10"
 ---
 ## Partition Large Tables for Better Performance