mirror of
https://github.com/supabase/agent-skills.git
synced 2026-01-26 19:09:51 +08:00
improve postgres best practices and add evals
This commit is contained in:
@@ -3,6 +3,10 @@
|
|||||||
"supabase": {
|
"supabase": {
|
||||||
"type": "http",
|
"type": "http",
|
||||||
"url": "https://mcp.supabase.com/mcp?features=docs"
|
"url": "https://mcp.supabase.com/mcp?features=docs"
|
||||||
|
},
|
||||||
|
"linear": {
|
||||||
|
"type": "http",
|
||||||
|
"url": "https://mcp.linear.app/mcp"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ import {
|
|||||||
validateSkillExists,
|
validateSkillExists,
|
||||||
} from "./config.js";
|
} from "./config.js";
|
||||||
import { parseRuleFile } from "./parser.js";
|
import { parseRuleFile } from "./parser.js";
|
||||||
import type { Metadata, Rule, Section } from "./types.js";
|
import { filterRulesForProfile, listProfiles, loadProfile } from "./profiles.js";
|
||||||
|
import type { Metadata, Profile, Rule, Section } from "./types.js";
|
||||||
import { validateRuleFile } from "./validate.js";
|
import { validateRuleFile } from "./validate.js";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -100,8 +101,13 @@ export function generateSectionMap(
|
|||||||
/**
|
/**
|
||||||
* Build AGENTS.md for a specific skill
|
* Build AGENTS.md for a specific skill
|
||||||
*/
|
*/
|
||||||
function buildSkill(paths: SkillPaths): void {
|
function buildSkill(paths: SkillPaths, profile?: Profile): void {
|
||||||
console.log(`[${paths.name}] Building AGENTS.md...`);
|
const profileSuffix = profile ? `.${profile.name}` : "";
|
||||||
|
const outputFile = profile
|
||||||
|
? paths.agentsOutput.replace(".md", `${profileSuffix}.md`)
|
||||||
|
: paths.agentsOutput;
|
||||||
|
|
||||||
|
console.log(`[${paths.name}] Building AGENTS${profileSuffix}.md...`);
|
||||||
|
|
||||||
// Load metadata and sections
|
// Load metadata and sections
|
||||||
const metadata = loadMetadata(paths.metadataFile, paths.name);
|
const metadata = loadMetadata(paths.metadataFile, paths.name);
|
||||||
@@ -113,7 +119,7 @@ function buildSkill(paths: SkillPaths): void {
|
|||||||
if (!existsSync(paths.rulesDir)) {
|
if (!existsSync(paths.rulesDir)) {
|
||||||
console.log(` No rules directory found. Generating empty AGENTS.md.`);
|
console.log(` No rules directory found. Generating empty AGENTS.md.`);
|
||||||
writeFileSync(
|
writeFileSync(
|
||||||
paths.agentsOutput,
|
outputFile,
|
||||||
`# ${skillTitle}\n\nNo rules defined yet.\n`,
|
`# ${skillTitle}\n\nNo rules defined yet.\n`,
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
@@ -147,10 +153,17 @@ function buildSkill(paths: SkillPaths): void {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filter rules by profile if specified
|
||||||
|
let filteredRules = rules;
|
||||||
|
if (profile) {
|
||||||
|
filteredRules = filterRulesForProfile(rules, profile);
|
||||||
|
console.log(` Filtered to ${filteredRules.length} rules for profile "${profile.name}"`);
|
||||||
|
}
|
||||||
|
|
||||||
// Group rules by section and assign IDs
|
// Group rules by section and assign IDs
|
||||||
const rulesBySection = new Map<number, Rule[]>();
|
const rulesBySection = new Map<number, Rule[]>();
|
||||||
|
|
||||||
for (const rule of rules) {
|
for (const rule of filteredRules) {
|
||||||
const sectionRules = rulesBySection.get(rule.section) || [];
|
const sectionRules = rulesBySection.get(rule.section) || [];
|
||||||
sectionRules.push(rule);
|
sectionRules.push(rule);
|
||||||
rulesBySection.set(rule.section, sectionRules);
|
rulesBySection.set(rule.section, sectionRules);
|
||||||
@@ -225,6 +238,18 @@ function buildSkill(paths: SkillPaths): void {
|
|||||||
output.push(`**Impact: ${rule.impact}**\n`);
|
output.push(`**Impact: ${rule.impact}**\n`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add prerequisites if minVersion or extensions are specified
|
||||||
|
const prerequisites: string[] = [];
|
||||||
|
if (rule.minVersion) {
|
||||||
|
prerequisites.push(`PostgreSQL ${rule.minVersion}+`);
|
||||||
|
}
|
||||||
|
if (rule.extensions && rule.extensions.length > 0) {
|
||||||
|
prerequisites.push(`Extension${rule.extensions.length > 1 ? "s" : ""}: ${rule.extensions.join(", ")}`);
|
||||||
|
}
|
||||||
|
if (prerequisites.length > 0) {
|
||||||
|
output.push(`**Prerequisites:** ${prerequisites.join(" | ")}\n`);
|
||||||
|
}
|
||||||
|
|
||||||
output.push(`${rule.explanation}\n`);
|
output.push(`${rule.explanation}\n`);
|
||||||
|
|
||||||
for (const example of rule.examples) {
|
for (const example of rule.examples) {
|
||||||
@@ -269,9 +294,52 @@ function buildSkill(paths: SkillPaths): void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Write output
|
// Write output
|
||||||
writeFileSync(paths.agentsOutput, output.join("\n"));
|
writeFileSync(outputFile, output.join("\n"));
|
||||||
console.log(` Generated: ${paths.agentsOutput}`);
|
console.log(` Generated: ${outputFile}`);
|
||||||
console.log(` Total rules: ${rules.length}`);
|
console.log(` Total rules: ${filteredRules.length}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse CLI arguments
|
||||||
|
*/
|
||||||
|
function parseArgs(): { skill?: string; profile?: string; allProfiles: boolean } {
|
||||||
|
const args = process.argv.slice(2);
|
||||||
|
let skill: string | undefined;
|
||||||
|
let profile: string | undefined;
|
||||||
|
let allProfiles = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < args.length; i++) {
|
||||||
|
const arg = args[i];
|
||||||
|
if (arg === "--profile" && args[i + 1]) {
|
||||||
|
profile = args[i + 1];
|
||||||
|
i++;
|
||||||
|
} else if (arg === "--all-profiles") {
|
||||||
|
allProfiles = true;
|
||||||
|
} else if (!arg.startsWith("--")) {
|
||||||
|
skill = arg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { skill, profile, allProfiles };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a skill with all available profiles
|
||||||
|
*/
|
||||||
|
function buildSkillWithAllProfiles(paths: SkillPaths): void {
|
||||||
|
const profilesDir = join(paths.skillDir, "profiles");
|
||||||
|
const profiles = listProfiles(profilesDir);
|
||||||
|
|
||||||
|
// Build default (no profile)
|
||||||
|
buildSkill(paths);
|
||||||
|
|
||||||
|
// Build each profile variant
|
||||||
|
for (const profileName of profiles) {
|
||||||
|
const profile = loadProfile(profilesDir, profileName);
|
||||||
|
if (profile) {
|
||||||
|
buildSkill(paths, profile);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run build when executed directly
|
// Run build when executed directly
|
||||||
@@ -280,7 +348,7 @@ const isMainModule =
|
|||||||
process.argv[1]?.endsWith("build.js");
|
process.argv[1]?.endsWith("build.js");
|
||||||
|
|
||||||
if (isMainModule) {
|
if (isMainModule) {
|
||||||
const targetSkill = process.argv[2];
|
const { skill: targetSkill, profile: profileName, allProfiles } = parseArgs();
|
||||||
|
|
||||||
if (targetSkill) {
|
if (targetSkill) {
|
||||||
// Build specific skill
|
// Build specific skill
|
||||||
@@ -292,7 +360,29 @@ if (isMainModule) {
|
|||||||
}
|
}
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
}
|
}
|
||||||
buildSkill(getSkillPaths(targetSkill));
|
|
||||||
|
const paths = getSkillPaths(targetSkill);
|
||||||
|
|
||||||
|
if (allProfiles) {
|
||||||
|
// Build all profile variants
|
||||||
|
buildSkillWithAllProfiles(paths);
|
||||||
|
} else if (profileName) {
|
||||||
|
// Build with specific profile
|
||||||
|
const profilesDir = join(paths.skillDir, "profiles");
|
||||||
|
const profile = loadProfile(profilesDir, profileName);
|
||||||
|
if (!profile) {
|
||||||
|
console.error(`Error: Profile "${profileName}" not found`);
|
||||||
|
const available = listProfiles(profilesDir);
|
||||||
|
if (available.length > 0) {
|
||||||
|
console.error(`Available profiles: ${available.join(", ")}`);
|
||||||
|
}
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
buildSkill(paths, profile);
|
||||||
|
} else {
|
||||||
|
// Build default
|
||||||
|
buildSkill(paths);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Build all skills
|
// Build all skills
|
||||||
const skills = discoverSkills();
|
const skills = discoverSkills();
|
||||||
@@ -303,7 +393,12 @@ if (isMainModule) {
|
|||||||
|
|
||||||
console.log(`Found ${skills.length} skill(s): ${skills.join(", ")}\n`);
|
console.log(`Found ${skills.length} skill(s): ${skills.join(", ")}\n`);
|
||||||
for (const skill of skills) {
|
for (const skill of skills) {
|
||||||
buildSkill(getSkillPaths(skill));
|
const paths = getSkillPaths(skill);
|
||||||
|
if (allProfiles) {
|
||||||
|
buildSkillWithAllProfiles(paths);
|
||||||
|
} else {
|
||||||
|
buildSkill(paths);
|
||||||
|
}
|
||||||
console.log("");
|
console.log("");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -251,6 +251,7 @@ export function parseRuleFile(
|
|||||||
const examples = extractExamples(body);
|
const examples = extractExamples(body);
|
||||||
|
|
||||||
const tags = frontmatter.tags?.split(",").map((t) => t.trim()) || [];
|
const tags = frontmatter.tags?.split(",").map((t) => t.trim()) || [];
|
||||||
|
const extensions = frontmatter.extensions?.split(",").map((e) => e.trim()) || [];
|
||||||
|
|
||||||
// Validation warnings
|
// Validation warnings
|
||||||
if (!explanation || explanation.length < 20) {
|
if (!explanation || explanation.length < 20) {
|
||||||
@@ -271,6 +272,8 @@ export function parseRuleFile(
|
|||||||
examples,
|
examples,
|
||||||
references: extractReferences(body),
|
references: extractReferences(body),
|
||||||
tags: tags.length > 0 ? tags : undefined,
|
tags: tags.length > 0 ? tags : undefined,
|
||||||
|
minVersion: frontmatter.minVersion || undefined,
|
||||||
|
extensions: extensions.length > 0 ? extensions : undefined,
|
||||||
};
|
};
|
||||||
|
|
||||||
return { success: true, rule, errors, warnings };
|
return { success: true, rule, errors, warnings };
|
||||||
|
|||||||
102
packages/skills-build/src/profiles.ts
Normal file
102
packages/skills-build/src/profiles.ts
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
import { existsSync, readdirSync, readFileSync } from "node:fs";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import type { Profile, Rule } from "./types.js";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load a profile from the profiles directory
|
||||||
|
*/
|
||||||
|
export function loadProfile(profilesDir: string, profileName: string): Profile | null {
|
||||||
|
const profileFile = join(profilesDir, `${profileName}.json`);
|
||||||
|
if (!existsSync(profileFile)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return JSON.parse(readFileSync(profileFile, "utf-8"));
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Error loading profile ${profileName}:`, error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List all available profiles in the profiles directory
|
||||||
|
*/
|
||||||
|
export function listProfiles(profilesDir: string): string[] {
|
||||||
|
if (!existsSync(profilesDir)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return readdirSync(profilesDir)
|
||||||
|
.filter((f) => f.endsWith(".json"))
|
||||||
|
.map((f) => f.replace(".json", ""));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compare version strings (e.g., "9.5", "11", "14.2")
|
||||||
|
* Returns: negative if a < b, 0 if equal, positive if a > b
|
||||||
|
*/
|
||||||
|
function compareVersions(a: string, b: string): number {
|
||||||
|
const partsA = a.split(".").map(Number);
|
||||||
|
const partsB = b.split(".").map(Number);
|
||||||
|
|
||||||
|
const maxLen = Math.max(partsA.length, partsB.length);
|
||||||
|
for (let i = 0; i < maxLen; i++) {
|
||||||
|
const numA = partsA[i] || 0;
|
||||||
|
const numB = partsB[i] || 0;
|
||||||
|
if (numA !== numB) {
|
||||||
|
return numA - numB;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a rule is compatible with a profile
|
||||||
|
*/
|
||||||
|
export function isRuleCompatibleWithProfile(rule: Rule, profile: Profile): boolean {
|
||||||
|
// Check version requirement
|
||||||
|
if (rule.minVersion) {
|
||||||
|
if (compareVersions(rule.minVersion, profile.minVersion) > 0) {
|
||||||
|
// Rule requires a higher version than profile supports
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (profile.maxVersion && compareVersions(rule.minVersion, profile.maxVersion) > 0) {
|
||||||
|
// Rule requires a version higher than profile's max
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check extension requirements
|
||||||
|
if (rule.extensions && rule.extensions.length > 0) {
|
||||||
|
const allExtensions = [
|
||||||
|
...(profile.extensions.available || []),
|
||||||
|
...(profile.extensions.installable || []),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const ext of rule.extensions) {
|
||||||
|
if (profile.extensions.unavailable?.includes(ext)) {
|
||||||
|
// Extension is explicitly unavailable in this profile
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!allExtensions.includes(ext)) {
|
||||||
|
// Extension is not available or installable
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if rule is explicitly excluded
|
||||||
|
if (profile.excludeRules?.includes(rule.id)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filter rules based on profile constraints
|
||||||
|
*/
|
||||||
|
export function filterRulesForProfile(rules: Rule[], profile: Profile): Rule[] {
|
||||||
|
return rules.filter((rule) => isRuleCompatibleWithProfile(rule, profile));
|
||||||
|
}
|
||||||
@@ -26,6 +26,8 @@ export interface Rule {
|
|||||||
references?: string[];
|
references?: string[];
|
||||||
tags?: string[];
|
tags?: string[];
|
||||||
supabaseNotes?: string;
|
supabaseNotes?: string;
|
||||||
|
minVersion?: string; // Minimum PostgreSQL version required (e.g., "11", "14")
|
||||||
|
extensions?: string[]; // Required PostgreSQL extensions (e.g., ["pg_stat_statements"])
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Section {
|
export interface Section {
|
||||||
@@ -57,3 +59,16 @@ export interface ValidationResult {
|
|||||||
errors: string[];
|
errors: string[];
|
||||||
warnings: string[];
|
warnings: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface Profile {
|
||||||
|
name: string;
|
||||||
|
minVersion: string;
|
||||||
|
maxVersion?: string;
|
||||||
|
extensions: {
|
||||||
|
available: string[];
|
||||||
|
installable?: string[];
|
||||||
|
unavailable: string[];
|
||||||
|
};
|
||||||
|
excludeRules?: string[];
|
||||||
|
notes?: string;
|
||||||
|
}
|
||||||
|
|||||||
1502
skills/postgres-best-practices/AGENTS.aurora.md
Normal file
1502
skills/postgres-best-practices/AGENTS.aurora.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -191,6 +191,8 @@ Reference: https://www.postgresql.org/docs/current/indexes-multicolumn.html
|
|||||||
|
|
||||||
**Impact: MEDIUM-HIGH (2-5x faster queries by eliminating heap fetches)**
|
**Impact: MEDIUM-HIGH (2-5x faster queries by eliminating heap fetches)**
|
||||||
|
|
||||||
|
**Prerequisites:** PostgreSQL 11+
|
||||||
|
|
||||||
Covering indexes include all columns needed by a query, enabling index-only scans that skip the table entirely.
|
Covering indexes include all columns needed by a query, enabling index-only scans that skip the table entirely.
|
||||||
|
|
||||||
**Incorrect (index scan + heap fetch):**
|
**Incorrect (index scan + heap fetch):**
|
||||||
@@ -680,6 +682,8 @@ Reference: https://www.postgresql.org/docs/current/ddl-constraints.html#DDL-CONS
|
|||||||
|
|
||||||
**Impact: MEDIUM-HIGH (5-20x faster queries and maintenance on large tables)**
|
**Impact: MEDIUM-HIGH (5-20x faster queries and maintenance on large tables)**
|
||||||
|
|
||||||
|
**Prerequisites:** PostgreSQL 10+
|
||||||
|
|
||||||
Partitioning splits a large table into smaller pieces, improving query performance and maintenance operations.
|
Partitioning splits a large table into smaller pieces, improving query performance and maintenance operations.
|
||||||
|
|
||||||
**Incorrect (single large table):**
|
**Incorrect (single large table):**
|
||||||
@@ -997,6 +1001,8 @@ Reference: https://www.postgresql.org/docs/current/explicit-locking.html#ADVISOR
|
|||||||
|
|
||||||
**Impact: MEDIUM-HIGH (10x throughput for worker queues)**
|
**Impact: MEDIUM-HIGH (10x throughput for worker queues)**
|
||||||
|
|
||||||
|
**Prerequisites:** PostgreSQL 9.5+
|
||||||
|
|
||||||
When multiple workers process a queue, SKIP LOCKED allows workers to process different rows without waiting.
|
When multiple workers process a queue, SKIP LOCKED allows workers to process different rows without waiting.
|
||||||
|
|
||||||
**Incorrect (workers block each other):**
|
**Incorrect (workers block each other):**
|
||||||
@@ -1194,6 +1200,8 @@ Reference: https://supabase.com/docs/guides/database/pagination
|
|||||||
|
|
||||||
**Impact: MEDIUM (Atomic operation, eliminates race conditions)**
|
**Impact: MEDIUM (Atomic operation, eliminates race conditions)**
|
||||||
|
|
||||||
|
**Prerequisites:** PostgreSQL 9.5+
|
||||||
|
|
||||||
Using separate SELECT-then-INSERT/UPDATE creates race conditions. Use INSERT ... ON CONFLICT for atomic upserts.
|
Using separate SELECT-then-INSERT/UPDATE creates race conditions. Use INSERT ... ON CONFLICT for atomic upserts.
|
||||||
|
|
||||||
**Incorrect (check-then-insert race condition):**
|
**Incorrect (check-then-insert race condition):**
|
||||||
@@ -1245,6 +1253,8 @@ Using pg_stat_statements, EXPLAIN ANALYZE, metrics collection, and performance d
|
|||||||
|
|
||||||
**Impact: LOW-MEDIUM (Identify top resource-consuming queries)**
|
**Impact: LOW-MEDIUM (Identify top resource-consuming queries)**
|
||||||
|
|
||||||
|
**Prerequisites:** Extension: pg_stat_statements
|
||||||
|
|
||||||
pg_stat_statements tracks execution statistics for all queries, helping identify slow and frequent queries.
|
pg_stat_statements tracks execution statistics for all queries, helping identify slow and frequent queries.
|
||||||
|
|
||||||
**Incorrect (no visibility into query patterns):**
|
**Incorrect (no visibility into query patterns):**
|
||||||
@@ -1391,6 +1401,8 @@ Full-text search, JSONB optimization, PostGIS, extensions, and advanced Postgres
|
|||||||
|
|
||||||
**Impact: MEDIUM (10-100x faster JSONB queries with proper indexing)**
|
**Impact: MEDIUM (10-100x faster JSONB queries with proper indexing)**
|
||||||
|
|
||||||
|
**Prerequisites:** PostgreSQL 9.4+
|
||||||
|
|
||||||
JSONB queries without indexes scan the entire table. Use GIN indexes for containment queries.
|
JSONB queries without indexes scan the entire table. Use GIN indexes for containment queries.
|
||||||
|
|
||||||
**Incorrect (no index on JSONB):**
|
**Incorrect (no index on JSONB):**
|
||||||
|
|||||||
1502
skills/postgres-best-practices/AGENTS.self-hosted.md
Normal file
1502
skills/postgres-best-practices/AGENTS.self-hosted.md
Normal file
File diff suppressed because it is too large
Load Diff
1502
skills/postgres-best-practices/AGENTS.supabase.md
Normal file
1502
skills/postgres-best-practices/AGENTS.supabase.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,6 +3,26 @@
|
|||||||
This repository contains Postgres performance optimization rules optimized for
|
This repository contains Postgres performance optimization rules optimized for
|
||||||
AI agents and LLMs.
|
AI agents and LLMs.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
- **Minimum PostgreSQL Version:** 9.5 (some rules require newer versions)
|
||||||
|
- **Recommended Version:** 15+
|
||||||
|
- **Optional Extensions:** pg_stat_statements (for monitoring rules)
|
||||||
|
|
||||||
|
### Version Compatibility
|
||||||
|
|
||||||
|
Some rules require specific PostgreSQL versions due to feature availability:
|
||||||
|
|
||||||
|
| Feature | Min Version | Affected Rules |
|
||||||
|
|---------|-------------|----------------|
|
||||||
|
| ON CONFLICT (UPSERT) | 9.5 | data-upsert |
|
||||||
|
| SKIP LOCKED | 9.5 | lock-skip-locked |
|
||||||
|
| JSONB type | 9.4 | advanced-jsonb-indexing |
|
||||||
|
| Declarative Partitioning | 10 | schema-partitioning |
|
||||||
|
| Covering Indexes (INCLUDE) | 11 | query-covering-indexes |
|
||||||
|
|
||||||
|
Rules include `minVersion` and `extensions` frontmatter fields. Agents should check these against the target environment before recommending specific rules.
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@@ -55,3 +55,52 @@ Each rule file contains:
|
|||||||
## Full Compiled Document
|
## Full Compiled Document
|
||||||
|
|
||||||
For the complete guide with all rules expanded: `AGENTS.md`
|
For the complete guide with all rules expanded: `AGENTS.md`
|
||||||
|
|
||||||
|
## Runtime Detection
|
||||||
|
|
||||||
|
Before applying rules, agents should detect the PostgreSQL environment to ensure compatibility:
|
||||||
|
|
||||||
|
### Version Detection
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT version();
|
||||||
|
-- Example output: PostgreSQL 15.4 on x86_64-pc-linux-gnu
|
||||||
|
```
|
||||||
|
|
||||||
|
Extract the major version number (e.g., "15" from "PostgreSQL 15.4") to check against rule `minVersion` requirements.
|
||||||
|
|
||||||
|
### Extension Availability
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT name, installed_version, default_version
|
||||||
|
FROM pg_available_extensions
|
||||||
|
WHERE name IN ('pg_stat_statements', 'pgcrypto', 'uuid-ossp', 'postgis')
|
||||||
|
ORDER BY name;
|
||||||
|
```
|
||||||
|
|
||||||
|
Check if required extensions are available before recommending rules that depend on them.
|
||||||
|
|
||||||
|
### Configuration Check
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT name, setting
|
||||||
|
FROM pg_settings
|
||||||
|
WHERE name IN ('shared_preload_libraries', 'max_connections', 'work_mem');
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rule Filtering
|
||||||
|
|
||||||
|
Only recommend rules where:
|
||||||
|
- `minVersion` <= detected PostgreSQL version (or minVersion is unset)
|
||||||
|
- All required `extensions` are available or installable
|
||||||
|
- The rule is appropriate for the user's deployment context
|
||||||
|
|
||||||
|
### Version Compatibility
|
||||||
|
|
||||||
|
| Feature | Min Version | Affected Rules |
|
||||||
|
|---------|-------------|----------------|
|
||||||
|
| ON CONFLICT (UPSERT) | 9.5 | data-upsert |
|
||||||
|
| SKIP LOCKED | 9.5 | lock-skip-locked |
|
||||||
|
| JSONB type | 9.4 | advanced-jsonb-indexing |
|
||||||
|
| Declarative Partitioning | 10 | schema-partitioning |
|
||||||
|
| Covering Indexes (INCLUDE) | 11 | query-covering-indexes |
|
||||||
|
|||||||
308
skills/postgres-best-practices/evals/README.md
Normal file
308
skills/postgres-best-practices/evals/README.md
Normal file
@@ -0,0 +1,308 @@
|
|||||||
|
# PostgreSQL Best Practices - Evaluation Scenarios
|
||||||
|
|
||||||
|
This directory contains evaluation scenarios to test that AI agents correctly apply the PostgreSQL best practices rules, including proper handling of version constraints and extension requirements.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The evals use [Vitest](https://vitest.dev/) as the test framework and the [Vercel AI SDK](https://ai-sdk.dev/) to interact with Claude for generating responses. Each scenario tests a specific aspect of rule application.
|
||||||
|
|
||||||
|
## Running Evals
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install dependencies
|
||||||
|
npm install
|
||||||
|
|
||||||
|
# Run all evals
|
||||||
|
npm run eval
|
||||||
|
|
||||||
|
# Run in watch mode
|
||||||
|
npm run eval:watch
|
||||||
|
|
||||||
|
# Run specific scenario
|
||||||
|
npm run eval -- -t "Missing Index"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment Setup
|
||||||
|
|
||||||
|
Set your Anthropic API key:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export ANTHROPIC_API_KEY=your_api_key
|
||||||
|
```
|
||||||
|
|
||||||
|
## Evaluation Scenarios
|
||||||
|
|
||||||
|
### Category 1: Core Query Patterns
|
||||||
|
|
||||||
|
#### 1.1 Missing Index Detection
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| **ID** | `missing-index-detection` |
|
||||||
|
| **File** | `scenarios/missing-index.eval.ts` |
|
||||||
|
| **Difficulty** | Basic |
|
||||||
|
| **Tests** | Agent identifies missing indexes on filtered columns |
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
Tests that the agent correctly identifies when a query would benefit from an index and recommends creating one.
|
||||||
|
|
||||||
|
**Input:**
|
||||||
|
- Schema: `orders` table with no indexes beyond PK
|
||||||
|
- Query: `SELECT * FROM orders WHERE customer_id = 12345 AND status = 'pending'`
|
||||||
|
|
||||||
|
**Expected Output:**
|
||||||
|
- Should recommend rule 1.1 (query-missing-indexes)
|
||||||
|
- Must mention "index" and "customer_id"
|
||||||
|
|
||||||
|
**Expected Reasoning:**
|
||||||
|
1. Identify that the query filters on customer_id and status
|
||||||
|
2. Recognize that without an index, this causes a sequential scan
|
||||||
|
3. Recommend creating an index on the filtered columns
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### 1.2 N+1 Query Detection
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| **ID** | `n-plus-one-detection` |
|
||||||
|
| **File** | `scenarios/n-plus-one.eval.ts` |
|
||||||
|
| **Difficulty** | Intermediate |
|
||||||
|
| **Tests** | Agent identifies N+1 query pattern in code |
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
Tests that the agent recognizes N+1 query patterns in application code and recommends using JOINs.
|
||||||
|
|
||||||
|
**Input:**
|
||||||
|
- Schema: `users` and `posts` tables with relationship
|
||||||
|
- Code snippet showing loop that queries for each post's author
|
||||||
|
|
||||||
|
**Expected Output:**
|
||||||
|
- Should recommend rule 6.1 (data-n-plus-one)
|
||||||
|
- Must mention "JOIN" and "N+1"
|
||||||
|
|
||||||
|
**Expected Reasoning:**
|
||||||
|
1. Identify the N+1 query pattern (1 + N queries)
|
||||||
|
2. Recognize this as a common performance anti-pattern
|
||||||
|
3. Recommend using a JOIN to fetch all data in a single query
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### 1.3 Covering Index Suggestion
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| **ID** | `covering-index-suggestion` |
|
||||||
|
| **File** | `scenarios/covering-index.eval.ts` |
|
||||||
|
| **Difficulty** | Intermediate |
|
||||||
|
| **Tests** | Agent suggests INCLUDE clause for covering index |
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
Tests that the agent recommends covering indexes when SELECT columns aren't in the index.
|
||||||
|
|
||||||
|
**Input:**
|
||||||
|
- Schema: `users` table with index on `email` only
|
||||||
|
- Query: `SELECT email, name, department FROM users WHERE email = ?`
|
||||||
|
- PostgreSQL version: 15.4
|
||||||
|
|
||||||
|
**Expected Output:**
|
||||||
|
- Should recommend rule 1.2 (query-covering-indexes)
|
||||||
|
- Must mention "INCLUDE" and "covering"
|
||||||
|
|
||||||
|
**Expected Reasoning:**
|
||||||
|
1. Identify that query selects columns not in the index
|
||||||
|
2. Recognize this causes heap fetches
|
||||||
|
3. Recommend using INCLUDE clause for index-only scans
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Category 2: Version Constraints
|
||||||
|
|
||||||
|
#### 2.1 PG10 - No Covering Index
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| **ID** | `version-constraint-pg10-no-covering` |
|
||||||
|
| **File** | `scenarios/version-constraint.eval.ts` |
|
||||||
|
| **Difficulty** | Intermediate |
|
||||||
|
| **Tests** | Agent respects PG11+ requirement for INCLUDE |
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
Tests that the agent does NOT recommend INCLUDE clause when PostgreSQL version is 10 (INCLUDE requires PG11+).
|
||||||
|
|
||||||
|
**Input:**
|
||||||
|
- Same setup as covering index scenario
|
||||||
|
- PostgreSQL version: 10.0
|
||||||
|
|
||||||
|
**Expected Output:**
|
||||||
|
- Should NOT recommend rule 1.2
|
||||||
|
- Must NOT contain "INCLUDE"
|
||||||
|
- Should suggest alternative optimizations
|
||||||
|
|
||||||
|
**Expected Reasoning:**
|
||||||
|
1. Recognize PostgreSQL 10 is specified
|
||||||
|
2. Check that INCLUDE requires PG11+
|
||||||
|
3. Avoid recommending incompatible features
|
||||||
|
4. Suggest PG10-compatible alternatives
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### 2.2 PG9.3 - No UPSERT
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| **ID** | `version-constraint-pg93-no-upsert` |
|
||||||
|
| **File** | `scenarios/version-constraint.eval.ts` |
|
||||||
|
| **Difficulty** | Intermediate |
|
||||||
|
| **Tests** | Agent respects PG9.5+ requirement for ON CONFLICT |
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
Tests that the agent does NOT recommend ON CONFLICT when PostgreSQL version is 9.3 (requires PG9.5+).
|
||||||
|
|
||||||
|
**Input:**
|
||||||
|
- Schema: `settings` table with composite primary key
|
||||||
|
- Query: Need insert-or-update functionality
|
||||||
|
- PostgreSQL version: 9.3
|
||||||
|
|
||||||
|
**Expected Output:**
|
||||||
|
- Should NOT recommend rule 6.3 (data-upsert)
|
||||||
|
- Must NOT contain "ON CONFLICT"
|
||||||
|
- Should suggest CTE-based or try/catch pattern
|
||||||
|
|
||||||
|
**Expected Reasoning:**
|
||||||
|
1. Recognize PostgreSQL 9.3 is specified
|
||||||
|
2. Check that ON CONFLICT requires PG9.5+
|
||||||
|
3. Avoid recommending UPSERT syntax
|
||||||
|
4. Suggest compatible alternatives
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Category 3: Extension Requirements
|
||||||
|
|
||||||
|
#### 3.1 Extension Available
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| **ID** | `extension-available-pg-stat-statements` |
|
||||||
|
| **File** | `scenarios/extension-available.eval.ts` |
|
||||||
|
| **Difficulty** | Basic |
|
||||||
|
| **Tests** | Agent recommends extension when available |
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
Tests that the agent recommends pg_stat_statements when it's listed as available.
|
||||||
|
|
||||||
|
**Input:**
|
||||||
|
- General schema
|
||||||
|
- Query: How to identify slow queries
|
||||||
|
- Available extensions: pg_stat_statements, pgcrypto, uuid-ossp
|
||||||
|
|
||||||
|
**Expected Output:**
|
||||||
|
- Should recommend rule 7.1 (monitor-pg-stat-statements)
|
||||||
|
- Must mention "pg_stat_statements"
|
||||||
|
|
||||||
|
**Expected Reasoning:**
|
||||||
|
1. Recognize query monitoring problem
|
||||||
|
2. Check that pg_stat_statements is available
|
||||||
|
3. Recommend enabling and using the extension
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### 3.2 Extension Unavailable
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| **ID** | `extension-unavailable-no-pg-stat-statements` |
|
||||||
|
| **File** | `scenarios/extension-unavailable.eval.ts` |
|
||||||
|
| **Difficulty** | Intermediate |
|
||||||
|
| **Tests** | Agent provides alternatives when extension unavailable |
|
||||||
|
|
||||||
|
**Description:**
|
||||||
|
Tests that the agent suggests alternatives when pg_stat_statements is not available.
|
||||||
|
|
||||||
|
**Input:**
|
||||||
|
- General schema
|
||||||
|
- Query: How to identify slow queries
|
||||||
|
- Available extensions: [] (none)
|
||||||
|
- Context: Managed database, can't install extensions
|
||||||
|
|
||||||
|
**Expected Output:**
|
||||||
|
- Should NOT recommend pg_stat_statements
|
||||||
|
- Must mention "EXPLAIN" and "ANALYZE"
|
||||||
|
- Should suggest built-in alternatives
|
||||||
|
|
||||||
|
**Expected Reasoning:**
|
||||||
|
1. Recognize no extensions are available
|
||||||
|
2. Avoid recommending pg_stat_statements
|
||||||
|
3. Suggest EXPLAIN ANALYZE, log_min_duration_statement, or pg_stat_activity
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Adding New Scenarios
|
||||||
|
|
||||||
|
1. Create a new file in `scenarios/` following the naming convention `{name}.eval.ts`
|
||||||
|
|
||||||
|
2. Define the scenario using the `EvalScenario` interface:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { runEval } from "../runner.js";
|
||||||
|
import type { EvalScenario } from "../types.js";
|
||||||
|
|
||||||
|
const scenario: EvalScenario = {
|
||||||
|
id: "unique-scenario-id",
|
||||||
|
name: "Human Readable Name",
|
||||||
|
description: "What this scenario tests",
|
||||||
|
category: "query-performance" | "version-constraints" | "extension-requirements",
|
||||||
|
difficulty: "basic" | "intermediate" | "advanced",
|
||||||
|
input: {
|
||||||
|
schema: "SQL schema definition",
|
||||||
|
userQuery: "User's question or problem",
|
||||||
|
postgresVersion: "15.4", // Optional
|
||||||
|
availableExtensions: ["list"], // Optional
|
||||||
|
},
|
||||||
|
expectedOutput: {
|
||||||
|
shouldRecommendRules: ["1.1"],
|
||||||
|
shouldNotRecommendRules: ["2.3"], // Optional
|
||||||
|
mustContain: ["keyword"],
|
||||||
|
mustNotContain: ["avoid"], // Optional
|
||||||
|
},
|
||||||
|
expectedReasoning: [
|
||||||
|
"Step 1 of expected reasoning",
|
||||||
|
"Step 2 of expected reasoning",
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
describe("Scenario Name", () => {
|
||||||
|
it("should do something specific", async () => {
|
||||||
|
const result = await runEval(scenario);
|
||||||
|
// Add assertions
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
export { scenario };
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Run the new scenario: `npm run eval -- -t "Scenario Name"`
|
||||||
|
|
||||||
|
## Evaluation Criteria
|
||||||
|
|
||||||
|
Each scenario is evaluated against:
|
||||||
|
|
||||||
|
1. **Rule References**: Does the response reference the expected rules?
|
||||||
|
2. **Must Contain**: Does the response include required keywords?
|
||||||
|
3. **Must Not Contain**: Does the response avoid prohibited content?
|
||||||
|
4. **Version Constraints**: Are version requirements respected?
|
||||||
|
5. **Extension Requirements**: Are extension dependencies checked?
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
**Evals timing out:**
|
||||||
|
- Increase timeout in `vitest.config.ts` (default: 60s)
|
||||||
|
- Check API key is valid
|
||||||
|
|
||||||
|
**Flaky results:**
|
||||||
|
- Set `temperature: 0` in runner config
|
||||||
|
- Make assertions more flexible (check for concept presence, not exact wording)
|
||||||
|
|
||||||
|
**Missing AGENTS.md:**
|
||||||
|
- Run `npm run build` from repository root first
|
||||||
18
skills/postgres-best-practices/evals/package.json
Normal file
18
skills/postgres-best-practices/evals/package.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"name": "postgres-best-practices-evals",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Evaluation scenarios for Postgres Best Practices skill",
|
||||||
|
"type": "module",
|
||||||
|
"scripts": {
|
||||||
|
"eval": "vitest run",
|
||||||
|
"eval:watch": "vitest",
|
||||||
|
"eval:ui": "vitest --ui"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@ai-sdk/anthropic": "^0.0.30",
|
||||||
|
"@types/node": "^20.0.0",
|
||||||
|
"ai": "^3.0.0",
|
||||||
|
"typescript": "^5.0.0",
|
||||||
|
"vitest": "^1.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
192
skills/postgres-best-practices/evals/runner.ts
Normal file
192
skills/postgres-best-practices/evals/runner.ts
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
import { generateText } from "ai";
|
||||||
|
import { anthropic } from "@ai-sdk/anthropic";
|
||||||
|
import { readFileSync } from "node:fs";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import type { CriterionResult, EvalConfig, EvalResult, EvalScenario } from "./types.js";
|
||||||
|
|
||||||
|
const DEFAULT_CONFIG: EvalConfig = {
|
||||||
|
agentsPath: join(import.meta.dirname, "..", "AGENTS.md"),
|
||||||
|
model: "claude-sonnet-4-20250514",
|
||||||
|
maxTokens: 2048,
|
||||||
|
temperature: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the user prompt from a scenario
|
||||||
|
*/
|
||||||
|
function buildUserPrompt(scenario: EvalScenario): string {
|
||||||
|
const parts: string[] = [];
|
||||||
|
|
||||||
|
// Add version context if specified
|
||||||
|
if (scenario.input.postgresVersion) {
|
||||||
|
parts.push(`PostgreSQL Version: ${scenario.input.postgresVersion}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add extensions context if specified
|
||||||
|
if (scenario.input.availableExtensions) {
|
||||||
|
if (scenario.input.availableExtensions.length === 0) {
|
||||||
|
parts.push("Available Extensions: None installed");
|
||||||
|
} else {
|
||||||
|
parts.push(`Available Extensions: ${scenario.input.availableExtensions.join(", ")}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add additional context if provided
|
||||||
|
if (scenario.input.context) {
|
||||||
|
parts.push(`Context: ${scenario.input.context}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add schema
|
||||||
|
parts.push(`\nSchema:\n\`\`\`sql\n${scenario.input.schema}\n\`\`\``);
|
||||||
|
|
||||||
|
// Add user query
|
||||||
|
parts.push(`\nQuestion: ${scenario.input.userQuery}`);
|
||||||
|
|
||||||
|
return parts.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract rule IDs mentioned in a response
|
||||||
|
*/
|
||||||
|
function extractRuleIds(response: string): string[] {
|
||||||
|
// Match patterns like "1.1", "2.3", etc.
|
||||||
|
const rulePattern = /\b(\d+\.\d+)\b/g;
|
||||||
|
const matches = response.match(rulePattern) || [];
|
||||||
|
return [...new Set(matches)];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Evaluate the response against expected criteria
|
||||||
|
*/
|
||||||
|
function evaluateCriteria(scenario: EvalScenario, response: string): CriterionResult[] {
|
||||||
|
const results: CriterionResult[] = [];
|
||||||
|
const responseLower = response.toLowerCase();
|
||||||
|
|
||||||
|
// Check mustContain criteria
|
||||||
|
for (const term of scenario.expectedOutput.mustContain) {
|
||||||
|
const found = responseLower.includes(term.toLowerCase());
|
||||||
|
results.push({
|
||||||
|
criterion: `Response should contain "${term}"`,
|
||||||
|
passed: found,
|
||||||
|
evidence: found ? "Found in response" : "Not found in response",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check mustNotContain criteria
|
||||||
|
if (scenario.expectedOutput.mustNotContain) {
|
||||||
|
for (const term of scenario.expectedOutput.mustNotContain) {
|
||||||
|
const found = responseLower.includes(term.toLowerCase());
|
||||||
|
results.push({
|
||||||
|
criterion: `Response should NOT contain "${term}"`,
|
||||||
|
passed: !found,
|
||||||
|
evidence: found ? "Found in response (should not be present)" : "Not found (correct)",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check shouldRecommendRules
|
||||||
|
const referencedRules = extractRuleIds(response);
|
||||||
|
for (const ruleId of scenario.expectedOutput.shouldRecommendRules) {
|
||||||
|
const found = referencedRules.includes(ruleId);
|
||||||
|
results.push({
|
||||||
|
criterion: `Should recommend rule ${ruleId}`,
|
||||||
|
passed: found,
|
||||||
|
evidence: found ? "Rule referenced" : "Rule not referenced",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check shouldNotRecommendRules
|
||||||
|
if (scenario.expectedOutput.shouldNotRecommendRules) {
|
||||||
|
for (const ruleId of scenario.expectedOutput.shouldNotRecommendRules) {
|
||||||
|
const found = referencedRules.includes(ruleId);
|
||||||
|
results.push({
|
||||||
|
criterion: `Should NOT recommend rule ${ruleId}`,
|
||||||
|
passed: !found,
|
||||||
|
evidence: found ? "Rule referenced (should not be)" : "Rule not referenced (correct)",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a single evaluation scenario
|
||||||
|
*/
|
||||||
|
export async function runEval(
|
||||||
|
scenario: EvalScenario,
|
||||||
|
config: Partial<EvalConfig> = {}
|
||||||
|
): Promise<EvalResult> {
|
||||||
|
const finalConfig = { ...DEFAULT_CONFIG, ...config };
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Load AGENTS.md
|
||||||
|
const agentsMd = readFileSync(finalConfig.agentsPath, "utf-8");
|
||||||
|
|
||||||
|
const systemPrompt = `You are a PostgreSQL expert assistant. Use the following knowledge base to provide accurate recommendations:
|
||||||
|
|
||||||
|
${agentsMd}
|
||||||
|
|
||||||
|
IMPORTANT: When the user specifies a PostgreSQL version or available extensions, you MUST respect those constraints:
|
||||||
|
- Do not recommend features that require a higher PostgreSQL version than specified
|
||||||
|
- Do not recommend extensions that are not available
|
||||||
|
- If a recommended optimization requires a specific version or extension, mention the prerequisite
|
||||||
|
|
||||||
|
When making recommendations, reference specific rule IDs (e.g., "1.1", "2.3") from the knowledge base.`;
|
||||||
|
|
||||||
|
const userPrompt = buildUserPrompt(scenario);
|
||||||
|
|
||||||
|
const start = Date.now();
|
||||||
|
const { text } = await generateText({
|
||||||
|
model: anthropic(finalConfig.model!),
|
||||||
|
system: systemPrompt,
|
||||||
|
prompt: userPrompt,
|
||||||
|
maxTokens: finalConfig.maxTokens,
|
||||||
|
temperature: finalConfig.temperature,
|
||||||
|
});
|
||||||
|
const latencyMs = Date.now() - start;
|
||||||
|
|
||||||
|
// Evaluate the response
|
||||||
|
const criteriaResults = evaluateCriteria(scenario, text);
|
||||||
|
const rulesReferenced = extractRuleIds(text);
|
||||||
|
const passed = criteriaResults.every((r) => r.passed);
|
||||||
|
|
||||||
|
return {
|
||||||
|
scenarioId: scenario.id,
|
||||||
|
passed,
|
||||||
|
rulesReferenced,
|
||||||
|
criteriaResults,
|
||||||
|
response: text,
|
||||||
|
latencyMs,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
scenarioId: scenario.id,
|
||||||
|
passed: false,
|
||||||
|
rulesReferenced: [],
|
||||||
|
criteriaResults: [],
|
||||||
|
response: "",
|
||||||
|
latencyMs: 0,
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run multiple evaluation scenarios
|
||||||
|
*/
|
||||||
|
export async function runEvals(
|
||||||
|
scenarios: EvalScenario[],
|
||||||
|
config: Partial<EvalConfig> = {}
|
||||||
|
): Promise<EvalResult[]> {
|
||||||
|
const results: EvalResult[] = [];
|
||||||
|
|
||||||
|
for (const scenario of scenarios) {
|
||||||
|
console.log(`Running eval: ${scenario.name}...`);
|
||||||
|
const result = await runEval(scenario, config);
|
||||||
|
results.push(result);
|
||||||
|
console.log(` ${result.passed ? "PASS" : "FAIL"} (${result.latencyMs}ms)`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { runEval } from "../runner.js";
|
||||||
|
import type { EvalScenario } from "../types.js";
|
||||||
|
|
||||||
|
const scenario: EvalScenario = {
|
||||||
|
id: "covering-index-suggestion",
|
||||||
|
name: "Covering Index Suggestion",
|
||||||
|
description:
|
||||||
|
"Agent should suggest using INCLUDE clause for columns in SELECT that aren't in WHERE clause",
|
||||||
|
category: "query-performance",
|
||||||
|
difficulty: "intermediate",
|
||||||
|
input: {
|
||||||
|
schema: `
|
||||||
|
CREATE TABLE users (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
email VARCHAR(255) NOT NULL,
|
||||||
|
name VARCHAR(100),
|
||||||
|
department VARCHAR(50),
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX users_email_idx ON users (email);
|
||||||
|
-- Table has 2 million rows
|
||||||
|
`,
|
||||||
|
userQuery: `This query still does heap fetches even though we have an index on email:
|
||||||
|
|
||||||
|
SELECT email, name, department FROM users WHERE email = 'user@example.com'
|
||||||
|
|
||||||
|
EXPLAIN shows "Index Scan" but not "Index Only Scan". How can I avoid the table lookup?`,
|
||||||
|
postgresVersion: "15.4",
|
||||||
|
},
|
||||||
|
expectedOutput: {
|
||||||
|
shouldRecommendRules: ["1.2"], // query-covering-indexes
|
||||||
|
mustContain: ["include", "covering"],
|
||||||
|
},
|
||||||
|
expectedReasoning: [
|
||||||
|
"Identify that the query selects columns (name, department) not in the index",
|
||||||
|
"Recognize this causes additional heap fetches after the index scan",
|
||||||
|
"Recommend using INCLUDE clause to create a covering index",
|
||||||
|
"Explain that this enables index-only scans",
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
describe("Covering Index Suggestion", () => {
|
||||||
|
it("should recommend INCLUDE clause for covering index", async () => {
|
||||||
|
const result = await runEval(scenario);
|
||||||
|
|
||||||
|
console.log("Response:", result.response);
|
||||||
|
console.log("Criteria results:", result.criteriaResults);
|
||||||
|
|
||||||
|
// Response should mention INCLUDE keyword
|
||||||
|
expect(result.response.toLowerCase()).toContain("include");
|
||||||
|
|
||||||
|
// Response should mention covering index concept
|
||||||
|
const responseLower = result.response.toLowerCase();
|
||||||
|
expect(
|
||||||
|
responseLower.includes("covering") || responseLower.includes("index-only")
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
export { scenario };
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { runEval } from "../runner.js";
|
||||||
|
import type { EvalScenario } from "../types.js";
|
||||||
|
|
||||||
|
const scenario: EvalScenario = {
|
||||||
|
id: "extension-available-pg-stat-statements",
|
||||||
|
name: "Extension Available - pg_stat_statements",
|
||||||
|
description:
|
||||||
|
"Agent should recommend pg_stat_statements for query monitoring when the extension is available",
|
||||||
|
category: "extension-requirements",
|
||||||
|
difficulty: "basic",
|
||||||
|
input: {
|
||||||
|
schema: `
|
||||||
|
-- Production database with various tables
|
||||||
|
CREATE TABLE users (id SERIAL PRIMARY KEY, email VARCHAR(255));
|
||||||
|
CREATE TABLE orders (id SERIAL PRIMARY KEY, user_id INT, total DECIMAL);
|
||||||
|
CREATE TABLE products (id SERIAL PRIMARY KEY, name VARCHAR(200), price DECIMAL);
|
||||||
|
`,
|
||||||
|
userQuery:
|
||||||
|
"Our database is slow but we don't know which queries are causing the problem. How can we identify the slowest queries?",
|
||||||
|
postgresVersion: "15.4",
|
||||||
|
availableExtensions: ["pg_stat_statements", "pgcrypto", "uuid-ossp"],
|
||||||
|
},
|
||||||
|
expectedOutput: {
|
||||||
|
shouldRecommendRules: ["7.1"], // monitor-pg-stat-statements
|
||||||
|
mustContain: ["pg_stat_statements"],
|
||||||
|
},
|
||||||
|
expectedReasoning: [
|
||||||
|
"Recognize this is a query monitoring/performance diagnosis problem",
|
||||||
|
"Check that pg_stat_statements is available in the extensions list",
|
||||||
|
"Recommend enabling pg_stat_statements for query analysis",
|
||||||
|
"Explain how to use it to find slow queries",
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
describe("Extension Available - pg_stat_statements", () => {
|
||||||
|
it("should recommend pg_stat_statements when available", async () => {
|
||||||
|
const result = await runEval(scenario);
|
||||||
|
|
||||||
|
console.log("Response:", result.response);
|
||||||
|
console.log("Criteria results:", result.criteriaResults);
|
||||||
|
|
||||||
|
// Response should mention pg_stat_statements
|
||||||
|
expect(result.response.toLowerCase()).toContain("pg_stat_statements");
|
||||||
|
|
||||||
|
// Should suggest enabling/using the extension
|
||||||
|
const responseLower = result.response.toLowerCase();
|
||||||
|
expect(
|
||||||
|
responseLower.includes("create extension") ||
|
||||||
|
responseLower.includes("enable") ||
|
||||||
|
responseLower.includes("query")
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
export { scenario };
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { runEval } from "../runner.js";
|
||||||
|
import type { EvalScenario } from "../types.js";
|
||||||
|
|
||||||
|
const scenario: EvalScenario = {
|
||||||
|
id: "extension-unavailable-no-pg-stat-statements",
|
||||||
|
name: "Extension Unavailable - No pg_stat_statements",
|
||||||
|
description:
|
||||||
|
"Agent should provide alternatives when pg_stat_statements is not available for query monitoring",
|
||||||
|
category: "extension-requirements",
|
||||||
|
difficulty: "intermediate",
|
||||||
|
input: {
|
||||||
|
schema: `
|
||||||
|
-- Production database with various tables
|
||||||
|
CREATE TABLE users (id SERIAL PRIMARY KEY, email VARCHAR(255));
|
||||||
|
CREATE TABLE orders (id SERIAL PRIMARY KEY, user_id INT, total DECIMAL);
|
||||||
|
CREATE TABLE products (id SERIAL PRIMARY KEY, name VARCHAR(200), price DECIMAL);
|
||||||
|
`,
|
||||||
|
userQuery:
|
||||||
|
"Our database is slow but we don't know which queries are causing the problem. How can we identify the slowest queries?",
|
||||||
|
postgresVersion: "15.4",
|
||||||
|
availableExtensions: [], // No extensions available
|
||||||
|
context:
|
||||||
|
"This is a managed database environment where we cannot install additional extensions.",
|
||||||
|
},
|
||||||
|
expectedOutput: {
|
||||||
|
shouldRecommendRules: [], // Should not recommend pg_stat_statements rule
|
||||||
|
shouldNotRecommendRules: ["7.1"], // monitor-pg-stat-statements
|
||||||
|
mustContain: ["explain", "analyze"],
|
||||||
|
mustNotContain: ["pg_stat_statements"],
|
||||||
|
},
|
||||||
|
expectedReasoning: [
|
||||||
|
"Recognize that no extensions are available",
|
||||||
|
"Check that pg_stat_statements cannot be used",
|
||||||
|
"Avoid recommending pg_stat_statements",
|
||||||
|
"Suggest alternative approaches like EXPLAIN ANALYZE, log_min_duration_statement, or pg_stat_activity",
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
describe("Extension Unavailable - No pg_stat_statements", () => {
|
||||||
|
it("should suggest alternatives when pg_stat_statements is unavailable", async () => {
|
||||||
|
const result = await runEval(scenario);
|
||||||
|
|
||||||
|
console.log("Response:", result.response);
|
||||||
|
console.log("Criteria results:", result.criteriaResults);
|
||||||
|
|
||||||
|
// Response should NOT primarily recommend pg_stat_statements
|
||||||
|
// (it might mention it as unavailable, but shouldn't suggest installing it)
|
||||||
|
const responseLower = result.response.toLowerCase();
|
||||||
|
|
||||||
|
// Should suggest EXPLAIN ANALYZE as an alternative
|
||||||
|
expect(responseLower.includes("explain") && responseLower.includes("analyze")).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
export { scenario };
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { runEval } from "../runner.js";
|
||||||
|
import type { EvalScenario } from "../types.js";
|
||||||
|
|
||||||
|
const scenario: EvalScenario = {
|
||||||
|
id: "missing-index-detection",
|
||||||
|
name: "Missing Index Detection",
|
||||||
|
description:
|
||||||
|
"Agent should identify missing index on WHERE clause columns and recommend creating an appropriate index",
|
||||||
|
category: "query-performance",
|
||||||
|
difficulty: "basic",
|
||||||
|
input: {
|
||||||
|
schema: `
|
||||||
|
CREATE TABLE orders (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
customer_id INT NOT NULL,
|
||||||
|
status VARCHAR(50),
|
||||||
|
total DECIMAL(10,2),
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
-- No indexes besides primary key
|
||||||
|
-- Table has 5 million rows
|
||||||
|
`,
|
||||||
|
userQuery:
|
||||||
|
"This query is slow and takes 3 seconds: SELECT * FROM orders WHERE customer_id = 12345 AND status = 'pending'",
|
||||||
|
},
|
||||||
|
expectedOutput: {
|
||||||
|
shouldRecommendRules: ["1.1"], // query-missing-indexes
|
||||||
|
mustContain: ["index", "customer_id"],
|
||||||
|
},
|
||||||
|
expectedReasoning: [
|
||||||
|
"Identify that the query filters on customer_id and status",
|
||||||
|
"Recognize that without an index, this causes a sequential scan",
|
||||||
|
"Recommend creating an index on the filtered columns",
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
describe("Missing Index Detection", () => {
|
||||||
|
it("should recommend creating an index on filtered columns", async () => {
|
||||||
|
const result = await runEval(scenario);
|
||||||
|
|
||||||
|
console.log("Response:", result.response);
|
||||||
|
console.log("Criteria results:", result.criteriaResults);
|
||||||
|
|
||||||
|
// Check that key criteria passed
|
||||||
|
expect(result.criteriaResults.some((c) => c.criterion.includes("index") && c.passed)).toBe(
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
|
// Response should mention creating an index
|
||||||
|
expect(result.response.toLowerCase()).toContain("index");
|
||||||
|
expect(result.response.toLowerCase()).toContain("customer_id");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
export { scenario };
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { runEval } from "../runner.js";
|
||||||
|
import type { EvalScenario } from "../types.js";
|
||||||
|
|
||||||
|
const scenario: EvalScenario = {
|
||||||
|
id: "n-plus-one-detection",
|
||||||
|
name: "N+1 Query Detection",
|
||||||
|
description:
|
||||||
|
"Agent should identify N+1 query pattern in application code and recommend using JOINs or batch queries",
|
||||||
|
category: "query-performance",
|
||||||
|
difficulty: "intermediate",
|
||||||
|
input: {
|
||||||
|
schema: `
|
||||||
|
CREATE TABLE users (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR(100),
|
||||||
|
email VARCHAR(255)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE posts (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
user_id INT REFERENCES users(id),
|
||||||
|
title VARCHAR(200),
|
||||||
|
content TEXT,
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
`,
|
||||||
|
userQuery: `My API endpoint is slow. Here's the code:
|
||||||
|
|
||||||
|
\`\`\`typescript
|
||||||
|
// Get all posts
|
||||||
|
const posts = await db.query('SELECT * FROM posts LIMIT 100');
|
||||||
|
|
||||||
|
// For each post, get the author
|
||||||
|
for (const post of posts) {
|
||||||
|
const author = await db.query('SELECT * FROM users WHERE id = $1', [post.user_id]);
|
||||||
|
post.author = author;
|
||||||
|
}
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
This makes 101 database queries. How can I optimize it?`,
|
||||||
|
},
|
||||||
|
expectedOutput: {
|
||||||
|
shouldRecommendRules: ["6.1"], // data-n-plus-one
|
||||||
|
mustContain: ["join", "n+1"],
|
||||||
|
},
|
||||||
|
expectedReasoning: [
|
||||||
|
"Identify the N+1 query pattern (1 query for posts + N queries for users)",
|
||||||
|
"Recognize this as a common performance anti-pattern",
|
||||||
|
"Recommend using a JOIN to fetch all data in a single query",
|
||||||
|
"Optionally suggest using IN clause for batch fetching",
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
describe("N+1 Query Detection", () => {
|
||||||
|
it("should identify N+1 pattern and recommend JOIN", async () => {
|
||||||
|
const result = await runEval(scenario);
|
||||||
|
|
||||||
|
console.log("Response:", result.response);
|
||||||
|
console.log("Criteria results:", result.criteriaResults);
|
||||||
|
|
||||||
|
// Response should mention JOIN
|
||||||
|
expect(result.response.toLowerCase()).toContain("join");
|
||||||
|
|
||||||
|
// Response should explain the N+1 problem
|
||||||
|
const responseLower = result.response.toLowerCase();
|
||||||
|
expect(responseLower.includes("n+1") || responseLower.includes("n + 1")).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
export { scenario };
|
||||||
@@ -0,0 +1,108 @@
|
|||||||
|
import { describe, it, expect } from "vitest";
|
||||||
|
import { runEval } from "../runner.js";
|
||||||
|
import type { EvalScenario } from "../types.js";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scenario 1: PG10 - Should NOT recommend covering indexes (requires PG11+)
|
||||||
|
*/
|
||||||
|
const scenarioPg10NoCoveringIndex: EvalScenario = {
|
||||||
|
id: "version-constraint-pg10-no-covering",
|
||||||
|
name: "Version Constraint - PG10 No Covering Index",
|
||||||
|
description:
|
||||||
|
"Agent should NOT recommend INCLUDE clause on PostgreSQL 10 since it requires PG11+",
|
||||||
|
category: "version-constraints",
|
||||||
|
difficulty: "intermediate",
|
||||||
|
input: {
|
||||||
|
schema: `
|
||||||
|
CREATE TABLE users (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
email VARCHAR(255) NOT NULL,
|
||||||
|
name VARCHAR(100),
|
||||||
|
created_at TIMESTAMPTZ DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX users_email_idx ON users (email);
|
||||||
|
`,
|
||||||
|
userQuery:
|
||||||
|
"How can I optimize this query to avoid heap fetches? SELECT email, name FROM users WHERE email = 'test@example.com'",
|
||||||
|
postgresVersion: "10.0",
|
||||||
|
},
|
||||||
|
expectedOutput: {
|
||||||
|
shouldRecommendRules: [],
|
||||||
|
shouldNotRecommendRules: ["1.2"], // query-covering-indexes requires PG11
|
||||||
|
mustContain: ["index"],
|
||||||
|
mustNotContain: ["include"],
|
||||||
|
},
|
||||||
|
expectedReasoning: [
|
||||||
|
"Recognize that PostgreSQL 10 is specified",
|
||||||
|
"Check that covering indexes (INCLUDE clause) require PG11+",
|
||||||
|
"Avoid recommending INCLUDE clause",
|
||||||
|
"Suggest alternative optimization strategies appropriate for PG10",
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scenario 2: PG9.3 - Should NOT recommend UPSERT (requires PG9.5+)
|
||||||
|
*/
|
||||||
|
const scenarioPg93NoUpsert: EvalScenario = {
|
||||||
|
id: "version-constraint-pg93-no-upsert",
|
||||||
|
name: "Version Constraint - PG9.3 No UPSERT",
|
||||||
|
description:
|
||||||
|
"Agent should NOT recommend ON CONFLICT on PostgreSQL 9.3 since it requires PG9.5+",
|
||||||
|
category: "version-constraints",
|
||||||
|
difficulty: "intermediate",
|
||||||
|
input: {
|
||||||
|
schema: `
|
||||||
|
CREATE TABLE settings (
|
||||||
|
user_id INT NOT NULL,
|
||||||
|
key VARCHAR(50) NOT NULL,
|
||||||
|
value TEXT,
|
||||||
|
PRIMARY KEY (user_id, key)
|
||||||
|
);
|
||||||
|
`,
|
||||||
|
userQuery:
|
||||||
|
"I need to insert a setting if it doesn't exist, or update it if it does. How should I do this?",
|
||||||
|
postgresVersion: "9.3",
|
||||||
|
},
|
||||||
|
expectedOutput: {
|
||||||
|
shouldRecommendRules: [],
|
||||||
|
shouldNotRecommendRules: ["6.3"], // data-upsert requires PG9.5
|
||||||
|
mustContain: ["insert", "update"],
|
||||||
|
mustNotContain: ["on conflict"],
|
||||||
|
},
|
||||||
|
expectedReasoning: [
|
||||||
|
"Recognize that PostgreSQL 9.3 is specified",
|
||||||
|
"Check that ON CONFLICT (UPSERT) requires PG9.5+",
|
||||||
|
"Avoid recommending ON CONFLICT syntax",
|
||||||
|
"Suggest alternative pattern (e.g., CTE with INSERT/UPDATE, or try/catch approach)",
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
describe("Version Constraint Tests", () => {
|
||||||
|
describe("PG10 - No Covering Index", () => {
|
||||||
|
it("should NOT recommend INCLUDE clause for PG10", async () => {
|
||||||
|
const result = await runEval(scenarioPg10NoCoveringIndex);
|
||||||
|
|
||||||
|
console.log("Response:", result.response);
|
||||||
|
console.log("Criteria results:", result.criteriaResults);
|
||||||
|
|
||||||
|
// Response should NOT contain INCLUDE recommendation
|
||||||
|
expect(result.response.toLowerCase()).not.toContain("include (");
|
||||||
|
expect(result.response.toLowerCase()).not.toContain("include(");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("PG9.3 - No UPSERT", () => {
|
||||||
|
it("should NOT recommend ON CONFLICT for PG9.3", async () => {
|
||||||
|
const result = await runEval(scenarioPg93NoUpsert);
|
||||||
|
|
||||||
|
console.log("Response:", result.response);
|
||||||
|
console.log("Criteria results:", result.criteriaResults);
|
||||||
|
|
||||||
|
// Response should NOT recommend ON CONFLICT
|
||||||
|
expect(result.response.toLowerCase()).not.toContain("on conflict");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
export { scenarioPg10NoCoveringIndex, scenarioPg93NoUpsert };
|
||||||
13
skills/postgres-best-practices/evals/tsconfig.json
Normal file
13
skills/postgres-best-practices/evals/tsconfig.json
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2022",
|
||||||
|
"module": "ESNext",
|
||||||
|
"moduleResolution": "bundler",
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"strict": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"outDir": "dist",
|
||||||
|
"declaration": true
|
||||||
|
},
|
||||||
|
"include": ["*.ts", "scenarios/**/*.ts"]
|
||||||
|
}
|
||||||
112
skills/postgres-best-practices/evals/types.ts
Normal file
112
skills/postgres-best-practices/evals/types.ts
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
/**
|
||||||
|
* Evaluation scenario definition
|
||||||
|
*/
|
||||||
|
export interface EvalScenario {
|
||||||
|
/** Unique identifier for the scenario */
|
||||||
|
id: string;
|
||||||
|
|
||||||
|
/** Human-readable name */
|
||||||
|
name: string;
|
||||||
|
|
||||||
|
/** Description of what this scenario tests */
|
||||||
|
description: string;
|
||||||
|
|
||||||
|
/** Category of the scenario */
|
||||||
|
category: "query-performance" | "version-constraints" | "extension-requirements";
|
||||||
|
|
||||||
|
/** Difficulty level */
|
||||||
|
difficulty: "basic" | "intermediate" | "advanced";
|
||||||
|
|
||||||
|
/** Input for the scenario */
|
||||||
|
input: {
|
||||||
|
/** SQL schema context */
|
||||||
|
schema: string;
|
||||||
|
|
||||||
|
/** User's question or request */
|
||||||
|
userQuery: string;
|
||||||
|
|
||||||
|
/** Optional PostgreSQL version (e.g., "10.0", "15.4") */
|
||||||
|
postgresVersion?: string;
|
||||||
|
|
||||||
|
/** Optional list of available extensions */
|
||||||
|
availableExtensions?: string[];
|
||||||
|
|
||||||
|
/** Additional context */
|
||||||
|
context?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Expected output criteria */
|
||||||
|
expectedOutput: {
|
||||||
|
/** Rule IDs that should be recommended */
|
||||||
|
shouldRecommendRules: string[];
|
||||||
|
|
||||||
|
/** Rule IDs that should NOT be recommended (version/extension constraints) */
|
||||||
|
shouldNotRecommendRules?: string[];
|
||||||
|
|
||||||
|
/** Strings that must appear in the response */
|
||||||
|
mustContain: string[];
|
||||||
|
|
||||||
|
/** Strings that must NOT appear in the response */
|
||||||
|
mustNotContain?: string[];
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Expected reasoning steps the agent should follow */
|
||||||
|
expectedReasoning: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of evaluating a single criterion
|
||||||
|
*/
|
||||||
|
export interface CriterionResult {
|
||||||
|
/** Description of the criterion */
|
||||||
|
criterion: string;
|
||||||
|
|
||||||
|
/** Whether the criterion passed */
|
||||||
|
passed: boolean;
|
||||||
|
|
||||||
|
/** Evidence or explanation */
|
||||||
|
evidence?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Result of running an evaluation scenario
|
||||||
|
*/
|
||||||
|
export interface EvalResult {
|
||||||
|
/** Scenario ID */
|
||||||
|
scenarioId: string;
|
||||||
|
|
||||||
|
/** Whether all criteria passed */
|
||||||
|
passed: boolean;
|
||||||
|
|
||||||
|
/** Rule IDs that were referenced in the response */
|
||||||
|
rulesReferenced: string[];
|
||||||
|
|
||||||
|
/** Results for each evaluation criterion */
|
||||||
|
criteriaResults: CriterionResult[];
|
||||||
|
|
||||||
|
/** The agent's full response */
|
||||||
|
response: string;
|
||||||
|
|
||||||
|
/** Time taken in milliseconds */
|
||||||
|
latencyMs: number;
|
||||||
|
|
||||||
|
/** Error message if evaluation failed */
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration for the eval runner
|
||||||
|
*/
|
||||||
|
export interface EvalConfig {
|
||||||
|
/** Path to AGENTS.md file */
|
||||||
|
agentsPath: string;
|
||||||
|
|
||||||
|
/** Model to use for evaluation */
|
||||||
|
model?: string;
|
||||||
|
|
||||||
|
/** Maximum tokens for response */
|
||||||
|
maxTokens?: number;
|
||||||
|
|
||||||
|
/** Temperature for generation */
|
||||||
|
temperature?: number;
|
||||||
|
}
|
||||||
72
skills/postgres-best-practices/evals/utils.ts
Normal file
72
skills/postgres-best-practices/evals/utils.ts
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
import type { EvalResult, EvalScenario } from "./types.js";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format eval results as a summary table
|
||||||
|
*/
|
||||||
|
export function formatResultsSummary(results: EvalResult[]): string {
|
||||||
|
const lines: string[] = [];
|
||||||
|
|
||||||
|
lines.push("## Eval Results Summary\n");
|
||||||
|
|
||||||
|
const passed = results.filter((r) => r.passed).length;
|
||||||
|
const total = results.length;
|
||||||
|
const passRate = ((passed / total) * 100).toFixed(1);
|
||||||
|
|
||||||
|
lines.push(`**Pass Rate:** ${passed}/${total} (${passRate}%)\n`);
|
||||||
|
|
||||||
|
lines.push("| Scenario | Status | Latency | Rules Referenced |");
|
||||||
|
lines.push("|----------|--------|---------|------------------|");
|
||||||
|
|
||||||
|
for (const result of results) {
|
||||||
|
const status = result.passed ? "PASS" : "FAIL";
|
||||||
|
const latency = `${result.latencyMs}ms`;
|
||||||
|
const rules = result.rulesReferenced.join(", ") || "none";
|
||||||
|
lines.push(`| ${result.scenarioId} | ${status} | ${latency} | ${rules} |`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return lines.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format detailed results for a single scenario
|
||||||
|
*/
|
||||||
|
export function formatDetailedResult(result: EvalResult): string {
|
||||||
|
const lines: string[] = [];
|
||||||
|
|
||||||
|
lines.push(`## ${result.scenarioId}\n`);
|
||||||
|
lines.push(`**Status:** ${result.passed ? "PASS" : "FAIL"}`);
|
||||||
|
lines.push(`**Latency:** ${result.latencyMs}ms`);
|
||||||
|
lines.push(`**Rules Referenced:** ${result.rulesReferenced.join(", ") || "none"}\n`);
|
||||||
|
|
||||||
|
if (result.error) {
|
||||||
|
lines.push(`**Error:** ${result.error}\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
lines.push("### Criteria Results\n");
|
||||||
|
for (const criterion of result.criteriaResults) {
|
||||||
|
const icon = criterion.passed ? "+" : "-";
|
||||||
|
lines.push(`${icon} ${criterion.criterion}`);
|
||||||
|
if (criterion.evidence) {
|
||||||
|
lines.push(` Evidence: ${criterion.evidence}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lines.push("\n### Response\n");
|
||||||
|
lines.push("```");
|
||||||
|
lines.push(result.response);
|
||||||
|
lines.push("```");
|
||||||
|
|
||||||
|
return lines.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a scenario builder for cleaner test definitions
|
||||||
|
*/
|
||||||
|
export function createScenario(
|
||||||
|
partial: Omit<EvalScenario, "id"> & { id?: string }
|
||||||
|
): EvalScenario {
|
||||||
|
return {
|
||||||
|
id: partial.id || partial.name.toLowerCase().replace(/\s+/g, "-"),
|
||||||
|
...partial,
|
||||||
|
} as EvalScenario;
|
||||||
|
}
|
||||||
9
skills/postgres-best-practices/evals/vitest.config.ts
Normal file
9
skills/postgres-best-practices/evals/vitest.config.ts
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
import { defineConfig } from "vitest/config";
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
test: {
|
||||||
|
include: ["scenarios/**/*.eval.ts"],
|
||||||
|
testTimeout: 60000, // 60 seconds for LLM calls
|
||||||
|
reporters: ["verbose"],
|
||||||
|
},
|
||||||
|
});
|
||||||
23
skills/postgres-best-practices/profiles/aurora.json
Normal file
23
skills/postgres-best-practices/profiles/aurora.json
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
{
|
||||||
|
"name": "aurora",
|
||||||
|
"minVersion": "13",
|
||||||
|
"maxVersion": "16",
|
||||||
|
"extensions": {
|
||||||
|
"available": [
|
||||||
|
"pg_stat_statements",
|
||||||
|
"pgcrypto",
|
||||||
|
"uuid-ossp"
|
||||||
|
],
|
||||||
|
"installable": [
|
||||||
|
"postgis",
|
||||||
|
"pg_hint_plan",
|
||||||
|
"pg_similarity"
|
||||||
|
],
|
||||||
|
"unavailable": [
|
||||||
|
"pg_cron",
|
||||||
|
"pg_partman",
|
||||||
|
"timescaledb"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"notes": "AWS Aurora PostgreSQL. Some extensions are not available due to managed service restrictions. Aurora has its own connection pooling (RDS Proxy) and automatic failover."
|
||||||
|
}
|
||||||
18
skills/postgres-best-practices/profiles/self-hosted.json
Normal file
18
skills/postgres-best-practices/profiles/self-hosted.json
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
{
|
||||||
|
"name": "self-hosted",
|
||||||
|
"minVersion": "12",
|
||||||
|
"extensions": {
|
||||||
|
"available": [],
|
||||||
|
"installable": [
|
||||||
|
"pg_stat_statements",
|
||||||
|
"pgcrypto",
|
||||||
|
"uuid-ossp",
|
||||||
|
"postgis",
|
||||||
|
"pg_trgm",
|
||||||
|
"btree_gin",
|
||||||
|
"btree_gist"
|
||||||
|
],
|
||||||
|
"unavailable": []
|
||||||
|
},
|
||||||
|
"notes": "Generic self-hosted PostgreSQL. Extension availability depends on server configuration. Check pg_available_extensions for what can be installed."
|
||||||
|
}
|
||||||
27
skills/postgres-best-practices/profiles/supabase.json
Normal file
27
skills/postgres-best-practices/profiles/supabase.json
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
"name": "supabase",
|
||||||
|
"minVersion": "15",
|
||||||
|
"extensions": {
|
||||||
|
"available": [
|
||||||
|
"pg_stat_statements",
|
||||||
|
"pgcrypto",
|
||||||
|
"uuid-ossp",
|
||||||
|
"pgjwt",
|
||||||
|
"pg_graphql",
|
||||||
|
"pg_net",
|
||||||
|
"pgsodium",
|
||||||
|
"supabase_vault",
|
||||||
|
"pg_jsonschema"
|
||||||
|
],
|
||||||
|
"installable": [
|
||||||
|
"postgis",
|
||||||
|
"pg_cron",
|
||||||
|
"pgtap",
|
||||||
|
"plv8",
|
||||||
|
"http",
|
||||||
|
"pg_hashids"
|
||||||
|
],
|
||||||
|
"unavailable": []
|
||||||
|
},
|
||||||
|
"notes": "Supabase manages connection pooling via Supavisor. Direct connection limits differ from pooled connections. All standard Postgres extensions are available."
|
||||||
|
}
|
||||||
@@ -3,6 +3,7 @@ title: Index JSONB Columns for Efficient Querying
|
|||||||
impact: MEDIUM
|
impact: MEDIUM
|
||||||
impactDescription: 10-100x faster JSONB queries with proper indexing
|
impactDescription: 10-100x faster JSONB queries with proper indexing
|
||||||
tags: jsonb, gin, indexes, json
|
tags: jsonb, gin, indexes, json
|
||||||
|
minVersion: "9.4"
|
||||||
---
|
---
|
||||||
|
|
||||||
## Index JSONB Columns for Efficient Querying
|
## Index JSONB Columns for Efficient Querying
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ title: Use UPSERT for Insert-or-Update Operations
|
|||||||
impact: MEDIUM
|
impact: MEDIUM
|
||||||
impactDescription: Atomic operation, eliminates race conditions
|
impactDescription: Atomic operation, eliminates race conditions
|
||||||
tags: upsert, on-conflict, insert, update
|
tags: upsert, on-conflict, insert, update
|
||||||
|
minVersion: "9.5"
|
||||||
---
|
---
|
||||||
|
|
||||||
## Use UPSERT for Insert-or-Update Operations
|
## Use UPSERT for Insert-or-Update Operations
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ title: Use SKIP LOCKED for Non-Blocking Queue Processing
|
|||||||
impact: MEDIUM-HIGH
|
impact: MEDIUM-HIGH
|
||||||
impactDescription: 10x throughput for worker queues
|
impactDescription: 10x throughput for worker queues
|
||||||
tags: skip-locked, queue, workers, concurrency
|
tags: skip-locked, queue, workers, concurrency
|
||||||
|
minVersion: "9.5"
|
||||||
---
|
---
|
||||||
|
|
||||||
## Use SKIP LOCKED for Non-Blocking Queue Processing
|
## Use SKIP LOCKED for Non-Blocking Queue Processing
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ title: Enable pg_stat_statements for Query Analysis
|
|||||||
impact: LOW-MEDIUM
|
impact: LOW-MEDIUM
|
||||||
impactDescription: Identify top resource-consuming queries
|
impactDescription: Identify top resource-consuming queries
|
||||||
tags: pg-stat-statements, monitoring, statistics, performance
|
tags: pg-stat-statements, monitoring, statistics, performance
|
||||||
|
extensions: pg_stat_statements
|
||||||
---
|
---
|
||||||
|
|
||||||
## Enable pg_stat_statements for Query Analysis
|
## Enable pg_stat_statements for Query Analysis
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ title: Use Covering Indexes to Avoid Table Lookups
|
|||||||
impact: MEDIUM-HIGH
|
impact: MEDIUM-HIGH
|
||||||
impactDescription: 2-5x faster queries by eliminating heap fetches
|
impactDescription: 2-5x faster queries by eliminating heap fetches
|
||||||
tags: indexes, covering-index, include, index-only-scan
|
tags: indexes, covering-index, include, index-only-scan
|
||||||
|
minVersion: "11"
|
||||||
---
|
---
|
||||||
|
|
||||||
## Use Covering Indexes to Avoid Table Lookups
|
## Use Covering Indexes to Avoid Table Lookups
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ title: Partition Large Tables for Better Performance
|
|||||||
impact: MEDIUM-HIGH
|
impact: MEDIUM-HIGH
|
||||||
impactDescription: 5-20x faster queries and maintenance on large tables
|
impactDescription: 5-20x faster queries and maintenance on large tables
|
||||||
tags: partitioning, large-tables, time-series, performance
|
tags: partitioning, large-tables, time-series, performance
|
||||||
|
minVersion: "10"
|
||||||
---
|
---
|
||||||
|
|
||||||
## Partition Large Tables for Better Performance
|
## Partition Large Tables for Better Performance
|
||||||
|
|||||||
Reference in New Issue
Block a user