From 1d9f4ea441d12ad9c6ca4ad501ba1c7730fdb3d1 Mon Sep 17 00:00:00 2001 From: Pedro Rodrigues Date: Fri, 23 Jan 2026 17:26:45 +0000 Subject: [PATCH] improve postgres best practices and add evals --- .mcp.json | 4 + packages/skills-build/src/build.ts | 117 +- packages/skills-build/src/parser.ts | 3 + packages/skills-build/src/profiles.ts | 102 ++ packages/skills-build/src/types.ts | 15 + .../postgres-best-practices/AGENTS.aurora.md | 1502 +++++++++++++++++ skills/postgres-best-practices/AGENTS.md | 12 + .../AGENTS.self-hosted.md | 1502 +++++++++++++++++ .../AGENTS.supabase.md | 1502 +++++++++++++++++ skills/postgres-best-practices/README.md | 20 + skills/postgres-best-practices/SKILL.md | 49 + .../postgres-best-practices/evals/README.md | 308 ++++ .../evals/package.json | 18 + .../postgres-best-practices/evals/runner.ts | 192 +++ .../evals/scenarios/covering-index.eval.ts | 62 + .../scenarios/extension-available.eval.ts | 56 + .../scenarios/extension-unavailable.eval.ts | 56 + .../evals/scenarios/missing-index.eval.ts | 56 + .../evals/scenarios/n-plus-one.eval.ts | 71 + .../scenarios/version-constraint.eval.ts | 108 ++ .../evals/tsconfig.json | 13 + skills/postgres-best-practices/evals/types.ts | 112 ++ skills/postgres-best-practices/evals/utils.ts | 72 + .../evals/vitest.config.ts | 9 + .../profiles/aurora.json | 23 + .../profiles/self-hosted.json | 18 + .../profiles/supabase.json | 27 + .../rules/advanced-jsonb-indexing.md | 1 + .../rules/data-upsert.md | 1 + .../rules/lock-skip-locked.md | 1 + .../rules/monitor-pg-stat-statements.md | 1 + .../rules/query-covering-indexes.md | 1 + .../rules/schema-partitioning.md | 1 + 33 files changed, 6024 insertions(+), 11 deletions(-) create mode 100644 packages/skills-build/src/profiles.ts create mode 100644 skills/postgres-best-practices/AGENTS.aurora.md create mode 100644 skills/postgres-best-practices/AGENTS.self-hosted.md create mode 100644 skills/postgres-best-practices/AGENTS.supabase.md create mode 100644 skills/postgres-best-practices/evals/README.md create mode 100644 skills/postgres-best-practices/evals/package.json create mode 100644 skills/postgres-best-practices/evals/runner.ts create mode 100644 skills/postgres-best-practices/evals/scenarios/covering-index.eval.ts create mode 100644 skills/postgres-best-practices/evals/scenarios/extension-available.eval.ts create mode 100644 skills/postgres-best-practices/evals/scenarios/extension-unavailable.eval.ts create mode 100644 skills/postgres-best-practices/evals/scenarios/missing-index.eval.ts create mode 100644 skills/postgres-best-practices/evals/scenarios/n-plus-one.eval.ts create mode 100644 skills/postgres-best-practices/evals/scenarios/version-constraint.eval.ts create mode 100644 skills/postgres-best-practices/evals/tsconfig.json create mode 100644 skills/postgres-best-practices/evals/types.ts create mode 100644 skills/postgres-best-practices/evals/utils.ts create mode 100644 skills/postgres-best-practices/evals/vitest.config.ts create mode 100644 skills/postgres-best-practices/profiles/aurora.json create mode 100644 skills/postgres-best-practices/profiles/self-hosted.json create mode 100644 skills/postgres-best-practices/profiles/supabase.json diff --git a/.mcp.json b/.mcp.json index 2e80c18..a4d94c6 100644 --- a/.mcp.json +++ b/.mcp.json @@ -3,6 +3,10 @@ "supabase": { "type": "http", "url": "https://mcp.supabase.com/mcp?features=docs" + }, + "linear": { + "type": "http", + "url": "https://mcp.linear.app/mcp" } } } diff --git a/packages/skills-build/src/build.ts b/packages/skills-build/src/build.ts index bcf5f3f..25d102c 100644 --- a/packages/skills-build/src/build.ts +++ b/packages/skills-build/src/build.ts @@ -7,7 +7,8 @@ import { validateSkillExists, } from "./config.js"; import { parseRuleFile } from "./parser.js"; -import type { Metadata, Rule, Section } from "./types.js"; +import { filterRulesForProfile, listProfiles, loadProfile } from "./profiles.js"; +import type { Metadata, Profile, Rule, Section } from "./types.js"; import { validateRuleFile } from "./validate.js"; /** @@ -100,8 +101,13 @@ export function generateSectionMap( /** * Build AGENTS.md for a specific skill */ -function buildSkill(paths: SkillPaths): void { - console.log(`[${paths.name}] Building AGENTS.md...`); +function buildSkill(paths: SkillPaths, profile?: Profile): void { + const profileSuffix = profile ? `.${profile.name}` : ""; + const outputFile = profile + ? paths.agentsOutput.replace(".md", `${profileSuffix}.md`) + : paths.agentsOutput; + + console.log(`[${paths.name}] Building AGENTS${profileSuffix}.md...`); // Load metadata and sections const metadata = loadMetadata(paths.metadataFile, paths.name); @@ -113,7 +119,7 @@ function buildSkill(paths: SkillPaths): void { if (!existsSync(paths.rulesDir)) { console.log(` No rules directory found. Generating empty AGENTS.md.`); writeFileSync( - paths.agentsOutput, + outputFile, `# ${skillTitle}\n\nNo rules defined yet.\n`, ); return; @@ -147,10 +153,17 @@ function buildSkill(paths: SkillPaths): void { } } + // Filter rules by profile if specified + let filteredRules = rules; + if (profile) { + filteredRules = filterRulesForProfile(rules, profile); + console.log(` Filtered to ${filteredRules.length} rules for profile "${profile.name}"`); + } + // Group rules by section and assign IDs const rulesBySection = new Map(); - for (const rule of rules) { + for (const rule of filteredRules) { const sectionRules = rulesBySection.get(rule.section) || []; sectionRules.push(rule); rulesBySection.set(rule.section, sectionRules); @@ -225,6 +238,18 @@ function buildSkill(paths: SkillPaths): void { output.push(`**Impact: ${rule.impact}**\n`); } + // Add prerequisites if minVersion or extensions are specified + const prerequisites: string[] = []; + if (rule.minVersion) { + prerequisites.push(`PostgreSQL ${rule.minVersion}+`); + } + if (rule.extensions && rule.extensions.length > 0) { + prerequisites.push(`Extension${rule.extensions.length > 1 ? "s" : ""}: ${rule.extensions.join(", ")}`); + } + if (prerequisites.length > 0) { + output.push(`**Prerequisites:** ${prerequisites.join(" | ")}\n`); + } + output.push(`${rule.explanation}\n`); for (const example of rule.examples) { @@ -269,9 +294,52 @@ function buildSkill(paths: SkillPaths): void { } // Write output - writeFileSync(paths.agentsOutput, output.join("\n")); - console.log(` Generated: ${paths.agentsOutput}`); - console.log(` Total rules: ${rules.length}`); + writeFileSync(outputFile, output.join("\n")); + console.log(` Generated: ${outputFile}`); + console.log(` Total rules: ${filteredRules.length}`); +} + +/** + * Parse CLI arguments + */ +function parseArgs(): { skill?: string; profile?: string; allProfiles: boolean } { + const args = process.argv.slice(2); + let skill: string | undefined; + let profile: string | undefined; + let allProfiles = false; + + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + if (arg === "--profile" && args[i + 1]) { + profile = args[i + 1]; + i++; + } else if (arg === "--all-profiles") { + allProfiles = true; + } else if (!arg.startsWith("--")) { + skill = arg; + } + } + + return { skill, profile, allProfiles }; +} + +/** + * Build a skill with all available profiles + */ +function buildSkillWithAllProfiles(paths: SkillPaths): void { + const profilesDir = join(paths.skillDir, "profiles"); + const profiles = listProfiles(profilesDir); + + // Build default (no profile) + buildSkill(paths); + + // Build each profile variant + for (const profileName of profiles) { + const profile = loadProfile(profilesDir, profileName); + if (profile) { + buildSkill(paths, profile); + } + } } // Run build when executed directly @@ -280,7 +348,7 @@ const isMainModule = process.argv[1]?.endsWith("build.js"); if (isMainModule) { - const targetSkill = process.argv[2]; + const { skill: targetSkill, profile: profileName, allProfiles } = parseArgs(); if (targetSkill) { // Build specific skill @@ -292,7 +360,29 @@ if (isMainModule) { } process.exit(1); } - buildSkill(getSkillPaths(targetSkill)); + + const paths = getSkillPaths(targetSkill); + + if (allProfiles) { + // Build all profile variants + buildSkillWithAllProfiles(paths); + } else if (profileName) { + // Build with specific profile + const profilesDir = join(paths.skillDir, "profiles"); + const profile = loadProfile(profilesDir, profileName); + if (!profile) { + console.error(`Error: Profile "${profileName}" not found`); + const available = listProfiles(profilesDir); + if (available.length > 0) { + console.error(`Available profiles: ${available.join(", ")}`); + } + process.exit(1); + } + buildSkill(paths, profile); + } else { + // Build default + buildSkill(paths); + } } else { // Build all skills const skills = discoverSkills(); @@ -303,7 +393,12 @@ if (isMainModule) { console.log(`Found ${skills.length} skill(s): ${skills.join(", ")}\n`); for (const skill of skills) { - buildSkill(getSkillPaths(skill)); + const paths = getSkillPaths(skill); + if (allProfiles) { + buildSkillWithAllProfiles(paths); + } else { + buildSkill(paths); + } console.log(""); } } diff --git a/packages/skills-build/src/parser.ts b/packages/skills-build/src/parser.ts index 6efee67..06a7841 100644 --- a/packages/skills-build/src/parser.ts +++ b/packages/skills-build/src/parser.ts @@ -251,6 +251,7 @@ export function parseRuleFile( const examples = extractExamples(body); const tags = frontmatter.tags?.split(",").map((t) => t.trim()) || []; + const extensions = frontmatter.extensions?.split(",").map((e) => e.trim()) || []; // Validation warnings if (!explanation || explanation.length < 20) { @@ -271,6 +272,8 @@ export function parseRuleFile( examples, references: extractReferences(body), tags: tags.length > 0 ? tags : undefined, + minVersion: frontmatter.minVersion || undefined, + extensions: extensions.length > 0 ? extensions : undefined, }; return { success: true, rule, errors, warnings }; diff --git a/packages/skills-build/src/profiles.ts b/packages/skills-build/src/profiles.ts new file mode 100644 index 0000000..4083381 --- /dev/null +++ b/packages/skills-build/src/profiles.ts @@ -0,0 +1,102 @@ +import { existsSync, readdirSync, readFileSync } from "node:fs"; +import { join } from "node:path"; +import type { Profile, Rule } from "./types.js"; + +/** + * Load a profile from the profiles directory + */ +export function loadProfile(profilesDir: string, profileName: string): Profile | null { + const profileFile = join(profilesDir, `${profileName}.json`); + if (!existsSync(profileFile)) { + return null; + } + + try { + return JSON.parse(readFileSync(profileFile, "utf-8")); + } catch (error) { + console.error(`Error loading profile ${profileName}:`, error); + return null; + } +} + +/** + * List all available profiles in the profiles directory + */ +export function listProfiles(profilesDir: string): string[] { + if (!existsSync(profilesDir)) { + return []; + } + + return readdirSync(profilesDir) + .filter((f) => f.endsWith(".json")) + .map((f) => f.replace(".json", "")); +} + +/** + * Compare version strings (e.g., "9.5", "11", "14.2") + * Returns: negative if a < b, 0 if equal, positive if a > b + */ +function compareVersions(a: string, b: string): number { + const partsA = a.split(".").map(Number); + const partsB = b.split(".").map(Number); + + const maxLen = Math.max(partsA.length, partsB.length); + for (let i = 0; i < maxLen; i++) { + const numA = partsA[i] || 0; + const numB = partsB[i] || 0; + if (numA !== numB) { + return numA - numB; + } + } + return 0; +} + +/** + * Check if a rule is compatible with a profile + */ +export function isRuleCompatibleWithProfile(rule: Rule, profile: Profile): boolean { + // Check version requirement + if (rule.minVersion) { + if (compareVersions(rule.minVersion, profile.minVersion) > 0) { + // Rule requires a higher version than profile supports + return false; + } + if (profile.maxVersion && compareVersions(rule.minVersion, profile.maxVersion) > 0) { + // Rule requires a version higher than profile's max + return false; + } + } + + // Check extension requirements + if (rule.extensions && rule.extensions.length > 0) { + const allExtensions = [ + ...(profile.extensions.available || []), + ...(profile.extensions.installable || []), + ]; + + for (const ext of rule.extensions) { + if (profile.extensions.unavailable?.includes(ext)) { + // Extension is explicitly unavailable in this profile + return false; + } + if (!allExtensions.includes(ext)) { + // Extension is not available or installable + return false; + } + } + } + + // Check if rule is explicitly excluded + if (profile.excludeRules?.includes(rule.id)) { + return false; + } + + return true; +} + +/** + * Filter rules based on profile constraints + */ +export function filterRulesForProfile(rules: Rule[], profile: Profile): Rule[] { + return rules.filter((rule) => isRuleCompatibleWithProfile(rule, profile)); +} diff --git a/packages/skills-build/src/types.ts b/packages/skills-build/src/types.ts index 440359b..520e1e4 100644 --- a/packages/skills-build/src/types.ts +++ b/packages/skills-build/src/types.ts @@ -26,6 +26,8 @@ export interface Rule { references?: string[]; tags?: string[]; supabaseNotes?: string; + minVersion?: string; // Minimum PostgreSQL version required (e.g., "11", "14") + extensions?: string[]; // Required PostgreSQL extensions (e.g., ["pg_stat_statements"]) } export interface Section { @@ -57,3 +59,16 @@ export interface ValidationResult { errors: string[]; warnings: string[]; } + +export interface Profile { + name: string; + minVersion: string; + maxVersion?: string; + extensions: { + available: string[]; + installable?: string[]; + unavailable: string[]; + }; + excludeRules?: string[]; + notes?: string; +} diff --git a/skills/postgres-best-practices/AGENTS.aurora.md b/skills/postgres-best-practices/AGENTS.aurora.md new file mode 100644 index 0000000..786a9da --- /dev/null +++ b/skills/postgres-best-practices/AGENTS.aurora.md @@ -0,0 +1,1502 @@ +# Postgres Best Practices + +**Version 1.0.0** +Supabase +January 2026 + +> This document is optimized for AI agents and LLMs. Rules are prioritized by performance impact. + +--- + +## Abstract + +Comprehensive Postgres performance optimization guide for developers using Supabase and Postgres. Contains performance rules across 8 categories, prioritized by impact from critical (query performance, connection management) to incremental (advanced features). Each rule includes detailed explanations, incorrect vs. correct SQL examples, query plan analysis, and specific performance metrics to guide automated optimization and code generation. + +--- + +## Table of Contents + +1. [Query Performance](#query-performance) - **CRITICAL** + - 1.1 [Add Indexes on WHERE and JOIN Columns](#11-add-indexes-on-where-and-join-columns) + - 1.2 [Choose the Right Index Type for Your Data](#12-choose-the-right-index-type-for-your-data) + - 1.3 [Create Composite Indexes for Multi-Column Queries](#13-create-composite-indexes-for-multi-column-queries) + - 1.4 [Use Covering Indexes to Avoid Table Lookups](#14-use-covering-indexes-to-avoid-table-lookups) + - 1.5 [Use Partial Indexes for Filtered Queries](#15-use-partial-indexes-for-filtered-queries) + +2. [Connection Management](#connection-management) - **CRITICAL** + - 2.1 [Configure Idle Connection Timeouts](#21-configure-idle-connection-timeouts) + - 2.2 [Set Appropriate Connection Limits](#22-set-appropriate-connection-limits) + - 2.3 [Use Connection Pooling for All Applications](#23-use-connection-pooling-for-all-applications) + - 2.4 [Use Prepared Statements Correctly with Pooling](#24-use-prepared-statements-correctly-with-pooling) + +3. [Security & RLS](#security-rls) - **CRITICAL** + - 3.1 [Apply Principle of Least Privilege](#31-apply-principle-of-least-privilege) + - 3.2 [Enable Row Level Security for Multi-Tenant Data](#32-enable-row-level-security-for-multi-tenant-data) + - 3.3 [Optimize RLS Policies for Performance](#33-optimize-rls-policies-for-performance) + +4. [Schema Design](#schema-design) - **HIGH** + - 4.1 [Choose Appropriate Data Types](#41-choose-appropriate-data-types) + - 4.2 [Index Foreign Key Columns](#42-index-foreign-key-columns) + - 4.3 [Partition Large Tables for Better Performance](#43-partition-large-tables-for-better-performance) + - 4.4 [Select Optimal Primary Key Strategy](#44-select-optimal-primary-key-strategy) + - 4.5 [Use Lowercase Identifiers for Compatibility](#45-use-lowercase-identifiers-for-compatibility) + +5. [Concurrency & Locking](#concurrency-locking) - **MEDIUM-HIGH** + - 5.1 [Keep Transactions Short to Reduce Lock Contention](#51-keep-transactions-short-to-reduce-lock-contention) + - 5.2 [Prevent Deadlocks with Consistent Lock Ordering](#52-prevent-deadlocks-with-consistent-lock-ordering) + - 5.3 [Use Advisory Locks for Application-Level Locking](#53-use-advisory-locks-for-application-level-locking) + - 5.4 [Use SKIP LOCKED for Non-Blocking Queue Processing](#54-use-skip-locked-for-non-blocking-queue-processing) + +6. [Data Access Patterns](#data-access-patterns) - **MEDIUM** + - 6.1 [Batch INSERT Statements for Bulk Data](#61-batch-insert-statements-for-bulk-data) + - 6.2 [Eliminate N+1 Queries with Batch Loading](#62-eliminate-n1-queries-with-batch-loading) + - 6.3 [Use Cursor-Based Pagination Instead of OFFSET](#63-use-cursor-based-pagination-instead-of-offset) + - 6.4 [Use UPSERT for Insert-or-Update Operations](#64-use-upsert-for-insert-or-update-operations) + +7. [Monitoring & Diagnostics](#monitoring-diagnostics) - **LOW-MEDIUM** + - 7.1 [Enable pg_stat_statements for Query Analysis](#71-enable-pgstatstatements-for-query-analysis) + - 7.2 [Maintain Table Statistics with VACUUM and ANALYZE](#72-maintain-table-statistics-with-vacuum-and-analyze) + - 7.3 [Use EXPLAIN ANALYZE to Diagnose Slow Queries](#73-use-explain-analyze-to-diagnose-slow-queries) + +8. [Advanced Features](#advanced-features) - **LOW** + - 8.1 [Index JSONB Columns for Efficient Querying](#81-index-jsonb-columns-for-efficient-querying) + - 8.2 [Use tsvector for Full-Text Search](#82-use-tsvector-for-full-text-search) + +--- + +## 1. Query Performance + +**Impact: CRITICAL** + +Slow queries, missing indexes, inefficient query plans. The most common source of Postgres performance issues. + +### 1.1 Add Indexes on WHERE and JOIN Columns + +**Impact: CRITICAL (100-1000x faster queries on large tables)** + +Queries filtering or joining on unindexed columns cause full table scans, which become exponentially slower as tables grow. + +**Incorrect (sequential scan on large table):** + +```sql +-- No index on customer_id causes full table scan +select * from orders where customer_id = 123; + +-- EXPLAIN shows: Seq Scan on orders (cost=0.00..25000.00 rows=100 width=85) +``` + +**Correct (index scan):** + +```sql +-- Create index on frequently filtered column +create index orders_customer_id_idx on orders (customer_id); + +select * from orders where customer_id = 123; + +-- EXPLAIN shows: Index Scan using orders_customer_id_idx (cost=0.42..8.44 rows=100 width=85) +-- Index the referencing column +create index orders_customer_id_idx on orders (customer_id); + +select c.name, o.total +from customers c +join orders o on o.customer_id = c.id; +``` + +For JOIN columns, always index the foreign key side: + +Reference: https://supabase.com/docs/guides/database/query-optimization + +--- + +### 1.2 Choose the Right Index Type for Your Data + +**Impact: HIGH (10-100x improvement with correct index type)** + +Different index types excel at different query patterns. The default B-tree isn't always optimal. + +**Incorrect (B-tree for JSONB containment):** + +```sql +-- B-tree cannot optimize containment operators +create index products_attrs_idx on products (attributes); +select * from products where attributes @> '{"color": "red"}'; +-- Full table scan - B-tree doesn't support @> operator +``` + +**Correct (GIN for JSONB):** + +```sql +-- GIN supports @>, ?, ?&, ?| operators +create index products_attrs_idx on products using gin (attributes); +select * from products where attributes @> '{"color": "red"}'; +-- B-tree (default): =, <, >, BETWEEN, IN, IS NULL +create index users_created_idx on users (created_at); + +-- GIN: arrays, JSONB, full-text search +create index posts_tags_idx on posts using gin (tags); + +-- BRIN: large time-series tables (10-100x smaller) +create index events_time_idx on events using brin (created_at); + +-- Hash: equality-only (slightly faster than B-tree for =) +create index sessions_token_idx on sessions using hash (token); +``` + +Index type guide: + +Reference: https://www.postgresql.org/docs/current/indexes-types.html + +--- + +### 1.3 Create Composite Indexes for Multi-Column Queries + +**Impact: HIGH (5-10x faster multi-column queries)** + +When queries filter on multiple columns, a composite index is more efficient than separate single-column indexes. + +**Incorrect (separate indexes require bitmap scan):** + +```sql +-- Two separate indexes +create index orders_status_idx on orders (status); +create index orders_created_idx on orders (created_at); + +-- Query must combine both indexes (slower) +select * from orders where status = 'pending' and created_at > '2024-01-01'; +``` + +**Correct (composite index):** + +```sql +-- Single composite index (leftmost column first for equality checks) +create index orders_status_created_idx on orders (status, created_at); + +-- Query uses one efficient index scan +select * from orders where status = 'pending' and created_at > '2024-01-01'; +-- Good: status (=) before created_at (>) +create index idx on orders (status, created_at); + +-- Works for: WHERE status = 'pending' +-- Works for: WHERE status = 'pending' AND created_at > '2024-01-01' +-- Does NOT work for: WHERE created_at > '2024-01-01' (leftmost prefix rule) +``` + +**Column order matters** - place equality columns first, range columns last: + +Reference: https://www.postgresql.org/docs/current/indexes-multicolumn.html + +--- + +### 1.4 Use Covering Indexes to Avoid Table Lookups + +**Impact: MEDIUM-HIGH (2-5x faster queries by eliminating heap fetches)** + +**Prerequisites:** PostgreSQL 11+ + +Covering indexes include all columns needed by a query, enabling index-only scans that skip the table entirely. + +**Incorrect (index scan + heap fetch):** + +```sql +create index users_email_idx on users (email); + +-- Must fetch name and created_at from table heap +select email, name, created_at from users where email = 'user@example.com'; +``` + +**Correct (index-only scan with INCLUDE):** + +```sql +-- Include non-searchable columns in the index +create index users_email_idx on users (email) include (name, created_at); + +-- All columns served from index, no table access needed +select email, name, created_at from users where email = 'user@example.com'; +-- Searching by status, but also need customer_id and total +create index orders_status_idx on orders (status) include (customer_id, total); + +select status, customer_id, total from orders where status = 'shipped'; +``` + +Use INCLUDE for columns you SELECT but don't filter on: + +Reference: https://www.postgresql.org/docs/current/indexes-index-only-scans.html + +--- + +### 1.5 Use Partial Indexes for Filtered Queries + +**Impact: HIGH (5-20x smaller indexes, faster writes and queries)** + +Partial indexes only include rows matching a WHERE condition, making them smaller and faster when queries consistently filter on the same condition. + +**Incorrect (full index includes irrelevant rows):** + +```sql +-- Index includes all rows, even soft-deleted ones +create index users_email_idx on users (email); + +-- Query always filters active users +select * from users where email = 'user@example.com' and deleted_at is null; +``` + +**Correct (partial index matches query filter):** + +```sql +-- Index only includes active users +create index users_active_email_idx on users (email) +where deleted_at is null; + +-- Query uses the smaller, faster index +select * from users where email = 'user@example.com' and deleted_at is null; +-- Only pending orders (status rarely changes once completed) +create index orders_pending_idx on orders (created_at) +where status = 'pending'; + +-- Only non-null values +create index products_sku_idx on products (sku) +where sku is not null; +``` + +Common use cases for partial indexes: + +Reference: https://www.postgresql.org/docs/current/indexes-partial.html + +--- + +## 2. Connection Management + +**Impact: CRITICAL** + +Connection pooling, limits, and serverless strategies. Critical for applications with high concurrency or serverless deployments. + +### 2.1 Configure Idle Connection Timeouts + +**Impact: HIGH (Reclaim 30-50% of connection slots from idle clients)** + +Idle connections waste resources. Configure timeouts to automatically reclaim them. + +**Incorrect (connections held indefinitely):** + +```sql +-- No timeout configured +show idle_in_transaction_session_timeout; -- 0 (disabled) + +-- Connections stay open forever, even when idle +select pid, state, state_change, query +from pg_stat_activity +where state = 'idle in transaction'; +-- Shows transactions idle for hours, holding locks +``` + +**Correct (automatic cleanup of idle connections):** + +```ini +-- Terminate connections idle in transaction after 30 seconds +alter system set idle_in_transaction_session_timeout = '30s'; + +-- Terminate completely idle connections after 10 minutes +alter system set idle_session_timeout = '10min'; + +-- Reload configuration +select pg_reload_conf(); +# pgbouncer.ini +server_idle_timeout = 60 +client_idle_timeout = 300 +``` + +For pooled connections, configure at the pooler level: + +Reference: https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-IDLE-IN-TRANSACTION-SESSION-TIMEOUT + +--- + +### 2.2 Set Appropriate Connection Limits + +**Impact: CRITICAL (Prevent database crashes and memory exhaustion)** + +Too many connections exhaust memory and degrade performance. Set limits based on available resources. + +**Incorrect (unlimited or excessive connections):** + +```sql +-- Default max_connections = 100, but often increased blindly +show max_connections; -- 500 (way too high for 4GB RAM) + +-- Each connection uses 1-3MB RAM +-- 500 connections * 2MB = 1GB just for connections! +-- Out of memory errors under load +``` + +**Correct (calculate based on resources):** + +```sql +-- Formula: max_connections = (RAM in MB / 5MB per connection) - reserved +-- For 4GB RAM: (4096 / 5) - 10 = ~800 theoretical max +-- But practically, 100-200 is better for query performance + +-- Recommended settings for 4GB RAM +alter system set max_connections = 100; + +-- Also set work_mem appropriately +-- work_mem * max_connections should not exceed 25% of RAM +alter system set work_mem = '8MB'; -- 8MB * 100 = 800MB max +select count(*), state from pg_stat_activity group by state; +``` + +Monitor connection usage: + +Reference: https://supabase.com/docs/guides/platform/performance#connection-management + +--- + +### 2.3 Use Connection Pooling for All Applications + +**Impact: CRITICAL (Handle 10-100x more concurrent users)** + +Postgres connections are expensive (1-3MB RAM each). Without pooling, applications exhaust connections under load. + +**Incorrect (new connection per request):** + +```sql +-- Each request creates a new connection +-- Application code: db.connect() per request +-- Result: 500 concurrent users = 500 connections = crashed database + +-- Check current connections +select count(*) from pg_stat_activity; -- 487 connections! +``` + +**Correct (connection pooling):** + +```sql +-- Use a pooler like PgBouncer between app and database +-- Application connects to pooler, pooler reuses a small pool to Postgres + +-- Configure pool_size based on: (CPU cores * 2) + spindle_count +-- Example for 4 cores: pool_size = 10 + +-- Result: 500 concurrent users share 10 actual connections +select count(*) from pg_stat_activity; -- 10 connections +``` + +Pool modes: +- **Transaction mode**: connection returned after each transaction (best for most apps) +- **Session mode**: connection held for entire session (needed for prepared statements, temp tables) + +Reference: https://supabase.com/docs/guides/database/connecting-to-postgres#connection-pooler + +--- + +### 2.4 Use Prepared Statements Correctly with Pooling + +**Impact: HIGH (Avoid prepared statement conflicts in pooled environments)** + +Prepared statements are tied to individual database connections. In transaction-mode pooling, connections are shared, causing conflicts. + +**Incorrect (named prepared statements with transaction pooling):** + +```sql +-- Named prepared statement +prepare get_user as select * from users where id = $1; + +-- In transaction mode pooling, next request may get different connection +execute get_user(123); +-- ERROR: prepared statement "get_user" does not exist +``` + +**Correct (use unnamed statements or session mode):** + +```sql +-- Option 1: Use unnamed prepared statements (most ORMs do this automatically) +-- The query is prepared and executed in a single protocol message + +-- Option 2: Deallocate after use in transaction mode +prepare get_user as select * from users where id = $1; +execute get_user(123); +deallocate get_user; + +-- Option 3: Use session mode pooling (port 5432 vs 6543) +-- Connection is held for entire session, prepared statements persist +-- Many drivers use prepared statements by default +-- Node.js pg: { prepare: false } to disable +-- JDBC: prepareThreshold=0 to disable +``` + +Check your driver settings: + +Reference: https://supabase.com/docs/guides/database/connecting-to-postgres#connection-pool-modes + +--- + +## 3. Security & RLS + +**Impact: CRITICAL** + +Row-Level Security policies, privilege management, and authentication patterns. + +### 3.1 Apply Principle of Least Privilege + +**Impact: MEDIUM (Reduced attack surface, better audit trail)** + +Grant only the minimum permissions required. Never use superuser for application queries. + +**Incorrect (overly broad permissions):** + +```sql +-- Application uses superuser connection +-- Or grants ALL to application role +grant all privileges on all tables in schema public to app_user; +grant all privileges on all sequences in schema public to app_user; + +-- Any SQL injection becomes catastrophic +-- drop table users; cascades to everything +``` + +**Correct (minimal, specific grants):** + +```sql +-- Create role with no default privileges +create role app_readonly nologin; + +-- Grant only SELECT on specific tables +grant usage on schema public to app_readonly; +grant select on public.products, public.categories to app_readonly; + +-- Create role for writes with limited scope +create role app_writer nologin; +grant usage on schema public to app_writer; +grant select, insert, update on public.orders to app_writer; +grant usage on sequence orders_id_seq to app_writer; +-- No DELETE permission + +-- Login role inherits from these +create role app_user login password 'xxx'; +grant app_writer to app_user; +-- Revoke default public access +revoke all on schema public from public; +revoke all on all tables in schema public from public; +``` + +Revoke public defaults: + +Reference: https://supabase.com/blog/postgres-roles-and-privileges + +--- + +### 3.2 Enable Row Level Security for Multi-Tenant Data + +**Impact: CRITICAL (Database-enforced tenant isolation, prevent data leaks)** + +Row Level Security (RLS) enforces data access at the database level, ensuring users only see their own data. + +**Incorrect (application-level filtering only):** + +```sql +-- Relying only on application to filter +select * from orders where user_id = $current_user_id; + +-- Bug or bypass means all data is exposed! +select * from orders; -- Returns ALL orders +``` + +**Correct (database-enforced RLS):** + +```sql +-- Enable RLS on the table +alter table orders enable row level security; + +-- Create policy for users to see only their orders +create policy orders_user_policy on orders + for all + using (user_id = current_setting('app.current_user_id')::bigint); + +-- Force RLS even for table owners +alter table orders force row level security; + +-- Set user context and query +set app.current_user_id = '123'; +select * from orders; -- Only returns orders for user 123 +create policy orders_user_policy on orders + for all + to authenticated + using (user_id = auth.uid()); +``` + +Policy for authenticated role: + +Reference: https://supabase.com/docs/guides/database/postgres/row-level-security + +--- + +### 3.3 Optimize RLS Policies for Performance + +**Impact: HIGH (5-10x faster RLS queries with proper patterns)** + +Poorly written RLS policies can cause severe performance issues. Use subqueries and indexes strategically. + +**Incorrect (function called for every row):** + +```sql +create policy orders_policy on orders + using (auth.uid() = user_id); -- auth.uid() called per row! + +-- With 1M rows, auth.uid() is called 1M times +``` + +**Correct (wrap functions in SELECT):** + +```sql +create policy orders_policy on orders + using ((select auth.uid()) = user_id); -- Called once, cached + +-- 100x+ faster on large tables +-- Create helper function (runs as definer, bypasses RLS) +create or replace function is_team_member(team_id bigint) +returns boolean +language sql +security definer +set search_path = '' +as $$ + select exists ( + select 1 from public.team_members + where team_id = $1 and user_id = (select auth.uid()) + ); +$$; + +-- Use in policy (indexed lookup, not per-row check) +create policy team_orders_policy on orders + using ((select is_team_member(team_id))); +create index orders_user_id_idx on orders (user_id); +``` + +Use security definer functions for complex checks: +Always add indexes on columns used in RLS policies: + +Reference: https://supabase.com/docs/guides/database/postgres/row-level-security#rls-performance-recommendations + +--- + +## 4. Schema Design + +**Impact: HIGH** + +Table design, index strategies, partitioning, and data type selection. Foundation for long-term performance. + +### 4.1 Choose Appropriate Data Types + +**Impact: HIGH (50% storage reduction, faster comparisons)** + +Using the right data types reduces storage, improves query performance, and prevents bugs. + +**Incorrect (wrong data types):** + +```sql +create table users ( + id int, -- Will overflow at 2.1 billion + email varchar(255), -- Unnecessary length limit + created_at timestamp, -- Missing timezone info + is_active varchar(5), -- String for boolean + price varchar(20) -- String for numeric +); +``` + +**Correct (appropriate data types):** + +```sql +create table users ( + id bigint generated always as identity primary key, -- 9 quintillion max + email text, -- No artificial limit, same performance as varchar + created_at timestamptz, -- Always store timezone-aware timestamps + is_active boolean default true, -- 1 byte vs variable string length + price numeric(10,2) -- Exact decimal arithmetic +); +-- IDs: use bigint, not int (future-proofing) +-- Strings: use text, not varchar(n) unless constraint needed +-- Time: use timestamptz, not timestamp +-- Money: use numeric, not float (precision matters) +-- Enums: use text with check constraint or create enum type +``` + +Key guidelines: + +Reference: https://www.postgresql.org/docs/current/datatype.html + +--- + +### 4.2 Index Foreign Key Columns + +**Impact: HIGH (10-100x faster JOINs and CASCADE operations)** + +Postgres does not automatically index foreign key columns. Missing indexes cause slow JOINs and CASCADE operations. + +**Incorrect (unindexed foreign key):** + +```sql +create table orders ( + id bigint generated always as identity primary key, + customer_id bigint references customers(id) on delete cascade, + total numeric(10,2) +); + +-- No index on customer_id! +-- JOINs and ON DELETE CASCADE both require full table scan +select * from orders where customer_id = 123; -- Seq Scan +delete from customers where id = 123; -- Locks table, scans all orders +``` + +**Correct (indexed foreign key):** + +```sql +create table orders ( + id bigint generated always as identity primary key, + customer_id bigint references customers(id) on delete cascade, + total numeric(10,2) +); + +-- Always index the FK column +create index orders_customer_id_idx on orders (customer_id); + +-- Now JOINs and cascades are fast +select * from orders where customer_id = 123; -- Index Scan +delete from customers where id = 123; -- Uses index, fast cascade +select + conrelid::regclass as table_name, + a.attname as fk_column +from pg_constraint c +join pg_attribute a on a.attrelid = c.conrelid and a.attnum = any(c.conkey) +where c.contype = 'f' + and not exists ( + select 1 from pg_index i + where i.indrelid = c.conrelid and a.attnum = any(i.indkey) + ); +``` + +Find missing FK indexes: + +Reference: https://www.postgresql.org/docs/current/ddl-constraints.html#DDL-CONSTRAINTS-FK + +--- + +### 4.3 Partition Large Tables for Better Performance + +**Impact: MEDIUM-HIGH (5-20x faster queries and maintenance on large tables)** + +**Prerequisites:** PostgreSQL 10+ + +Partitioning splits a large table into smaller pieces, improving query performance and maintenance operations. + +**Incorrect (single large table):** + +```sql +create table events ( + id bigint generated always as identity, + created_at timestamptz, + data jsonb +); + +-- 500M rows, queries scan everything +select * from events where created_at > '2024-01-01'; -- Slow +vacuum events; -- Takes hours, locks table +``` + +**Correct (partitioned by time range):** + +```sql +create table events ( + id bigint generated always as identity, + created_at timestamptz not null, + data jsonb +) partition by range (created_at); + +-- Create partitions for each month +create table events_2024_01 partition of events + for values from ('2024-01-01') to ('2024-02-01'); + +create table events_2024_02 partition of events + for values from ('2024-02-01') to ('2024-03-01'); + +-- Queries only scan relevant partitions +select * from events where created_at > '2024-01-15'; -- Only scans events_2024_01+ + +-- Drop old data instantly +drop table events_2023_01; -- Instant vs DELETE taking hours +``` + +When to partition: +- Tables > 100M rows +- Time-series data with date-based queries +- Need to efficiently drop old data + +Reference: https://www.postgresql.org/docs/current/ddl-partitioning.html + +--- + +### 4.4 Select Optimal Primary Key Strategy + +**Impact: HIGH (Better index locality, reduced fragmentation)** + +Primary key choice affects insert performance, index size, and replication +efficiency. + +**Incorrect (problematic PK choices):** + +```sql +-- identity is the SQL-standard approach +create table users ( + id serial primary key -- Works, but IDENTITY is recommended +); + +-- Random UUIDs (v4) cause index fragmentation +create table orders ( + id uuid default gen_random_uuid() primary key -- UUIDv4 = random = scattered inserts +); +``` + +**Correct (optimal PK strategies):** + +```sql +-- Use IDENTITY for sequential IDs (SQL-standard, best for most cases) +create table users ( + id bigint generated always as identity primary key +); + +-- For distributed systems needing UUIDs, use UUIDv7 (time-ordered) +-- Requires pg_uuidv7 extension: create extension pg_uuidv7; +create table orders ( + id uuid default uuid_generate_v7() primary key -- Time-ordered, no fragmentation +); + +-- Alternative: time-prefixed IDs for sortable, distributed IDs (no extension needed) +create table events ( + id text default concat( + to_char(now() at time zone 'utc', 'YYYYMMDDHH24MISSMS'), + gen_random_uuid()::text + ) primary key +); +``` + +Guidelines: +- Single database: `bigint identity` (sequential, 8 bytes, SQL-standard) +- Distributed/exposed IDs: UUIDv7 (requires pg_uuidv7) or ULID (time-ordered, no + fragmentation) +- `serial` works but `identity` is SQL-standard and preferred for new + applications +- Avoid random UUIDs (v4) as primary keys on large tables (causes index + fragmentation) +[Identity Columns](https://www.postgresql.org/docs/current/sql-createtable.html#SQL-CREATETABLE-PARMS-GENERATED-IDENTITY) + +--- + +### 4.5 Use Lowercase Identifiers for Compatibility + +**Impact: MEDIUM (Avoid case-sensitivity bugs with tools, ORMs, and AI assistants)** + +PostgreSQL folds unquoted identifiers to lowercase. Quoted mixed-case identifiers require quotes forever and cause issues with tools, ORMs, and AI assistants that may not recognize them. + +**Incorrect (mixed-case identifiers):** + +```sql +-- Quoted identifiers preserve case but require quotes everywhere +CREATE TABLE "Users" ( + "userId" bigint PRIMARY KEY, + "firstName" text, + "lastName" text +); + +-- Must always quote or queries fail +SELECT "firstName" FROM "Users" WHERE "userId" = 1; + +-- This fails - Users becomes users without quotes +SELECT firstName FROM Users; +-- ERROR: relation "users" does not exist +``` + +**Correct (lowercase snake_case):** + +```sql +-- Unquoted lowercase identifiers are portable and tool-friendly +CREATE TABLE users ( + user_id bigint PRIMARY KEY, + first_name text, + last_name text +); + +-- Works without quotes, recognized by all tools +SELECT first_name FROM users WHERE user_id = 1; +-- ORMs often generate quoted camelCase - configure them to use snake_case +-- Migrations from other databases may preserve original casing +-- Some GUI tools quote identifiers by default - disable this + +-- If stuck with mixed-case, create views as a compatibility layer +CREATE VIEW users AS SELECT "userId" AS user_id, "firstName" AS first_name FROM "Users"; +``` + +Common sources of mixed-case identifiers: + +Reference: https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS + +--- + +## 5. Concurrency & Locking + +**Impact: MEDIUM-HIGH** + +Transaction management, isolation levels, deadlock prevention, and lock contention patterns. + +### 5.1 Keep Transactions Short to Reduce Lock Contention + +**Impact: MEDIUM-HIGH (3-5x throughput improvement, fewer deadlocks)** + +Long-running transactions hold locks that block other queries. Keep transactions as short as possible. + +**Incorrect (long transaction with external calls):** + +```sql +begin; +select * from orders where id = 1 for update; -- Lock acquired + +-- Application makes HTTP call to payment API (2-5 seconds) +-- Other queries on this row are blocked! + +update orders set status = 'paid' where id = 1; +commit; -- Lock held for entire duration +``` + +**Correct (minimal transaction scope):** + +```sql +-- Validate data and call APIs outside transaction +-- Application: response = await paymentAPI.charge(...) + +-- Only hold lock for the actual update +begin; +update orders +set status = 'paid', payment_id = $1 +where id = $2 and status = 'pending' +returning *; +commit; -- Lock held for milliseconds +-- Abort queries running longer than 30 seconds +set statement_timeout = '30s'; + +-- Or per-session +set local statement_timeout = '5s'; +``` + +Use `statement_timeout` to prevent runaway transactions: + +Reference: https://www.postgresql.org/docs/current/tutorial-transactions.html + +--- + +### 5.2 Prevent Deadlocks with Consistent Lock Ordering + +**Impact: MEDIUM-HIGH (Eliminate deadlock errors, improve reliability)** + +Deadlocks occur when transactions lock resources in different orders. Always +acquire locks in a consistent order. + +**Incorrect (inconsistent lock ordering):** + +```sql +-- Transaction A -- Transaction B +begin; begin; +update accounts update accounts +set balance = balance - 100 set balance = balance - 50 +where id = 1; where id = 2; -- B locks row 2 + +update accounts update accounts +set balance = balance + 100 set balance = balance + 50 +where id = 2; -- A waits for B where id = 1; -- B waits for A + +-- DEADLOCK! Both waiting for each other +``` + +**Correct (lock rows in consistent order first):** + +```sql +-- Explicitly acquire locks in ID order before updating +begin; +select * from accounts where id in (1, 2) order by id for update; + +-- Now perform updates in any order - locks already held +update accounts set balance = balance - 100 where id = 1; +update accounts set balance = balance + 100 where id = 2; +commit; +-- Single statement acquires all locks atomically +begin; +update accounts +set balance = balance + case id + when 1 then -100 + when 2 then 100 +end +where id in (1, 2); +commit; +-- Check for recent deadlocks +select * from pg_stat_database where deadlocks > 0; + +-- Enable deadlock logging +set log_lock_waits = on; +set deadlock_timeout = '1s'; +``` + +Alternative: use a single statement to update atomically: +Detect deadlocks in logs: +[Deadlocks](https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-DEADLOCKS) + +--- + +### 5.3 Use Advisory Locks for Application-Level Locking + +**Impact: MEDIUM (Efficient coordination without row-level lock overhead)** + +Advisory locks provide application-level coordination without requiring database rows to lock. + +**Incorrect (creating rows just for locking):** + +```sql +-- Creating dummy rows to lock on +create table resource_locks ( + resource_name text primary key +); + +insert into resource_locks values ('report_generator'); + +-- Lock by selecting the row +select * from resource_locks where resource_name = 'report_generator' for update; +``` + +**Correct (advisory locks):** + +```sql +-- Session-level advisory lock (released on disconnect or unlock) +select pg_advisory_lock(hashtext('report_generator')); +-- ... do exclusive work ... +select pg_advisory_unlock(hashtext('report_generator')); + +-- Transaction-level lock (released on commit/rollback) +begin; +select pg_advisory_xact_lock(hashtext('daily_report')); +-- ... do work ... +commit; -- Lock automatically released +-- Returns immediately with true/false instead of waiting +select pg_try_advisory_lock(hashtext('resource_name')); + +-- Use in application +if (acquired) { + -- Do work + select pg_advisory_unlock(hashtext('resource_name')); +} else { + -- Skip or retry later +} +``` + +Try-lock for non-blocking operations: + +Reference: https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS + +--- + +### 5.4 Use SKIP LOCKED for Non-Blocking Queue Processing + +**Impact: MEDIUM-HIGH (10x throughput for worker queues)** + +**Prerequisites:** PostgreSQL 9.5+ + +When multiple workers process a queue, SKIP LOCKED allows workers to process different rows without waiting. + +**Incorrect (workers block each other):** + +```sql +-- Worker 1 and Worker 2 both try to get next job +begin; +select * from jobs where status = 'pending' order by created_at limit 1 for update; +-- Worker 2 waits for Worker 1's lock to release! +``` + +**Correct (SKIP LOCKED for parallel processing):** + +```sql +-- Each worker skips locked rows and gets the next available +begin; +select * from jobs +where status = 'pending' +order by created_at +limit 1 +for update skip locked; + +-- Worker 1 gets job 1, Worker 2 gets job 2 (no waiting) + +update jobs set status = 'processing' where id = $1; +commit; +-- Atomic claim-and-update in one statement +update jobs +set status = 'processing', worker_id = $1, started_at = now() +where id = ( + select id from jobs + where status = 'pending' + order by created_at + limit 1 + for update skip locked +) +returning *; +``` + +Complete queue pattern: + +Reference: https://www.postgresql.org/docs/current/sql-select.html#SQL-FOR-UPDATE-SHARE + +--- + +## 6. Data Access Patterns + +**Impact: MEDIUM** + +N+1 query elimination, batch operations, cursor-based pagination, and efficient data fetching. + +### 6.1 Batch INSERT Statements for Bulk Data + +**Impact: MEDIUM (10-50x faster bulk inserts)** + +Individual INSERT statements have high overhead. Batch multiple rows in single statements or use COPY. + +**Incorrect (individual inserts):** + +```sql +-- Each insert is a separate transaction and round trip +insert into events (user_id, action) values (1, 'click'); +insert into events (user_id, action) values (1, 'view'); +insert into events (user_id, action) values (2, 'click'); +-- ... 1000 more individual inserts + +-- 1000 inserts = 1000 round trips = slow +``` + +**Correct (batch insert):** + +```sql +-- Multiple rows in single statement +insert into events (user_id, action) values + (1, 'click'), + (1, 'view'), + (2, 'click'), + -- ... up to ~1000 rows per batch + (999, 'view'); + +-- One round trip for 1000 rows +-- COPY is fastest for bulk loading +copy events (user_id, action, created_at) +from '/path/to/data.csv' +with (format csv, header true); + +-- Or from stdin in application +copy events (user_id, action) from stdin with (format csv); +1,click +1,view +2,click +\. +``` + +For large imports, use COPY: + +Reference: https://www.postgresql.org/docs/current/sql-copy.html + +--- + +### 6.2 Eliminate N+1 Queries with Batch Loading + +**Impact: MEDIUM-HIGH (10-100x fewer database round trips)** + +N+1 queries execute one query per item in a loop. Batch them into a single query using arrays or JOINs. + +**Incorrect (N+1 queries):** + +```sql +-- First query: get all users +select id from users where active = true; -- Returns 100 IDs + +-- Then N queries, one per user +select * from orders where user_id = 1; +select * from orders where user_id = 2; +select * from orders where user_id = 3; +-- ... 97 more queries! + +-- Total: 101 round trips to database +``` + +**Correct (single batch query):** + +```sql +-- Collect IDs and query once with ANY +select * from orders where user_id = any(array[1, 2, 3, ...]); + +-- Or use JOIN instead of loop +select u.id, u.name, o.* +from users u +left join orders o on o.user_id = u.id +where u.active = true; + +-- Total: 1 round trip +-- Instead of looping in application code: +-- for user in users: db.query("SELECT * FROM orders WHERE user_id = $1", user.id) + +-- Pass array parameter: +select * from orders where user_id = any($1::bigint[]); +-- Application passes: [1, 2, 3, 4, 5, ...] +``` + +Application pattern: + +Reference: https://supabase.com/docs/guides/database/query-optimization + +--- + +### 6.3 Use Cursor-Based Pagination Instead of OFFSET + +**Impact: MEDIUM-HIGH (Consistent O(1) performance regardless of page depth)** + +OFFSET-based pagination scans all skipped rows, getting slower on deeper pages. Cursor pagination is O(1). + +**Incorrect (OFFSET pagination):** + +```sql +-- Page 1: scans 20 rows +select * from products order by id limit 20 offset 0; + +-- Page 100: scans 2000 rows to skip 1980 +select * from products order by id limit 20 offset 1980; + +-- Page 10000: scans 200,000 rows! +select * from products order by id limit 20 offset 199980; +``` + +**Correct (cursor/keyset pagination):** + +```sql +-- Page 1: get first 20 +select * from products order by id limit 20; +-- Application stores last_id = 20 + +-- Page 2: start after last ID +select * from products where id > 20 order by id limit 20; +-- Uses index, always fast regardless of page depth + +-- Page 10000: same speed as page 1 +select * from products where id > 199980 order by id limit 20; +-- Cursor must include all sort columns +select * from products +where (created_at, id) > ('2024-01-15 10:00:00', 12345) +order by created_at, id +limit 20; +``` + +For multi-column sorting: + +Reference: https://supabase.com/docs/guides/database/pagination + +--- + +### 6.4 Use UPSERT for Insert-or-Update Operations + +**Impact: MEDIUM (Atomic operation, eliminates race conditions)** + +**Prerequisites:** PostgreSQL 9.5+ + +Using separate SELECT-then-INSERT/UPDATE creates race conditions. Use INSERT ... ON CONFLICT for atomic upserts. + +**Incorrect (check-then-insert race condition):** + +```sql +-- Race condition: two requests check simultaneously +select * from settings where user_id = 123 and key = 'theme'; +-- Both find nothing + +-- Both try to insert +insert into settings (user_id, key, value) values (123, 'theme', 'dark'); +-- One succeeds, one fails with duplicate key error! +``` + +**Correct (atomic UPSERT):** + +```sql +-- Single atomic operation +insert into settings (user_id, key, value) +values (123, 'theme', 'dark') +on conflict (user_id, key) +do update set value = excluded.value, updated_at = now(); + +-- Returns the inserted/updated row +insert into settings (user_id, key, value) +values (123, 'theme', 'dark') +on conflict (user_id, key) +do update set value = excluded.value +returning *; +-- Insert only if not exists (no update) +insert into page_views (page_id, user_id) +values (1, 123) +on conflict (page_id, user_id) do nothing; +``` + +Insert-or-ignore pattern: + +Reference: https://www.postgresql.org/docs/current/sql-insert.html#SQL-ON-CONFLICT + +--- + +## 7. Monitoring & Diagnostics + +**Impact: LOW-MEDIUM** + +Using pg_stat_statements, EXPLAIN ANALYZE, metrics collection, and performance diagnostics. + +### 7.1 Enable pg_stat_statements for Query Analysis + +**Impact: LOW-MEDIUM (Identify top resource-consuming queries)** + +**Prerequisites:** Extension: pg_stat_statements + +pg_stat_statements tracks execution statistics for all queries, helping identify slow and frequent queries. + +**Incorrect (no visibility into query patterns):** + +```sql +-- Database is slow, but which queries are the problem? +-- No way to know without pg_stat_statements +``` + +**Correct (enable and query pg_stat_statements):** + +```sql +-- Enable the extension +create extension if not exists pg_stat_statements; + +-- Find slowest queries by total time +select + calls, + round(total_exec_time::numeric, 2) as total_time_ms, + round(mean_exec_time::numeric, 2) as mean_time_ms, + query +from pg_stat_statements +order by total_exec_time desc +limit 10; + +-- Find most frequent queries +select calls, query +from pg_stat_statements +order by calls desc +limit 10; + +-- Reset statistics after optimization +select pg_stat_statements_reset(); +-- Queries with high mean time (candidates for optimization) +select query, mean_exec_time, calls +from pg_stat_statements +where mean_exec_time > 100 -- > 100ms average +order by mean_exec_time desc; +``` + +Key metrics to monitor: + +Reference: https://supabase.com/docs/guides/database/extensions/pg_stat_statements + +--- + +### 7.2 Maintain Table Statistics with VACUUM and ANALYZE + +**Impact: MEDIUM (2-10x better query plans with accurate statistics)** + +Outdated statistics cause the query planner to make poor decisions. VACUUM reclaims space, ANALYZE updates statistics. + +**Incorrect (stale statistics):** + +```sql +-- Table has 1M rows but stats say 1000 +-- Query planner chooses wrong strategy +explain select * from orders where status = 'pending'; +-- Shows: Seq Scan (because stats show small table) +-- Actually: Index Scan would be much faster +``` + +**Correct (maintain fresh statistics):** + +```sql +-- Manually analyze after large data changes +analyze orders; + +-- Analyze specific columns used in WHERE clauses +analyze orders (status, created_at); + +-- Check when tables were last analyzed +select + relname, + last_vacuum, + last_autovacuum, + last_analyze, + last_autoanalyze +from pg_stat_user_tables +order by last_analyze nulls first; +-- Increase frequency for high-churn tables +alter table orders set ( + autovacuum_vacuum_scale_factor = 0.05, -- Vacuum at 5% dead tuples (default 20%) + autovacuum_analyze_scale_factor = 0.02 -- Analyze at 2% changes (default 10%) +); + +-- Check autovacuum status +select * from pg_stat_progress_vacuum; +``` + +Autovacuum tuning for busy tables: + +Reference: https://supabase.com/docs/guides/database/database-size#vacuum-operations + +--- + +### 7.3 Use EXPLAIN ANALYZE to Diagnose Slow Queries + +**Impact: LOW-MEDIUM (Identify exact bottlenecks in query execution)** + +EXPLAIN ANALYZE executes the query and shows actual timings, revealing the true performance bottlenecks. + +**Incorrect (guessing at performance issues):** + +```sql +-- Query is slow, but why? +select * from orders where customer_id = 123 and status = 'pending'; +-- "It must be missing an index" - but which one? +``` + +**Correct (use EXPLAIN ANALYZE):** + +```sql +explain (analyze, buffers, format text) +select * from orders where customer_id = 123 and status = 'pending'; + +-- Output reveals the issue: +-- Seq Scan on orders (cost=0.00..25000.00 rows=50 width=100) (actual time=0.015..450.123 rows=50 loops=1) +-- Filter: ((customer_id = 123) AND (status = 'pending'::text)) +-- Rows Removed by Filter: 999950 +-- Buffers: shared hit=5000 read=15000 +-- Planning Time: 0.150 ms +-- Execution Time: 450.500 ms +-- Seq Scan on large tables = missing index +-- Rows Removed by Filter = poor selectivity or missing index +-- Buffers: read >> hit = data not cached, needs more memory +-- Nested Loop with high loops = consider different join strategy +-- Sort Method: external merge = work_mem too low +``` + +Key things to look for: + +Reference: https://supabase.com/docs/guides/database/inspect + +--- + +## 8. Advanced Features + +**Impact: LOW** + +Full-text search, JSONB optimization, PostGIS, extensions, and advanced Postgres features. + +### 8.1 Index JSONB Columns for Efficient Querying + +**Impact: MEDIUM (10-100x faster JSONB queries with proper indexing)** + +**Prerequisites:** PostgreSQL 9.4+ + +JSONB queries without indexes scan the entire table. Use GIN indexes for containment queries. + +**Incorrect (no index on JSONB):** + +```sql +create table products ( + id bigint primary key, + attributes jsonb +); + +-- Full table scan for every query +select * from products where attributes @> '{"color": "red"}'; +select * from products where attributes->>'brand' = 'Nike'; +``` + +**Correct (GIN index for JSONB):** + +```sql +-- GIN index for containment operators (@>, ?, ?&, ?|) +create index products_attrs_gin on products using gin (attributes); + +-- Now containment queries use the index +select * from products where attributes @> '{"color": "red"}'; + +-- For specific key lookups, use expression index +create index products_brand_idx on products ((attributes->>'brand')); +select * from products where attributes->>'brand' = 'Nike'; +-- jsonb_ops (default): supports all operators, larger index +create index idx1 on products using gin (attributes); + +-- jsonb_path_ops: only @> operator, but 2-3x smaller index +create index idx2 on products using gin (attributes jsonb_path_ops); +``` + +Choose the right operator class: + +Reference: https://www.postgresql.org/docs/current/datatype-json.html#JSON-INDEXING + +--- + +### 8.2 Use tsvector for Full-Text Search + +**Impact: MEDIUM (100x faster than LIKE, with ranking support)** + +LIKE with wildcards can't use indexes. Full-text search with tsvector is orders of magnitude faster. + +**Incorrect (LIKE pattern matching):** + +```sql +-- Cannot use index, scans all rows +select * from articles where content like '%postgresql%'; + +-- Case-insensitive makes it worse +select * from articles where lower(content) like '%postgresql%'; +``` + +**Correct (full-text search with tsvector):** + +```sql +-- Add tsvector column and index +alter table articles add column search_vector tsvector + generated always as (to_tsvector('english', coalesce(title,'') || ' ' || coalesce(content,''))) stored; + +create index articles_search_idx on articles using gin (search_vector); + +-- Fast full-text search +select * from articles +where search_vector @@ to_tsquery('english', 'postgresql & performance'); + +-- With ranking +select *, ts_rank(search_vector, query) as rank +from articles, to_tsquery('english', 'postgresql') query +where search_vector @@ query +order by rank desc; +-- AND: both terms required +to_tsquery('postgresql & performance') + +-- OR: either term +to_tsquery('postgresql | mysql') + +-- Prefix matching +to_tsquery('post:*') +``` + +Search multiple terms: + +Reference: https://supabase.com/docs/guides/database/full-text-search + +--- + +## References + +- https://www.postgresql.org/docs/current/ +- https://supabase.com/docs +- https://wiki.postgresql.org/wiki/Performance_Optimization +- https://supabase.com/docs/guides/database/overview +- https://supabase.com/docs/guides/auth/row-level-security diff --git a/skills/postgres-best-practices/AGENTS.md b/skills/postgres-best-practices/AGENTS.md index 08d9e56..786a9da 100644 --- a/skills/postgres-best-practices/AGENTS.md +++ b/skills/postgres-best-practices/AGENTS.md @@ -191,6 +191,8 @@ Reference: https://www.postgresql.org/docs/current/indexes-multicolumn.html **Impact: MEDIUM-HIGH (2-5x faster queries by eliminating heap fetches)** +**Prerequisites:** PostgreSQL 11+ + Covering indexes include all columns needed by a query, enabling index-only scans that skip the table entirely. **Incorrect (index scan + heap fetch):** @@ -680,6 +682,8 @@ Reference: https://www.postgresql.org/docs/current/ddl-constraints.html#DDL-CONS **Impact: MEDIUM-HIGH (5-20x faster queries and maintenance on large tables)** +**Prerequisites:** PostgreSQL 10+ + Partitioning splits a large table into smaller pieces, improving query performance and maintenance operations. **Incorrect (single large table):** @@ -997,6 +1001,8 @@ Reference: https://www.postgresql.org/docs/current/explicit-locking.html#ADVISOR **Impact: MEDIUM-HIGH (10x throughput for worker queues)** +**Prerequisites:** PostgreSQL 9.5+ + When multiple workers process a queue, SKIP LOCKED allows workers to process different rows without waiting. **Incorrect (workers block each other):** @@ -1194,6 +1200,8 @@ Reference: https://supabase.com/docs/guides/database/pagination **Impact: MEDIUM (Atomic operation, eliminates race conditions)** +**Prerequisites:** PostgreSQL 9.5+ + Using separate SELECT-then-INSERT/UPDATE creates race conditions. Use INSERT ... ON CONFLICT for atomic upserts. **Incorrect (check-then-insert race condition):** @@ -1245,6 +1253,8 @@ Using pg_stat_statements, EXPLAIN ANALYZE, metrics collection, and performance d **Impact: LOW-MEDIUM (Identify top resource-consuming queries)** +**Prerequisites:** Extension: pg_stat_statements + pg_stat_statements tracks execution statistics for all queries, helping identify slow and frequent queries. **Incorrect (no visibility into query patterns):** @@ -1391,6 +1401,8 @@ Full-text search, JSONB optimization, PostGIS, extensions, and advanced Postgres **Impact: MEDIUM (10-100x faster JSONB queries with proper indexing)** +**Prerequisites:** PostgreSQL 9.4+ + JSONB queries without indexes scan the entire table. Use GIN indexes for containment queries. **Incorrect (no index on JSONB):** diff --git a/skills/postgres-best-practices/AGENTS.self-hosted.md b/skills/postgres-best-practices/AGENTS.self-hosted.md new file mode 100644 index 0000000..786a9da --- /dev/null +++ b/skills/postgres-best-practices/AGENTS.self-hosted.md @@ -0,0 +1,1502 @@ +# Postgres Best Practices + +**Version 1.0.0** +Supabase +January 2026 + +> This document is optimized for AI agents and LLMs. Rules are prioritized by performance impact. + +--- + +## Abstract + +Comprehensive Postgres performance optimization guide for developers using Supabase and Postgres. Contains performance rules across 8 categories, prioritized by impact from critical (query performance, connection management) to incremental (advanced features). Each rule includes detailed explanations, incorrect vs. correct SQL examples, query plan analysis, and specific performance metrics to guide automated optimization and code generation. + +--- + +## Table of Contents + +1. [Query Performance](#query-performance) - **CRITICAL** + - 1.1 [Add Indexes on WHERE and JOIN Columns](#11-add-indexes-on-where-and-join-columns) + - 1.2 [Choose the Right Index Type for Your Data](#12-choose-the-right-index-type-for-your-data) + - 1.3 [Create Composite Indexes for Multi-Column Queries](#13-create-composite-indexes-for-multi-column-queries) + - 1.4 [Use Covering Indexes to Avoid Table Lookups](#14-use-covering-indexes-to-avoid-table-lookups) + - 1.5 [Use Partial Indexes for Filtered Queries](#15-use-partial-indexes-for-filtered-queries) + +2. [Connection Management](#connection-management) - **CRITICAL** + - 2.1 [Configure Idle Connection Timeouts](#21-configure-idle-connection-timeouts) + - 2.2 [Set Appropriate Connection Limits](#22-set-appropriate-connection-limits) + - 2.3 [Use Connection Pooling for All Applications](#23-use-connection-pooling-for-all-applications) + - 2.4 [Use Prepared Statements Correctly with Pooling](#24-use-prepared-statements-correctly-with-pooling) + +3. [Security & RLS](#security-rls) - **CRITICAL** + - 3.1 [Apply Principle of Least Privilege](#31-apply-principle-of-least-privilege) + - 3.2 [Enable Row Level Security for Multi-Tenant Data](#32-enable-row-level-security-for-multi-tenant-data) + - 3.3 [Optimize RLS Policies for Performance](#33-optimize-rls-policies-for-performance) + +4. [Schema Design](#schema-design) - **HIGH** + - 4.1 [Choose Appropriate Data Types](#41-choose-appropriate-data-types) + - 4.2 [Index Foreign Key Columns](#42-index-foreign-key-columns) + - 4.3 [Partition Large Tables for Better Performance](#43-partition-large-tables-for-better-performance) + - 4.4 [Select Optimal Primary Key Strategy](#44-select-optimal-primary-key-strategy) + - 4.5 [Use Lowercase Identifiers for Compatibility](#45-use-lowercase-identifiers-for-compatibility) + +5. [Concurrency & Locking](#concurrency-locking) - **MEDIUM-HIGH** + - 5.1 [Keep Transactions Short to Reduce Lock Contention](#51-keep-transactions-short-to-reduce-lock-contention) + - 5.2 [Prevent Deadlocks with Consistent Lock Ordering](#52-prevent-deadlocks-with-consistent-lock-ordering) + - 5.3 [Use Advisory Locks for Application-Level Locking](#53-use-advisory-locks-for-application-level-locking) + - 5.4 [Use SKIP LOCKED for Non-Blocking Queue Processing](#54-use-skip-locked-for-non-blocking-queue-processing) + +6. [Data Access Patterns](#data-access-patterns) - **MEDIUM** + - 6.1 [Batch INSERT Statements for Bulk Data](#61-batch-insert-statements-for-bulk-data) + - 6.2 [Eliminate N+1 Queries with Batch Loading](#62-eliminate-n1-queries-with-batch-loading) + - 6.3 [Use Cursor-Based Pagination Instead of OFFSET](#63-use-cursor-based-pagination-instead-of-offset) + - 6.4 [Use UPSERT for Insert-or-Update Operations](#64-use-upsert-for-insert-or-update-operations) + +7. [Monitoring & Diagnostics](#monitoring-diagnostics) - **LOW-MEDIUM** + - 7.1 [Enable pg_stat_statements for Query Analysis](#71-enable-pgstatstatements-for-query-analysis) + - 7.2 [Maintain Table Statistics with VACUUM and ANALYZE](#72-maintain-table-statistics-with-vacuum-and-analyze) + - 7.3 [Use EXPLAIN ANALYZE to Diagnose Slow Queries](#73-use-explain-analyze-to-diagnose-slow-queries) + +8. [Advanced Features](#advanced-features) - **LOW** + - 8.1 [Index JSONB Columns for Efficient Querying](#81-index-jsonb-columns-for-efficient-querying) + - 8.2 [Use tsvector for Full-Text Search](#82-use-tsvector-for-full-text-search) + +--- + +## 1. Query Performance + +**Impact: CRITICAL** + +Slow queries, missing indexes, inefficient query plans. The most common source of Postgres performance issues. + +### 1.1 Add Indexes on WHERE and JOIN Columns + +**Impact: CRITICAL (100-1000x faster queries on large tables)** + +Queries filtering or joining on unindexed columns cause full table scans, which become exponentially slower as tables grow. + +**Incorrect (sequential scan on large table):** + +```sql +-- No index on customer_id causes full table scan +select * from orders where customer_id = 123; + +-- EXPLAIN shows: Seq Scan on orders (cost=0.00..25000.00 rows=100 width=85) +``` + +**Correct (index scan):** + +```sql +-- Create index on frequently filtered column +create index orders_customer_id_idx on orders (customer_id); + +select * from orders where customer_id = 123; + +-- EXPLAIN shows: Index Scan using orders_customer_id_idx (cost=0.42..8.44 rows=100 width=85) +-- Index the referencing column +create index orders_customer_id_idx on orders (customer_id); + +select c.name, o.total +from customers c +join orders o on o.customer_id = c.id; +``` + +For JOIN columns, always index the foreign key side: + +Reference: https://supabase.com/docs/guides/database/query-optimization + +--- + +### 1.2 Choose the Right Index Type for Your Data + +**Impact: HIGH (10-100x improvement with correct index type)** + +Different index types excel at different query patterns. The default B-tree isn't always optimal. + +**Incorrect (B-tree for JSONB containment):** + +```sql +-- B-tree cannot optimize containment operators +create index products_attrs_idx on products (attributes); +select * from products where attributes @> '{"color": "red"}'; +-- Full table scan - B-tree doesn't support @> operator +``` + +**Correct (GIN for JSONB):** + +```sql +-- GIN supports @>, ?, ?&, ?| operators +create index products_attrs_idx on products using gin (attributes); +select * from products where attributes @> '{"color": "red"}'; +-- B-tree (default): =, <, >, BETWEEN, IN, IS NULL +create index users_created_idx on users (created_at); + +-- GIN: arrays, JSONB, full-text search +create index posts_tags_idx on posts using gin (tags); + +-- BRIN: large time-series tables (10-100x smaller) +create index events_time_idx on events using brin (created_at); + +-- Hash: equality-only (slightly faster than B-tree for =) +create index sessions_token_idx on sessions using hash (token); +``` + +Index type guide: + +Reference: https://www.postgresql.org/docs/current/indexes-types.html + +--- + +### 1.3 Create Composite Indexes for Multi-Column Queries + +**Impact: HIGH (5-10x faster multi-column queries)** + +When queries filter on multiple columns, a composite index is more efficient than separate single-column indexes. + +**Incorrect (separate indexes require bitmap scan):** + +```sql +-- Two separate indexes +create index orders_status_idx on orders (status); +create index orders_created_idx on orders (created_at); + +-- Query must combine both indexes (slower) +select * from orders where status = 'pending' and created_at > '2024-01-01'; +``` + +**Correct (composite index):** + +```sql +-- Single composite index (leftmost column first for equality checks) +create index orders_status_created_idx on orders (status, created_at); + +-- Query uses one efficient index scan +select * from orders where status = 'pending' and created_at > '2024-01-01'; +-- Good: status (=) before created_at (>) +create index idx on orders (status, created_at); + +-- Works for: WHERE status = 'pending' +-- Works for: WHERE status = 'pending' AND created_at > '2024-01-01' +-- Does NOT work for: WHERE created_at > '2024-01-01' (leftmost prefix rule) +``` + +**Column order matters** - place equality columns first, range columns last: + +Reference: https://www.postgresql.org/docs/current/indexes-multicolumn.html + +--- + +### 1.4 Use Covering Indexes to Avoid Table Lookups + +**Impact: MEDIUM-HIGH (2-5x faster queries by eliminating heap fetches)** + +**Prerequisites:** PostgreSQL 11+ + +Covering indexes include all columns needed by a query, enabling index-only scans that skip the table entirely. + +**Incorrect (index scan + heap fetch):** + +```sql +create index users_email_idx on users (email); + +-- Must fetch name and created_at from table heap +select email, name, created_at from users where email = 'user@example.com'; +``` + +**Correct (index-only scan with INCLUDE):** + +```sql +-- Include non-searchable columns in the index +create index users_email_idx on users (email) include (name, created_at); + +-- All columns served from index, no table access needed +select email, name, created_at from users where email = 'user@example.com'; +-- Searching by status, but also need customer_id and total +create index orders_status_idx on orders (status) include (customer_id, total); + +select status, customer_id, total from orders where status = 'shipped'; +``` + +Use INCLUDE for columns you SELECT but don't filter on: + +Reference: https://www.postgresql.org/docs/current/indexes-index-only-scans.html + +--- + +### 1.5 Use Partial Indexes for Filtered Queries + +**Impact: HIGH (5-20x smaller indexes, faster writes and queries)** + +Partial indexes only include rows matching a WHERE condition, making them smaller and faster when queries consistently filter on the same condition. + +**Incorrect (full index includes irrelevant rows):** + +```sql +-- Index includes all rows, even soft-deleted ones +create index users_email_idx on users (email); + +-- Query always filters active users +select * from users where email = 'user@example.com' and deleted_at is null; +``` + +**Correct (partial index matches query filter):** + +```sql +-- Index only includes active users +create index users_active_email_idx on users (email) +where deleted_at is null; + +-- Query uses the smaller, faster index +select * from users where email = 'user@example.com' and deleted_at is null; +-- Only pending orders (status rarely changes once completed) +create index orders_pending_idx on orders (created_at) +where status = 'pending'; + +-- Only non-null values +create index products_sku_idx on products (sku) +where sku is not null; +``` + +Common use cases for partial indexes: + +Reference: https://www.postgresql.org/docs/current/indexes-partial.html + +--- + +## 2. Connection Management + +**Impact: CRITICAL** + +Connection pooling, limits, and serverless strategies. Critical for applications with high concurrency or serverless deployments. + +### 2.1 Configure Idle Connection Timeouts + +**Impact: HIGH (Reclaim 30-50% of connection slots from idle clients)** + +Idle connections waste resources. Configure timeouts to automatically reclaim them. + +**Incorrect (connections held indefinitely):** + +```sql +-- No timeout configured +show idle_in_transaction_session_timeout; -- 0 (disabled) + +-- Connections stay open forever, even when idle +select pid, state, state_change, query +from pg_stat_activity +where state = 'idle in transaction'; +-- Shows transactions idle for hours, holding locks +``` + +**Correct (automatic cleanup of idle connections):** + +```ini +-- Terminate connections idle in transaction after 30 seconds +alter system set idle_in_transaction_session_timeout = '30s'; + +-- Terminate completely idle connections after 10 minutes +alter system set idle_session_timeout = '10min'; + +-- Reload configuration +select pg_reload_conf(); +# pgbouncer.ini +server_idle_timeout = 60 +client_idle_timeout = 300 +``` + +For pooled connections, configure at the pooler level: + +Reference: https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-IDLE-IN-TRANSACTION-SESSION-TIMEOUT + +--- + +### 2.2 Set Appropriate Connection Limits + +**Impact: CRITICAL (Prevent database crashes and memory exhaustion)** + +Too many connections exhaust memory and degrade performance. Set limits based on available resources. + +**Incorrect (unlimited or excessive connections):** + +```sql +-- Default max_connections = 100, but often increased blindly +show max_connections; -- 500 (way too high for 4GB RAM) + +-- Each connection uses 1-3MB RAM +-- 500 connections * 2MB = 1GB just for connections! +-- Out of memory errors under load +``` + +**Correct (calculate based on resources):** + +```sql +-- Formula: max_connections = (RAM in MB / 5MB per connection) - reserved +-- For 4GB RAM: (4096 / 5) - 10 = ~800 theoretical max +-- But practically, 100-200 is better for query performance + +-- Recommended settings for 4GB RAM +alter system set max_connections = 100; + +-- Also set work_mem appropriately +-- work_mem * max_connections should not exceed 25% of RAM +alter system set work_mem = '8MB'; -- 8MB * 100 = 800MB max +select count(*), state from pg_stat_activity group by state; +``` + +Monitor connection usage: + +Reference: https://supabase.com/docs/guides/platform/performance#connection-management + +--- + +### 2.3 Use Connection Pooling for All Applications + +**Impact: CRITICAL (Handle 10-100x more concurrent users)** + +Postgres connections are expensive (1-3MB RAM each). Without pooling, applications exhaust connections under load. + +**Incorrect (new connection per request):** + +```sql +-- Each request creates a new connection +-- Application code: db.connect() per request +-- Result: 500 concurrent users = 500 connections = crashed database + +-- Check current connections +select count(*) from pg_stat_activity; -- 487 connections! +``` + +**Correct (connection pooling):** + +```sql +-- Use a pooler like PgBouncer between app and database +-- Application connects to pooler, pooler reuses a small pool to Postgres + +-- Configure pool_size based on: (CPU cores * 2) + spindle_count +-- Example for 4 cores: pool_size = 10 + +-- Result: 500 concurrent users share 10 actual connections +select count(*) from pg_stat_activity; -- 10 connections +``` + +Pool modes: +- **Transaction mode**: connection returned after each transaction (best for most apps) +- **Session mode**: connection held for entire session (needed for prepared statements, temp tables) + +Reference: https://supabase.com/docs/guides/database/connecting-to-postgres#connection-pooler + +--- + +### 2.4 Use Prepared Statements Correctly with Pooling + +**Impact: HIGH (Avoid prepared statement conflicts in pooled environments)** + +Prepared statements are tied to individual database connections. In transaction-mode pooling, connections are shared, causing conflicts. + +**Incorrect (named prepared statements with transaction pooling):** + +```sql +-- Named prepared statement +prepare get_user as select * from users where id = $1; + +-- In transaction mode pooling, next request may get different connection +execute get_user(123); +-- ERROR: prepared statement "get_user" does not exist +``` + +**Correct (use unnamed statements or session mode):** + +```sql +-- Option 1: Use unnamed prepared statements (most ORMs do this automatically) +-- The query is prepared and executed in a single protocol message + +-- Option 2: Deallocate after use in transaction mode +prepare get_user as select * from users where id = $1; +execute get_user(123); +deallocate get_user; + +-- Option 3: Use session mode pooling (port 5432 vs 6543) +-- Connection is held for entire session, prepared statements persist +-- Many drivers use prepared statements by default +-- Node.js pg: { prepare: false } to disable +-- JDBC: prepareThreshold=0 to disable +``` + +Check your driver settings: + +Reference: https://supabase.com/docs/guides/database/connecting-to-postgres#connection-pool-modes + +--- + +## 3. Security & RLS + +**Impact: CRITICAL** + +Row-Level Security policies, privilege management, and authentication patterns. + +### 3.1 Apply Principle of Least Privilege + +**Impact: MEDIUM (Reduced attack surface, better audit trail)** + +Grant only the minimum permissions required. Never use superuser for application queries. + +**Incorrect (overly broad permissions):** + +```sql +-- Application uses superuser connection +-- Or grants ALL to application role +grant all privileges on all tables in schema public to app_user; +grant all privileges on all sequences in schema public to app_user; + +-- Any SQL injection becomes catastrophic +-- drop table users; cascades to everything +``` + +**Correct (minimal, specific grants):** + +```sql +-- Create role with no default privileges +create role app_readonly nologin; + +-- Grant only SELECT on specific tables +grant usage on schema public to app_readonly; +grant select on public.products, public.categories to app_readonly; + +-- Create role for writes with limited scope +create role app_writer nologin; +grant usage on schema public to app_writer; +grant select, insert, update on public.orders to app_writer; +grant usage on sequence orders_id_seq to app_writer; +-- No DELETE permission + +-- Login role inherits from these +create role app_user login password 'xxx'; +grant app_writer to app_user; +-- Revoke default public access +revoke all on schema public from public; +revoke all on all tables in schema public from public; +``` + +Revoke public defaults: + +Reference: https://supabase.com/blog/postgres-roles-and-privileges + +--- + +### 3.2 Enable Row Level Security for Multi-Tenant Data + +**Impact: CRITICAL (Database-enforced tenant isolation, prevent data leaks)** + +Row Level Security (RLS) enforces data access at the database level, ensuring users only see their own data. + +**Incorrect (application-level filtering only):** + +```sql +-- Relying only on application to filter +select * from orders where user_id = $current_user_id; + +-- Bug or bypass means all data is exposed! +select * from orders; -- Returns ALL orders +``` + +**Correct (database-enforced RLS):** + +```sql +-- Enable RLS on the table +alter table orders enable row level security; + +-- Create policy for users to see only their orders +create policy orders_user_policy on orders + for all + using (user_id = current_setting('app.current_user_id')::bigint); + +-- Force RLS even for table owners +alter table orders force row level security; + +-- Set user context and query +set app.current_user_id = '123'; +select * from orders; -- Only returns orders for user 123 +create policy orders_user_policy on orders + for all + to authenticated + using (user_id = auth.uid()); +``` + +Policy for authenticated role: + +Reference: https://supabase.com/docs/guides/database/postgres/row-level-security + +--- + +### 3.3 Optimize RLS Policies for Performance + +**Impact: HIGH (5-10x faster RLS queries with proper patterns)** + +Poorly written RLS policies can cause severe performance issues. Use subqueries and indexes strategically. + +**Incorrect (function called for every row):** + +```sql +create policy orders_policy on orders + using (auth.uid() = user_id); -- auth.uid() called per row! + +-- With 1M rows, auth.uid() is called 1M times +``` + +**Correct (wrap functions in SELECT):** + +```sql +create policy orders_policy on orders + using ((select auth.uid()) = user_id); -- Called once, cached + +-- 100x+ faster on large tables +-- Create helper function (runs as definer, bypasses RLS) +create or replace function is_team_member(team_id bigint) +returns boolean +language sql +security definer +set search_path = '' +as $$ + select exists ( + select 1 from public.team_members + where team_id = $1 and user_id = (select auth.uid()) + ); +$$; + +-- Use in policy (indexed lookup, not per-row check) +create policy team_orders_policy on orders + using ((select is_team_member(team_id))); +create index orders_user_id_idx on orders (user_id); +``` + +Use security definer functions for complex checks: +Always add indexes on columns used in RLS policies: + +Reference: https://supabase.com/docs/guides/database/postgres/row-level-security#rls-performance-recommendations + +--- + +## 4. Schema Design + +**Impact: HIGH** + +Table design, index strategies, partitioning, and data type selection. Foundation for long-term performance. + +### 4.1 Choose Appropriate Data Types + +**Impact: HIGH (50% storage reduction, faster comparisons)** + +Using the right data types reduces storage, improves query performance, and prevents bugs. + +**Incorrect (wrong data types):** + +```sql +create table users ( + id int, -- Will overflow at 2.1 billion + email varchar(255), -- Unnecessary length limit + created_at timestamp, -- Missing timezone info + is_active varchar(5), -- String for boolean + price varchar(20) -- String for numeric +); +``` + +**Correct (appropriate data types):** + +```sql +create table users ( + id bigint generated always as identity primary key, -- 9 quintillion max + email text, -- No artificial limit, same performance as varchar + created_at timestamptz, -- Always store timezone-aware timestamps + is_active boolean default true, -- 1 byte vs variable string length + price numeric(10,2) -- Exact decimal arithmetic +); +-- IDs: use bigint, not int (future-proofing) +-- Strings: use text, not varchar(n) unless constraint needed +-- Time: use timestamptz, not timestamp +-- Money: use numeric, not float (precision matters) +-- Enums: use text with check constraint or create enum type +``` + +Key guidelines: + +Reference: https://www.postgresql.org/docs/current/datatype.html + +--- + +### 4.2 Index Foreign Key Columns + +**Impact: HIGH (10-100x faster JOINs and CASCADE operations)** + +Postgres does not automatically index foreign key columns. Missing indexes cause slow JOINs and CASCADE operations. + +**Incorrect (unindexed foreign key):** + +```sql +create table orders ( + id bigint generated always as identity primary key, + customer_id bigint references customers(id) on delete cascade, + total numeric(10,2) +); + +-- No index on customer_id! +-- JOINs and ON DELETE CASCADE both require full table scan +select * from orders where customer_id = 123; -- Seq Scan +delete from customers where id = 123; -- Locks table, scans all orders +``` + +**Correct (indexed foreign key):** + +```sql +create table orders ( + id bigint generated always as identity primary key, + customer_id bigint references customers(id) on delete cascade, + total numeric(10,2) +); + +-- Always index the FK column +create index orders_customer_id_idx on orders (customer_id); + +-- Now JOINs and cascades are fast +select * from orders where customer_id = 123; -- Index Scan +delete from customers where id = 123; -- Uses index, fast cascade +select + conrelid::regclass as table_name, + a.attname as fk_column +from pg_constraint c +join pg_attribute a on a.attrelid = c.conrelid and a.attnum = any(c.conkey) +where c.contype = 'f' + and not exists ( + select 1 from pg_index i + where i.indrelid = c.conrelid and a.attnum = any(i.indkey) + ); +``` + +Find missing FK indexes: + +Reference: https://www.postgresql.org/docs/current/ddl-constraints.html#DDL-CONSTRAINTS-FK + +--- + +### 4.3 Partition Large Tables for Better Performance + +**Impact: MEDIUM-HIGH (5-20x faster queries and maintenance on large tables)** + +**Prerequisites:** PostgreSQL 10+ + +Partitioning splits a large table into smaller pieces, improving query performance and maintenance operations. + +**Incorrect (single large table):** + +```sql +create table events ( + id bigint generated always as identity, + created_at timestamptz, + data jsonb +); + +-- 500M rows, queries scan everything +select * from events where created_at > '2024-01-01'; -- Slow +vacuum events; -- Takes hours, locks table +``` + +**Correct (partitioned by time range):** + +```sql +create table events ( + id bigint generated always as identity, + created_at timestamptz not null, + data jsonb +) partition by range (created_at); + +-- Create partitions for each month +create table events_2024_01 partition of events + for values from ('2024-01-01') to ('2024-02-01'); + +create table events_2024_02 partition of events + for values from ('2024-02-01') to ('2024-03-01'); + +-- Queries only scan relevant partitions +select * from events where created_at > '2024-01-15'; -- Only scans events_2024_01+ + +-- Drop old data instantly +drop table events_2023_01; -- Instant vs DELETE taking hours +``` + +When to partition: +- Tables > 100M rows +- Time-series data with date-based queries +- Need to efficiently drop old data + +Reference: https://www.postgresql.org/docs/current/ddl-partitioning.html + +--- + +### 4.4 Select Optimal Primary Key Strategy + +**Impact: HIGH (Better index locality, reduced fragmentation)** + +Primary key choice affects insert performance, index size, and replication +efficiency. + +**Incorrect (problematic PK choices):** + +```sql +-- identity is the SQL-standard approach +create table users ( + id serial primary key -- Works, but IDENTITY is recommended +); + +-- Random UUIDs (v4) cause index fragmentation +create table orders ( + id uuid default gen_random_uuid() primary key -- UUIDv4 = random = scattered inserts +); +``` + +**Correct (optimal PK strategies):** + +```sql +-- Use IDENTITY for sequential IDs (SQL-standard, best for most cases) +create table users ( + id bigint generated always as identity primary key +); + +-- For distributed systems needing UUIDs, use UUIDv7 (time-ordered) +-- Requires pg_uuidv7 extension: create extension pg_uuidv7; +create table orders ( + id uuid default uuid_generate_v7() primary key -- Time-ordered, no fragmentation +); + +-- Alternative: time-prefixed IDs for sortable, distributed IDs (no extension needed) +create table events ( + id text default concat( + to_char(now() at time zone 'utc', 'YYYYMMDDHH24MISSMS'), + gen_random_uuid()::text + ) primary key +); +``` + +Guidelines: +- Single database: `bigint identity` (sequential, 8 bytes, SQL-standard) +- Distributed/exposed IDs: UUIDv7 (requires pg_uuidv7) or ULID (time-ordered, no + fragmentation) +- `serial` works but `identity` is SQL-standard and preferred for new + applications +- Avoid random UUIDs (v4) as primary keys on large tables (causes index + fragmentation) +[Identity Columns](https://www.postgresql.org/docs/current/sql-createtable.html#SQL-CREATETABLE-PARMS-GENERATED-IDENTITY) + +--- + +### 4.5 Use Lowercase Identifiers for Compatibility + +**Impact: MEDIUM (Avoid case-sensitivity bugs with tools, ORMs, and AI assistants)** + +PostgreSQL folds unquoted identifiers to lowercase. Quoted mixed-case identifiers require quotes forever and cause issues with tools, ORMs, and AI assistants that may not recognize them. + +**Incorrect (mixed-case identifiers):** + +```sql +-- Quoted identifiers preserve case but require quotes everywhere +CREATE TABLE "Users" ( + "userId" bigint PRIMARY KEY, + "firstName" text, + "lastName" text +); + +-- Must always quote or queries fail +SELECT "firstName" FROM "Users" WHERE "userId" = 1; + +-- This fails - Users becomes users without quotes +SELECT firstName FROM Users; +-- ERROR: relation "users" does not exist +``` + +**Correct (lowercase snake_case):** + +```sql +-- Unquoted lowercase identifiers are portable and tool-friendly +CREATE TABLE users ( + user_id bigint PRIMARY KEY, + first_name text, + last_name text +); + +-- Works without quotes, recognized by all tools +SELECT first_name FROM users WHERE user_id = 1; +-- ORMs often generate quoted camelCase - configure them to use snake_case +-- Migrations from other databases may preserve original casing +-- Some GUI tools quote identifiers by default - disable this + +-- If stuck with mixed-case, create views as a compatibility layer +CREATE VIEW users AS SELECT "userId" AS user_id, "firstName" AS first_name FROM "Users"; +``` + +Common sources of mixed-case identifiers: + +Reference: https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS + +--- + +## 5. Concurrency & Locking + +**Impact: MEDIUM-HIGH** + +Transaction management, isolation levels, deadlock prevention, and lock contention patterns. + +### 5.1 Keep Transactions Short to Reduce Lock Contention + +**Impact: MEDIUM-HIGH (3-5x throughput improvement, fewer deadlocks)** + +Long-running transactions hold locks that block other queries. Keep transactions as short as possible. + +**Incorrect (long transaction with external calls):** + +```sql +begin; +select * from orders where id = 1 for update; -- Lock acquired + +-- Application makes HTTP call to payment API (2-5 seconds) +-- Other queries on this row are blocked! + +update orders set status = 'paid' where id = 1; +commit; -- Lock held for entire duration +``` + +**Correct (minimal transaction scope):** + +```sql +-- Validate data and call APIs outside transaction +-- Application: response = await paymentAPI.charge(...) + +-- Only hold lock for the actual update +begin; +update orders +set status = 'paid', payment_id = $1 +where id = $2 and status = 'pending' +returning *; +commit; -- Lock held for milliseconds +-- Abort queries running longer than 30 seconds +set statement_timeout = '30s'; + +-- Or per-session +set local statement_timeout = '5s'; +``` + +Use `statement_timeout` to prevent runaway transactions: + +Reference: https://www.postgresql.org/docs/current/tutorial-transactions.html + +--- + +### 5.2 Prevent Deadlocks with Consistent Lock Ordering + +**Impact: MEDIUM-HIGH (Eliminate deadlock errors, improve reliability)** + +Deadlocks occur when transactions lock resources in different orders. Always +acquire locks in a consistent order. + +**Incorrect (inconsistent lock ordering):** + +```sql +-- Transaction A -- Transaction B +begin; begin; +update accounts update accounts +set balance = balance - 100 set balance = balance - 50 +where id = 1; where id = 2; -- B locks row 2 + +update accounts update accounts +set balance = balance + 100 set balance = balance + 50 +where id = 2; -- A waits for B where id = 1; -- B waits for A + +-- DEADLOCK! Both waiting for each other +``` + +**Correct (lock rows in consistent order first):** + +```sql +-- Explicitly acquire locks in ID order before updating +begin; +select * from accounts where id in (1, 2) order by id for update; + +-- Now perform updates in any order - locks already held +update accounts set balance = balance - 100 where id = 1; +update accounts set balance = balance + 100 where id = 2; +commit; +-- Single statement acquires all locks atomically +begin; +update accounts +set balance = balance + case id + when 1 then -100 + when 2 then 100 +end +where id in (1, 2); +commit; +-- Check for recent deadlocks +select * from pg_stat_database where deadlocks > 0; + +-- Enable deadlock logging +set log_lock_waits = on; +set deadlock_timeout = '1s'; +``` + +Alternative: use a single statement to update atomically: +Detect deadlocks in logs: +[Deadlocks](https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-DEADLOCKS) + +--- + +### 5.3 Use Advisory Locks for Application-Level Locking + +**Impact: MEDIUM (Efficient coordination without row-level lock overhead)** + +Advisory locks provide application-level coordination without requiring database rows to lock. + +**Incorrect (creating rows just for locking):** + +```sql +-- Creating dummy rows to lock on +create table resource_locks ( + resource_name text primary key +); + +insert into resource_locks values ('report_generator'); + +-- Lock by selecting the row +select * from resource_locks where resource_name = 'report_generator' for update; +``` + +**Correct (advisory locks):** + +```sql +-- Session-level advisory lock (released on disconnect or unlock) +select pg_advisory_lock(hashtext('report_generator')); +-- ... do exclusive work ... +select pg_advisory_unlock(hashtext('report_generator')); + +-- Transaction-level lock (released on commit/rollback) +begin; +select pg_advisory_xact_lock(hashtext('daily_report')); +-- ... do work ... +commit; -- Lock automatically released +-- Returns immediately with true/false instead of waiting +select pg_try_advisory_lock(hashtext('resource_name')); + +-- Use in application +if (acquired) { + -- Do work + select pg_advisory_unlock(hashtext('resource_name')); +} else { + -- Skip or retry later +} +``` + +Try-lock for non-blocking operations: + +Reference: https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS + +--- + +### 5.4 Use SKIP LOCKED for Non-Blocking Queue Processing + +**Impact: MEDIUM-HIGH (10x throughput for worker queues)** + +**Prerequisites:** PostgreSQL 9.5+ + +When multiple workers process a queue, SKIP LOCKED allows workers to process different rows without waiting. + +**Incorrect (workers block each other):** + +```sql +-- Worker 1 and Worker 2 both try to get next job +begin; +select * from jobs where status = 'pending' order by created_at limit 1 for update; +-- Worker 2 waits for Worker 1's lock to release! +``` + +**Correct (SKIP LOCKED for parallel processing):** + +```sql +-- Each worker skips locked rows and gets the next available +begin; +select * from jobs +where status = 'pending' +order by created_at +limit 1 +for update skip locked; + +-- Worker 1 gets job 1, Worker 2 gets job 2 (no waiting) + +update jobs set status = 'processing' where id = $1; +commit; +-- Atomic claim-and-update in one statement +update jobs +set status = 'processing', worker_id = $1, started_at = now() +where id = ( + select id from jobs + where status = 'pending' + order by created_at + limit 1 + for update skip locked +) +returning *; +``` + +Complete queue pattern: + +Reference: https://www.postgresql.org/docs/current/sql-select.html#SQL-FOR-UPDATE-SHARE + +--- + +## 6. Data Access Patterns + +**Impact: MEDIUM** + +N+1 query elimination, batch operations, cursor-based pagination, and efficient data fetching. + +### 6.1 Batch INSERT Statements for Bulk Data + +**Impact: MEDIUM (10-50x faster bulk inserts)** + +Individual INSERT statements have high overhead. Batch multiple rows in single statements or use COPY. + +**Incorrect (individual inserts):** + +```sql +-- Each insert is a separate transaction and round trip +insert into events (user_id, action) values (1, 'click'); +insert into events (user_id, action) values (1, 'view'); +insert into events (user_id, action) values (2, 'click'); +-- ... 1000 more individual inserts + +-- 1000 inserts = 1000 round trips = slow +``` + +**Correct (batch insert):** + +```sql +-- Multiple rows in single statement +insert into events (user_id, action) values + (1, 'click'), + (1, 'view'), + (2, 'click'), + -- ... up to ~1000 rows per batch + (999, 'view'); + +-- One round trip for 1000 rows +-- COPY is fastest for bulk loading +copy events (user_id, action, created_at) +from '/path/to/data.csv' +with (format csv, header true); + +-- Or from stdin in application +copy events (user_id, action) from stdin with (format csv); +1,click +1,view +2,click +\. +``` + +For large imports, use COPY: + +Reference: https://www.postgresql.org/docs/current/sql-copy.html + +--- + +### 6.2 Eliminate N+1 Queries with Batch Loading + +**Impact: MEDIUM-HIGH (10-100x fewer database round trips)** + +N+1 queries execute one query per item in a loop. Batch them into a single query using arrays or JOINs. + +**Incorrect (N+1 queries):** + +```sql +-- First query: get all users +select id from users where active = true; -- Returns 100 IDs + +-- Then N queries, one per user +select * from orders where user_id = 1; +select * from orders where user_id = 2; +select * from orders where user_id = 3; +-- ... 97 more queries! + +-- Total: 101 round trips to database +``` + +**Correct (single batch query):** + +```sql +-- Collect IDs and query once with ANY +select * from orders where user_id = any(array[1, 2, 3, ...]); + +-- Or use JOIN instead of loop +select u.id, u.name, o.* +from users u +left join orders o on o.user_id = u.id +where u.active = true; + +-- Total: 1 round trip +-- Instead of looping in application code: +-- for user in users: db.query("SELECT * FROM orders WHERE user_id = $1", user.id) + +-- Pass array parameter: +select * from orders where user_id = any($1::bigint[]); +-- Application passes: [1, 2, 3, 4, 5, ...] +``` + +Application pattern: + +Reference: https://supabase.com/docs/guides/database/query-optimization + +--- + +### 6.3 Use Cursor-Based Pagination Instead of OFFSET + +**Impact: MEDIUM-HIGH (Consistent O(1) performance regardless of page depth)** + +OFFSET-based pagination scans all skipped rows, getting slower on deeper pages. Cursor pagination is O(1). + +**Incorrect (OFFSET pagination):** + +```sql +-- Page 1: scans 20 rows +select * from products order by id limit 20 offset 0; + +-- Page 100: scans 2000 rows to skip 1980 +select * from products order by id limit 20 offset 1980; + +-- Page 10000: scans 200,000 rows! +select * from products order by id limit 20 offset 199980; +``` + +**Correct (cursor/keyset pagination):** + +```sql +-- Page 1: get first 20 +select * from products order by id limit 20; +-- Application stores last_id = 20 + +-- Page 2: start after last ID +select * from products where id > 20 order by id limit 20; +-- Uses index, always fast regardless of page depth + +-- Page 10000: same speed as page 1 +select * from products where id > 199980 order by id limit 20; +-- Cursor must include all sort columns +select * from products +where (created_at, id) > ('2024-01-15 10:00:00', 12345) +order by created_at, id +limit 20; +``` + +For multi-column sorting: + +Reference: https://supabase.com/docs/guides/database/pagination + +--- + +### 6.4 Use UPSERT for Insert-or-Update Operations + +**Impact: MEDIUM (Atomic operation, eliminates race conditions)** + +**Prerequisites:** PostgreSQL 9.5+ + +Using separate SELECT-then-INSERT/UPDATE creates race conditions. Use INSERT ... ON CONFLICT for atomic upserts. + +**Incorrect (check-then-insert race condition):** + +```sql +-- Race condition: two requests check simultaneously +select * from settings where user_id = 123 and key = 'theme'; +-- Both find nothing + +-- Both try to insert +insert into settings (user_id, key, value) values (123, 'theme', 'dark'); +-- One succeeds, one fails with duplicate key error! +``` + +**Correct (atomic UPSERT):** + +```sql +-- Single atomic operation +insert into settings (user_id, key, value) +values (123, 'theme', 'dark') +on conflict (user_id, key) +do update set value = excluded.value, updated_at = now(); + +-- Returns the inserted/updated row +insert into settings (user_id, key, value) +values (123, 'theme', 'dark') +on conflict (user_id, key) +do update set value = excluded.value +returning *; +-- Insert only if not exists (no update) +insert into page_views (page_id, user_id) +values (1, 123) +on conflict (page_id, user_id) do nothing; +``` + +Insert-or-ignore pattern: + +Reference: https://www.postgresql.org/docs/current/sql-insert.html#SQL-ON-CONFLICT + +--- + +## 7. Monitoring & Diagnostics + +**Impact: LOW-MEDIUM** + +Using pg_stat_statements, EXPLAIN ANALYZE, metrics collection, and performance diagnostics. + +### 7.1 Enable pg_stat_statements for Query Analysis + +**Impact: LOW-MEDIUM (Identify top resource-consuming queries)** + +**Prerequisites:** Extension: pg_stat_statements + +pg_stat_statements tracks execution statistics for all queries, helping identify slow and frequent queries. + +**Incorrect (no visibility into query patterns):** + +```sql +-- Database is slow, but which queries are the problem? +-- No way to know without pg_stat_statements +``` + +**Correct (enable and query pg_stat_statements):** + +```sql +-- Enable the extension +create extension if not exists pg_stat_statements; + +-- Find slowest queries by total time +select + calls, + round(total_exec_time::numeric, 2) as total_time_ms, + round(mean_exec_time::numeric, 2) as mean_time_ms, + query +from pg_stat_statements +order by total_exec_time desc +limit 10; + +-- Find most frequent queries +select calls, query +from pg_stat_statements +order by calls desc +limit 10; + +-- Reset statistics after optimization +select pg_stat_statements_reset(); +-- Queries with high mean time (candidates for optimization) +select query, mean_exec_time, calls +from pg_stat_statements +where mean_exec_time > 100 -- > 100ms average +order by mean_exec_time desc; +``` + +Key metrics to monitor: + +Reference: https://supabase.com/docs/guides/database/extensions/pg_stat_statements + +--- + +### 7.2 Maintain Table Statistics with VACUUM and ANALYZE + +**Impact: MEDIUM (2-10x better query plans with accurate statistics)** + +Outdated statistics cause the query planner to make poor decisions. VACUUM reclaims space, ANALYZE updates statistics. + +**Incorrect (stale statistics):** + +```sql +-- Table has 1M rows but stats say 1000 +-- Query planner chooses wrong strategy +explain select * from orders where status = 'pending'; +-- Shows: Seq Scan (because stats show small table) +-- Actually: Index Scan would be much faster +``` + +**Correct (maintain fresh statistics):** + +```sql +-- Manually analyze after large data changes +analyze orders; + +-- Analyze specific columns used in WHERE clauses +analyze orders (status, created_at); + +-- Check when tables were last analyzed +select + relname, + last_vacuum, + last_autovacuum, + last_analyze, + last_autoanalyze +from pg_stat_user_tables +order by last_analyze nulls first; +-- Increase frequency for high-churn tables +alter table orders set ( + autovacuum_vacuum_scale_factor = 0.05, -- Vacuum at 5% dead tuples (default 20%) + autovacuum_analyze_scale_factor = 0.02 -- Analyze at 2% changes (default 10%) +); + +-- Check autovacuum status +select * from pg_stat_progress_vacuum; +``` + +Autovacuum tuning for busy tables: + +Reference: https://supabase.com/docs/guides/database/database-size#vacuum-operations + +--- + +### 7.3 Use EXPLAIN ANALYZE to Diagnose Slow Queries + +**Impact: LOW-MEDIUM (Identify exact bottlenecks in query execution)** + +EXPLAIN ANALYZE executes the query and shows actual timings, revealing the true performance bottlenecks. + +**Incorrect (guessing at performance issues):** + +```sql +-- Query is slow, but why? +select * from orders where customer_id = 123 and status = 'pending'; +-- "It must be missing an index" - but which one? +``` + +**Correct (use EXPLAIN ANALYZE):** + +```sql +explain (analyze, buffers, format text) +select * from orders where customer_id = 123 and status = 'pending'; + +-- Output reveals the issue: +-- Seq Scan on orders (cost=0.00..25000.00 rows=50 width=100) (actual time=0.015..450.123 rows=50 loops=1) +-- Filter: ((customer_id = 123) AND (status = 'pending'::text)) +-- Rows Removed by Filter: 999950 +-- Buffers: shared hit=5000 read=15000 +-- Planning Time: 0.150 ms +-- Execution Time: 450.500 ms +-- Seq Scan on large tables = missing index +-- Rows Removed by Filter = poor selectivity or missing index +-- Buffers: read >> hit = data not cached, needs more memory +-- Nested Loop with high loops = consider different join strategy +-- Sort Method: external merge = work_mem too low +``` + +Key things to look for: + +Reference: https://supabase.com/docs/guides/database/inspect + +--- + +## 8. Advanced Features + +**Impact: LOW** + +Full-text search, JSONB optimization, PostGIS, extensions, and advanced Postgres features. + +### 8.1 Index JSONB Columns for Efficient Querying + +**Impact: MEDIUM (10-100x faster JSONB queries with proper indexing)** + +**Prerequisites:** PostgreSQL 9.4+ + +JSONB queries without indexes scan the entire table. Use GIN indexes for containment queries. + +**Incorrect (no index on JSONB):** + +```sql +create table products ( + id bigint primary key, + attributes jsonb +); + +-- Full table scan for every query +select * from products where attributes @> '{"color": "red"}'; +select * from products where attributes->>'brand' = 'Nike'; +``` + +**Correct (GIN index for JSONB):** + +```sql +-- GIN index for containment operators (@>, ?, ?&, ?|) +create index products_attrs_gin on products using gin (attributes); + +-- Now containment queries use the index +select * from products where attributes @> '{"color": "red"}'; + +-- For specific key lookups, use expression index +create index products_brand_idx on products ((attributes->>'brand')); +select * from products where attributes->>'brand' = 'Nike'; +-- jsonb_ops (default): supports all operators, larger index +create index idx1 on products using gin (attributes); + +-- jsonb_path_ops: only @> operator, but 2-3x smaller index +create index idx2 on products using gin (attributes jsonb_path_ops); +``` + +Choose the right operator class: + +Reference: https://www.postgresql.org/docs/current/datatype-json.html#JSON-INDEXING + +--- + +### 8.2 Use tsvector for Full-Text Search + +**Impact: MEDIUM (100x faster than LIKE, with ranking support)** + +LIKE with wildcards can't use indexes. Full-text search with tsvector is orders of magnitude faster. + +**Incorrect (LIKE pattern matching):** + +```sql +-- Cannot use index, scans all rows +select * from articles where content like '%postgresql%'; + +-- Case-insensitive makes it worse +select * from articles where lower(content) like '%postgresql%'; +``` + +**Correct (full-text search with tsvector):** + +```sql +-- Add tsvector column and index +alter table articles add column search_vector tsvector + generated always as (to_tsvector('english', coalesce(title,'') || ' ' || coalesce(content,''))) stored; + +create index articles_search_idx on articles using gin (search_vector); + +-- Fast full-text search +select * from articles +where search_vector @@ to_tsquery('english', 'postgresql & performance'); + +-- With ranking +select *, ts_rank(search_vector, query) as rank +from articles, to_tsquery('english', 'postgresql') query +where search_vector @@ query +order by rank desc; +-- AND: both terms required +to_tsquery('postgresql & performance') + +-- OR: either term +to_tsquery('postgresql | mysql') + +-- Prefix matching +to_tsquery('post:*') +``` + +Search multiple terms: + +Reference: https://supabase.com/docs/guides/database/full-text-search + +--- + +## References + +- https://www.postgresql.org/docs/current/ +- https://supabase.com/docs +- https://wiki.postgresql.org/wiki/Performance_Optimization +- https://supabase.com/docs/guides/database/overview +- https://supabase.com/docs/guides/auth/row-level-security diff --git a/skills/postgres-best-practices/AGENTS.supabase.md b/skills/postgres-best-practices/AGENTS.supabase.md new file mode 100644 index 0000000..786a9da --- /dev/null +++ b/skills/postgres-best-practices/AGENTS.supabase.md @@ -0,0 +1,1502 @@ +# Postgres Best Practices + +**Version 1.0.0** +Supabase +January 2026 + +> This document is optimized for AI agents and LLMs. Rules are prioritized by performance impact. + +--- + +## Abstract + +Comprehensive Postgres performance optimization guide for developers using Supabase and Postgres. Contains performance rules across 8 categories, prioritized by impact from critical (query performance, connection management) to incremental (advanced features). Each rule includes detailed explanations, incorrect vs. correct SQL examples, query plan analysis, and specific performance metrics to guide automated optimization and code generation. + +--- + +## Table of Contents + +1. [Query Performance](#query-performance) - **CRITICAL** + - 1.1 [Add Indexes on WHERE and JOIN Columns](#11-add-indexes-on-where-and-join-columns) + - 1.2 [Choose the Right Index Type for Your Data](#12-choose-the-right-index-type-for-your-data) + - 1.3 [Create Composite Indexes for Multi-Column Queries](#13-create-composite-indexes-for-multi-column-queries) + - 1.4 [Use Covering Indexes to Avoid Table Lookups](#14-use-covering-indexes-to-avoid-table-lookups) + - 1.5 [Use Partial Indexes for Filtered Queries](#15-use-partial-indexes-for-filtered-queries) + +2. [Connection Management](#connection-management) - **CRITICAL** + - 2.1 [Configure Idle Connection Timeouts](#21-configure-idle-connection-timeouts) + - 2.2 [Set Appropriate Connection Limits](#22-set-appropriate-connection-limits) + - 2.3 [Use Connection Pooling for All Applications](#23-use-connection-pooling-for-all-applications) + - 2.4 [Use Prepared Statements Correctly with Pooling](#24-use-prepared-statements-correctly-with-pooling) + +3. [Security & RLS](#security-rls) - **CRITICAL** + - 3.1 [Apply Principle of Least Privilege](#31-apply-principle-of-least-privilege) + - 3.2 [Enable Row Level Security for Multi-Tenant Data](#32-enable-row-level-security-for-multi-tenant-data) + - 3.3 [Optimize RLS Policies for Performance](#33-optimize-rls-policies-for-performance) + +4. [Schema Design](#schema-design) - **HIGH** + - 4.1 [Choose Appropriate Data Types](#41-choose-appropriate-data-types) + - 4.2 [Index Foreign Key Columns](#42-index-foreign-key-columns) + - 4.3 [Partition Large Tables for Better Performance](#43-partition-large-tables-for-better-performance) + - 4.4 [Select Optimal Primary Key Strategy](#44-select-optimal-primary-key-strategy) + - 4.5 [Use Lowercase Identifiers for Compatibility](#45-use-lowercase-identifiers-for-compatibility) + +5. [Concurrency & Locking](#concurrency-locking) - **MEDIUM-HIGH** + - 5.1 [Keep Transactions Short to Reduce Lock Contention](#51-keep-transactions-short-to-reduce-lock-contention) + - 5.2 [Prevent Deadlocks with Consistent Lock Ordering](#52-prevent-deadlocks-with-consistent-lock-ordering) + - 5.3 [Use Advisory Locks for Application-Level Locking](#53-use-advisory-locks-for-application-level-locking) + - 5.4 [Use SKIP LOCKED for Non-Blocking Queue Processing](#54-use-skip-locked-for-non-blocking-queue-processing) + +6. [Data Access Patterns](#data-access-patterns) - **MEDIUM** + - 6.1 [Batch INSERT Statements for Bulk Data](#61-batch-insert-statements-for-bulk-data) + - 6.2 [Eliminate N+1 Queries with Batch Loading](#62-eliminate-n1-queries-with-batch-loading) + - 6.3 [Use Cursor-Based Pagination Instead of OFFSET](#63-use-cursor-based-pagination-instead-of-offset) + - 6.4 [Use UPSERT for Insert-or-Update Operations](#64-use-upsert-for-insert-or-update-operations) + +7. [Monitoring & Diagnostics](#monitoring-diagnostics) - **LOW-MEDIUM** + - 7.1 [Enable pg_stat_statements for Query Analysis](#71-enable-pgstatstatements-for-query-analysis) + - 7.2 [Maintain Table Statistics with VACUUM and ANALYZE](#72-maintain-table-statistics-with-vacuum-and-analyze) + - 7.3 [Use EXPLAIN ANALYZE to Diagnose Slow Queries](#73-use-explain-analyze-to-diagnose-slow-queries) + +8. [Advanced Features](#advanced-features) - **LOW** + - 8.1 [Index JSONB Columns for Efficient Querying](#81-index-jsonb-columns-for-efficient-querying) + - 8.2 [Use tsvector for Full-Text Search](#82-use-tsvector-for-full-text-search) + +--- + +## 1. Query Performance + +**Impact: CRITICAL** + +Slow queries, missing indexes, inefficient query plans. The most common source of Postgres performance issues. + +### 1.1 Add Indexes on WHERE and JOIN Columns + +**Impact: CRITICAL (100-1000x faster queries on large tables)** + +Queries filtering or joining on unindexed columns cause full table scans, which become exponentially slower as tables grow. + +**Incorrect (sequential scan on large table):** + +```sql +-- No index on customer_id causes full table scan +select * from orders where customer_id = 123; + +-- EXPLAIN shows: Seq Scan on orders (cost=0.00..25000.00 rows=100 width=85) +``` + +**Correct (index scan):** + +```sql +-- Create index on frequently filtered column +create index orders_customer_id_idx on orders (customer_id); + +select * from orders where customer_id = 123; + +-- EXPLAIN shows: Index Scan using orders_customer_id_idx (cost=0.42..8.44 rows=100 width=85) +-- Index the referencing column +create index orders_customer_id_idx on orders (customer_id); + +select c.name, o.total +from customers c +join orders o on o.customer_id = c.id; +``` + +For JOIN columns, always index the foreign key side: + +Reference: https://supabase.com/docs/guides/database/query-optimization + +--- + +### 1.2 Choose the Right Index Type for Your Data + +**Impact: HIGH (10-100x improvement with correct index type)** + +Different index types excel at different query patterns. The default B-tree isn't always optimal. + +**Incorrect (B-tree for JSONB containment):** + +```sql +-- B-tree cannot optimize containment operators +create index products_attrs_idx on products (attributes); +select * from products where attributes @> '{"color": "red"}'; +-- Full table scan - B-tree doesn't support @> operator +``` + +**Correct (GIN for JSONB):** + +```sql +-- GIN supports @>, ?, ?&, ?| operators +create index products_attrs_idx on products using gin (attributes); +select * from products where attributes @> '{"color": "red"}'; +-- B-tree (default): =, <, >, BETWEEN, IN, IS NULL +create index users_created_idx on users (created_at); + +-- GIN: arrays, JSONB, full-text search +create index posts_tags_idx on posts using gin (tags); + +-- BRIN: large time-series tables (10-100x smaller) +create index events_time_idx on events using brin (created_at); + +-- Hash: equality-only (slightly faster than B-tree for =) +create index sessions_token_idx on sessions using hash (token); +``` + +Index type guide: + +Reference: https://www.postgresql.org/docs/current/indexes-types.html + +--- + +### 1.3 Create Composite Indexes for Multi-Column Queries + +**Impact: HIGH (5-10x faster multi-column queries)** + +When queries filter on multiple columns, a composite index is more efficient than separate single-column indexes. + +**Incorrect (separate indexes require bitmap scan):** + +```sql +-- Two separate indexes +create index orders_status_idx on orders (status); +create index orders_created_idx on orders (created_at); + +-- Query must combine both indexes (slower) +select * from orders where status = 'pending' and created_at > '2024-01-01'; +``` + +**Correct (composite index):** + +```sql +-- Single composite index (leftmost column first for equality checks) +create index orders_status_created_idx on orders (status, created_at); + +-- Query uses one efficient index scan +select * from orders where status = 'pending' and created_at > '2024-01-01'; +-- Good: status (=) before created_at (>) +create index idx on orders (status, created_at); + +-- Works for: WHERE status = 'pending' +-- Works for: WHERE status = 'pending' AND created_at > '2024-01-01' +-- Does NOT work for: WHERE created_at > '2024-01-01' (leftmost prefix rule) +``` + +**Column order matters** - place equality columns first, range columns last: + +Reference: https://www.postgresql.org/docs/current/indexes-multicolumn.html + +--- + +### 1.4 Use Covering Indexes to Avoid Table Lookups + +**Impact: MEDIUM-HIGH (2-5x faster queries by eliminating heap fetches)** + +**Prerequisites:** PostgreSQL 11+ + +Covering indexes include all columns needed by a query, enabling index-only scans that skip the table entirely. + +**Incorrect (index scan + heap fetch):** + +```sql +create index users_email_idx on users (email); + +-- Must fetch name and created_at from table heap +select email, name, created_at from users where email = 'user@example.com'; +``` + +**Correct (index-only scan with INCLUDE):** + +```sql +-- Include non-searchable columns in the index +create index users_email_idx on users (email) include (name, created_at); + +-- All columns served from index, no table access needed +select email, name, created_at from users where email = 'user@example.com'; +-- Searching by status, but also need customer_id and total +create index orders_status_idx on orders (status) include (customer_id, total); + +select status, customer_id, total from orders where status = 'shipped'; +``` + +Use INCLUDE for columns you SELECT but don't filter on: + +Reference: https://www.postgresql.org/docs/current/indexes-index-only-scans.html + +--- + +### 1.5 Use Partial Indexes for Filtered Queries + +**Impact: HIGH (5-20x smaller indexes, faster writes and queries)** + +Partial indexes only include rows matching a WHERE condition, making them smaller and faster when queries consistently filter on the same condition. + +**Incorrect (full index includes irrelevant rows):** + +```sql +-- Index includes all rows, even soft-deleted ones +create index users_email_idx on users (email); + +-- Query always filters active users +select * from users where email = 'user@example.com' and deleted_at is null; +``` + +**Correct (partial index matches query filter):** + +```sql +-- Index only includes active users +create index users_active_email_idx on users (email) +where deleted_at is null; + +-- Query uses the smaller, faster index +select * from users where email = 'user@example.com' and deleted_at is null; +-- Only pending orders (status rarely changes once completed) +create index orders_pending_idx on orders (created_at) +where status = 'pending'; + +-- Only non-null values +create index products_sku_idx on products (sku) +where sku is not null; +``` + +Common use cases for partial indexes: + +Reference: https://www.postgresql.org/docs/current/indexes-partial.html + +--- + +## 2. Connection Management + +**Impact: CRITICAL** + +Connection pooling, limits, and serverless strategies. Critical for applications with high concurrency or serverless deployments. + +### 2.1 Configure Idle Connection Timeouts + +**Impact: HIGH (Reclaim 30-50% of connection slots from idle clients)** + +Idle connections waste resources. Configure timeouts to automatically reclaim them. + +**Incorrect (connections held indefinitely):** + +```sql +-- No timeout configured +show idle_in_transaction_session_timeout; -- 0 (disabled) + +-- Connections stay open forever, even when idle +select pid, state, state_change, query +from pg_stat_activity +where state = 'idle in transaction'; +-- Shows transactions idle for hours, holding locks +``` + +**Correct (automatic cleanup of idle connections):** + +```ini +-- Terminate connections idle in transaction after 30 seconds +alter system set idle_in_transaction_session_timeout = '30s'; + +-- Terminate completely idle connections after 10 minutes +alter system set idle_session_timeout = '10min'; + +-- Reload configuration +select pg_reload_conf(); +# pgbouncer.ini +server_idle_timeout = 60 +client_idle_timeout = 300 +``` + +For pooled connections, configure at the pooler level: + +Reference: https://www.postgresql.org/docs/current/runtime-config-client.html#GUC-IDLE-IN-TRANSACTION-SESSION-TIMEOUT + +--- + +### 2.2 Set Appropriate Connection Limits + +**Impact: CRITICAL (Prevent database crashes and memory exhaustion)** + +Too many connections exhaust memory and degrade performance. Set limits based on available resources. + +**Incorrect (unlimited or excessive connections):** + +```sql +-- Default max_connections = 100, but often increased blindly +show max_connections; -- 500 (way too high for 4GB RAM) + +-- Each connection uses 1-3MB RAM +-- 500 connections * 2MB = 1GB just for connections! +-- Out of memory errors under load +``` + +**Correct (calculate based on resources):** + +```sql +-- Formula: max_connections = (RAM in MB / 5MB per connection) - reserved +-- For 4GB RAM: (4096 / 5) - 10 = ~800 theoretical max +-- But practically, 100-200 is better for query performance + +-- Recommended settings for 4GB RAM +alter system set max_connections = 100; + +-- Also set work_mem appropriately +-- work_mem * max_connections should not exceed 25% of RAM +alter system set work_mem = '8MB'; -- 8MB * 100 = 800MB max +select count(*), state from pg_stat_activity group by state; +``` + +Monitor connection usage: + +Reference: https://supabase.com/docs/guides/platform/performance#connection-management + +--- + +### 2.3 Use Connection Pooling for All Applications + +**Impact: CRITICAL (Handle 10-100x more concurrent users)** + +Postgres connections are expensive (1-3MB RAM each). Without pooling, applications exhaust connections under load. + +**Incorrect (new connection per request):** + +```sql +-- Each request creates a new connection +-- Application code: db.connect() per request +-- Result: 500 concurrent users = 500 connections = crashed database + +-- Check current connections +select count(*) from pg_stat_activity; -- 487 connections! +``` + +**Correct (connection pooling):** + +```sql +-- Use a pooler like PgBouncer between app and database +-- Application connects to pooler, pooler reuses a small pool to Postgres + +-- Configure pool_size based on: (CPU cores * 2) + spindle_count +-- Example for 4 cores: pool_size = 10 + +-- Result: 500 concurrent users share 10 actual connections +select count(*) from pg_stat_activity; -- 10 connections +``` + +Pool modes: +- **Transaction mode**: connection returned after each transaction (best for most apps) +- **Session mode**: connection held for entire session (needed for prepared statements, temp tables) + +Reference: https://supabase.com/docs/guides/database/connecting-to-postgres#connection-pooler + +--- + +### 2.4 Use Prepared Statements Correctly with Pooling + +**Impact: HIGH (Avoid prepared statement conflicts in pooled environments)** + +Prepared statements are tied to individual database connections. In transaction-mode pooling, connections are shared, causing conflicts. + +**Incorrect (named prepared statements with transaction pooling):** + +```sql +-- Named prepared statement +prepare get_user as select * from users where id = $1; + +-- In transaction mode pooling, next request may get different connection +execute get_user(123); +-- ERROR: prepared statement "get_user" does not exist +``` + +**Correct (use unnamed statements or session mode):** + +```sql +-- Option 1: Use unnamed prepared statements (most ORMs do this automatically) +-- The query is prepared and executed in a single protocol message + +-- Option 2: Deallocate after use in transaction mode +prepare get_user as select * from users where id = $1; +execute get_user(123); +deallocate get_user; + +-- Option 3: Use session mode pooling (port 5432 vs 6543) +-- Connection is held for entire session, prepared statements persist +-- Many drivers use prepared statements by default +-- Node.js pg: { prepare: false } to disable +-- JDBC: prepareThreshold=0 to disable +``` + +Check your driver settings: + +Reference: https://supabase.com/docs/guides/database/connecting-to-postgres#connection-pool-modes + +--- + +## 3. Security & RLS + +**Impact: CRITICAL** + +Row-Level Security policies, privilege management, and authentication patterns. + +### 3.1 Apply Principle of Least Privilege + +**Impact: MEDIUM (Reduced attack surface, better audit trail)** + +Grant only the minimum permissions required. Never use superuser for application queries. + +**Incorrect (overly broad permissions):** + +```sql +-- Application uses superuser connection +-- Or grants ALL to application role +grant all privileges on all tables in schema public to app_user; +grant all privileges on all sequences in schema public to app_user; + +-- Any SQL injection becomes catastrophic +-- drop table users; cascades to everything +``` + +**Correct (minimal, specific grants):** + +```sql +-- Create role with no default privileges +create role app_readonly nologin; + +-- Grant only SELECT on specific tables +grant usage on schema public to app_readonly; +grant select on public.products, public.categories to app_readonly; + +-- Create role for writes with limited scope +create role app_writer nologin; +grant usage on schema public to app_writer; +grant select, insert, update on public.orders to app_writer; +grant usage on sequence orders_id_seq to app_writer; +-- No DELETE permission + +-- Login role inherits from these +create role app_user login password 'xxx'; +grant app_writer to app_user; +-- Revoke default public access +revoke all on schema public from public; +revoke all on all tables in schema public from public; +``` + +Revoke public defaults: + +Reference: https://supabase.com/blog/postgres-roles-and-privileges + +--- + +### 3.2 Enable Row Level Security for Multi-Tenant Data + +**Impact: CRITICAL (Database-enforced tenant isolation, prevent data leaks)** + +Row Level Security (RLS) enforces data access at the database level, ensuring users only see their own data. + +**Incorrect (application-level filtering only):** + +```sql +-- Relying only on application to filter +select * from orders where user_id = $current_user_id; + +-- Bug or bypass means all data is exposed! +select * from orders; -- Returns ALL orders +``` + +**Correct (database-enforced RLS):** + +```sql +-- Enable RLS on the table +alter table orders enable row level security; + +-- Create policy for users to see only their orders +create policy orders_user_policy on orders + for all + using (user_id = current_setting('app.current_user_id')::bigint); + +-- Force RLS even for table owners +alter table orders force row level security; + +-- Set user context and query +set app.current_user_id = '123'; +select * from orders; -- Only returns orders for user 123 +create policy orders_user_policy on orders + for all + to authenticated + using (user_id = auth.uid()); +``` + +Policy for authenticated role: + +Reference: https://supabase.com/docs/guides/database/postgres/row-level-security + +--- + +### 3.3 Optimize RLS Policies for Performance + +**Impact: HIGH (5-10x faster RLS queries with proper patterns)** + +Poorly written RLS policies can cause severe performance issues. Use subqueries and indexes strategically. + +**Incorrect (function called for every row):** + +```sql +create policy orders_policy on orders + using (auth.uid() = user_id); -- auth.uid() called per row! + +-- With 1M rows, auth.uid() is called 1M times +``` + +**Correct (wrap functions in SELECT):** + +```sql +create policy orders_policy on orders + using ((select auth.uid()) = user_id); -- Called once, cached + +-- 100x+ faster on large tables +-- Create helper function (runs as definer, bypasses RLS) +create or replace function is_team_member(team_id bigint) +returns boolean +language sql +security definer +set search_path = '' +as $$ + select exists ( + select 1 from public.team_members + where team_id = $1 and user_id = (select auth.uid()) + ); +$$; + +-- Use in policy (indexed lookup, not per-row check) +create policy team_orders_policy on orders + using ((select is_team_member(team_id))); +create index orders_user_id_idx on orders (user_id); +``` + +Use security definer functions for complex checks: +Always add indexes on columns used in RLS policies: + +Reference: https://supabase.com/docs/guides/database/postgres/row-level-security#rls-performance-recommendations + +--- + +## 4. Schema Design + +**Impact: HIGH** + +Table design, index strategies, partitioning, and data type selection. Foundation for long-term performance. + +### 4.1 Choose Appropriate Data Types + +**Impact: HIGH (50% storage reduction, faster comparisons)** + +Using the right data types reduces storage, improves query performance, and prevents bugs. + +**Incorrect (wrong data types):** + +```sql +create table users ( + id int, -- Will overflow at 2.1 billion + email varchar(255), -- Unnecessary length limit + created_at timestamp, -- Missing timezone info + is_active varchar(5), -- String for boolean + price varchar(20) -- String for numeric +); +``` + +**Correct (appropriate data types):** + +```sql +create table users ( + id bigint generated always as identity primary key, -- 9 quintillion max + email text, -- No artificial limit, same performance as varchar + created_at timestamptz, -- Always store timezone-aware timestamps + is_active boolean default true, -- 1 byte vs variable string length + price numeric(10,2) -- Exact decimal arithmetic +); +-- IDs: use bigint, not int (future-proofing) +-- Strings: use text, not varchar(n) unless constraint needed +-- Time: use timestamptz, not timestamp +-- Money: use numeric, not float (precision matters) +-- Enums: use text with check constraint or create enum type +``` + +Key guidelines: + +Reference: https://www.postgresql.org/docs/current/datatype.html + +--- + +### 4.2 Index Foreign Key Columns + +**Impact: HIGH (10-100x faster JOINs and CASCADE operations)** + +Postgres does not automatically index foreign key columns. Missing indexes cause slow JOINs and CASCADE operations. + +**Incorrect (unindexed foreign key):** + +```sql +create table orders ( + id bigint generated always as identity primary key, + customer_id bigint references customers(id) on delete cascade, + total numeric(10,2) +); + +-- No index on customer_id! +-- JOINs and ON DELETE CASCADE both require full table scan +select * from orders where customer_id = 123; -- Seq Scan +delete from customers where id = 123; -- Locks table, scans all orders +``` + +**Correct (indexed foreign key):** + +```sql +create table orders ( + id bigint generated always as identity primary key, + customer_id bigint references customers(id) on delete cascade, + total numeric(10,2) +); + +-- Always index the FK column +create index orders_customer_id_idx on orders (customer_id); + +-- Now JOINs and cascades are fast +select * from orders where customer_id = 123; -- Index Scan +delete from customers where id = 123; -- Uses index, fast cascade +select + conrelid::regclass as table_name, + a.attname as fk_column +from pg_constraint c +join pg_attribute a on a.attrelid = c.conrelid and a.attnum = any(c.conkey) +where c.contype = 'f' + and not exists ( + select 1 from pg_index i + where i.indrelid = c.conrelid and a.attnum = any(i.indkey) + ); +``` + +Find missing FK indexes: + +Reference: https://www.postgresql.org/docs/current/ddl-constraints.html#DDL-CONSTRAINTS-FK + +--- + +### 4.3 Partition Large Tables for Better Performance + +**Impact: MEDIUM-HIGH (5-20x faster queries and maintenance on large tables)** + +**Prerequisites:** PostgreSQL 10+ + +Partitioning splits a large table into smaller pieces, improving query performance and maintenance operations. + +**Incorrect (single large table):** + +```sql +create table events ( + id bigint generated always as identity, + created_at timestamptz, + data jsonb +); + +-- 500M rows, queries scan everything +select * from events where created_at > '2024-01-01'; -- Slow +vacuum events; -- Takes hours, locks table +``` + +**Correct (partitioned by time range):** + +```sql +create table events ( + id bigint generated always as identity, + created_at timestamptz not null, + data jsonb +) partition by range (created_at); + +-- Create partitions for each month +create table events_2024_01 partition of events + for values from ('2024-01-01') to ('2024-02-01'); + +create table events_2024_02 partition of events + for values from ('2024-02-01') to ('2024-03-01'); + +-- Queries only scan relevant partitions +select * from events where created_at > '2024-01-15'; -- Only scans events_2024_01+ + +-- Drop old data instantly +drop table events_2023_01; -- Instant vs DELETE taking hours +``` + +When to partition: +- Tables > 100M rows +- Time-series data with date-based queries +- Need to efficiently drop old data + +Reference: https://www.postgresql.org/docs/current/ddl-partitioning.html + +--- + +### 4.4 Select Optimal Primary Key Strategy + +**Impact: HIGH (Better index locality, reduced fragmentation)** + +Primary key choice affects insert performance, index size, and replication +efficiency. + +**Incorrect (problematic PK choices):** + +```sql +-- identity is the SQL-standard approach +create table users ( + id serial primary key -- Works, but IDENTITY is recommended +); + +-- Random UUIDs (v4) cause index fragmentation +create table orders ( + id uuid default gen_random_uuid() primary key -- UUIDv4 = random = scattered inserts +); +``` + +**Correct (optimal PK strategies):** + +```sql +-- Use IDENTITY for sequential IDs (SQL-standard, best for most cases) +create table users ( + id bigint generated always as identity primary key +); + +-- For distributed systems needing UUIDs, use UUIDv7 (time-ordered) +-- Requires pg_uuidv7 extension: create extension pg_uuidv7; +create table orders ( + id uuid default uuid_generate_v7() primary key -- Time-ordered, no fragmentation +); + +-- Alternative: time-prefixed IDs for sortable, distributed IDs (no extension needed) +create table events ( + id text default concat( + to_char(now() at time zone 'utc', 'YYYYMMDDHH24MISSMS'), + gen_random_uuid()::text + ) primary key +); +``` + +Guidelines: +- Single database: `bigint identity` (sequential, 8 bytes, SQL-standard) +- Distributed/exposed IDs: UUIDv7 (requires pg_uuidv7) or ULID (time-ordered, no + fragmentation) +- `serial` works but `identity` is SQL-standard and preferred for new + applications +- Avoid random UUIDs (v4) as primary keys on large tables (causes index + fragmentation) +[Identity Columns](https://www.postgresql.org/docs/current/sql-createtable.html#SQL-CREATETABLE-PARMS-GENERATED-IDENTITY) + +--- + +### 4.5 Use Lowercase Identifiers for Compatibility + +**Impact: MEDIUM (Avoid case-sensitivity bugs with tools, ORMs, and AI assistants)** + +PostgreSQL folds unquoted identifiers to lowercase. Quoted mixed-case identifiers require quotes forever and cause issues with tools, ORMs, and AI assistants that may not recognize them. + +**Incorrect (mixed-case identifiers):** + +```sql +-- Quoted identifiers preserve case but require quotes everywhere +CREATE TABLE "Users" ( + "userId" bigint PRIMARY KEY, + "firstName" text, + "lastName" text +); + +-- Must always quote or queries fail +SELECT "firstName" FROM "Users" WHERE "userId" = 1; + +-- This fails - Users becomes users without quotes +SELECT firstName FROM Users; +-- ERROR: relation "users" does not exist +``` + +**Correct (lowercase snake_case):** + +```sql +-- Unquoted lowercase identifiers are portable and tool-friendly +CREATE TABLE users ( + user_id bigint PRIMARY KEY, + first_name text, + last_name text +); + +-- Works without quotes, recognized by all tools +SELECT first_name FROM users WHERE user_id = 1; +-- ORMs often generate quoted camelCase - configure them to use snake_case +-- Migrations from other databases may preserve original casing +-- Some GUI tools quote identifiers by default - disable this + +-- If stuck with mixed-case, create views as a compatibility layer +CREATE VIEW users AS SELECT "userId" AS user_id, "firstName" AS first_name FROM "Users"; +``` + +Common sources of mixed-case identifiers: + +Reference: https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS + +--- + +## 5. Concurrency & Locking + +**Impact: MEDIUM-HIGH** + +Transaction management, isolation levels, deadlock prevention, and lock contention patterns. + +### 5.1 Keep Transactions Short to Reduce Lock Contention + +**Impact: MEDIUM-HIGH (3-5x throughput improvement, fewer deadlocks)** + +Long-running transactions hold locks that block other queries. Keep transactions as short as possible. + +**Incorrect (long transaction with external calls):** + +```sql +begin; +select * from orders where id = 1 for update; -- Lock acquired + +-- Application makes HTTP call to payment API (2-5 seconds) +-- Other queries on this row are blocked! + +update orders set status = 'paid' where id = 1; +commit; -- Lock held for entire duration +``` + +**Correct (minimal transaction scope):** + +```sql +-- Validate data and call APIs outside transaction +-- Application: response = await paymentAPI.charge(...) + +-- Only hold lock for the actual update +begin; +update orders +set status = 'paid', payment_id = $1 +where id = $2 and status = 'pending' +returning *; +commit; -- Lock held for milliseconds +-- Abort queries running longer than 30 seconds +set statement_timeout = '30s'; + +-- Or per-session +set local statement_timeout = '5s'; +``` + +Use `statement_timeout` to prevent runaway transactions: + +Reference: https://www.postgresql.org/docs/current/tutorial-transactions.html + +--- + +### 5.2 Prevent Deadlocks with Consistent Lock Ordering + +**Impact: MEDIUM-HIGH (Eliminate deadlock errors, improve reliability)** + +Deadlocks occur when transactions lock resources in different orders. Always +acquire locks in a consistent order. + +**Incorrect (inconsistent lock ordering):** + +```sql +-- Transaction A -- Transaction B +begin; begin; +update accounts update accounts +set balance = balance - 100 set balance = balance - 50 +where id = 1; where id = 2; -- B locks row 2 + +update accounts update accounts +set balance = balance + 100 set balance = balance + 50 +where id = 2; -- A waits for B where id = 1; -- B waits for A + +-- DEADLOCK! Both waiting for each other +``` + +**Correct (lock rows in consistent order first):** + +```sql +-- Explicitly acquire locks in ID order before updating +begin; +select * from accounts where id in (1, 2) order by id for update; + +-- Now perform updates in any order - locks already held +update accounts set balance = balance - 100 where id = 1; +update accounts set balance = balance + 100 where id = 2; +commit; +-- Single statement acquires all locks atomically +begin; +update accounts +set balance = balance + case id + when 1 then -100 + when 2 then 100 +end +where id in (1, 2); +commit; +-- Check for recent deadlocks +select * from pg_stat_database where deadlocks > 0; + +-- Enable deadlock logging +set log_lock_waits = on; +set deadlock_timeout = '1s'; +``` + +Alternative: use a single statement to update atomically: +Detect deadlocks in logs: +[Deadlocks](https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-DEADLOCKS) + +--- + +### 5.3 Use Advisory Locks for Application-Level Locking + +**Impact: MEDIUM (Efficient coordination without row-level lock overhead)** + +Advisory locks provide application-level coordination without requiring database rows to lock. + +**Incorrect (creating rows just for locking):** + +```sql +-- Creating dummy rows to lock on +create table resource_locks ( + resource_name text primary key +); + +insert into resource_locks values ('report_generator'); + +-- Lock by selecting the row +select * from resource_locks where resource_name = 'report_generator' for update; +``` + +**Correct (advisory locks):** + +```sql +-- Session-level advisory lock (released on disconnect or unlock) +select pg_advisory_lock(hashtext('report_generator')); +-- ... do exclusive work ... +select pg_advisory_unlock(hashtext('report_generator')); + +-- Transaction-level lock (released on commit/rollback) +begin; +select pg_advisory_xact_lock(hashtext('daily_report')); +-- ... do work ... +commit; -- Lock automatically released +-- Returns immediately with true/false instead of waiting +select pg_try_advisory_lock(hashtext('resource_name')); + +-- Use in application +if (acquired) { + -- Do work + select pg_advisory_unlock(hashtext('resource_name')); +} else { + -- Skip or retry later +} +``` + +Try-lock for non-blocking operations: + +Reference: https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS + +--- + +### 5.4 Use SKIP LOCKED for Non-Blocking Queue Processing + +**Impact: MEDIUM-HIGH (10x throughput for worker queues)** + +**Prerequisites:** PostgreSQL 9.5+ + +When multiple workers process a queue, SKIP LOCKED allows workers to process different rows without waiting. + +**Incorrect (workers block each other):** + +```sql +-- Worker 1 and Worker 2 both try to get next job +begin; +select * from jobs where status = 'pending' order by created_at limit 1 for update; +-- Worker 2 waits for Worker 1's lock to release! +``` + +**Correct (SKIP LOCKED for parallel processing):** + +```sql +-- Each worker skips locked rows and gets the next available +begin; +select * from jobs +where status = 'pending' +order by created_at +limit 1 +for update skip locked; + +-- Worker 1 gets job 1, Worker 2 gets job 2 (no waiting) + +update jobs set status = 'processing' where id = $1; +commit; +-- Atomic claim-and-update in one statement +update jobs +set status = 'processing', worker_id = $1, started_at = now() +where id = ( + select id from jobs + where status = 'pending' + order by created_at + limit 1 + for update skip locked +) +returning *; +``` + +Complete queue pattern: + +Reference: https://www.postgresql.org/docs/current/sql-select.html#SQL-FOR-UPDATE-SHARE + +--- + +## 6. Data Access Patterns + +**Impact: MEDIUM** + +N+1 query elimination, batch operations, cursor-based pagination, and efficient data fetching. + +### 6.1 Batch INSERT Statements for Bulk Data + +**Impact: MEDIUM (10-50x faster bulk inserts)** + +Individual INSERT statements have high overhead. Batch multiple rows in single statements or use COPY. + +**Incorrect (individual inserts):** + +```sql +-- Each insert is a separate transaction and round trip +insert into events (user_id, action) values (1, 'click'); +insert into events (user_id, action) values (1, 'view'); +insert into events (user_id, action) values (2, 'click'); +-- ... 1000 more individual inserts + +-- 1000 inserts = 1000 round trips = slow +``` + +**Correct (batch insert):** + +```sql +-- Multiple rows in single statement +insert into events (user_id, action) values + (1, 'click'), + (1, 'view'), + (2, 'click'), + -- ... up to ~1000 rows per batch + (999, 'view'); + +-- One round trip for 1000 rows +-- COPY is fastest for bulk loading +copy events (user_id, action, created_at) +from '/path/to/data.csv' +with (format csv, header true); + +-- Or from stdin in application +copy events (user_id, action) from stdin with (format csv); +1,click +1,view +2,click +\. +``` + +For large imports, use COPY: + +Reference: https://www.postgresql.org/docs/current/sql-copy.html + +--- + +### 6.2 Eliminate N+1 Queries with Batch Loading + +**Impact: MEDIUM-HIGH (10-100x fewer database round trips)** + +N+1 queries execute one query per item in a loop. Batch them into a single query using arrays or JOINs. + +**Incorrect (N+1 queries):** + +```sql +-- First query: get all users +select id from users where active = true; -- Returns 100 IDs + +-- Then N queries, one per user +select * from orders where user_id = 1; +select * from orders where user_id = 2; +select * from orders where user_id = 3; +-- ... 97 more queries! + +-- Total: 101 round trips to database +``` + +**Correct (single batch query):** + +```sql +-- Collect IDs and query once with ANY +select * from orders where user_id = any(array[1, 2, 3, ...]); + +-- Or use JOIN instead of loop +select u.id, u.name, o.* +from users u +left join orders o on o.user_id = u.id +where u.active = true; + +-- Total: 1 round trip +-- Instead of looping in application code: +-- for user in users: db.query("SELECT * FROM orders WHERE user_id = $1", user.id) + +-- Pass array parameter: +select * from orders where user_id = any($1::bigint[]); +-- Application passes: [1, 2, 3, 4, 5, ...] +``` + +Application pattern: + +Reference: https://supabase.com/docs/guides/database/query-optimization + +--- + +### 6.3 Use Cursor-Based Pagination Instead of OFFSET + +**Impact: MEDIUM-HIGH (Consistent O(1) performance regardless of page depth)** + +OFFSET-based pagination scans all skipped rows, getting slower on deeper pages. Cursor pagination is O(1). + +**Incorrect (OFFSET pagination):** + +```sql +-- Page 1: scans 20 rows +select * from products order by id limit 20 offset 0; + +-- Page 100: scans 2000 rows to skip 1980 +select * from products order by id limit 20 offset 1980; + +-- Page 10000: scans 200,000 rows! +select * from products order by id limit 20 offset 199980; +``` + +**Correct (cursor/keyset pagination):** + +```sql +-- Page 1: get first 20 +select * from products order by id limit 20; +-- Application stores last_id = 20 + +-- Page 2: start after last ID +select * from products where id > 20 order by id limit 20; +-- Uses index, always fast regardless of page depth + +-- Page 10000: same speed as page 1 +select * from products where id > 199980 order by id limit 20; +-- Cursor must include all sort columns +select * from products +where (created_at, id) > ('2024-01-15 10:00:00', 12345) +order by created_at, id +limit 20; +``` + +For multi-column sorting: + +Reference: https://supabase.com/docs/guides/database/pagination + +--- + +### 6.4 Use UPSERT for Insert-or-Update Operations + +**Impact: MEDIUM (Atomic operation, eliminates race conditions)** + +**Prerequisites:** PostgreSQL 9.5+ + +Using separate SELECT-then-INSERT/UPDATE creates race conditions. Use INSERT ... ON CONFLICT for atomic upserts. + +**Incorrect (check-then-insert race condition):** + +```sql +-- Race condition: two requests check simultaneously +select * from settings where user_id = 123 and key = 'theme'; +-- Both find nothing + +-- Both try to insert +insert into settings (user_id, key, value) values (123, 'theme', 'dark'); +-- One succeeds, one fails with duplicate key error! +``` + +**Correct (atomic UPSERT):** + +```sql +-- Single atomic operation +insert into settings (user_id, key, value) +values (123, 'theme', 'dark') +on conflict (user_id, key) +do update set value = excluded.value, updated_at = now(); + +-- Returns the inserted/updated row +insert into settings (user_id, key, value) +values (123, 'theme', 'dark') +on conflict (user_id, key) +do update set value = excluded.value +returning *; +-- Insert only if not exists (no update) +insert into page_views (page_id, user_id) +values (1, 123) +on conflict (page_id, user_id) do nothing; +``` + +Insert-or-ignore pattern: + +Reference: https://www.postgresql.org/docs/current/sql-insert.html#SQL-ON-CONFLICT + +--- + +## 7. Monitoring & Diagnostics + +**Impact: LOW-MEDIUM** + +Using pg_stat_statements, EXPLAIN ANALYZE, metrics collection, and performance diagnostics. + +### 7.1 Enable pg_stat_statements for Query Analysis + +**Impact: LOW-MEDIUM (Identify top resource-consuming queries)** + +**Prerequisites:** Extension: pg_stat_statements + +pg_stat_statements tracks execution statistics for all queries, helping identify slow and frequent queries. + +**Incorrect (no visibility into query patterns):** + +```sql +-- Database is slow, but which queries are the problem? +-- No way to know without pg_stat_statements +``` + +**Correct (enable and query pg_stat_statements):** + +```sql +-- Enable the extension +create extension if not exists pg_stat_statements; + +-- Find slowest queries by total time +select + calls, + round(total_exec_time::numeric, 2) as total_time_ms, + round(mean_exec_time::numeric, 2) as mean_time_ms, + query +from pg_stat_statements +order by total_exec_time desc +limit 10; + +-- Find most frequent queries +select calls, query +from pg_stat_statements +order by calls desc +limit 10; + +-- Reset statistics after optimization +select pg_stat_statements_reset(); +-- Queries with high mean time (candidates for optimization) +select query, mean_exec_time, calls +from pg_stat_statements +where mean_exec_time > 100 -- > 100ms average +order by mean_exec_time desc; +``` + +Key metrics to monitor: + +Reference: https://supabase.com/docs/guides/database/extensions/pg_stat_statements + +--- + +### 7.2 Maintain Table Statistics with VACUUM and ANALYZE + +**Impact: MEDIUM (2-10x better query plans with accurate statistics)** + +Outdated statistics cause the query planner to make poor decisions. VACUUM reclaims space, ANALYZE updates statistics. + +**Incorrect (stale statistics):** + +```sql +-- Table has 1M rows but stats say 1000 +-- Query planner chooses wrong strategy +explain select * from orders where status = 'pending'; +-- Shows: Seq Scan (because stats show small table) +-- Actually: Index Scan would be much faster +``` + +**Correct (maintain fresh statistics):** + +```sql +-- Manually analyze after large data changes +analyze orders; + +-- Analyze specific columns used in WHERE clauses +analyze orders (status, created_at); + +-- Check when tables were last analyzed +select + relname, + last_vacuum, + last_autovacuum, + last_analyze, + last_autoanalyze +from pg_stat_user_tables +order by last_analyze nulls first; +-- Increase frequency for high-churn tables +alter table orders set ( + autovacuum_vacuum_scale_factor = 0.05, -- Vacuum at 5% dead tuples (default 20%) + autovacuum_analyze_scale_factor = 0.02 -- Analyze at 2% changes (default 10%) +); + +-- Check autovacuum status +select * from pg_stat_progress_vacuum; +``` + +Autovacuum tuning for busy tables: + +Reference: https://supabase.com/docs/guides/database/database-size#vacuum-operations + +--- + +### 7.3 Use EXPLAIN ANALYZE to Diagnose Slow Queries + +**Impact: LOW-MEDIUM (Identify exact bottlenecks in query execution)** + +EXPLAIN ANALYZE executes the query and shows actual timings, revealing the true performance bottlenecks. + +**Incorrect (guessing at performance issues):** + +```sql +-- Query is slow, but why? +select * from orders where customer_id = 123 and status = 'pending'; +-- "It must be missing an index" - but which one? +``` + +**Correct (use EXPLAIN ANALYZE):** + +```sql +explain (analyze, buffers, format text) +select * from orders where customer_id = 123 and status = 'pending'; + +-- Output reveals the issue: +-- Seq Scan on orders (cost=0.00..25000.00 rows=50 width=100) (actual time=0.015..450.123 rows=50 loops=1) +-- Filter: ((customer_id = 123) AND (status = 'pending'::text)) +-- Rows Removed by Filter: 999950 +-- Buffers: shared hit=5000 read=15000 +-- Planning Time: 0.150 ms +-- Execution Time: 450.500 ms +-- Seq Scan on large tables = missing index +-- Rows Removed by Filter = poor selectivity or missing index +-- Buffers: read >> hit = data not cached, needs more memory +-- Nested Loop with high loops = consider different join strategy +-- Sort Method: external merge = work_mem too low +``` + +Key things to look for: + +Reference: https://supabase.com/docs/guides/database/inspect + +--- + +## 8. Advanced Features + +**Impact: LOW** + +Full-text search, JSONB optimization, PostGIS, extensions, and advanced Postgres features. + +### 8.1 Index JSONB Columns for Efficient Querying + +**Impact: MEDIUM (10-100x faster JSONB queries with proper indexing)** + +**Prerequisites:** PostgreSQL 9.4+ + +JSONB queries without indexes scan the entire table. Use GIN indexes for containment queries. + +**Incorrect (no index on JSONB):** + +```sql +create table products ( + id bigint primary key, + attributes jsonb +); + +-- Full table scan for every query +select * from products where attributes @> '{"color": "red"}'; +select * from products where attributes->>'brand' = 'Nike'; +``` + +**Correct (GIN index for JSONB):** + +```sql +-- GIN index for containment operators (@>, ?, ?&, ?|) +create index products_attrs_gin on products using gin (attributes); + +-- Now containment queries use the index +select * from products where attributes @> '{"color": "red"}'; + +-- For specific key lookups, use expression index +create index products_brand_idx on products ((attributes->>'brand')); +select * from products where attributes->>'brand' = 'Nike'; +-- jsonb_ops (default): supports all operators, larger index +create index idx1 on products using gin (attributes); + +-- jsonb_path_ops: only @> operator, but 2-3x smaller index +create index idx2 on products using gin (attributes jsonb_path_ops); +``` + +Choose the right operator class: + +Reference: https://www.postgresql.org/docs/current/datatype-json.html#JSON-INDEXING + +--- + +### 8.2 Use tsvector for Full-Text Search + +**Impact: MEDIUM (100x faster than LIKE, with ranking support)** + +LIKE with wildcards can't use indexes. Full-text search with tsvector is orders of magnitude faster. + +**Incorrect (LIKE pattern matching):** + +```sql +-- Cannot use index, scans all rows +select * from articles where content like '%postgresql%'; + +-- Case-insensitive makes it worse +select * from articles where lower(content) like '%postgresql%'; +``` + +**Correct (full-text search with tsvector):** + +```sql +-- Add tsvector column and index +alter table articles add column search_vector tsvector + generated always as (to_tsvector('english', coalesce(title,'') || ' ' || coalesce(content,''))) stored; + +create index articles_search_idx on articles using gin (search_vector); + +-- Fast full-text search +select * from articles +where search_vector @@ to_tsquery('english', 'postgresql & performance'); + +-- With ranking +select *, ts_rank(search_vector, query) as rank +from articles, to_tsquery('english', 'postgresql') query +where search_vector @@ query +order by rank desc; +-- AND: both terms required +to_tsquery('postgresql & performance') + +-- OR: either term +to_tsquery('postgresql | mysql') + +-- Prefix matching +to_tsquery('post:*') +``` + +Search multiple terms: + +Reference: https://supabase.com/docs/guides/database/full-text-search + +--- + +## References + +- https://www.postgresql.org/docs/current/ +- https://supabase.com/docs +- https://wiki.postgresql.org/wiki/Performance_Optimization +- https://supabase.com/docs/guides/database/overview +- https://supabase.com/docs/guides/auth/row-level-security diff --git a/skills/postgres-best-practices/README.md b/skills/postgres-best-practices/README.md index 8b71afd..32758f0 100644 --- a/skills/postgres-best-practices/README.md +++ b/skills/postgres-best-practices/README.md @@ -3,6 +3,26 @@ This repository contains Postgres performance optimization rules optimized for AI agents and LLMs. +## Requirements + +- **Minimum PostgreSQL Version:** 9.5 (some rules require newer versions) +- **Recommended Version:** 15+ +- **Optional Extensions:** pg_stat_statements (for monitoring rules) + +### Version Compatibility + +Some rules require specific PostgreSQL versions due to feature availability: + +| Feature | Min Version | Affected Rules | +|---------|-------------|----------------| +| ON CONFLICT (UPSERT) | 9.5 | data-upsert | +| SKIP LOCKED | 9.5 | lock-skip-locked | +| JSONB type | 9.4 | advanced-jsonb-indexing | +| Declarative Partitioning | 10 | schema-partitioning | +| Covering Indexes (INCLUDE) | 11 | query-covering-indexes | + +Rules include `minVersion` and `extensions` frontmatter fields. Agents should check these against the target environment before recommending specific rules. + ## Quick Start ```bash diff --git a/skills/postgres-best-practices/SKILL.md b/skills/postgres-best-practices/SKILL.md index c1ad772..02bf7be 100644 --- a/skills/postgres-best-practices/SKILL.md +++ b/skills/postgres-best-practices/SKILL.md @@ -55,3 +55,52 @@ Each rule file contains: ## Full Compiled Document For the complete guide with all rules expanded: `AGENTS.md` + +## Runtime Detection + +Before applying rules, agents should detect the PostgreSQL environment to ensure compatibility: + +### Version Detection + +```sql +SELECT version(); +-- Example output: PostgreSQL 15.4 on x86_64-pc-linux-gnu +``` + +Extract the major version number (e.g., "15" from "PostgreSQL 15.4") to check against rule `minVersion` requirements. + +### Extension Availability + +```sql +SELECT name, installed_version, default_version +FROM pg_available_extensions +WHERE name IN ('pg_stat_statements', 'pgcrypto', 'uuid-ossp', 'postgis') +ORDER BY name; +``` + +Check if required extensions are available before recommending rules that depend on them. + +### Configuration Check + +```sql +SELECT name, setting +FROM pg_settings +WHERE name IN ('shared_preload_libraries', 'max_connections', 'work_mem'); +``` + +## Rule Filtering + +Only recommend rules where: +- `minVersion` <= detected PostgreSQL version (or minVersion is unset) +- All required `extensions` are available or installable +- The rule is appropriate for the user's deployment context + +### Version Compatibility + +| Feature | Min Version | Affected Rules | +|---------|-------------|----------------| +| ON CONFLICT (UPSERT) | 9.5 | data-upsert | +| SKIP LOCKED | 9.5 | lock-skip-locked | +| JSONB type | 9.4 | advanced-jsonb-indexing | +| Declarative Partitioning | 10 | schema-partitioning | +| Covering Indexes (INCLUDE) | 11 | query-covering-indexes | diff --git a/skills/postgres-best-practices/evals/README.md b/skills/postgres-best-practices/evals/README.md new file mode 100644 index 0000000..7619c2a --- /dev/null +++ b/skills/postgres-best-practices/evals/README.md @@ -0,0 +1,308 @@ +# PostgreSQL Best Practices - Evaluation Scenarios + +This directory contains evaluation scenarios to test that AI agents correctly apply the PostgreSQL best practices rules, including proper handling of version constraints and extension requirements. + +## Overview + +The evals use [Vitest](https://vitest.dev/) as the test framework and the [Vercel AI SDK](https://ai-sdk.dev/) to interact with Claude for generating responses. Each scenario tests a specific aspect of rule application. + +## Running Evals + +```bash +# Install dependencies +npm install + +# Run all evals +npm run eval + +# Run in watch mode +npm run eval:watch + +# Run specific scenario +npm run eval -- -t "Missing Index" +``` + +## Environment Setup + +Set your Anthropic API key: + +```bash +export ANTHROPIC_API_KEY=your_api_key +``` + +## Evaluation Scenarios + +### Category 1: Core Query Patterns + +#### 1.1 Missing Index Detection + +| Field | Value | +|-------|-------| +| **ID** | `missing-index-detection` | +| **File** | `scenarios/missing-index.eval.ts` | +| **Difficulty** | Basic | +| **Tests** | Agent identifies missing indexes on filtered columns | + +**Description:** +Tests that the agent correctly identifies when a query would benefit from an index and recommends creating one. + +**Input:** +- Schema: `orders` table with no indexes beyond PK +- Query: `SELECT * FROM orders WHERE customer_id = 12345 AND status = 'pending'` + +**Expected Output:** +- Should recommend rule 1.1 (query-missing-indexes) +- Must mention "index" and "customer_id" + +**Expected Reasoning:** +1. Identify that the query filters on customer_id and status +2. Recognize that without an index, this causes a sequential scan +3. Recommend creating an index on the filtered columns + +--- + +#### 1.2 N+1 Query Detection + +| Field | Value | +|-------|-------| +| **ID** | `n-plus-one-detection` | +| **File** | `scenarios/n-plus-one.eval.ts` | +| **Difficulty** | Intermediate | +| **Tests** | Agent identifies N+1 query pattern in code | + +**Description:** +Tests that the agent recognizes N+1 query patterns in application code and recommends using JOINs. + +**Input:** +- Schema: `users` and `posts` tables with relationship +- Code snippet showing loop that queries for each post's author + +**Expected Output:** +- Should recommend rule 6.1 (data-n-plus-one) +- Must mention "JOIN" and "N+1" + +**Expected Reasoning:** +1. Identify the N+1 query pattern (1 + N queries) +2. Recognize this as a common performance anti-pattern +3. Recommend using a JOIN to fetch all data in a single query + +--- + +#### 1.3 Covering Index Suggestion + +| Field | Value | +|-------|-------| +| **ID** | `covering-index-suggestion` | +| **File** | `scenarios/covering-index.eval.ts` | +| **Difficulty** | Intermediate | +| **Tests** | Agent suggests INCLUDE clause for covering index | + +**Description:** +Tests that the agent recommends covering indexes when SELECT columns aren't in the index. + +**Input:** +- Schema: `users` table with index on `email` only +- Query: `SELECT email, name, department FROM users WHERE email = ?` +- PostgreSQL version: 15.4 + +**Expected Output:** +- Should recommend rule 1.2 (query-covering-indexes) +- Must mention "INCLUDE" and "covering" + +**Expected Reasoning:** +1. Identify that query selects columns not in the index +2. Recognize this causes heap fetches +3. Recommend using INCLUDE clause for index-only scans + +--- + +### Category 2: Version Constraints + +#### 2.1 PG10 - No Covering Index + +| Field | Value | +|-------|-------| +| **ID** | `version-constraint-pg10-no-covering` | +| **File** | `scenarios/version-constraint.eval.ts` | +| **Difficulty** | Intermediate | +| **Tests** | Agent respects PG11+ requirement for INCLUDE | + +**Description:** +Tests that the agent does NOT recommend INCLUDE clause when PostgreSQL version is 10 (INCLUDE requires PG11+). + +**Input:** +- Same setup as covering index scenario +- PostgreSQL version: 10.0 + +**Expected Output:** +- Should NOT recommend rule 1.2 +- Must NOT contain "INCLUDE" +- Should suggest alternative optimizations + +**Expected Reasoning:** +1. Recognize PostgreSQL 10 is specified +2. Check that INCLUDE requires PG11+ +3. Avoid recommending incompatible features +4. Suggest PG10-compatible alternatives + +--- + +#### 2.2 PG9.3 - No UPSERT + +| Field | Value | +|-------|-------| +| **ID** | `version-constraint-pg93-no-upsert` | +| **File** | `scenarios/version-constraint.eval.ts` | +| **Difficulty** | Intermediate | +| **Tests** | Agent respects PG9.5+ requirement for ON CONFLICT | + +**Description:** +Tests that the agent does NOT recommend ON CONFLICT when PostgreSQL version is 9.3 (requires PG9.5+). + +**Input:** +- Schema: `settings` table with composite primary key +- Query: Need insert-or-update functionality +- PostgreSQL version: 9.3 + +**Expected Output:** +- Should NOT recommend rule 6.3 (data-upsert) +- Must NOT contain "ON CONFLICT" +- Should suggest CTE-based or try/catch pattern + +**Expected Reasoning:** +1. Recognize PostgreSQL 9.3 is specified +2. Check that ON CONFLICT requires PG9.5+ +3. Avoid recommending UPSERT syntax +4. Suggest compatible alternatives + +--- + +### Category 3: Extension Requirements + +#### 3.1 Extension Available + +| Field | Value | +|-------|-------| +| **ID** | `extension-available-pg-stat-statements` | +| **File** | `scenarios/extension-available.eval.ts` | +| **Difficulty** | Basic | +| **Tests** | Agent recommends extension when available | + +**Description:** +Tests that the agent recommends pg_stat_statements when it's listed as available. + +**Input:** +- General schema +- Query: How to identify slow queries +- Available extensions: pg_stat_statements, pgcrypto, uuid-ossp + +**Expected Output:** +- Should recommend rule 7.1 (monitor-pg-stat-statements) +- Must mention "pg_stat_statements" + +**Expected Reasoning:** +1. Recognize query monitoring problem +2. Check that pg_stat_statements is available +3. Recommend enabling and using the extension + +--- + +#### 3.2 Extension Unavailable + +| Field | Value | +|-------|-------| +| **ID** | `extension-unavailable-no-pg-stat-statements` | +| **File** | `scenarios/extension-unavailable.eval.ts` | +| **Difficulty** | Intermediate | +| **Tests** | Agent provides alternatives when extension unavailable | + +**Description:** +Tests that the agent suggests alternatives when pg_stat_statements is not available. + +**Input:** +- General schema +- Query: How to identify slow queries +- Available extensions: [] (none) +- Context: Managed database, can't install extensions + +**Expected Output:** +- Should NOT recommend pg_stat_statements +- Must mention "EXPLAIN" and "ANALYZE" +- Should suggest built-in alternatives + +**Expected Reasoning:** +1. Recognize no extensions are available +2. Avoid recommending pg_stat_statements +3. Suggest EXPLAIN ANALYZE, log_min_duration_statement, or pg_stat_activity + +--- + +## Adding New Scenarios + +1. Create a new file in `scenarios/` following the naming convention `{name}.eval.ts` + +2. Define the scenario using the `EvalScenario` interface: + +```typescript +import { describe, it, expect } from "vitest"; +import { runEval } from "../runner.js"; +import type { EvalScenario } from "../types.js"; + +const scenario: EvalScenario = { + id: "unique-scenario-id", + name: "Human Readable Name", + description: "What this scenario tests", + category: "query-performance" | "version-constraints" | "extension-requirements", + difficulty: "basic" | "intermediate" | "advanced", + input: { + schema: "SQL schema definition", + userQuery: "User's question or problem", + postgresVersion: "15.4", // Optional + availableExtensions: ["list"], // Optional + }, + expectedOutput: { + shouldRecommendRules: ["1.1"], + shouldNotRecommendRules: ["2.3"], // Optional + mustContain: ["keyword"], + mustNotContain: ["avoid"], // Optional + }, + expectedReasoning: [ + "Step 1 of expected reasoning", + "Step 2 of expected reasoning", + ], +}; + +describe("Scenario Name", () => { + it("should do something specific", async () => { + const result = await runEval(scenario); + // Add assertions + }); +}); + +export { scenario }; +``` + +3. Run the new scenario: `npm run eval -- -t "Scenario Name"` + +## Evaluation Criteria + +Each scenario is evaluated against: + +1. **Rule References**: Does the response reference the expected rules? +2. **Must Contain**: Does the response include required keywords? +3. **Must Not Contain**: Does the response avoid prohibited content? +4. **Version Constraints**: Are version requirements respected? +5. **Extension Requirements**: Are extension dependencies checked? + +## Troubleshooting + +**Evals timing out:** +- Increase timeout in `vitest.config.ts` (default: 60s) +- Check API key is valid + +**Flaky results:** +- Set `temperature: 0` in runner config +- Make assertions more flexible (check for concept presence, not exact wording) + +**Missing AGENTS.md:** +- Run `npm run build` from repository root first diff --git a/skills/postgres-best-practices/evals/package.json b/skills/postgres-best-practices/evals/package.json new file mode 100644 index 0000000..da22c87 --- /dev/null +++ b/skills/postgres-best-practices/evals/package.json @@ -0,0 +1,18 @@ +{ + "name": "postgres-best-practices-evals", + "version": "1.0.0", + "description": "Evaluation scenarios for Postgres Best Practices skill", + "type": "module", + "scripts": { + "eval": "vitest run", + "eval:watch": "vitest", + "eval:ui": "vitest --ui" + }, + "devDependencies": { + "@ai-sdk/anthropic": "^0.0.30", + "@types/node": "^20.0.0", + "ai": "^3.0.0", + "typescript": "^5.0.0", + "vitest": "^1.0.0" + } +} diff --git a/skills/postgres-best-practices/evals/runner.ts b/skills/postgres-best-practices/evals/runner.ts new file mode 100644 index 0000000..3bf51df --- /dev/null +++ b/skills/postgres-best-practices/evals/runner.ts @@ -0,0 +1,192 @@ +import { generateText } from "ai"; +import { anthropic } from "@ai-sdk/anthropic"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import type { CriterionResult, EvalConfig, EvalResult, EvalScenario } from "./types.js"; + +const DEFAULT_CONFIG: EvalConfig = { + agentsPath: join(import.meta.dirname, "..", "AGENTS.md"), + model: "claude-sonnet-4-20250514", + maxTokens: 2048, + temperature: 0, +}; + +/** + * Build the user prompt from a scenario + */ +function buildUserPrompt(scenario: EvalScenario): string { + const parts: string[] = []; + + // Add version context if specified + if (scenario.input.postgresVersion) { + parts.push(`PostgreSQL Version: ${scenario.input.postgresVersion}`); + } + + // Add extensions context if specified + if (scenario.input.availableExtensions) { + if (scenario.input.availableExtensions.length === 0) { + parts.push("Available Extensions: None installed"); + } else { + parts.push(`Available Extensions: ${scenario.input.availableExtensions.join(", ")}`); + } + } + + // Add additional context if provided + if (scenario.input.context) { + parts.push(`Context: ${scenario.input.context}`); + } + + // Add schema + parts.push(`\nSchema:\n\`\`\`sql\n${scenario.input.schema}\n\`\`\``); + + // Add user query + parts.push(`\nQuestion: ${scenario.input.userQuery}`); + + return parts.join("\n"); +} + +/** + * Extract rule IDs mentioned in a response + */ +function extractRuleIds(response: string): string[] { + // Match patterns like "1.1", "2.3", etc. + const rulePattern = /\b(\d+\.\d+)\b/g; + const matches = response.match(rulePattern) || []; + return [...new Set(matches)]; +} + +/** + * Evaluate the response against expected criteria + */ +function evaluateCriteria(scenario: EvalScenario, response: string): CriterionResult[] { + const results: CriterionResult[] = []; + const responseLower = response.toLowerCase(); + + // Check mustContain criteria + for (const term of scenario.expectedOutput.mustContain) { + const found = responseLower.includes(term.toLowerCase()); + results.push({ + criterion: `Response should contain "${term}"`, + passed: found, + evidence: found ? "Found in response" : "Not found in response", + }); + } + + // Check mustNotContain criteria + if (scenario.expectedOutput.mustNotContain) { + for (const term of scenario.expectedOutput.mustNotContain) { + const found = responseLower.includes(term.toLowerCase()); + results.push({ + criterion: `Response should NOT contain "${term}"`, + passed: !found, + evidence: found ? "Found in response (should not be present)" : "Not found (correct)", + }); + } + } + + // Check shouldRecommendRules + const referencedRules = extractRuleIds(response); + for (const ruleId of scenario.expectedOutput.shouldRecommendRules) { + const found = referencedRules.includes(ruleId); + results.push({ + criterion: `Should recommend rule ${ruleId}`, + passed: found, + evidence: found ? "Rule referenced" : "Rule not referenced", + }); + } + + // Check shouldNotRecommendRules + if (scenario.expectedOutput.shouldNotRecommendRules) { + for (const ruleId of scenario.expectedOutput.shouldNotRecommendRules) { + const found = referencedRules.includes(ruleId); + results.push({ + criterion: `Should NOT recommend rule ${ruleId}`, + passed: !found, + evidence: found ? "Rule referenced (should not be)" : "Rule not referenced (correct)", + }); + } + } + + return results; +} + +/** + * Run a single evaluation scenario + */ +export async function runEval( + scenario: EvalScenario, + config: Partial = {} +): Promise { + const finalConfig = { ...DEFAULT_CONFIG, ...config }; + + try { + // Load AGENTS.md + const agentsMd = readFileSync(finalConfig.agentsPath, "utf-8"); + + const systemPrompt = `You are a PostgreSQL expert assistant. Use the following knowledge base to provide accurate recommendations: + +${agentsMd} + +IMPORTANT: When the user specifies a PostgreSQL version or available extensions, you MUST respect those constraints: +- Do not recommend features that require a higher PostgreSQL version than specified +- Do not recommend extensions that are not available +- If a recommended optimization requires a specific version or extension, mention the prerequisite + +When making recommendations, reference specific rule IDs (e.g., "1.1", "2.3") from the knowledge base.`; + + const userPrompt = buildUserPrompt(scenario); + + const start = Date.now(); + const { text } = await generateText({ + model: anthropic(finalConfig.model!), + system: systemPrompt, + prompt: userPrompt, + maxTokens: finalConfig.maxTokens, + temperature: finalConfig.temperature, + }); + const latencyMs = Date.now() - start; + + // Evaluate the response + const criteriaResults = evaluateCriteria(scenario, text); + const rulesReferenced = extractRuleIds(text); + const passed = criteriaResults.every((r) => r.passed); + + return { + scenarioId: scenario.id, + passed, + rulesReferenced, + criteriaResults, + response: text, + latencyMs, + }; + } catch (error) { + return { + scenarioId: scenario.id, + passed: false, + rulesReferenced: [], + criteriaResults: [], + response: "", + latencyMs: 0, + error: error instanceof Error ? error.message : String(error), + }; + } +} + +/** + * Run multiple evaluation scenarios + */ +export async function runEvals( + scenarios: EvalScenario[], + config: Partial = {} +): Promise { + const results: EvalResult[] = []; + + for (const scenario of scenarios) { + console.log(`Running eval: ${scenario.name}...`); + const result = await runEval(scenario, config); + results.push(result); + console.log(` ${result.passed ? "PASS" : "FAIL"} (${result.latencyMs}ms)`); + } + + return results; +} diff --git a/skills/postgres-best-practices/evals/scenarios/covering-index.eval.ts b/skills/postgres-best-practices/evals/scenarios/covering-index.eval.ts new file mode 100644 index 0000000..e0d4e24 --- /dev/null +++ b/skills/postgres-best-practices/evals/scenarios/covering-index.eval.ts @@ -0,0 +1,62 @@ +import { describe, it, expect } from "vitest"; +import { runEval } from "../runner.js"; +import type { EvalScenario } from "../types.js"; + +const scenario: EvalScenario = { + id: "covering-index-suggestion", + name: "Covering Index Suggestion", + description: + "Agent should suggest using INCLUDE clause for columns in SELECT that aren't in WHERE clause", + category: "query-performance", + difficulty: "intermediate", + input: { + schema: ` +CREATE TABLE users ( + id SERIAL PRIMARY KEY, + email VARCHAR(255) NOT NULL, + name VARCHAR(100), + department VARCHAR(50), + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX users_email_idx ON users (email); +-- Table has 2 million rows +`, + userQuery: `This query still does heap fetches even though we have an index on email: + +SELECT email, name, department FROM users WHERE email = 'user@example.com' + +EXPLAIN shows "Index Scan" but not "Index Only Scan". How can I avoid the table lookup?`, + postgresVersion: "15.4", + }, + expectedOutput: { + shouldRecommendRules: ["1.2"], // query-covering-indexes + mustContain: ["include", "covering"], + }, + expectedReasoning: [ + "Identify that the query selects columns (name, department) not in the index", + "Recognize this causes additional heap fetches after the index scan", + "Recommend using INCLUDE clause to create a covering index", + "Explain that this enables index-only scans", + ], +}; + +describe("Covering Index Suggestion", () => { + it("should recommend INCLUDE clause for covering index", async () => { + const result = await runEval(scenario); + + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); + + // Response should mention INCLUDE keyword + expect(result.response.toLowerCase()).toContain("include"); + + // Response should mention covering index concept + const responseLower = result.response.toLowerCase(); + expect( + responseLower.includes("covering") || responseLower.includes("index-only") + ).toBe(true); + }); +}); + +export { scenario }; diff --git a/skills/postgres-best-practices/evals/scenarios/extension-available.eval.ts b/skills/postgres-best-practices/evals/scenarios/extension-available.eval.ts new file mode 100644 index 0000000..0b87c31 --- /dev/null +++ b/skills/postgres-best-practices/evals/scenarios/extension-available.eval.ts @@ -0,0 +1,56 @@ +import { describe, it, expect } from "vitest"; +import { runEval } from "../runner.js"; +import type { EvalScenario } from "../types.js"; + +const scenario: EvalScenario = { + id: "extension-available-pg-stat-statements", + name: "Extension Available - pg_stat_statements", + description: + "Agent should recommend pg_stat_statements for query monitoring when the extension is available", + category: "extension-requirements", + difficulty: "basic", + input: { + schema: ` +-- Production database with various tables +CREATE TABLE users (id SERIAL PRIMARY KEY, email VARCHAR(255)); +CREATE TABLE orders (id SERIAL PRIMARY KEY, user_id INT, total DECIMAL); +CREATE TABLE products (id SERIAL PRIMARY KEY, name VARCHAR(200), price DECIMAL); +`, + userQuery: + "Our database is slow but we don't know which queries are causing the problem. How can we identify the slowest queries?", + postgresVersion: "15.4", + availableExtensions: ["pg_stat_statements", "pgcrypto", "uuid-ossp"], + }, + expectedOutput: { + shouldRecommendRules: ["7.1"], // monitor-pg-stat-statements + mustContain: ["pg_stat_statements"], + }, + expectedReasoning: [ + "Recognize this is a query monitoring/performance diagnosis problem", + "Check that pg_stat_statements is available in the extensions list", + "Recommend enabling pg_stat_statements for query analysis", + "Explain how to use it to find slow queries", + ], +}; + +describe("Extension Available - pg_stat_statements", () => { + it("should recommend pg_stat_statements when available", async () => { + const result = await runEval(scenario); + + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); + + // Response should mention pg_stat_statements + expect(result.response.toLowerCase()).toContain("pg_stat_statements"); + + // Should suggest enabling/using the extension + const responseLower = result.response.toLowerCase(); + expect( + responseLower.includes("create extension") || + responseLower.includes("enable") || + responseLower.includes("query") + ).toBe(true); + }); +}); + +export { scenario }; diff --git a/skills/postgres-best-practices/evals/scenarios/extension-unavailable.eval.ts b/skills/postgres-best-practices/evals/scenarios/extension-unavailable.eval.ts new file mode 100644 index 0000000..14d4b00 --- /dev/null +++ b/skills/postgres-best-practices/evals/scenarios/extension-unavailable.eval.ts @@ -0,0 +1,56 @@ +import { describe, it, expect } from "vitest"; +import { runEval } from "../runner.js"; +import type { EvalScenario } from "../types.js"; + +const scenario: EvalScenario = { + id: "extension-unavailable-no-pg-stat-statements", + name: "Extension Unavailable - No pg_stat_statements", + description: + "Agent should provide alternatives when pg_stat_statements is not available for query monitoring", + category: "extension-requirements", + difficulty: "intermediate", + input: { + schema: ` +-- Production database with various tables +CREATE TABLE users (id SERIAL PRIMARY KEY, email VARCHAR(255)); +CREATE TABLE orders (id SERIAL PRIMARY KEY, user_id INT, total DECIMAL); +CREATE TABLE products (id SERIAL PRIMARY KEY, name VARCHAR(200), price DECIMAL); +`, + userQuery: + "Our database is slow but we don't know which queries are causing the problem. How can we identify the slowest queries?", + postgresVersion: "15.4", + availableExtensions: [], // No extensions available + context: + "This is a managed database environment where we cannot install additional extensions.", + }, + expectedOutput: { + shouldRecommendRules: [], // Should not recommend pg_stat_statements rule + shouldNotRecommendRules: ["7.1"], // monitor-pg-stat-statements + mustContain: ["explain", "analyze"], + mustNotContain: ["pg_stat_statements"], + }, + expectedReasoning: [ + "Recognize that no extensions are available", + "Check that pg_stat_statements cannot be used", + "Avoid recommending pg_stat_statements", + "Suggest alternative approaches like EXPLAIN ANALYZE, log_min_duration_statement, or pg_stat_activity", + ], +}; + +describe("Extension Unavailable - No pg_stat_statements", () => { + it("should suggest alternatives when pg_stat_statements is unavailable", async () => { + const result = await runEval(scenario); + + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); + + // Response should NOT primarily recommend pg_stat_statements + // (it might mention it as unavailable, but shouldn't suggest installing it) + const responseLower = result.response.toLowerCase(); + + // Should suggest EXPLAIN ANALYZE as an alternative + expect(responseLower.includes("explain") && responseLower.includes("analyze")).toBe(true); + }); +}); + +export { scenario }; diff --git a/skills/postgres-best-practices/evals/scenarios/missing-index.eval.ts b/skills/postgres-best-practices/evals/scenarios/missing-index.eval.ts new file mode 100644 index 0000000..8db466a --- /dev/null +++ b/skills/postgres-best-practices/evals/scenarios/missing-index.eval.ts @@ -0,0 +1,56 @@ +import { describe, it, expect } from "vitest"; +import { runEval } from "../runner.js"; +import type { EvalScenario } from "../types.js"; + +const scenario: EvalScenario = { + id: "missing-index-detection", + name: "Missing Index Detection", + description: + "Agent should identify missing index on WHERE clause columns and recommend creating an appropriate index", + category: "query-performance", + difficulty: "basic", + input: { + schema: ` +CREATE TABLE orders ( + id SERIAL PRIMARY KEY, + customer_id INT NOT NULL, + status VARCHAR(50), + total DECIMAL(10,2), + created_at TIMESTAMPTZ DEFAULT NOW() +); +-- No indexes besides primary key +-- Table has 5 million rows +`, + userQuery: + "This query is slow and takes 3 seconds: SELECT * FROM orders WHERE customer_id = 12345 AND status = 'pending'", + }, + expectedOutput: { + shouldRecommendRules: ["1.1"], // query-missing-indexes + mustContain: ["index", "customer_id"], + }, + expectedReasoning: [ + "Identify that the query filters on customer_id and status", + "Recognize that without an index, this causes a sequential scan", + "Recommend creating an index on the filtered columns", + ], +}; + +describe("Missing Index Detection", () => { + it("should recommend creating an index on filtered columns", async () => { + const result = await runEval(scenario); + + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); + + // Check that key criteria passed + expect(result.criteriaResults.some((c) => c.criterion.includes("index") && c.passed)).toBe( + true + ); + + // Response should mention creating an index + expect(result.response.toLowerCase()).toContain("index"); + expect(result.response.toLowerCase()).toContain("customer_id"); + }); +}); + +export { scenario }; diff --git a/skills/postgres-best-practices/evals/scenarios/n-plus-one.eval.ts b/skills/postgres-best-practices/evals/scenarios/n-plus-one.eval.ts new file mode 100644 index 0000000..6268e86 --- /dev/null +++ b/skills/postgres-best-practices/evals/scenarios/n-plus-one.eval.ts @@ -0,0 +1,71 @@ +import { describe, it, expect } from "vitest"; +import { runEval } from "../runner.js"; +import type { EvalScenario } from "../types.js"; + +const scenario: EvalScenario = { + id: "n-plus-one-detection", + name: "N+1 Query Detection", + description: + "Agent should identify N+1 query pattern in application code and recommend using JOINs or batch queries", + category: "query-performance", + difficulty: "intermediate", + input: { + schema: ` +CREATE TABLE users ( + id SERIAL PRIMARY KEY, + name VARCHAR(100), + email VARCHAR(255) +); + +CREATE TABLE posts ( + id SERIAL PRIMARY KEY, + user_id INT REFERENCES users(id), + title VARCHAR(200), + content TEXT, + created_at TIMESTAMPTZ DEFAULT NOW() +); +`, + userQuery: `My API endpoint is slow. Here's the code: + +\`\`\`typescript +// Get all posts +const posts = await db.query('SELECT * FROM posts LIMIT 100'); + +// For each post, get the author +for (const post of posts) { + const author = await db.query('SELECT * FROM users WHERE id = $1', [post.user_id]); + post.author = author; +} +\`\`\` + +This makes 101 database queries. How can I optimize it?`, + }, + expectedOutput: { + shouldRecommendRules: ["6.1"], // data-n-plus-one + mustContain: ["join", "n+1"], + }, + expectedReasoning: [ + "Identify the N+1 query pattern (1 query for posts + N queries for users)", + "Recognize this as a common performance anti-pattern", + "Recommend using a JOIN to fetch all data in a single query", + "Optionally suggest using IN clause for batch fetching", + ], +}; + +describe("N+1 Query Detection", () => { + it("should identify N+1 pattern and recommend JOIN", async () => { + const result = await runEval(scenario); + + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); + + // Response should mention JOIN + expect(result.response.toLowerCase()).toContain("join"); + + // Response should explain the N+1 problem + const responseLower = result.response.toLowerCase(); + expect(responseLower.includes("n+1") || responseLower.includes("n + 1")).toBe(true); + }); +}); + +export { scenario }; diff --git a/skills/postgres-best-practices/evals/scenarios/version-constraint.eval.ts b/skills/postgres-best-practices/evals/scenarios/version-constraint.eval.ts new file mode 100644 index 0000000..86adbd3 --- /dev/null +++ b/skills/postgres-best-practices/evals/scenarios/version-constraint.eval.ts @@ -0,0 +1,108 @@ +import { describe, it, expect } from "vitest"; +import { runEval } from "../runner.js"; +import type { EvalScenario } from "../types.js"; + +/** + * Scenario 1: PG10 - Should NOT recommend covering indexes (requires PG11+) + */ +const scenarioPg10NoCoveringIndex: EvalScenario = { + id: "version-constraint-pg10-no-covering", + name: "Version Constraint - PG10 No Covering Index", + description: + "Agent should NOT recommend INCLUDE clause on PostgreSQL 10 since it requires PG11+", + category: "version-constraints", + difficulty: "intermediate", + input: { + schema: ` +CREATE TABLE users ( + id SERIAL PRIMARY KEY, + email VARCHAR(255) NOT NULL, + name VARCHAR(100), + created_at TIMESTAMPTZ DEFAULT NOW() +); + +CREATE INDEX users_email_idx ON users (email); +`, + userQuery: + "How can I optimize this query to avoid heap fetches? SELECT email, name FROM users WHERE email = 'test@example.com'", + postgresVersion: "10.0", + }, + expectedOutput: { + shouldRecommendRules: [], + shouldNotRecommendRules: ["1.2"], // query-covering-indexes requires PG11 + mustContain: ["index"], + mustNotContain: ["include"], + }, + expectedReasoning: [ + "Recognize that PostgreSQL 10 is specified", + "Check that covering indexes (INCLUDE clause) require PG11+", + "Avoid recommending INCLUDE clause", + "Suggest alternative optimization strategies appropriate for PG10", + ], +}; + +/** + * Scenario 2: PG9.3 - Should NOT recommend UPSERT (requires PG9.5+) + */ +const scenarioPg93NoUpsert: EvalScenario = { + id: "version-constraint-pg93-no-upsert", + name: "Version Constraint - PG9.3 No UPSERT", + description: + "Agent should NOT recommend ON CONFLICT on PostgreSQL 9.3 since it requires PG9.5+", + category: "version-constraints", + difficulty: "intermediate", + input: { + schema: ` +CREATE TABLE settings ( + user_id INT NOT NULL, + key VARCHAR(50) NOT NULL, + value TEXT, + PRIMARY KEY (user_id, key) +); +`, + userQuery: + "I need to insert a setting if it doesn't exist, or update it if it does. How should I do this?", + postgresVersion: "9.3", + }, + expectedOutput: { + shouldRecommendRules: [], + shouldNotRecommendRules: ["6.3"], // data-upsert requires PG9.5 + mustContain: ["insert", "update"], + mustNotContain: ["on conflict"], + }, + expectedReasoning: [ + "Recognize that PostgreSQL 9.3 is specified", + "Check that ON CONFLICT (UPSERT) requires PG9.5+", + "Avoid recommending ON CONFLICT syntax", + "Suggest alternative pattern (e.g., CTE with INSERT/UPDATE, or try/catch approach)", + ], +}; + +describe("Version Constraint Tests", () => { + describe("PG10 - No Covering Index", () => { + it("should NOT recommend INCLUDE clause for PG10", async () => { + const result = await runEval(scenarioPg10NoCoveringIndex); + + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); + + // Response should NOT contain INCLUDE recommendation + expect(result.response.toLowerCase()).not.toContain("include ("); + expect(result.response.toLowerCase()).not.toContain("include("); + }); + }); + + describe("PG9.3 - No UPSERT", () => { + it("should NOT recommend ON CONFLICT for PG9.3", async () => { + const result = await runEval(scenarioPg93NoUpsert); + + console.log("Response:", result.response); + console.log("Criteria results:", result.criteriaResults); + + // Response should NOT recommend ON CONFLICT + expect(result.response.toLowerCase()).not.toContain("on conflict"); + }); + }); +}); + +export { scenarioPg10NoCoveringIndex, scenarioPg93NoUpsert }; diff --git a/skills/postgres-best-practices/evals/tsconfig.json b/skills/postgres-best-practices/evals/tsconfig.json new file mode 100644 index 0000000..2451b91 --- /dev/null +++ b/skills/postgres-best-practices/evals/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true, + "outDir": "dist", + "declaration": true + }, + "include": ["*.ts", "scenarios/**/*.ts"] +} diff --git a/skills/postgres-best-practices/evals/types.ts b/skills/postgres-best-practices/evals/types.ts new file mode 100644 index 0000000..ed013c6 --- /dev/null +++ b/skills/postgres-best-practices/evals/types.ts @@ -0,0 +1,112 @@ +/** + * Evaluation scenario definition + */ +export interface EvalScenario { + /** Unique identifier for the scenario */ + id: string; + + /** Human-readable name */ + name: string; + + /** Description of what this scenario tests */ + description: string; + + /** Category of the scenario */ + category: "query-performance" | "version-constraints" | "extension-requirements"; + + /** Difficulty level */ + difficulty: "basic" | "intermediate" | "advanced"; + + /** Input for the scenario */ + input: { + /** SQL schema context */ + schema: string; + + /** User's question or request */ + userQuery: string; + + /** Optional PostgreSQL version (e.g., "10.0", "15.4") */ + postgresVersion?: string; + + /** Optional list of available extensions */ + availableExtensions?: string[]; + + /** Additional context */ + context?: string; + }; + + /** Expected output criteria */ + expectedOutput: { + /** Rule IDs that should be recommended */ + shouldRecommendRules: string[]; + + /** Rule IDs that should NOT be recommended (version/extension constraints) */ + shouldNotRecommendRules?: string[]; + + /** Strings that must appear in the response */ + mustContain: string[]; + + /** Strings that must NOT appear in the response */ + mustNotContain?: string[]; + }; + + /** Expected reasoning steps the agent should follow */ + expectedReasoning: string[]; +} + +/** + * Result of evaluating a single criterion + */ +export interface CriterionResult { + /** Description of the criterion */ + criterion: string; + + /** Whether the criterion passed */ + passed: boolean; + + /** Evidence or explanation */ + evidence?: string; +} + +/** + * Result of running an evaluation scenario + */ +export interface EvalResult { + /** Scenario ID */ + scenarioId: string; + + /** Whether all criteria passed */ + passed: boolean; + + /** Rule IDs that were referenced in the response */ + rulesReferenced: string[]; + + /** Results for each evaluation criterion */ + criteriaResults: CriterionResult[]; + + /** The agent's full response */ + response: string; + + /** Time taken in milliseconds */ + latencyMs: number; + + /** Error message if evaluation failed */ + error?: string; +} + +/** + * Configuration for the eval runner + */ +export interface EvalConfig { + /** Path to AGENTS.md file */ + agentsPath: string; + + /** Model to use for evaluation */ + model?: string; + + /** Maximum tokens for response */ + maxTokens?: number; + + /** Temperature for generation */ + temperature?: number; +} diff --git a/skills/postgres-best-practices/evals/utils.ts b/skills/postgres-best-practices/evals/utils.ts new file mode 100644 index 0000000..aa2c46e --- /dev/null +++ b/skills/postgres-best-practices/evals/utils.ts @@ -0,0 +1,72 @@ +import type { EvalResult, EvalScenario } from "./types.js"; + +/** + * Format eval results as a summary table + */ +export function formatResultsSummary(results: EvalResult[]): string { + const lines: string[] = []; + + lines.push("## Eval Results Summary\n"); + + const passed = results.filter((r) => r.passed).length; + const total = results.length; + const passRate = ((passed / total) * 100).toFixed(1); + + lines.push(`**Pass Rate:** ${passed}/${total} (${passRate}%)\n`); + + lines.push("| Scenario | Status | Latency | Rules Referenced |"); + lines.push("|----------|--------|---------|------------------|"); + + for (const result of results) { + const status = result.passed ? "PASS" : "FAIL"; + const latency = `${result.latencyMs}ms`; + const rules = result.rulesReferenced.join(", ") || "none"; + lines.push(`| ${result.scenarioId} | ${status} | ${latency} | ${rules} |`); + } + + return lines.join("\n"); +} + +/** + * Format detailed results for a single scenario + */ +export function formatDetailedResult(result: EvalResult): string { + const lines: string[] = []; + + lines.push(`## ${result.scenarioId}\n`); + lines.push(`**Status:** ${result.passed ? "PASS" : "FAIL"}`); + lines.push(`**Latency:** ${result.latencyMs}ms`); + lines.push(`**Rules Referenced:** ${result.rulesReferenced.join(", ") || "none"}\n`); + + if (result.error) { + lines.push(`**Error:** ${result.error}\n`); + } + + lines.push("### Criteria Results\n"); + for (const criterion of result.criteriaResults) { + const icon = criterion.passed ? "+" : "-"; + lines.push(`${icon} ${criterion.criterion}`); + if (criterion.evidence) { + lines.push(` Evidence: ${criterion.evidence}`); + } + } + + lines.push("\n### Response\n"); + lines.push("```"); + lines.push(result.response); + lines.push("```"); + + return lines.join("\n"); +} + +/** + * Create a scenario builder for cleaner test definitions + */ +export function createScenario( + partial: Omit & { id?: string } +): EvalScenario { + return { + id: partial.id || partial.name.toLowerCase().replace(/\s+/g, "-"), + ...partial, + } as EvalScenario; +} diff --git a/skills/postgres-best-practices/evals/vitest.config.ts b/skills/postgres-best-practices/evals/vitest.config.ts new file mode 100644 index 0000000..c56d462 --- /dev/null +++ b/skills/postgres-best-practices/evals/vitest.config.ts @@ -0,0 +1,9 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + include: ["scenarios/**/*.eval.ts"], + testTimeout: 60000, // 60 seconds for LLM calls + reporters: ["verbose"], + }, +}); diff --git a/skills/postgres-best-practices/profiles/aurora.json b/skills/postgres-best-practices/profiles/aurora.json new file mode 100644 index 0000000..6b79e75 --- /dev/null +++ b/skills/postgres-best-practices/profiles/aurora.json @@ -0,0 +1,23 @@ +{ + "name": "aurora", + "minVersion": "13", + "maxVersion": "16", + "extensions": { + "available": [ + "pg_stat_statements", + "pgcrypto", + "uuid-ossp" + ], + "installable": [ + "postgis", + "pg_hint_plan", + "pg_similarity" + ], + "unavailable": [ + "pg_cron", + "pg_partman", + "timescaledb" + ] + }, + "notes": "AWS Aurora PostgreSQL. Some extensions are not available due to managed service restrictions. Aurora has its own connection pooling (RDS Proxy) and automatic failover." +} diff --git a/skills/postgres-best-practices/profiles/self-hosted.json b/skills/postgres-best-practices/profiles/self-hosted.json new file mode 100644 index 0000000..3d02199 --- /dev/null +++ b/skills/postgres-best-practices/profiles/self-hosted.json @@ -0,0 +1,18 @@ +{ + "name": "self-hosted", + "minVersion": "12", + "extensions": { + "available": [], + "installable": [ + "pg_stat_statements", + "pgcrypto", + "uuid-ossp", + "postgis", + "pg_trgm", + "btree_gin", + "btree_gist" + ], + "unavailable": [] + }, + "notes": "Generic self-hosted PostgreSQL. Extension availability depends on server configuration. Check pg_available_extensions for what can be installed." +} diff --git a/skills/postgres-best-practices/profiles/supabase.json b/skills/postgres-best-practices/profiles/supabase.json new file mode 100644 index 0000000..be0a2f6 --- /dev/null +++ b/skills/postgres-best-practices/profiles/supabase.json @@ -0,0 +1,27 @@ +{ + "name": "supabase", + "minVersion": "15", + "extensions": { + "available": [ + "pg_stat_statements", + "pgcrypto", + "uuid-ossp", + "pgjwt", + "pg_graphql", + "pg_net", + "pgsodium", + "supabase_vault", + "pg_jsonschema" + ], + "installable": [ + "postgis", + "pg_cron", + "pgtap", + "plv8", + "http", + "pg_hashids" + ], + "unavailable": [] + }, + "notes": "Supabase manages connection pooling via Supavisor. Direct connection limits differ from pooled connections. All standard Postgres extensions are available." +} diff --git a/skills/postgres-best-practices/rules/advanced-jsonb-indexing.md b/skills/postgres-best-practices/rules/advanced-jsonb-indexing.md index e3d261e..2e0c147 100644 --- a/skills/postgres-best-practices/rules/advanced-jsonb-indexing.md +++ b/skills/postgres-best-practices/rules/advanced-jsonb-indexing.md @@ -3,6 +3,7 @@ title: Index JSONB Columns for Efficient Querying impact: MEDIUM impactDescription: 10-100x faster JSONB queries with proper indexing tags: jsonb, gin, indexes, json +minVersion: "9.4" --- ## Index JSONB Columns for Efficient Querying diff --git a/skills/postgres-best-practices/rules/data-upsert.md b/skills/postgres-best-practices/rules/data-upsert.md index bc95e23..8817b9e 100644 --- a/skills/postgres-best-practices/rules/data-upsert.md +++ b/skills/postgres-best-practices/rules/data-upsert.md @@ -3,6 +3,7 @@ title: Use UPSERT for Insert-or-Update Operations impact: MEDIUM impactDescription: Atomic operation, eliminates race conditions tags: upsert, on-conflict, insert, update +minVersion: "9.5" --- ## Use UPSERT for Insert-or-Update Operations diff --git a/skills/postgres-best-practices/rules/lock-skip-locked.md b/skills/postgres-best-practices/rules/lock-skip-locked.md index 77bdbb9..5091da4 100644 --- a/skills/postgres-best-practices/rules/lock-skip-locked.md +++ b/skills/postgres-best-practices/rules/lock-skip-locked.md @@ -3,6 +3,7 @@ title: Use SKIP LOCKED for Non-Blocking Queue Processing impact: MEDIUM-HIGH impactDescription: 10x throughput for worker queues tags: skip-locked, queue, workers, concurrency +minVersion: "9.5" --- ## Use SKIP LOCKED for Non-Blocking Queue Processing diff --git a/skills/postgres-best-practices/rules/monitor-pg-stat-statements.md b/skills/postgres-best-practices/rules/monitor-pg-stat-statements.md index d7e82f1..4917d57 100644 --- a/skills/postgres-best-practices/rules/monitor-pg-stat-statements.md +++ b/skills/postgres-best-practices/rules/monitor-pg-stat-statements.md @@ -3,6 +3,7 @@ title: Enable pg_stat_statements for Query Analysis impact: LOW-MEDIUM impactDescription: Identify top resource-consuming queries tags: pg-stat-statements, monitoring, statistics, performance +extensions: pg_stat_statements --- ## Enable pg_stat_statements for Query Analysis diff --git a/skills/postgres-best-practices/rules/query-covering-indexes.md b/skills/postgres-best-practices/rules/query-covering-indexes.md index 9d2a494..5a1113e 100644 --- a/skills/postgres-best-practices/rules/query-covering-indexes.md +++ b/skills/postgres-best-practices/rules/query-covering-indexes.md @@ -3,6 +3,7 @@ title: Use Covering Indexes to Avoid Table Lookups impact: MEDIUM-HIGH impactDescription: 2-5x faster queries by eliminating heap fetches tags: indexes, covering-index, include, index-only-scan +minVersion: "11" --- ## Use Covering Indexes to Avoid Table Lookups diff --git a/skills/postgres-best-practices/rules/schema-partitioning.md b/skills/postgres-best-practices/rules/schema-partitioning.md index 13137a0..9cb9ef4 100644 --- a/skills/postgres-best-practices/rules/schema-partitioning.md +++ b/skills/postgres-best-practices/rules/schema-partitioning.md @@ -3,6 +3,7 @@ title: Partition Large Tables for Better Performance impact: MEDIUM-HIGH impactDescription: 5-20x faster queries and maintenance on large tables tags: partitioning, large-tables, time-series, performance +minVersion: "10" --- ## Partition Large Tables for Better Performance