From bedaf7fdd387aa379f7bce7f178a6df9502a0be4 Mon Sep 17 00:00:00 2001 From: Pedro Rodrigues Date: Mon, 9 Feb 2026 17:31:10 +0000 Subject: [PATCH] fix: correct P0/P1 fact-check issues in vectors references - hybrid search: improve examples with proper table schema, HNSW index, parameter docs, and operator guidance - perf tuning: fix compute sizing (Nano 0.5GB, add Micro tier, correct Large to ~225K) - rag patterns: add missing match_document_chunks SQL function definition Co-Authored-By: Claude Opus 4.6 --- .../references/vectors/perf-tuning.md | 11 +++- .../references/vectors/rag-patterns.md | 25 ++++++++ .../references/vectors/search-hybrid.md | 59 ++++++++++++------- 3 files changed, 72 insertions(+), 23 deletions(-) diff --git a/skills/supabase/references/vectors/perf-tuning.md b/skills/supabase/references/vectors/perf-tuning.md index 987bda1..94195d4 100644 --- a/skills/supabase/references/vectors/perf-tuning.md +++ b/skills/supabase/references/vectors/perf-tuning.md @@ -16,7 +16,7 @@ Vector indexes must fit in RAM for optimal performance. **Incorrect:** ```sql --- Free tier (1GB RAM) with 100K 1536-dim vectors +-- Free tier (0.5GB RAM) with 100K 1536-dim vectors -- Symptoms: high disk reads, slow queries select count(*) from documents; -- Returns 100000 ``` @@ -50,12 +50,17 @@ create index concurrently on documents using hnsw (embedding vector_cosine_ops); ## Compute Sizing +Approximate capacity for 1536-dimension vectors with HNSW index: + | Plan | RAM | Vectors (1536d) | |------|-----|-----------------| -| Free | 1GB | ~20K | +| Nano (Free) | 0.5GB | Limited — index may swap | +| Micro | 1GB | ~15K | | Small | 2GB | ~50K | | Medium | 4GB | ~100K | -| Large | 8GB | ~250K | +| Large | 8GB | ~225K | + +See the [compute sizing guide](https://supabase.com/docs/guides/ai/choosing-compute-addon) for detailed benchmarks. ## Index Pre-Warming diff --git a/skills/supabase/references/vectors/rag-patterns.md b/skills/supabase/references/vectors/rag-patterns.md index bec9245..9f60178 100644 --- a/skills/supabase/references/vectors/rag-patterns.md +++ b/skills/supabase/references/vectors/rag-patterns.md @@ -71,6 +71,31 @@ create table document_chunks ( ); create index on document_chunks using hnsw (embedding vector_cosine_ops); + +-- Search function for RAG retrieval +create or replace function match_document_chunks( + query_embedding extensions.vector(1536), + match_count int default 5 +) +returns table ( + id bigint, + document_id bigint, + chunk_index int, + content text, + similarity float +) +language sql stable +as $$ + select + dc.id, + dc.document_id, + dc.chunk_index, + dc.content, + 1 - (dc.embedding <=> query_embedding) as similarity + from document_chunks dc + order by dc.embedding <=> query_embedding + limit match_count; +$$; ``` ## RAG Query Pipeline diff --git a/skills/supabase/references/vectors/search-hybrid.md b/skills/supabase/references/vectors/search-hybrid.md index 0425fe8..be55afe 100644 --- a/skills/supabase/references/vectors/search-hybrid.md +++ b/skills/supabase/references/vectors/search-hybrid.md @@ -17,21 +17,26 @@ Full-text search without an index is extremely slow. ```sql -- No index on tsvector column -create table docs ( - fts tsvector generated always as (to_tsvector('english', content)) stored +create table documents ( + content text, + fts tsvector generated always as (to_tsvector('english', content)) stored, + embedding extensions.vector(512) ); -select * from docs where fts @@ to_tsquery('search'); -- Slow seq scan +select * from documents where fts @@ to_tsquery('search'); -- Slow seq scan ``` **Correct:** ```sql -- Add GIN index for full-text search -create table docs ( - fts tsvector generated always as (to_tsvector('english', content)) stored +create table documents ( + content text, + fts tsvector generated always as (to_tsvector('english', content)) stored, + embedding extensions.vector(512) ); -create index on docs using gin(fts); -select * from docs where fts @@ to_tsquery('search'); -- Fast index scan +create index on documents using gin(fts); +create index on documents using hnsw (embedding vector_ip_ops); +select * from documents where fts @@ to_tsquery('search'); -- Fast index scan ``` ## 2. Not Over-Fetching Before Fusion @@ -50,11 +55,11 @@ select * from semantic union full_text limit 5; **Correct:** ```sql --- Over-fetch 2x with least() cap, then fuse and limit -with semantic as (select id from docs order by embedding <#> query limit least(5, 30) * 2), - full_text as (select id from docs where fts @@ query limit least(5, 30) * 2) +-- Over-fetch 2x from each, then fuse and limit +with semantic as (select id from docs order by embedding <#> query limit 10), + full_text as (select id from docs where fts @@ query limit 10) -- Apply RRF scoring... -limit least(5, 30); +limit 5; ``` ## Complete Hybrid Search Function @@ -68,29 +73,43 @@ create or replace function hybrid_search( semantic_weight float = 1, rrf_k int = 50 ) -returns setof documents language sql as $$ +returns setof documents +language sql +as $$ with full_text as ( - select id, row_number() over (order by ts_rank_cd(fts, websearch_to_tsquery(query_text)) desc) as rank_ix - from documents where fts @@ websearch_to_tsquery(query_text) + select + id, + row_number() over(order by ts_rank_cd(fts, websearch_to_tsquery(query_text)) desc) as rank_ix + from documents + where fts @@ websearch_to_tsquery(query_text) + order by rank_ix limit least(match_count, 30) * 2 ), semantic as ( - select id, row_number() over (order by embedding <#> query_embedding) as rank_ix + select + id, + row_number() over (order by embedding <#> query_embedding) as rank_ix from documents - order by embedding <#> query_embedding + order by rank_ix limit least(match_count, 30) * 2 ) -select documents.* from full_text +select + documents.* +from full_text full outer join semantic on full_text.id = semantic.id join documents on coalesce(full_text.id, semantic.id) = documents.id order by - coalesce(1.0/(rrf_k + full_text.rank_ix), 0.0) * full_text_weight + - coalesce(1.0/(rrf_k + semantic.rank_ix), 0.0) * semantic_weight + coalesce(1.0 / (rrf_k + full_text.rank_ix), 0.0) * full_text_weight + + coalesce(1.0 / (rrf_k + semantic.rank_ix), 0.0) * semantic_weight desc -limit least(match_count, 30); +limit least(match_count, 30) $$; ``` +Parameters: `full_text_weight` and `semantic_weight` control how much each method contributes to the final rank (both default to 1). `rrf_k` is the RRF smoothing constant (default 50). The `least(match_count, 30)` caps results to prevent excessive over-fetching. + +Use `<#>` (negative inner product) with `vector_ip_ops` index, or `<=>` (cosine distance) with `vector_cosine_ops` — ensure the operator matches your index. + ## Related - [search-semantic.md](search-semantic.md) - Vector-only search