mirror of
https://github.com/supabase/agent-skills.git
synced 2026-03-27 10:09:26 +08:00
fix: correct P0/P1 fact-check issues in vectors references
- hybrid search: improve examples with proper table schema, HNSW index, parameter docs, and operator guidance - perf tuning: fix compute sizing (Nano 0.5GB, add Micro tier, correct Large to ~225K) - rag patterns: add missing match_document_chunks SQL function definition Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -16,7 +16,7 @@ Vector indexes must fit in RAM for optimal performance.
|
|||||||
**Incorrect:**
|
**Incorrect:**
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
-- Free tier (1GB RAM) with 100K 1536-dim vectors
|
-- Free tier (0.5GB RAM) with 100K 1536-dim vectors
|
||||||
-- Symptoms: high disk reads, slow queries
|
-- Symptoms: high disk reads, slow queries
|
||||||
select count(*) from documents; -- Returns 100000
|
select count(*) from documents; -- Returns 100000
|
||||||
```
|
```
|
||||||
@@ -50,12 +50,17 @@ create index concurrently on documents using hnsw (embedding vector_cosine_ops);
|
|||||||
|
|
||||||
## Compute Sizing
|
## Compute Sizing
|
||||||
|
|
||||||
|
Approximate capacity for 1536-dimension vectors with HNSW index:
|
||||||
|
|
||||||
| Plan | RAM | Vectors (1536d) |
|
| Plan | RAM | Vectors (1536d) |
|
||||||
|------|-----|-----------------|
|
|------|-----|-----------------|
|
||||||
| Free | 1GB | ~20K |
|
| Nano (Free) | 0.5GB | Limited — index may swap |
|
||||||
|
| Micro | 1GB | ~15K |
|
||||||
| Small | 2GB | ~50K |
|
| Small | 2GB | ~50K |
|
||||||
| Medium | 4GB | ~100K |
|
| Medium | 4GB | ~100K |
|
||||||
| Large | 8GB | ~250K |
|
| Large | 8GB | ~225K |
|
||||||
|
|
||||||
|
See the [compute sizing guide](https://supabase.com/docs/guides/ai/choosing-compute-addon) for detailed benchmarks.
|
||||||
|
|
||||||
## Index Pre-Warming
|
## Index Pre-Warming
|
||||||
|
|
||||||
|
|||||||
@@ -71,6 +71,31 @@ create table document_chunks (
|
|||||||
);
|
);
|
||||||
|
|
||||||
create index on document_chunks using hnsw (embedding vector_cosine_ops);
|
create index on document_chunks using hnsw (embedding vector_cosine_ops);
|
||||||
|
|
||||||
|
-- Search function for RAG retrieval
|
||||||
|
create or replace function match_document_chunks(
|
||||||
|
query_embedding extensions.vector(1536),
|
||||||
|
match_count int default 5
|
||||||
|
)
|
||||||
|
returns table (
|
||||||
|
id bigint,
|
||||||
|
document_id bigint,
|
||||||
|
chunk_index int,
|
||||||
|
content text,
|
||||||
|
similarity float
|
||||||
|
)
|
||||||
|
language sql stable
|
||||||
|
as $$
|
||||||
|
select
|
||||||
|
dc.id,
|
||||||
|
dc.document_id,
|
||||||
|
dc.chunk_index,
|
||||||
|
dc.content,
|
||||||
|
1 - (dc.embedding <=> query_embedding) as similarity
|
||||||
|
from document_chunks dc
|
||||||
|
order by dc.embedding <=> query_embedding
|
||||||
|
limit match_count;
|
||||||
|
$$;
|
||||||
```
|
```
|
||||||
|
|
||||||
## RAG Query Pipeline
|
## RAG Query Pipeline
|
||||||
|
|||||||
@@ -17,21 +17,26 @@ Full-text search without an index is extremely slow.
|
|||||||
|
|
||||||
```sql
|
```sql
|
||||||
-- No index on tsvector column
|
-- No index on tsvector column
|
||||||
create table docs (
|
create table documents (
|
||||||
fts tsvector generated always as (to_tsvector('english', content)) stored
|
content text,
|
||||||
|
fts tsvector generated always as (to_tsvector('english', content)) stored,
|
||||||
|
embedding extensions.vector(512)
|
||||||
);
|
);
|
||||||
select * from docs where fts @@ to_tsquery('search'); -- Slow seq scan
|
select * from documents where fts @@ to_tsquery('search'); -- Slow seq scan
|
||||||
```
|
```
|
||||||
|
|
||||||
**Correct:**
|
**Correct:**
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
-- Add GIN index for full-text search
|
-- Add GIN index for full-text search
|
||||||
create table docs (
|
create table documents (
|
||||||
fts tsvector generated always as (to_tsvector('english', content)) stored
|
content text,
|
||||||
|
fts tsvector generated always as (to_tsvector('english', content)) stored,
|
||||||
|
embedding extensions.vector(512)
|
||||||
);
|
);
|
||||||
create index on docs using gin(fts);
|
create index on documents using gin(fts);
|
||||||
select * from docs where fts @@ to_tsquery('search'); -- Fast index scan
|
create index on documents using hnsw (embedding vector_ip_ops);
|
||||||
|
select * from documents where fts @@ to_tsquery('search'); -- Fast index scan
|
||||||
```
|
```
|
||||||
|
|
||||||
## 2. Not Over-Fetching Before Fusion
|
## 2. Not Over-Fetching Before Fusion
|
||||||
@@ -50,11 +55,11 @@ select * from semantic union full_text limit 5;
|
|||||||
**Correct:**
|
**Correct:**
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
-- Over-fetch 2x with least() cap, then fuse and limit
|
-- Over-fetch 2x from each, then fuse and limit
|
||||||
with semantic as (select id from docs order by embedding <#> query limit least(5, 30) * 2),
|
with semantic as (select id from docs order by embedding <#> query limit 10),
|
||||||
full_text as (select id from docs where fts @@ query limit least(5, 30) * 2)
|
full_text as (select id from docs where fts @@ query limit 10)
|
||||||
-- Apply RRF scoring...
|
-- Apply RRF scoring...
|
||||||
limit least(5, 30);
|
limit 5;
|
||||||
```
|
```
|
||||||
|
|
||||||
## Complete Hybrid Search Function
|
## Complete Hybrid Search Function
|
||||||
@@ -68,29 +73,43 @@ create or replace function hybrid_search(
|
|||||||
semantic_weight float = 1,
|
semantic_weight float = 1,
|
||||||
rrf_k int = 50
|
rrf_k int = 50
|
||||||
)
|
)
|
||||||
returns setof documents language sql as $$
|
returns setof documents
|
||||||
|
language sql
|
||||||
|
as $$
|
||||||
with full_text as (
|
with full_text as (
|
||||||
select id, row_number() over (order by ts_rank_cd(fts, websearch_to_tsquery(query_text)) desc) as rank_ix
|
select
|
||||||
from documents where fts @@ websearch_to_tsquery(query_text)
|
id,
|
||||||
|
row_number() over(order by ts_rank_cd(fts, websearch_to_tsquery(query_text)) desc) as rank_ix
|
||||||
|
from documents
|
||||||
|
where fts @@ websearch_to_tsquery(query_text)
|
||||||
|
order by rank_ix
|
||||||
limit least(match_count, 30) * 2
|
limit least(match_count, 30) * 2
|
||||||
),
|
),
|
||||||
semantic as (
|
semantic as (
|
||||||
select id, row_number() over (order by embedding <#> query_embedding) as rank_ix
|
select
|
||||||
|
id,
|
||||||
|
row_number() over (order by embedding <#> query_embedding) as rank_ix
|
||||||
from documents
|
from documents
|
||||||
order by embedding <#> query_embedding
|
order by rank_ix
|
||||||
limit least(match_count, 30) * 2
|
limit least(match_count, 30) * 2
|
||||||
)
|
)
|
||||||
select documents.* from full_text
|
select
|
||||||
|
documents.*
|
||||||
|
from full_text
|
||||||
full outer join semantic on full_text.id = semantic.id
|
full outer join semantic on full_text.id = semantic.id
|
||||||
join documents on coalesce(full_text.id, semantic.id) = documents.id
|
join documents on coalesce(full_text.id, semantic.id) = documents.id
|
||||||
order by
|
order by
|
||||||
coalesce(1.0/(rrf_k + full_text.rank_ix), 0.0) * full_text_weight +
|
coalesce(1.0 / (rrf_k + full_text.rank_ix), 0.0) * full_text_weight +
|
||||||
coalesce(1.0/(rrf_k + semantic.rank_ix), 0.0) * semantic_weight
|
coalesce(1.0 / (rrf_k + semantic.rank_ix), 0.0) * semantic_weight
|
||||||
desc
|
desc
|
||||||
limit least(match_count, 30);
|
limit least(match_count, 30)
|
||||||
$$;
|
$$;
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Parameters: `full_text_weight` and `semantic_weight` control how much each method contributes to the final rank (both default to 1). `rrf_k` is the RRF smoothing constant (default 50). The `least(match_count, 30)` caps results to prevent excessive over-fetching.
|
||||||
|
|
||||||
|
Use `<#>` (negative inner product) with `vector_ip_ops` index, or `<=>` (cosine distance) with `vector_cosine_ops` — ensure the operator matches your index.
|
||||||
|
|
||||||
## Related
|
## Related
|
||||||
|
|
||||||
- [search-semantic.md](search-semantic.md) - Vector-only search
|
- [search-semantic.md](search-semantic.md) - Vector-only search
|
||||||
|
|||||||
Reference in New Issue
Block a user