All guidessql schema.sqlsql ingest.tsts embed-and-store.tsts search.sqlsql rag.tsts
GuideIntermediatepgvectorragaipostgres
Build Your First RAG App with Postgres + pgvector
End-to-end tutorial: enable pgvector, design the schema, ingest documents, embed them, run hybrid search, and integrate with your LLM. All on Postgres, all in one afternoon.
90 min to complete 14 min read
Ninety minutes from blank Supabase project to working RAG with hybrid search. We'll use OpenAI's embedding model by default; substitute any other provider with the same shape.
Step 1: Enable pgvector
In Supabase Studio, go to Database → Extensions, find pgvector, and enable it. Or run:
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm; -- for lexical hybrid searchStep 2: Design the schema
CREATE TABLE documents (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
title text NOT NULL,
source_uri text NOT NULL,
tenant_id uuid NOT NULL,
created_at timestamptz NOT NULL DEFAULT now()
);
CREATE TABLE chunks (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
document_id uuid NOT NULL REFERENCES documents(id) ON DELETE CASCADE,
tenant_id uuid NOT NULL, -- denormalised for RLS + filter speed
ordinal int NOT NULL,
content text NOT NULL,
embedding vector(1536) NOT NULL,
model text NOT NULL, -- which embedding model produced this
token_count int NOT NULL,
created_at timestamptz NOT NULL DEFAULT now()
);
CREATE INDEX chunks_tenant_idx ON chunks (tenant_id);
CREATE INDEX chunks_doc_idx ON chunks (document_id);
-- HNSW index for vector similarity
CREATE INDEX chunks_embedding_hnsw
ON chunks USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
-- Trigram GIN for lexical hybrid
CREATE INDEX chunks_content_trgm
ON chunks USING gin (content gin_trgm_ops);Step 3: Ingest + chunk a document
Sentence-aware chunking with header enrichment is the pattern that works in 2026. Quick TypeScript version:
import { readFile } from "node:fs/promises";
interface Chunk {
ordinal: number;
content: string;
tokenCount: number;
}
function chunkMarkdown(md: string, title: string): Chunk[] {
// Naive but effective: split on headings, then on paragraphs.
const sections = md.split(/^##\s+/m);
const out: Chunk[] = [];
let ordinal = 0;
for (const sec of sections) {
const lines = sec.split("\n").filter(Boolean);
const heading = lines.shift() ?? "";
const body = lines.join(" ").trim();
if (!body) continue;
// Approximate tokens by chars / 4
out.push({
ordinal: ordinal++,
content: `${title}\n${heading}\n\n${body}`,
tokenCount: Math.ceil(body.length / 4),
});
}
return out;
}Step 4: Embed and store
import OpenAI from "openai";
import { db } from "@/db/client";
import { chunks, documents } from "@/db/schema";
const openai = new OpenAI();
async function embedAndStore(
tenantId: string,
title: string,
sourceUri: string,
chunked: Chunk[],
) {
const [doc] = await db
.insert(documents)
.values({ tenantId, title, sourceUri })
.returning();
// Batch embed (5-10x faster than per-row)
const inputs = chunked.map((c) => c.content);
const resp = await openai.embeddings.create({
model: "text-embedding-3-small",
input: inputs,
});
await db.insert(chunks).values(
chunked.map((c, i) => ({
documentId: doc.id,
tenantId,
ordinal: c.ordinal,
content: c.content,
embedding: resp.data[i].embedding,
model: "text-embedding-3-small",
tokenCount: c.tokenCount,
})),
);
}Step 5: Query: hybrid search
Pure vector search misses queries that include proper nouns. Hybrid search (vector + lexical, fused by reciprocal rank) is what you want.
-- $1 = query embedding, $2 = tenant_id, $3 = model name, $4 = query text
WITH vector_hits AS (
SELECT id, row_number() OVER () AS rk
FROM chunks
WHERE tenant_id = $2 AND model = $3
ORDER BY embedding <=> $1
LIMIT 20
),
lex_hits AS (
SELECT id, row_number() OVER () AS rk
FROM chunks
WHERE tenant_id = $2 AND content % $4
ORDER BY similarity(content, $4) DESC
LIMIT 20
),
fused AS (
SELECT id, sum(1.0 / (60 + rk)) AS score
FROM (
SELECT id, rk FROM vector_hits
UNION ALL
SELECT id, rk FROM lex_hits
) all_hits
GROUP BY id
)
SELECT c.id, c.content, f.score
FROM fused f
JOIN chunks c ON c.id = f.id
ORDER BY f.score DESC
LIMIT 5;Step 6: Plug into your LLM
import OpenAI from "openai";
const openai = new OpenAI();
export async function answer(question: string, tenantId: string) {
const [embedding] = (await openai.embeddings.create({
model: "text-embedding-3-small",
input: [question],
})).data;
const hits = await db.execute<{ content: string }>(/* the SQL above */ );
const context = hits.map((h) => h.content).join("\n---\n");
const completion = await openai.chat.completions.create({
model: "gpt-4o-mini",
messages: [
{ role: "system", content: "Answer using only the context. Cite chunk numbers." },
{ role: "user", content: `Context:\n${context}\n\nQuestion: ${question}` },
],
});
return completion.choices[0].message.content;
}