PostgreSQL Essentials & pgvector
PostgreSQL advanced features including procedures, views, pgvector for vector similarity search, and Docker setup.
Docker Setup
Docker Run
1# Run PostgreSQL
2docker run -d \
3 --name postgres \
4 -e POSTGRES_PASSWORD=password \
5 -e POSTGRES_USER=myuser \
6 -e POSTGRES_DB=mydb \
7 -v postgres-data:/var/lib/postgresql/data \
8 -p 5432:5432 \
9 postgres:16-alpine
10
11# Connect to PostgreSQL
12docker exec -it postgres psql -U myuser -d mydb
13
14# With pgvector
15docker run -d \
16 --name postgres-vector \
17 -e POSTGRES_PASSWORD=password \
18 -v postgres-data:/var/lib/postgresql/data \
19 -p 5432:5432 \
20 pgvector/pgvector:pg16
Docker Compose
1version: '3.8'
2
3services:
4 postgres:
5 image: postgres:16-alpine
6 container_name: postgres
7 environment:
8 POSTGRES_USER: myuser
9 POSTGRES_PASSWORD: password
10 POSTGRES_DB: mydb
11 volumes:
12 - postgres-data:/var/lib/postgresql/data
13 - ./init.sql:/docker-entrypoint-initdb.d/init.sql
14 ports:
15 - "5432:5432"
16 healthcheck:
17 test: ["CMD-SHELL", "pg_isready -U myuser"]
18 interval: 10s
19 timeout: 5s
20 retries: 5
21 restart: unless-stopped
22
23 # PostgreSQL with pgvector
24 postgres-vector:
25 image: pgvector/pgvector:pg16
26 container_name: postgres-vector
27 environment:
28 POSTGRES_USER: vectoruser
29 POSTGRES_PASSWORD: password
30 POSTGRES_DB: vectordb
31 volumes:
32 - postgres-vector-data:/var/lib/postgresql/data
33 ports:
34 - "5433:5432"
35 restart: unless-stopped
36
37 # pgAdmin (optional)
38 pgadmin:
39 image: dpage/pgadmin4
40 container_name: pgadmin
41 environment:
42 PGADMIN_DEFAULT_EMAIL: admin@example.com
43 PGADMIN_DEFAULT_PASSWORD: admin
44 ports:
45 - "5050:80"
46 depends_on:
47 - postgres
48 restart: unless-stopped
49
50volumes:
51 postgres-data:
52 postgres-vector-data:
1# Start services
2docker-compose up -d
3
4# View logs
5docker-compose logs -f postgres
6
7# Connect to database
8docker-compose exec postgres psql -U myuser -d mydb
9
10# Stop services
11docker-compose down
12
13# Stop and remove volumes
14docker-compose down -v
Basic Commands
1# Connect to database
2psql -h localhost -U myuser -d mydb
3
4# List databases
5\l
6
7# Connect to database
8\c mydb
9
10# List tables
11\dt
12
13# Describe table
14\d table_name
15
16# List views
17\dv
18
19# List functions
20\df
21
22# List schemas
23\dn
24
25# Execute SQL file
26\i /path/to/file.sql
27
28# Export query results to CSV
29\copy (SELECT * FROM users) TO '/path/to/users.csv' CSV HEADER
30
31# Quit
32\q
Views
1-- Create view
2CREATE VIEW user_orders AS
3SELECT
4 u.id,
5 u.username,
6 u.email,
7 COUNT(o.id) AS order_count,
8 SUM(o.total) AS total_spent
9FROM users u
10LEFT JOIN orders o ON u.id = o.user_id
11GROUP BY u.id, u.username, u.email;
12
13-- Query view
14SELECT * FROM user_orders WHERE order_count > 5;
15
16-- Materialized view (cached results)
17CREATE MATERIALIZED VIEW user_stats AS
18SELECT
19 u.id,
20 u.username,
21 COUNT(o.id) AS order_count,
22 AVG(o.total) AS avg_order_value
23FROM users u
24LEFT JOIN orders o ON u.id = o.user_id
25GROUP BY u.id, u.username;
26
27-- Refresh materialized view
28REFRESH MATERIALIZED VIEW user_stats;
29
30-- Refresh concurrently (doesn't lock)
31REFRESH MATERIALIZED VIEW CONCURRENTLY user_stats;
32
33-- Drop view
34DROP VIEW user_orders;
35DROP MATERIALIZED VIEW user_stats;
36
37-- Replace view
38CREATE OR REPLACE VIEW user_orders AS
39SELECT u.id, u.username, COUNT(o.id) AS order_count
40FROM users u
41LEFT JOIN orders o ON u.id = o.user_id
42GROUP BY u.id, u.username;
Stored Procedures & Functions
Functions
1-- Simple function
2CREATE OR REPLACE FUNCTION get_user_order_count(user_id_param INTEGER)
3RETURNS INTEGER AS $$
4DECLARE
5 order_count INTEGER;
6BEGIN
7 SELECT COUNT(*) INTO order_count
8 FROM orders
9 WHERE user_id = user_id_param;
10
11 RETURN order_count;
12END;
13$$ LANGUAGE plpgsql;
14
15-- Usage
16SELECT get_user_order_count(1);
17
18-- Function returning table
19CREATE OR REPLACE FUNCTION get_high_value_customers(min_spent DECIMAL)
20RETURNS TABLE(
21 user_id INTEGER,
22 username VARCHAR,
23 total_spent DECIMAL
24) AS $$
25BEGIN
26 RETURN QUERY
27 SELECT
28 u.id,
29 u.username,
30 SUM(o.total) AS total
31 FROM users u
32 INNER JOIN orders o ON u.id = o.user_id
33 GROUP BY u.id, u.username
34 HAVING SUM(o.total) >= min_spent;
35END;
36$$ LANGUAGE plpgsql;
37
38-- Usage
39SELECT * FROM get_high_value_customers(1000.00);
40
41-- Function with exception handling
42CREATE OR REPLACE FUNCTION safe_divide(a DECIMAL, b DECIMAL)
43RETURNS DECIMAL AS $$
44BEGIN
45 IF b = 0 THEN
46 RAISE EXCEPTION 'Division by zero';
47 END IF;
48 RETURN a / b;
49EXCEPTION
50 WHEN division_by_zero THEN
51 RETURN NULL;
52END;
53$$ LANGUAGE plpgsql;
Procedures
1-- Stored procedure
2CREATE OR REPLACE PROCEDURE process_order(
3 p_user_id INTEGER,
4 p_total DECIMAL
5)
6LANGUAGE plpgsql
7AS $$
8DECLARE
9 v_order_id INTEGER;
10BEGIN
11 -- Insert order
12 INSERT INTO orders (user_id, total, status)
13 VALUES (p_user_id, p_total, 'pending')
14 RETURNING id INTO v_order_id;
15
16 -- Update user stats
17 UPDATE user_stats
18 SET order_count = order_count + 1,
19 total_spent = total_spent + p_total
20 WHERE user_id = p_user_id;
21
22 -- Log
23 INSERT INTO audit_log (action, details)
24 VALUES ('order_created', 'Order ' || v_order_id || ' created');
25
26 COMMIT;
27END;
28$$;
29
30-- Call procedure
31CALL process_order(1, 99.99);
32
33-- Procedure with transaction control
34CREATE OR REPLACE PROCEDURE transfer_funds(
35 from_account INTEGER,
36 to_account INTEGER,
37 amount DECIMAL
38)
39LANGUAGE plpgsql
40AS $$
41BEGIN
42 -- Start transaction
43 BEGIN
44 -- Deduct from source
45 UPDATE accounts
46 SET balance = balance - amount
47 WHERE id = from_account;
48
49 -- Check if sufficient funds
50 IF NOT FOUND OR (SELECT balance FROM accounts WHERE id = from_account) < 0 THEN
51 RAISE EXCEPTION 'Insufficient funds';
52 END IF;
53
54 -- Add to destination
55 UPDATE accounts
56 SET balance = balance + amount
57 WHERE id = to_account;
58
59 COMMIT;
60 EXCEPTION
61 WHEN OTHERS THEN
62 ROLLBACK;
63 RAISE;
64 END;
65END;
66$$;
Triggers
1-- Create trigger function
2CREATE OR REPLACE FUNCTION update_modified_timestamp()
3RETURNS TRIGGER AS $$
4BEGIN
5 NEW.updated_at = CURRENT_TIMESTAMP;
6 RETURN NEW;
7END;
8$$ LANGUAGE plpgsql;
9
10-- Create trigger
11CREATE TRIGGER users_updated_at
12BEFORE UPDATE ON users
13FOR EACH ROW
14EXECUTE FUNCTION update_modified_timestamp();
15
16-- Audit trigger
17CREATE OR REPLACE FUNCTION audit_user_changes()
18RETURNS TRIGGER AS $$
19BEGIN
20 IF TG_OP = 'INSERT' THEN
21 INSERT INTO audit_log (table_name, action, new_data)
22 VALUES ('users', 'INSERT', row_to_json(NEW));
23 ELSIF TG_OP = 'UPDATE' THEN
24 INSERT INTO audit_log (table_name, action, old_data, new_data)
25 VALUES ('users', 'UPDATE', row_to_json(OLD), row_to_json(NEW));
26 ELSIF TG_OP = 'DELETE' THEN
27 INSERT INTO audit_log (table_name, action, old_data)
28 VALUES ('users', 'DELETE', row_to_json(OLD));
29 END IF;
30 RETURN NULL;
31END;
32$$ LANGUAGE plpgsql;
33
34CREATE TRIGGER audit_users
35AFTER INSERT OR UPDATE OR DELETE ON users
36FOR EACH ROW
37EXECUTE FUNCTION audit_user_changes();
pgvector - Vector Similarity Search
Installation
1-- Enable extension
2CREATE EXTENSION IF NOT EXISTS vector;
Create Table with Vector Column
1-- Create table for embeddings
2CREATE TABLE documents (
3 id SERIAL PRIMARY KEY,
4 title TEXT NOT NULL,
5 content TEXT NOT NULL,
6 embedding vector(1536), -- OpenAI ada-002 dimension
7 created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
8);
9
10-- Create index for similarity search
11CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
12
13-- Or use HNSW index (better for high-dimensional vectors)
14CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops);
Insert Vectors
1-- Insert document with embedding
2INSERT INTO documents (title, content, embedding)
3VALUES (
4 'Machine Learning Basics',
5 'Introduction to ML concepts...',
6 '[0.1, 0.2, 0.3, ...]'::vector -- 1536 dimensions
7);
8
9-- Insert from Python
10-- import psycopg2
11-- from openai import OpenAI
12--
13-- client = OpenAI()
14-- response = client.embeddings.create(
15-- model="text-embedding-ada-002",
16-- input="Your text here"
17-- )
18-- embedding = response.data[0].embedding
19--
20-- cursor.execute(
21-- "INSERT INTO documents (title, content, embedding) VALUES (%s, %s, %s)",
22-- (title, content, embedding)
23-- )
Similarity Search
1-- Cosine similarity (most common)
2SELECT
3 id,
4 title,
5 1 - (embedding <=> '[0.1, 0.2, ...]'::vector) AS similarity
6FROM documents
7ORDER BY embedding <=> '[0.1, 0.2, ...]'::vector
8LIMIT 10;
9
10-- L2 distance (Euclidean)
11SELECT
12 id,
13 title,
14 embedding <-> '[0.1, 0.2, ...]'::vector AS distance
15FROM documents
16ORDER BY embedding <-> '[0.1, 0.2, ...]'::vector
17LIMIT 10;
18
19-- Inner product
20SELECT
21 id,
22 title,
23 (embedding <#> '[0.1, 0.2, ...]'::vector) * -1 AS similarity
24FROM documents
25ORDER BY embedding <#> '[0.1, 0.2, ...]'::vector
26LIMIT 10;
27
28-- With filters
29SELECT
30 id,
31 title,
32 1 - (embedding <=> '[0.1, 0.2, ...]'::vector) AS similarity
33FROM documents
34WHERE created_at > '2024-01-01'
35 AND similarity > 0.8
36ORDER BY embedding <=> '[0.1, 0.2, ...]'::vector
37LIMIT 10;
Python Integration
1import psycopg2
2from openai import OpenAI
3import numpy as np
4
5# Connect to PostgreSQL
6conn = psycopg2.connect(
7 host="localhost",
8 database="vectordb",
9 user="vectoruser",
10 password="password"
11)
12cursor = conn.cursor()
13
14# Initialize OpenAI
15client = OpenAI(api_key="your-api-key")
16
17def get_embedding(text):
18 """Get embedding from OpenAI"""
19 response = client.embeddings.create(
20 model="text-embedding-ada-002",
21 input=text
22 )
23 return response.data[0].embedding
24
25def insert_document(title, content):
26 """Insert document with embedding"""
27 embedding = get_embedding(content)
28 cursor.execute(
29 """
30 INSERT INTO documents (title, content, embedding)
31 VALUES (%s, %s, %s)
32 RETURNING id
33 """,
34 (title, content, embedding)
35 )
36 doc_id = cursor.fetchone()[0]
37 conn.commit()
38 return doc_id
39
40def search_similar(query, limit=10):
41 """Search for similar documents"""
42 query_embedding = get_embedding(query)
43 cursor.execute(
44 """
45 SELECT
46 id,
47 title,
48 content,
49 1 - (embedding <=> %s::vector) AS similarity
50 FROM documents
51 ORDER BY embedding <=> %s::vector
52 LIMIT %s
53 """,
54 (query_embedding, query_embedding, limit)
55 )
56 return cursor.fetchall()
57
58# Usage
59doc_id = insert_document(
60 "Introduction to Neural Networks",
61 "Neural networks are computing systems inspired by biological neural networks..."
62)
63
64results = search_similar("What are neural networks?")
65for id, title, content, similarity in results:
66 print(f"{title} (similarity: {similarity:.4f})")
Advanced Features
JSON/JSONB
1-- Create table with JSONB
2CREATE TABLE products (
3 id SERIAL PRIMARY KEY,
4 name VARCHAR(200),
5 attributes JSONB
6);
7
8-- Insert JSON data
9INSERT INTO products (name, attributes) VALUES
10 ('Laptop', '{"brand": "Dell", "ram": "16GB", "storage": "512GB SSD"}'),
11 ('Phone', '{"brand": "Apple", "model": "iPhone 14", "color": "black"}');
12
13-- Query JSON
14SELECT * FROM products WHERE attributes->>'brand' = 'Dell';
15SELECT * FROM products WHERE attributes->'ram' = '"16GB"';
16
17-- JSON operators
18SELECT attributes->'brand' FROM products; -- Get JSON object
19SELECT attributes->>'brand' FROM products; -- Get text
20SELECT attributes#>'{specs,ram}' FROM products; -- Get nested
21
22-- JSON functions
23SELECT jsonb_array_elements(attributes->'tags') FROM products;
24SELECT jsonb_object_keys(attributes) FROM products;
25
26-- Index on JSON
27CREATE INDEX idx_brand ON products ((attributes->>'brand'));
Full-Text Search
1-- Create table
2CREATE TABLE articles (
3 id SERIAL PRIMARY KEY,
4 title TEXT,
5 content TEXT,
6 search_vector tsvector
7);
8
9-- Generate search vector
10UPDATE articles
11SET search_vector =
12 setweight(to_tsvector('english', COALESCE(title, '')), 'A') ||
13 setweight(to_tsvector('english', COALESCE(content, '')), 'B');
14
15-- Create GIN index
16CREATE INDEX idx_search ON articles USING GIN(search_vector);
17
18-- Search
19SELECT title, ts_rank(search_vector, query) AS rank
20FROM articles, to_tsquery('english', 'postgresql & database') query
21WHERE search_vector @@ query
22ORDER BY rank DESC;
23
24-- Auto-update trigger
25CREATE TRIGGER articles_search_update
26BEFORE INSERT OR UPDATE ON articles
27FOR EACH ROW EXECUTE FUNCTION
28tsvector_update_trigger(search_vector, 'pg_catalog.english', title, content);
Partitioning
1-- Create partitioned table
2CREATE TABLE orders (
3 id SERIAL,
4 user_id INTEGER,
5 total DECIMAL(10, 2),
6 created_at TIMESTAMP
7) PARTITION BY RANGE (created_at);
8
9-- Create partitions
10CREATE TABLE orders_2023 PARTITION OF orders
11FOR VALUES FROM ('2023-01-01') TO ('2024-01-01');
12
13CREATE TABLE orders_2024 PARTITION OF orders
14FOR VALUES FROM ('2024-01-01') TO ('2025-01-01');
15
16-- Insert automatically routes to correct partition
17INSERT INTO orders (user_id, total, created_at)
18VALUES (1, 99.99, '2024-06-15');
Performance Tips
1-- Analyze query performance
2EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com';
3
4-- Create indexes
5CREATE INDEX idx_email ON users(email);
6CREATE INDEX idx_created_at ON users(created_at);
7CREATE INDEX idx_composite ON users(email, created_at);
8
9-- Partial index
10CREATE INDEX idx_active_users ON users(email) WHERE active = true;
11
12-- Vacuum and analyze
13VACUUM ANALYZE users;
14
15-- Show slow queries
16SELECT query, calls, total_time, mean_time
17FROM pg_stat_statements
18ORDER BY mean_time DESC
19LIMIT 10;
Related Snippets
- Database Performance & SQL vs NoSQL
Database performance issues, solutions, and SQL vs NoSQL comparison. Common … - MongoDB Essentials
MongoDB queries, aggregation, and Docker setup for document-based NoSQL … - MySQL Essentials
MySQL commands, optimization, and Docker setup. Docker Setup Docker Run 1# Run … - Neo4j Graph Database
Neo4j graph database queries, algorithms, and vector search capabilities. Docker … - SQL Language Basics
SQL language fundamentals - essential commands and patterns for relational … - SQL vs NoSQL Comparison
Comprehensive comparison of SQL and NoSQL databases to help choose the right … - SQLite Essentials
SQLite commands for embedded database applications. Basic Commands 1# …