PostgreSQL入门与实践
SQL 概述
SQL(Structured Query Language)是用于管理关系数据库的标准语言。PostgreSQL 作为功能最强大的开源关系数据库之一,完全支持 SQL 标准,并提供了许多扩展功能。
常见 SQL 操作
数据定义语言 (DDL)
// DDL 操作类型
const ddlOperations = {
CREATE: "创建数据库对象(表、索引、视图等)",
ALTER: "修改现有数据库对象结构",
DROP: "删除数据库对象",
TRUNCATE: "清空表数据但保留结构"
};
-- 创建表
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(100) UNIQUE NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 修改表结构
ALTER TABLE users ADD COLUMN last_login TIMESTAMP;
-- 删除表
DROP TABLE IF EXISTS temp_table;
数据操作语言 (DML)
// DML 操作类型
const dmlOperations = {
SELECT: "查询数据",
INSERT: "插入数据",
UPDATE: "更新数据",
DELETE: "删除数据"
};
-- 插入数据
INSERT INTO users (username, email)
VALUES ('admin', 'admin@example.com');
-- 查询数据
SELECT * FROM users WHERE created_at > '2024-01-01';
-- 更新数据
UPDATE users SET last_login = NOW() WHERE username = 'admin';
-- 删除数据
DELETE FROM users WHERE created_at < '2023-01-01';
SQL 执行流程
PostgreSQL 与其他数据库的比较
PostgreSQL 与 MySQL 的比较
功能对比
// PostgreSQL vs MySQL 特性对比
const dbComparison = {
PostgreSQL: {
ACID支持: "完全支持",
复杂查询: "优秀(支持CTE、窗口函数等)",
JSON支持: "原生JSONB类型",
全文检索: "内置支持",
扩展性: "插件架构,高度可扩展",
数据类型: "丰富(数组、范围类型等)",
并发控制: "MVCC多版本并发控制",
复制: "流复制、逻辑复制",
许可证: "PostgreSQL许可证(类BSD)"
},
MySQL: {
ACID支持: "InnoDB引擎支持",
复杂查询: "基本支持",
JSON支持: "5.7+版本支持",
全文检索: "MyISAM和InnoDB支持",
扩展性: "存储引擎架构",
数据类型: "标准SQL类型",
并发控制: "锁机制",
复制: "主从复制、组复制",
许可证: "GPL双许可证"
}
};
选择建议
graph TD
A[数据库选择] --> B{项目需求}
B --> C[复杂分析查询]
B --> D[高并发读写]
B --> E[JSON数据处理]
B --> F[简单CRUD操作]
C --> G[PostgreSQL]
D --> H{读多写少?}
E --> G
F --> I[MySQL/PostgreSQL]
H --> J[是] --> K[MySQL]
H --> L[否] --> G
G --> M[推荐理由:功能强大、扩展性好]
K --> N[推荐理由:性能优秀、生态成熟]
I --> O[根据团队经验选择]
PostgreSQL 与 MongoDB 的比较
架构对比
// PostgreSQL vs MongoDB 架构对比
const architectureComparison = {
PostgreSQL: {
数据模型: "关系型(表、行、列)",
查询语言: "SQL",
事务支持: "完整ACID事务",
一致性: "强一致性",
扩展方式: "垂直扩展为主,支持读写分离",
模式: "严格模式定义",
索引类型: "B-Tree、Hash、GiST、GIN等"
},
MongoDB: {
数据模型: "文档型(集合、文档、字段)",
查询语言: "MongoDB查询语言",
事务支持: "4.0+版本支持多文档事务",
一致性: "最终一致性(可配置)",
扩展方式: "水平扩展(分片)",
模式: "灵活模式",
索引类型: "单字段、复合、多键、文本等"
}
};
PostgreSQL 数据类型
基本数据类型
数值类型
// PostgreSQL 数值类型
const numericTypes = {
整数类型: {
SMALLINT: "2字节,-32768 到 32767",
INTEGER: "4字节,-2147483648 到 2147483647",
BIGINT: "8字节,-9223372036854775808 到 9223372036854775807",
SERIAL: "自增整数,等同于INTEGER + DEFAULT"
},
浮点类型: {
REAL: "4字节单精度",
DOUBLE_PRECISION: "8字节双精度",
NUMERIC: "可变精度,适用于货币等精确计算"
}
};
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
price NUMERIC(10,2), -- 10位数字,2位小数
stock INTEGER DEFAULT 0,
rating REAL,
views BIGINT DEFAULT 0
);
字符类型
// PostgreSQL 字符类型
const characterTypes = {
CHAR: "固定长度字符串",
VARCHAR: "变长字符串,有长度限制",
TEXT: "变长字符串,无长度限制",
UUID: "通用唯一标识符"
};
CREATE TABLE articles (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
title VARCHAR(200) NOT NULL,
slug VARCHAR(100) UNIQUE,
content TEXT,
status CHAR(1) DEFAULT 'D' -- D=Draft, P=Published
);
日期时间类型
CREATE TABLE events (
id SERIAL PRIMARY KEY,
event_name VARCHAR(100),
event_date DATE, -- 只存日期
event_time TIME, -- 只存时间
created_at TIMESTAMP, -- 日期时间(无时区)
updated_at TIMESTAMPTZ, -- 日期时间(带时区)
duration INTERVAL -- 时间间隔
);
-- 插入示例
INSERT INTO events (event_name, event_date, event_time, created_at, updated_at, duration)
VALUES (
'产品发布会',
'2024-12-25',
'14:30:00',
'2024-01-15 10:00:00',
'2024-01-15 10:00:00+08',
'2 hours 30 minutes'
);
高级数据类型
数组类型
// PostgreSQL 数组操作示例
const arrayOperations = {
创建: "column_name TYPE[]",
插入: "ARRAY[val1, val2] 或 '{val1,val2}'",
访问: "array_column[index]",
查询: "ANY(array_column) 或 array_column @> ARRAY[value]"
};
CREATE TABLE users_advanced (
id SERIAL PRIMARY KEY,
username VARCHAR(50),
tags TEXT[], -- 字符串数组
scores INTEGER[], -- 整数数组
preferences JSONB,
location POINT -- 几何点类型
);
-- 插入数组数据
INSERT INTO users_advanced (username, tags, scores, location) VALUES
('john_doe', ARRAY['developer', 'postgresql', 'javascript'], '{85, 92, 78}', POINT(40.7128, -74.0060)),
('jane_smith', '{"backend", "python", "data-science"}', '{95, 88, 91}', POINT(51.5074, -0.1278));
-- 查询数组数据
SELECT username FROM users_advanced WHERE 'postgresql' = ANY(tags);
SELECT username FROM users_advanced WHERE tags @> ARRAY['developer'];
SELECT username, tags[1] as first_tag FROM users_advanced;
复合类型
-- 创建复合类型
CREATE TYPE address_type AS (
street VARCHAR(100),
city VARCHAR(50),
state VARCHAR(50),
zip_code VARCHAR(10)
);
CREATE TABLE customers (
id SERIAL PRIMARY KEY,
name VARCHAR(100),
home_address address_type,
work_address address_type
);
-- 插入复合类型数据
INSERT INTO customers (name, home_address, work_address) VALUES (
'Alice Johnson',
ROW('123 Main St', 'New York', 'NY', '10001'),
ROW('456 Business Ave', 'New York', 'NY', '10002')
);
PostgreSQL 数据类型选择流程
PostgreSQL 存储过程与函数
PostgreSQL 支持多种编程语言编写存储过程和函数,包括 PL/pgSQL、SQL、C、Python、JavaScript 等。
创建存储过程和函数
函数 vs 存储过程
// 函数与存储过程的区别
const procedureVsFunction = {
函数: {
返回值: "必须返回值",
调用方式: "SELECT function_name()",
事务控制: "不能控制事务",
用途: "计算和数据转换"
},
存储过程: {
返回值: "可选返回值",
调用方式: "CALL procedure_name()",
事务控制: "可以控制事务",
用途: "复杂业务逻辑处理"
}
};
示例1:创建简单的加法函数
-- PL/pgSQL 加法函数
CREATE OR REPLACE FUNCTION add_numbers(a INTEGER, b INTEGER)
RETURNS INTEGER AS $$
BEGIN
RETURN a + b;
END;
$$ LANGUAGE plpgsql;
-- 使用函数
SELECT add_numbers(10, 20) as result; -- 返回 30
-- SQL 函数版本(更简洁)
CREATE OR REPLACE FUNCTION add_numbers_sql(a INTEGER, b INTEGER)
RETURNS INTEGER AS $$
SELECT $1 + $2;
$$ LANGUAGE sql;
示例 2:创建复杂的存储过程
-- 用户注册存储过程
CREATE OR REPLACE FUNCTION register_user(
p_username VARCHAR(50),
p_email VARCHAR(100),
p_password VARCHAR(255),
OUT user_id INTEGER,
OUT success BOOLEAN,
OUT message TEXT
) AS $$
DECLARE
existing_count INTEGER;
BEGIN
-- 初始化返回值
success := false;
message := '';
user_id := 0;
-- 检查用户名是否存在
SELECT COUNT(*) INTO existing_count
FROM users
WHERE username = p_username OR email = p_email;
IF existing_count > 0 THEN
message := '用户名或邮箱已存在';
RETURN;
END IF;
-- 验证邮箱格式
IF p_email !~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$' THEN
message := '邮箱格式无效';
RETURN;
END IF;
-- 插入新用户
BEGIN
INSERT INTO users (username, email, password_hash, created_at)
VALUES (p_username, p_email, crypt(p_password, gen_salt('bf')), NOW())
RETURNING id INTO user_id;
success := true;
message := '注册成功';
-- 记录用户行为
INSERT INTO user_behaviors (user_id, action, created_at)
VALUES (user_id, 'register', NOW());
EXCEPTION
WHEN OTHERS THEN
message := '注册失败: ' || SQLERRM;
ROLLBACK;
END;
END;
$$ LANGUAGE plpgsql;
-- 调用存储过程
SELECT * FROM register_user('newuser', 'newuser@example.com', 'password123');
存储过程开发流程
用户表与用户行为表实战
创建用户表
-- 用户主表
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(100) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
first_name VARCHAR(50),
last_name VARCHAR(50),
avatar_url VARCHAR(255),
phone VARCHAR(20),
birth_date DATE,
gender CHAR(1) CHECK (gender IN ('M', 'F', 'O')), -- M=Male, F=Female, O=Other
status VARCHAR(20) DEFAULT 'active' CHECK (status IN ('active', 'inactive', 'banned')),
email_verified BOOLEAN DEFAULT FALSE,
phone_verified BOOLEAN DEFAULT FALSE,
last_login_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- 创建更新时间触发器
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_users_updated_at
BEFORE UPDATE ON users
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- 创建索引优化查询
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_username ON users(username);
CREATE INDEX idx_users_status ON users(status);
CREATE INDEX idx_users_created_at ON users(created_at);
创建用户行为表
-- 用户行为表
CREATE TABLE user_behaviors (
id BIGSERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id) ON DELETE CASCADE,
action VARCHAR(50) NOT NULL, -- login, logout, view_product, purchase, etc.
target_type VARCHAR(50), -- product, article, user, etc.
target_id INTEGER,
metadata JSONB, -- 存储额外的行为数据
ip_address INET,
user_agent TEXT,
session_id VARCHAR(100),
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- 用户行为表索引
CREATE INDEX idx_user_behaviors_user_id ON user_behaviors(user_id);
CREATE INDEX idx_user_behaviors_action ON user_behaviors(action);
CREATE INDEX idx_user_behaviors_created_at ON user_behaviors(created_at);
CREATE INDEX idx_user_behaviors_target ON user_behaviors(target_type, target_id);
-- 用户行为元数据的 GIN 索引,支持 JSONB 查询
CREATE INDEX idx_user_behaviors_metadata ON user_behaviors USING gin(metadata);
-- 创建分区表(按月分区)
CREATE TABLE user_behaviors_y2024m01 PARTITION OF user_behaviors
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE user_behaviors_y2024m02 PARTITION OF user_behaviors
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
插入与查询示例
插入示例数据
-- 插入用户数据
INSERT INTO users (username, email, password_hash, first_name, last_name, gender) VALUES
('john_doe', 'john@example.com', crypt('password123', gen_salt('bf')), 'John', 'Doe', 'M'),
('jane_smith', 'jane@example.com', crypt('password456', gen_salt('bf')), 'Jane', 'Smith', 'F'),
('admin_user', 'admin@example.com', crypt('admin789', gen_salt('bf')), 'Admin', 'User', 'O');
-- 插入用户行为数据
INSERT INTO user_behaviors (user_id, action, target_type, target_id, metadata, ip_address, user_agent) VALUES
(1, 'login', NULL, NULL, '{"device": "desktop", "browser": "chrome"}', '192.168.1.100', 'Mozilla/5.0...'),
(1, 'view_product', 'product', 101, '{"category": "electronics", "price": 299.99}', '192.168.1.100', 'Mozilla/5.0...'),
(1, 'add_to_cart', 'product', 101, '{"quantity": 1, "variant": "black"}', '192.168.1.100', 'Mozilla/5.0...'),
(2, 'login', NULL, NULL, '{"device": "mobile", "browser": "safari"}', '192.168.1.101', 'Mozilla/5.0...'),
(2, 'search', NULL, NULL, '{"query": "laptop", "results_count": 25}', '192.168.1.101', 'Mozilla/5.0...');
复杂查询示例
// 用户行为分析查询示例
const analyticsQueries = {
用户活跃度: "统计用户每日活跃情况",
行为转化漏斗: "分析从浏览到购买的转化率",
用户画像: "基于行为数据构建用户特征",
实时监控: "监控异常行为和系统性能"
};
-- 1. 用户活跃度分析
WITH daily_active_users AS (
SELECT
DATE(created_at) as activity_date,
COUNT(DISTINCT user_id) as daily_active_users,
COUNT(*) as total_actions
FROM user_behaviors
WHERE created_at >= NOW() - INTERVAL '30 days'
GROUP BY DATE(created_at)
)
SELECT
activity_date,
daily_active_users,
total_actions,
ROUND(total_actions::numeric / daily_active_users, 2) as avg_actions_per_user
FROM daily_active_users
ORDER BY activity_date DESC;
-- 2. 用户行为转化漏斗分析
SELECT
action,
COUNT(DISTINCT user_id) as unique_users,
COUNT(*) as total_actions,
ROUND(COUNT(DISTINCT user_id) * 100.0 /
LAG(COUNT(DISTINCT user_id)) OVER (ORDER BY
CASE action
WHEN 'view_product' THEN 1
WHEN 'add_to_cart' THEN 2
WHEN 'checkout' THEN 3
WHEN 'purchase' THEN 4
END
), 2) as conversion_rate
FROM user_behaviors
WHERE action IN ('view_product', 'add_to_cart', 'checkout', 'purchase')
AND created_at >= NOW() - INTERVAL '7 days'
GROUP BY action
ORDER BY CASE action
WHEN 'view_product' THEN 1
WHEN 'add_to_cart' THEN 2
WHEN 'checkout' THEN 3
WHEN 'purchase' THEN 4
END;
-- 3. 用户画像分析
SELECT
u.username,
u.gender,
DATE_PART('year', AGE(u.birth_date)) as age,
COUNT(DISTINCT ub.action) as distinct_actions,
COUNT(*) as total_behaviors,
MAX(ub.created_at) as last_activity,
COALESCE(purchase_stats.total_purchases, 0) as total_purchases,
COALESCE(purchase_stats.total_amount, 0) as total_spent
FROM users u
LEFT JOIN user_behaviors ub ON u.id = ub.user_id
LEFT JOIN (
SELECT
user_id,
COUNT(*) as total_purchases,
SUM((metadata->>'amount')::numeric) as total_amount
FROM user_behaviors
WHERE action = 'purchase'
GROUP BY user_id
) purchase_stats ON u.id = purchase_stats.user_id
WHERE u.created_at >= NOW() - INTERVAL '90 days'
GROUP BY u.id, u.username, u.gender, u.birth_date,
purchase_stats.total_purchases, purchase_stats.total_amount
ORDER BY total_behaviors DESC;
PostgreSQL 高级应用
全文检索(Full-Text Search)
PostgreSQL 提供强大的全文检索功能,支持多种语言和自定义配置。
示例:创建支持全文检索的表
-- 创建文章表支持全文检索
CREATE TABLE articles (
id SERIAL PRIMARY KEY,
title VARCHAR(200) NOT NULL,
content TEXT NOT NULL,
author_id INTEGER REFERENCES users(id),
category VARCHAR(50),
tags TEXT[],
published_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW(),
-- 全文检索向量字段
search_vector tsvector
);
创建索引并插入数据
-- 创建全文检索索引
CREATE INDEX idx_articles_search_vector ON articles USING gin(search_vector);
CREATE INDEX idx_articles_category ON articles(category);
CREATE INDEX idx_articles_published_at ON articles(published_at);
-- 创建更新全文检索向量的函数
CREATE OR REPLACE FUNCTION update_article_search_vector()
RETURNS TRIGGER AS $$
BEGIN
NEW.search_vector :=
setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
setweight(to_tsvector('english', COALESCE(NEW.content, '')), 'B') ||
setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'C');
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- 创建触发器
CREATE TRIGGER update_articles_search_vector
BEFORE INSERT OR UPDATE ON articles
FOR EACH ROW EXECUTE FUNCTION update_article_search_vector();
-- 插入测试数据
INSERT INTO articles (title, content, author_id, category, tags, published_at) VALUES
('PostgreSQL Advanced Features', 'PostgreSQL offers many advanced features like full-text search, JSON support, and window functions...', 1, 'database', ARRAY['postgresql', 'database', 'sql'], NOW()),
('JavaScript Performance Tips', 'Optimizing JavaScript performance is crucial for modern web applications. Here are some key strategies...', 2, 'programming', ARRAY['javascript', 'performance', 'optimization'], NOW()),
('Database Design Best Practices', 'When designing databases, consider normalization, indexing strategies, and query optimization...', 1, 'database', ARRAY['database', 'design', 'sql'], NOW());
使用全文检索查询
// 全文检索查询类型
const searchTypes = {
基本搜索: "使用 @@ 操作符进行匹配",
短语搜索: "使用双引号包围的精确短语",
布尔搜索: "使用 & | ! 进行逻辑组合",
排序搜索: "使用 ts_rank 进行相关性排序"
};
-- 基本全文检索
SELECT id, title, ts_rank(search_vector, query) AS rank
FROM articles, plainto_tsquery('english', 'PostgreSQL database') query
WHERE search_vector @@ query
ORDER BY rank DESC;
-- 高级搜索:布尔查询
SELECT id, title, content,
ts_rank(search_vector, query) AS rank,
ts_headline('english', content, query) AS highlighted_content
FROM articles, to_tsquery('english', 'PostgreSQL & (performance | optimization)') query
WHERE search_vector @@ query
ORDER BY rank DESC;
-- 多条件搜索
SELECT a.id, a.title, a.category,
ts_rank(a.search_vector, query) AS rank
FROM articles a, to_tsquery('english', 'database & design') query
WHERE a.search_vector @@ query
AND a.category = 'database'
AND a.published_at >= NOW() - INTERVAL '30 days'
ORDER BY rank DESC, a.published_at DESC;
-- 搜索建议(自动补全)
SELECT word, ndoc, nentry
FROM ts_stat('SELECT search_vector FROM articles')
WHERE word ILIKE 'postgres%'
ORDER BY nentry DESC, ndoc DESC
LIMIT 10;
数据分区(Table Partitioning)
数据分区可以提高大表的查询性能和维护效率。
示例:创建分区表
-- 创建按时间分区的订单表
CREATE TABLE orders (
id BIGSERIAL,
user_id INTEGER NOT NULL,
product_id INTEGER NOT NULL,
quantity INTEGER NOT NULL,
amount DECIMAL(10,2) NOT NULL,
status VARCHAR(20) DEFAULT 'pending',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
PRIMARY KEY (id, created_at)
) PARTITION BY RANGE (created_at);
创建分区
-- 创建月度分区
CREATE TABLE orders_y2024m01 PARTITION OF orders
FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
CREATE TABLE orders_y2024m02 PARTITION OF orders
FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
CREATE TABLE orders_y2024m03 PARTITION OF orders
FOR VALUES FROM ('2024-03-01') TO ('2024-04-01');
-- 创建分区索引
CREATE INDEX idx_orders_y2024m01_user_id ON orders_y2024m01(user_id);
CREATE INDEX idx_orders_y2024m01_status ON orders_y2024m01(status);
CREATE INDEX idx_orders_y2024m02_user_id ON orders_y2024m02(user_id);
CREATE INDEX idx_orders_y2024m02_status ON orders_y2024m02(status);
-- 自动创建分区的函数
CREATE OR REPLACE FUNCTION create_monthly_partition(table_name text, start_date date)
RETURNS void AS $$
DECLARE
partition_name text;
end_date date;
BEGIN
partition_name := table_name || '_y' || EXTRACT(year FROM start_date) || 'm' || LPAD(EXTRACT(month FROM start_date)::text, 2, '0');
end_date := start_date + INTERVAL '1 month';
EXECUTE format('CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (%L) TO (%L)',
partition_name, table_name, start_date, end_date);
-- 创建索引
EXECUTE format('CREATE INDEX idx_%s_user_id ON %I(user_id)', partition_name, partition_name);
EXECUTE format('CREATE INDEX idx_%s_status ON %I(status)', partition_name, partition_name);
END;
$$ LANGUAGE plpgsql;
-- 创建未来几个月的分区
SELECT create_monthly_partition('orders', date_trunc('month', NOW() + interval '1 month' * generate_series(0, 11)));
插入和查询数据
-- 插入测试数据
INSERT INTO orders (user_id, product_id, quantity, amount, status, created_at)
SELECT
(random() * 100 + 1)::integer,
(random() * 1000 + 1)::integer,
(random() * 5 + 1)::integer,
(random() * 1000 + 10)::numeric(10,2),
CASE (random() * 3)::integer
WHEN 0 THEN 'pending'
WHEN 1 THEN 'completed'
ELSE 'cancelled'
END,
NOW() - (random() * 90)::integer * INTERVAL '1 day'
FROM generate_series(1, 100000);
-- 分区裁剪查询示例
EXPLAIN (ANALYZE, BUFFERS)
SELECT COUNT(*), AVG(amount)
FROM orders
WHERE created_at >= '2024-01-01' AND created_at < '2024-02-01'
AND status = 'completed';
-- 跨分区查询
SELECT
DATE_TRUNC('month', created_at) as month,
status,
COUNT(*) as order_count,
SUM(amount) as total_amount
FROM orders
WHERE created_at >= '2024-01-01'
GROUP BY DATE_TRUNC('month', created_at), status
ORDER BY month, status;
分区策略选择流程
并行查询 (Parallel Query)
PostgreSQL 支持并行查询执行,可以显著提高大数据集查询的性能。
启用并行查询
-- 查看当前并行查询设置
SHOW max_parallel_workers_per_gather;
SHOW max_worker_processes;
SHOW max_parallel_workers;
-- 设置并行查询参数
SET max_parallel_workers_per_gather = 4;
SET parallel_tuple_cost = 0.1;
SET parallel_setup_cost = 1000.0;
-- 创建大表用于测试
CREATE TABLE large_dataset AS
SELECT
generate_series(1, 10000000) as id,
(random() * 1000000)::integer as value,
md5(random()::text) as text_data,
NOW() - (random() * 365 * 24 * 60 * 60)::integer * INTERVAL '1 second' as created_at;
-- 创建索引
CREATE INDEX idx_large_dataset_value ON large_dataset(value);
CREATE INDEX idx_large_dataset_created_at ON large_dataset(created_at);
执行并行查询
// 并行查询适用场景
const parallelQueryScenarios = {
全表扫描: "大表的聚合查询",
排序操作: "ORDER BY 大量数据",
连接操作: "大表之间的JOIN",
分组聚合: "GROUP BY 操作"
};
-- 并行聚合查询
EXPLAIN (ANALYZE, BUFFERS)
SELECT COUNT(*), AVG(value), MIN(created_at), MAX(created_at)
FROM large_dataset;
-- 并行分组查询
EXPLAIN (ANALYZE, BUFFERS)
SELECT
DATE_TRUNC('month', created_at) as month,
COUNT(*) as record_count,
AVG(value) as avg_value
FROM large_dataset
WHERE created_at >= '2023-01-01'
GROUP BY DATE_TRUNC('month', created_at)
ORDER BY month;
-- 并行连接查询
CREATE TABLE users_large AS
SELECT generate_series(1, 1000000) as id,
'user_' || generate_series(1, 1000000) as username;
EXPLAIN (ANALYZE, BUFFERS)
SELECT u.username, COUNT(d.id) as dataset_count
FROM users_large u
JOIN large_dataset d ON u.id = d.value % 1000000 + 1
GROUP BY u.username
HAVING COUNT(d.id) > 10
ORDER BY dataset_count DESC
LIMIT 100;
JSON 和 JSONB 数据类型
PostgreSQL 对 JSON 数据提供了强大的支持,JSONB 是二进制格式的 JSON,提供更好的性能。
示例:创建包含 JSONB 字段的表
-- 创建产品表包含 JSONB 配置
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
category VARCHAR(50),
price DECIMAL(10,2),
attributes JSONB, -- 产品属性
metadata JSONB, -- 元数据
created_at TIMESTAMPTZ DEFAULT NOW()
);
插入 JSON 数据
-- 插入包含 JSONB 数据的记录
INSERT INTO products (name, category, price, attributes, metadata) VALUES
('iPhone 15 Pro', 'electronics', 999.99,
'{"brand": "Apple", "color": "Space Gray", "storage": "256GB", "features": ["Face ID", "5G", "Wireless Charging"]}',
'{"supplier": "Apple Inc", "warranty": "1 year", "tags": ["premium", "smartphone"]}'
),
('MacBook Air M2', 'electronics', 1299.99,
'{"brand": "Apple", "processor": "M2", "ram": "8GB", "storage": "256GB", "screen": "13.6inch"}',
'{"supplier": "Apple Inc", "warranty": "1 year", "tags": ["laptop", "ultrabook"]}'
),
('Nike Air Max', 'shoes', 129.99,
'{"brand": "Nike", "size": [7, 8, 9, 10, 11], "color": ["white", "black", "red"], "type": "running"}',
'{"supplier": "Nike Inc", "warranty": "6 months", "tags": ["sport", "running"]}'
);
查询 JSON 数据
// JSONB 操作符
const jsonbOperators = {
"->": "获取JSON对象字段(返回JSON)",
"->>": "获取JSON对象字段(返回文本)",
"#>": "获取指定路径的JSON对象",
"#>>": "获取指定路径的文本",
"@>": "包含操作符",
"<@": "被包含操作符",
"?": "存在键操作符",
"?&": "存在所有键",
"?|": "存在任意键"
};
-- 基本 JSON 查询
SELECT name, attributes->>'brand' as brand, attributes->>'color' as color
FROM products
WHERE category = 'electronics';
-- 复杂 JSON 路径查询
SELECT name, attributes#>>'{features,0}' as first_feature
FROM products
WHERE attributes ? 'features';
-- JSON 包含查询
SELECT name, price
FROM products
WHERE attributes @> '{"brand": "Apple"}';
-- 数组元素查询
SELECT name, jsonb_array_elements_text(attributes->'features') as feature
FROM products
WHERE attributes ? 'features';
-- JSON 聚合查询
SELECT
attributes->>'brand' as brand,
COUNT(*) as product_count,
AVG(price) as avg_price,
jsonb_agg(name) as product_names
FROM products
WHERE attributes ? 'brand'
GROUP BY attributes->>'brand';
-- 复杂 JSON 查询:查找包含特定标签的产品
SELECT name, price, metadata->'tags' as tags
FROM products
WHERE metadata->'tags' @> '["premium"]'
OR metadata->'tags' @> '["sport"]';
-- JSON 更新操作
UPDATE products
SET attributes = attributes || '{"updated_at": "2024-01-15"}'::jsonb
WHERE id = 1;
-- JSON 删除键操作
UPDATE products
SET attributes = attributes - 'updated_at'
WHERE id = 1;
JSONB 索引
-- 创建 JSONB 索引提高查询性能
-- GIN 索引:支持包含操作符 @>, ?, ?&, ?|
CREATE INDEX idx_products_attributes ON products USING gin(attributes);
CREATE INDEX idx_products_metadata ON products USING gin(metadata);
-- 特定路径索引
CREATE INDEX idx_products_brand ON products USING btree((attributes->>'brand'));
CREATE INDEX idx_products_category ON products USING btree((attributes->>'category'));
-- 表达式索引
CREATE INDEX idx_products_price_range ON products(price)
WHERE attributes->>'brand' = 'Apple';
-- 查看索引使用情况
EXPLAIN (ANALYZE, BUFFERS)
SELECT name, price FROM products
WHERE attributes @> '{"brand": "Apple", "storage": "256GB"}';
JSON 数据处理流程
flowchart TD
A[JSON 数据输入] --> B{数据格式验证}
B --> C[有效] --> D[选择存储类型]
B --> E[无效] --> F[数据清洗]
F --> D
D --> G{数据特征}
G --> H[频繁查询] --> I[JSONB + 索引]
G --> J[存储优先] --> K[JSON]
I --> L[创建 GIN 索引]
L --> M[路径特定索引]
M --> N[查询优化]
subgraph "查询优化"
O[使用操作符] --> P[@>, ?, ?&, ?|]
Q[避免函数] --> R[减少计算开销]
S[索引覆盖] --> T[提高查询速度]
end
数据库逻辑复制(Logical Replication)
逻辑复制允许在不同 PostgreSQL 实例之间复制数据,支持表级别的选择性复制。
创建发布者和订阅者
// 逻辑复制应用场景
const logicalReplicationUses = {
数据分发: "将数据复制到多个只读副本",
实时分析: "将 OLTP 数据复制到分析系统",
迁移升级: "在不同版本间迁移数据",
多租户: "在租户间复制特定数据"
};
-- 发布者配置 (源数据库)
-- 1. 确保配置参数
-- wal_level = logical
-- max_replication_slots >= 1
-- max_wal_senders >= 1
-- 2. 创建复制用户
CREATE USER replication_user WITH REPLICATION LOGIN PASSWORD 'replication_password';
GRANT SELECT ON ALL TABLES IN SCHEMA public TO replication_user;
GRANT USAGE ON SCHEMA public TO replication_user;
-- 3. 创建发布
CREATE PUBLICATION users_publication FOR TABLE users, user_behaviors;
-- 查看发布信息
SELECT * FROM pg_publication;
SELECT * FROM pg_publication_tables WHERE pubname = 'users_publication';
-- 4. 创建复制槽
SELECT pg_create_logical_replication_slot('users_replication_slot', 'pgoutput');
-- 订阅者配置 (目标数据库)
-- 1. 创建相同结构的表
CREATE TABLE users (
id SERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(100) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE TABLE user_behaviors (
id BIGSERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id) ON DELETE CASCADE,
action VARCHAR(50) NOT NULL,
metadata JSONB,
created_at TIMESTAMPTZ DEFAULT NOW()
);
-- 2. 创建订阅
CREATE SUBSCRIPTION users_subscription
CONNECTION 'host=source_host port=5432 dbname=source_db user=replication_user password=replication_password'
PUBLICATION users_publication;
-- 查看订阅状态
SELECT * FROM pg_subscription;
SELECT * FROM pg_replication_slots;
-- 监控复制状态
SELECT
application_name,
state,
sent_lsn,
write_lsn,
flush_lsn,
replay_lsn,
write_lag,
flush_lag,
replay_lag
FROM pg_stat_replication;
逻辑复制架构图
graph TD
subgraph "发布者 (Publisher)"
A[源数据库]
B[WAL 日志]
C[逻辑复制槽]
D[发布 (Publication)]
A --> B
B --> C
A --> D
end
subgraph "网络传输"
E[复制连接]
end
subgraph "订阅者 (Subscriber)"
F[订阅 (Subscription)]
G[应用进程]
H[目标数据库]
F --> G
G --> H
end
C --> E
D --> E
E --> F
subgraph "监控和管理"
I[复制状态监控]
J[冲突处理]
K[性能调优]
end
JavaScript 编写 postgresql 存储过程与函数
PostgreSQL 支持使用 PLV8 扩展来编写 JavaScript 存储过程和函数,为熟悉 JavaScript 的开发者提供了便利。
安装 PLV8 扩展
容器内安装
// PLV8 扩展优势
const plv8Advantages = {
熟悉语法: "使用 JavaScript 语法编写",
JSON处理: "原生 JSON 支持",
性能优秀: "V8 引擎驱动",
库支持: "支持部分 JavaScript 库",
调试便利: "熟悉的调试方式"
};
# Docker 容器中安装 PLV8
# 使用支持 PLV8 的 PostgreSQL 镜像
docker pull postgres:15
docker run --name postgres-plv8 -e POSTGRES_PASSWORD=postgres -d -p 5432:5432 postgres:15
# 进入容器
docker exec -it postgres-plv8 bash
# 安装依赖
apt-get update
apt-get install -y build-essential postgresql-server-dev-15
# 下载并编译 PLV8
git clone https://github.com/plv8/plv8.git
cd plv8
make static
make install
启用 PLV8 扩展
-- 连接到数据库并启用扩展
CREATE EXTENSION IF NOT EXISTS plv8;
-- 验证扩展安装
SELECT * FROM pg_extension WHERE extname = 'plv8';
-- 查看 PLV8 版本
SELECT plv8_version();
使用 JavaScript 编写存储过程或函数
示例1:使用 JavaScript 编写加法函数
-- JavaScript 加法函数
CREATE OR REPLACE FUNCTION js_add(a INTEGER, b INTEGER)
RETURNS INTEGER AS $$
return a + b;
$$ LANGUAGE plv8;
-- 更复杂的数学函数
CREATE OR REPLACE FUNCTION js_calculate_stats(numbers INTEGER[])
RETURNS JSON AS $$
const nums = numbers || [];
if (nums.length === 0) return null;
const sum = nums.reduce((acc, num) => acc + num, 0);
const mean = sum / nums.length;
const sortedNums = [...nums].sort((a, b) => a - b);
const median = sortedNums.length % 2 === 0
? (sortedNums[sortedNums.length / 2 - 1] + sortedNums[sortedNums.length / 2]) / 2
: sortedNums[Math.floor(sortedNums.length / 2)];
const variance = nums.reduce((acc, num) => acc + Math.pow(num - mean, 2), 0) / nums.length;
const standardDeviation = Math.sqrt(variance);
return {
count: nums.length,
sum: sum,
mean: mean,
median: median,
min: Math.min(...nums),
max: Math.max(...nums),
variance: variance,
standardDeviation: standardDeviation
};
$$ LANGUAGE plv8;
-- 测试统计函数
SELECT js_calculate_stats(ARRAY[1, 2, 3, 4, 5, 10, 15, 20]);
示例 2:使用 JavaScript 插入数据
-- JavaScript 批量插入用户数据
CREATE OR REPLACE FUNCTION js_batch_create_users(user_data JSON)
RETURNS JSON AS $$
const users = user_data || [];
const results = {
success: [],
errors: []
};
for (let i = 0; i < users.length; i++) {
const user = users[i];
try {
// 验证必填字段
if (!user.username || !user.email) {
throw new Error('Username and email are required');
}
// 验证邮箱格式
const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
if (!emailRegex.test(user.email)) {
throw new Error('Invalid email format');
}
// 插入用户
const insertResult = plv8.execute(
'INSERT INTO users (username, email, first_name, last_name) VALUES ($1, $2, $3, $4) RETURNING id',
[user.username, user.email, user.first_name || null, user.last_name || null]
);
results.success.push({
id: insertResult[0].id,
username: user.username,
email: user.email
});
} catch (error) {
results.errors.push({
user: user,
error: error.message
});
}
}
return results;
$$ LANGUAGE plv8;
-- 测试批量创建用户
SELECT js_batch_create_users('[
{"username": "js_user1", "email": "user1@example.com", "first_name": "John"},
{"username": "js_user2", "email": "user2@example.com", "first_name": "Jane"},
{"username": "js_user3", "email": "invalid-email"},
{"username": "", "email": "user4@example.com"}
]'::json);
示例3:使用 JavaScript 查询并返回 JSON数据
-- JavaScript 用户分析函数
CREATE OR REPLACE FUNCTION js_user_analytics(date_from TIMESTAMP, date_to TIMESTAMP)
RETURNS JSON AS $$
const analytics = {
summary: {},
userStats: [],
behaviorDistribution: {}
};
try {
// 获取用户总数和新用户数
const userSummary = plv8.execute(`
SELECT
COUNT(*) as total_users,
COUNT(*) FILTER (WHERE created_at BETWEEN $1 AND $2) as new_users
FROM users
`, [date_from, date_to]);
analytics.summary = userSummary[0];
// 获取用户行为统计
const behaviorStats = plv8.execute(`
SELECT
action,
COUNT(*) as count,
COUNT(DISTINCT user_id) as unique_users
FROM user_behaviors
WHERE created_at BETWEEN $1 AND $2
GROUP BY action
ORDER BY count DESC
`, [date_from, date_to]);
analytics.behaviorDistribution = behaviorStats.reduce((acc, row) => {
acc[row.action] = {
total_actions: parseInt(row.count),
unique_users: parseInt(row.unique_users)
};
return acc;
}, {});
// 获取活跃用户统计
const activeUsers = plv8.execute(`
SELECT
u.id,
u.username,
u.email,
COUNT(ub.id) as total_actions,
COUNT(DISTINCT ub.action) as unique_actions,
MAX(ub.created_at) as last_activity
FROM users u
JOIN user_behaviors ub ON u.id = ub.user_id
WHERE ub.created_at BETWEEN $1 AND $2
GROUP BY u.id, u.username, u.email
ORDER BY total_actions DESC
LIMIT 10
`, [date_from, date_to]);
analytics.userStats = activeUsers.map(user => ({
id: user.id,
username: user.username,
email: user.email,
totalActions: parseInt(user.total_actions),
uniqueActions: parseInt(user.unique_actions),
lastActivity: user.last_activity
}));
} catch (error) {
return { error: error.message };
}
return analytics;
$$ LANGUAGE plv8;
-- 调用分析函数
SELECT js_user_analytics('2024-01-01'::timestamp, '2024-01-31'::timestamp);
使用 JavaScript 进行更多高级操作
示例4:处理 JSON 数据
-- JavaScript JSON 数据处理和转换
CREATE OR REPLACE FUNCTION js_process_product_data(product_json JSON)
RETURNS JSON AS $$
const product = product_json || {};
// 数据验证和清理
const processedProduct = {
id: product.id || null,
name: (product.name || '').trim(),
category: (product.category || '').toLowerCase(),
price: parseFloat(product.price) || 0,
attributes: {},
computed: {}
};
// 处理属性数据
if (product.attributes && typeof product.attributes === 'object') {
// 标准化属性名称
Object.keys(product.attributes).forEach(key => {
const normalizedKey = key.toLowerCase().replace(/[^a-z0-9]/g, '_');
processedProduct.attributes[normalizedKey] = product.attributes[key];
});
}
// 计算字段
processedProduct.computed.priceCategory =
processedProduct.price < 100 ? 'budget' :
processedProduct.price < 500 ? 'mid-range' : 'premium';
processedProduct.computed.hasDiscount =
product.originalPrice && product.originalPrice > processedProduct.price;
if (processedProduct.computed.hasDiscount) {
processedProduct.computed.discountPercent = Math.round(
((product.originalPrice - processedProduct.price) / product.originalPrice) * 100
);
}
// 生成搜索关键词
const searchKeywords = [];
searchKeywords.push(processedProduct.name.toLowerCase());
searchKeywords.push(processedProduct.category);
if (processedProduct.attributes.brand) {
searchKeywords.push(processedProduct.attributes.brand.toLowerCase());
}
processedProduct.computed.searchKeywords = [...new Set(searchKeywords)];
// 数据验证
const validation = {
isValid: true,
errors: []
};
if (!processedProduct.name) {
validation.errors.push('Product name is required');
validation.isValid = false;
}
if (processedProduct.price <= 0) {
validation.errors.push('Price must be greater than 0');
validation.isValid = false;
}
processedProduct.validation = validation;
return processedProduct;
$$ LANGUAGE plv8;
-- 批量处理产品数据的函数
CREATE OR REPLACE FUNCTION js_batch_process_products(products_json JSON)
RETURNS JSON AS $$
const products = Array.isArray(products_json) ? products_json : [products_json];
const results = {
processed: [],
errors: [],
summary: {
total: products.length,
valid: 0,
invalid: 0
}
};
products.forEach((product, index) => {
try {
const processed = plv8.execute(
'SELECT js_process_product_data($1) as result',
[JSON.stringify(product)]
)[0].result;
if (processed.validation.isValid) {
results.processed.push(processed);
results.summary.valid++;
} else {
results.errors.push({
index: index,
product: product,
errors: processed.validation.errors
});
results.summary.invalid++;
}
} catch (error) {
results.errors.push({
index: index,
product: product,
errors: [error.message]
});
results.summary.invalid++;
}
});
return results;
$$ LANGUAGE plv8;
-- 测试批量处理
SELECT js_batch_process_products('[
{
"id": 1,
"name": "iPhone 15 Pro",
"category": "Electronics",
"price": 999.99,
"originalPrice": 1099.99,
"attributes": {
"Brand": "Apple",
"Storage Size": "256GB",
"Color": "Space Gray"
}
},
{
"id": 2,
"name": "Invalid Product",
"category": "Test",
"price": -10,
"attributes": {}
}
]'::json);
JavaScript 函数开发流程
PLV8 最佳实践
// PLV8 开发最佳实践
const plv8BestPractices = {
性能优化: {
预编译语句: "使用 plv8.prepare 预编译频繁执行的 SQL",
批处理: "批量处理数据减少数据库往返",
索引利用: "确保查询能够有效使用索引",
结果缓存: "缓存计算结果避免重复计算"
},
错误处理: {
异常捕获: "使用 try-catch 处理所有可能的错误",
参数验证: "验证输入参数的有效性",
事务处理: "在需要时正确处理事务",
日志记录: "记录错误信息用于调试"
},
代码质量: {
函数职责: "保持函数职责单一",
代码复用: "提取公共逻辑为独立函数",
文档注释: "添加清晰的函数文档",
测试覆盖: "编写充分的测试用例"
}
};