PostgreSQL入门与实践

PostgreSQL入门与实践

SQL 概述

SQL(Structured Query Language)是用于管理关系数据库的标准语言。PostgreSQL 作为功能最强大的开源关系数据库之一,完全支持 SQL 标准,并提供了许多扩展功能。

常见 SQL 操作

数据定义语言 (DDL)
// DDL 操作类型
const ddlOperations = {
  CREATE: "创建数据库对象(表、索引、视图等)",
  ALTER: "修改现有数据库对象结构",
  DROP: "删除数据库对象",
  TRUNCATE: "清空表数据但保留结构"
};
-- 创建表
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    username VARCHAR(50) UNIQUE NOT NULL,
    email VARCHAR(100) UNIQUE NOT NULL,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 修改表结构
ALTER TABLE users ADD COLUMN last_login TIMESTAMP;

-- 删除表
DROP TABLE IF EXISTS temp_table;
数据操作语言 (DML)
// DML 操作类型
const dmlOperations = {
  SELECT: "查询数据",
  INSERT: "插入数据",
  UPDATE: "更新数据",
  DELETE: "删除数据"
};
-- 插入数据
INSERT INTO users (username, email) 
VALUES ('admin', 'admin@example.com');

-- 查询数据
SELECT * FROM users WHERE created_at > '2024-01-01';

-- 更新数据
UPDATE users SET last_login = NOW() WHERE username = 'admin';

-- 删除数据
DELETE FROM users WHERE created_at < '2023-01-01';

SQL 执行流程

优化过程
查询优化器
统计信息
索引信息
约束条件
SQL 语句
语法解析
语义分析
执行计划生成
执行引擎
存储引擎
返回结果

PostgreSQL 与其他数据库的比较

PostgreSQL 与 MySQL 的比较

功能对比
// PostgreSQL vs MySQL 特性对比
const dbComparison = {
  PostgreSQL: {
    ACID支持: "完全支持",
    复杂查询: "优秀(支持CTE、窗口函数等)",
    JSON支持: "原生JSONB类型",
    全文检索: "内置支持",
    扩展性: "插件架构,高度可扩展",
    数据类型: "丰富(数组、范围类型等)",
    并发控制: "MVCC多版本并发控制",
    复制: "流复制、逻辑复制",
    许可证: "PostgreSQL许可证(类BSD)"
  },
  MySQL: {
    ACID支持: "InnoDB引擎支持",
    复杂查询: "基本支持",
    JSON支持: "5.7+版本支持",
    全文检索: "MyISAM和InnoDB支持",
    扩展性: "存储引擎架构",
    数据类型: "标准SQL类型",
    并发控制: "锁机制",
    复制: "主从复制、组复制",
    许可证: "GPL双许可证"
  }
};
选择建议
graph TD
    A[数据库选择] --> B{项目需求}
    
    B --> C[复杂分析查询]
    B --> D[高并发读写]
    B --> E[JSON数据处理]
    B --> F[简单CRUD操作]
    
    C --> G[PostgreSQL]
    D --> H{读多写少?}
    E --> G
    F --> I[MySQL/PostgreSQL]
    
    H --> J[是] --> K[MySQL]
    H --> L[否] --> G
    
    G --> M[推荐理由:功能强大、扩展性好]
    K --> N[推荐理由:性能优秀、生态成熟]
    I --> O[根据团队经验选择]

PostgreSQL 与 MongoDB 的比较

架构对比
// PostgreSQL vs MongoDB 架构对比
const architectureComparison = {
  PostgreSQL: {
    数据模型: "关系型(表、行、列)",
    查询语言: "SQL",
    事务支持: "完整ACID事务",
    一致性: "强一致性",
    扩展方式: "垂直扩展为主,支持读写分离",
    模式: "严格模式定义",
    索引类型: "B-Tree、Hash、GiST、GIN等"
  },
  MongoDB: {
    数据模型: "文档型(集合、文档、字段)",
    查询语言: "MongoDB查询语言",
    事务支持: "4.0+版本支持多文档事务",
    一致性: "最终一致性(可配置)",
    扩展方式: "水平扩展(分片)",
    模式: "灵活模式",
    索引类型: "单字段、复合、多键、文本等"
  }
};

PostgreSQL 数据类型

基本数据类型

数值类型
// PostgreSQL 数值类型
const numericTypes = {
  整数类型: {
    SMALLINT: "2字节,-32768 到 32767",
    INTEGER: "4字节,-2147483648 到 2147483647",
    BIGINT: "8字节,-9223372036854775808 到 9223372036854775807",
    SERIAL: "自增整数,等同于INTEGER + DEFAULT"
  },
  浮点类型: {
    REAL: "4字节单精度",
    DOUBLE_PRECISION: "8字节双精度", 
    NUMERIC: "可变精度,适用于货币等精确计算"
  }
};
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100) NOT NULL,
    price NUMERIC(10,2),  -- 10位数字,2位小数
    stock INTEGER DEFAULT 0,
    rating REAL,
    views BIGINT DEFAULT 0
);
字符类型
// PostgreSQL 字符类型
const characterTypes = {
  CHAR: "固定长度字符串",
  VARCHAR: "变长字符串,有长度限制",
  TEXT: "变长字符串,无长度限制",
  UUID: "通用唯一标识符"
};
CREATE TABLE articles (
    id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    title VARCHAR(200) NOT NULL,
    slug VARCHAR(100) UNIQUE,
    content TEXT,
    status CHAR(1) DEFAULT 'D' -- D=Draft, P=Published
);
日期时间类型
CREATE TABLE events (
    id SERIAL PRIMARY KEY,
    event_name VARCHAR(100),
    event_date DATE,                    -- 只存日期
    event_time TIME,                    -- 只存时间
    created_at TIMESTAMP,               -- 日期时间(无时区)
    updated_at TIMESTAMPTZ,             -- 日期时间(带时区)
    duration INTERVAL                   -- 时间间隔
);

-- 插入示例
INSERT INTO events (event_name, event_date, event_time, created_at, updated_at, duration) 
VALUES (
    '产品发布会',
    '2024-12-25',
    '14:30:00',
    '2024-01-15 10:00:00',
    '2024-01-15 10:00:00+08',
    '2 hours 30 minutes'
);

高级数据类型

数组类型
// PostgreSQL 数组操作示例
const arrayOperations = {
  创建: "column_name TYPE[]",
  插入: "ARRAY[val1, val2] 或 '{val1,val2}'",
  访问: "array_column[index]",
  查询: "ANY(array_column) 或 array_column @> ARRAY[value]"
};
CREATE TABLE users_advanced (
    id SERIAL PRIMARY KEY,
    username VARCHAR(50),
    tags TEXT[],                        -- 字符串数组
    scores INTEGER[],                   -- 整数数组
    preferences JSONB,
    location POINT                      -- 几何点类型
);

-- 插入数组数据
INSERT INTO users_advanced (username, tags, scores, location) VALUES 
('john_doe', ARRAY['developer', 'postgresql', 'javascript'], '{85, 92, 78}', POINT(40.7128, -74.0060)),
('jane_smith', '{"backend", "python", "data-science"}', '{95, 88, 91}', POINT(51.5074, -0.1278));

-- 查询数组数据
SELECT username FROM users_advanced WHERE 'postgresql' = ANY(tags);
SELECT username FROM users_advanced WHERE tags @> ARRAY['developer'];
SELECT username, tags[1] as first_tag FROM users_advanced;
复合类型
-- 创建复合类型
CREATE TYPE address_type AS (
    street VARCHAR(100),
    city VARCHAR(50),
    state VARCHAR(50),
    zip_code VARCHAR(10)
);

CREATE TABLE customers (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100),
    home_address address_type,
    work_address address_type
);

-- 插入复合类型数据
INSERT INTO customers (name, home_address, work_address) VALUES (
    'Alice Johnson',
    ROW('123 Main St', 'New York', 'NY', '10001'),
    ROW('456 Business Ave', 'New York', 'NY', '10002')
);

PostgreSQL 数据类型选择流程

选择数据类型
数据特征
数值数据
文本数据
日期时间
布尔值
二进制数据
结构化数据
精度要求
高精度
NUMERIC
一般精度
INTEGER/REAL
长度固定?
CHAR
有长度限制?
VARCHAR
TEXT
TIMESTAMP/DATE/TIME
BOOLEAN
BYTEA
JSON/JSONB/数组/复合类型

PostgreSQL 存储过程与函数

PostgreSQL 支持多种编程语言编写存储过程和函数,包括 PL/pgSQL、SQL、C、Python、JavaScript 等。

创建存储过程和函数

函数 vs 存储过程
// 函数与存储过程的区别
const procedureVsFunction = {
  函数: {
    返回值: "必须返回值",
    调用方式: "SELECT function_name()",
    事务控制: "不能控制事务",
    用途: "计算和数据转换"
  },
  存储过程: {
    返回值: "可选返回值",
    调用方式: "CALL procedure_name()",
    事务控制: "可以控制事务",
    用途: "复杂业务逻辑处理"
  }
};
示例1:创建简单的加法函数
-- PL/pgSQL 加法函数
CREATE OR REPLACE FUNCTION add_numbers(a INTEGER, b INTEGER)
RETURNS INTEGER AS $$
BEGIN
    RETURN a + b;
END;
$$ LANGUAGE plpgsql;

-- 使用函数
SELECT add_numbers(10, 20) as result; -- 返回 30

-- SQL 函数版本(更简洁)
CREATE OR REPLACE FUNCTION add_numbers_sql(a INTEGER, b INTEGER)
RETURNS INTEGER AS $$
    SELECT $1 + $2;
$$ LANGUAGE sql;
示例 2:创建复杂的存储过程
-- 用户注册存储过程
CREATE OR REPLACE FUNCTION register_user(
    p_username VARCHAR(50),
    p_email VARCHAR(100),
    p_password VARCHAR(255),
    OUT user_id INTEGER,
    OUT success BOOLEAN,
    OUT message TEXT
) AS $$
DECLARE
    existing_count INTEGER;
BEGIN
    -- 初始化返回值
    success := false;
    message := '';
    user_id := 0;
    
    -- 检查用户名是否存在
    SELECT COUNT(*) INTO existing_count 
    FROM users 
    WHERE username = p_username OR email = p_email;
    
    IF existing_count > 0 THEN
        message := '用户名或邮箱已存在';
        RETURN;
    END IF;
    
    -- 验证邮箱格式
    IF p_email !~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$' THEN
        message := '邮箱格式无效';
        RETURN;
    END IF;
    
    -- 插入新用户
    BEGIN
        INSERT INTO users (username, email, password_hash, created_at)
        VALUES (p_username, p_email, crypt(p_password, gen_salt('bf')), NOW())
        RETURNING id INTO user_id;
        
        success := true;
        message := '注册成功';
        
        -- 记录用户行为
        INSERT INTO user_behaviors (user_id, action, created_at)
        VALUES (user_id, 'register', NOW());
        
    EXCEPTION
        WHEN OTHERS THEN
            message := '注册失败: ' || SQLERRM;
            ROLLBACK;
    END;
END;
$$ LANGUAGE plpgsql;

-- 调用存储过程
SELECT * FROM register_user('newuser', 'newuser@example.com', 'password123');

存储过程开发流程

需求分析
设计函数签名
编写函数体
语法检查
语法正确?
修复错误
单元测试
测试通过?
调试修复
性能测试
部署上线

用户表与用户行为表实战

创建用户表

-- 用户主表
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    username VARCHAR(50) UNIQUE NOT NULL,
    email VARCHAR(100) UNIQUE NOT NULL,
    password_hash VARCHAR(255) NOT NULL,
    first_name VARCHAR(50),
    last_name VARCHAR(50),
    avatar_url VARCHAR(255),
    phone VARCHAR(20),
    birth_date DATE,
    gender CHAR(1) CHECK (gender IN ('M', 'F', 'O')), -- M=Male, F=Female, O=Other
    status VARCHAR(20) DEFAULT 'active' CHECK (status IN ('active', 'inactive', 'banned')),
    email_verified BOOLEAN DEFAULT FALSE,
    phone_verified BOOLEAN DEFAULT FALSE,
    last_login_at TIMESTAMPTZ,
    created_at TIMESTAMPTZ DEFAULT NOW(),
    updated_at TIMESTAMPTZ DEFAULT NOW()
);

-- 创建更新时间触发器
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
    NEW.updated_at = NOW();
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER update_users_updated_at 
    BEFORE UPDATE ON users
    FOR EACH ROW 
    EXECUTE FUNCTION update_updated_at_column();

-- 创建索引优化查询
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_username ON users(username);
CREATE INDEX idx_users_status ON users(status);
CREATE INDEX idx_users_created_at ON users(created_at);

创建用户行为表

-- 用户行为表
CREATE TABLE user_behaviors (
    id BIGSERIAL PRIMARY KEY,
    user_id INTEGER REFERENCES users(id) ON DELETE CASCADE,
    action VARCHAR(50) NOT NULL, -- login, logout, view_product, purchase, etc.
    target_type VARCHAR(50),      -- product, article, user, etc.
    target_id INTEGER,
    metadata JSONB,               -- 存储额外的行为数据
    ip_address INET,
    user_agent TEXT,
    session_id VARCHAR(100),
    created_at TIMESTAMPTZ DEFAULT NOW()
);

-- 用户行为表索引
CREATE INDEX idx_user_behaviors_user_id ON user_behaviors(user_id);
CREATE INDEX idx_user_behaviors_action ON user_behaviors(action);
CREATE INDEX idx_user_behaviors_created_at ON user_behaviors(created_at);
CREATE INDEX idx_user_behaviors_target ON user_behaviors(target_type, target_id);

-- 用户行为元数据的 GIN 索引,支持 JSONB 查询
CREATE INDEX idx_user_behaviors_metadata ON user_behaviors USING gin(metadata);

-- 创建分区表(按月分区)
CREATE TABLE user_behaviors_y2024m01 PARTITION OF user_behaviors
    FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

CREATE TABLE user_behaviors_y2024m02 PARTITION OF user_behaviors
    FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');

插入与查询示例

插入示例数据
-- 插入用户数据
INSERT INTO users (username, email, password_hash, first_name, last_name, gender) VALUES
('john_doe', 'john@example.com', crypt('password123', gen_salt('bf')), 'John', 'Doe', 'M'),
('jane_smith', 'jane@example.com', crypt('password456', gen_salt('bf')), 'Jane', 'Smith', 'F'),
('admin_user', 'admin@example.com', crypt('admin789', gen_salt('bf')), 'Admin', 'User', 'O');

-- 插入用户行为数据
INSERT INTO user_behaviors (user_id, action, target_type, target_id, metadata, ip_address, user_agent) VALUES
(1, 'login', NULL, NULL, '{"device": "desktop", "browser": "chrome"}', '192.168.1.100', 'Mozilla/5.0...'),
(1, 'view_product', 'product', 101, '{"category": "electronics", "price": 299.99}', '192.168.1.100', 'Mozilla/5.0...'),
(1, 'add_to_cart', 'product', 101, '{"quantity": 1, "variant": "black"}', '192.168.1.100', 'Mozilla/5.0...'),
(2, 'login', NULL, NULL, '{"device": "mobile", "browser": "safari"}', '192.168.1.101', 'Mozilla/5.0...'),
(2, 'search', NULL, NULL, '{"query": "laptop", "results_count": 25}', '192.168.1.101', 'Mozilla/5.0...');
复杂查询示例
// 用户行为分析查询示例
const analyticsQueries = {
  用户活跃度: "统计用户每日活跃情况",
  行为转化漏斗: "分析从浏览到购买的转化率",
  用户画像: "基于行为数据构建用户特征",
  实时监控: "监控异常行为和系统性能"
};
-- 1. 用户活跃度分析
WITH daily_active_users AS (
    SELECT 
        DATE(created_at) as activity_date,
        COUNT(DISTINCT user_id) as daily_active_users,
        COUNT(*) as total_actions
    FROM user_behaviors 
    WHERE created_at >= NOW() - INTERVAL '30 days'
    GROUP BY DATE(created_at)
)
SELECT 
    activity_date,
    daily_active_users,
    total_actions,
    ROUND(total_actions::numeric / daily_active_users, 2) as avg_actions_per_user
FROM daily_active_users
ORDER BY activity_date DESC;

-- 2. 用户行为转化漏斗分析
SELECT 
    action,
    COUNT(DISTINCT user_id) as unique_users,
    COUNT(*) as total_actions,
    ROUND(COUNT(DISTINCT user_id) * 100.0 / 
          LAG(COUNT(DISTINCT user_id)) OVER (ORDER BY 
            CASE action 
                WHEN 'view_product' THEN 1
                WHEN 'add_to_cart' THEN 2  
                WHEN 'checkout' THEN 3
                WHEN 'purchase' THEN 4
            END
          ), 2) as conversion_rate
FROM user_behaviors 
WHERE action IN ('view_product', 'add_to_cart', 'checkout', 'purchase')
    AND created_at >= NOW() - INTERVAL '7 days'
GROUP BY action
ORDER BY CASE action 
    WHEN 'view_product' THEN 1
    WHEN 'add_to_cart' THEN 2
    WHEN 'checkout' THEN 3 
    WHEN 'purchase' THEN 4
END;

-- 3. 用户画像分析
SELECT 
    u.username,
    u.gender,
    DATE_PART('year', AGE(u.birth_date)) as age,
    COUNT(DISTINCT ub.action) as distinct_actions,
    COUNT(*) as total_behaviors,
    MAX(ub.created_at) as last_activity,
    COALESCE(purchase_stats.total_purchases, 0) as total_purchases,
    COALESCE(purchase_stats.total_amount, 0) as total_spent
FROM users u
LEFT JOIN user_behaviors ub ON u.id = ub.user_id
LEFT JOIN (
    SELECT 
        user_id,
        COUNT(*) as total_purchases,
        SUM((metadata->>'amount')::numeric) as total_amount
    FROM user_behaviors 
    WHERE action = 'purchase'
    GROUP BY user_id
) purchase_stats ON u.id = purchase_stats.user_id
WHERE u.created_at >= NOW() - INTERVAL '90 days'
GROUP BY u.id, u.username, u.gender, u.birth_date, 
         purchase_stats.total_purchases, purchase_stats.total_amount
ORDER BY total_behaviors DESC;

PostgreSQL 高级应用

全文检索(Full-Text Search)

PostgreSQL 提供强大的全文检索功能,支持多种语言和自定义配置。

示例:创建支持全文检索的表
-- 创建文章表支持全文检索
CREATE TABLE articles (
    id SERIAL PRIMARY KEY,
    title VARCHAR(200) NOT NULL,
    content TEXT NOT NULL,
    author_id INTEGER REFERENCES users(id),
    category VARCHAR(50),
    tags TEXT[],
    published_at TIMESTAMPTZ,
    created_at TIMESTAMPTZ DEFAULT NOW(),
    
    -- 全文检索向量字段
    search_vector tsvector
);
创建索引并插入数据
-- 创建全文检索索引
CREATE INDEX idx_articles_search_vector ON articles USING gin(search_vector);
CREATE INDEX idx_articles_category ON articles(category);
CREATE INDEX idx_articles_published_at ON articles(published_at);

-- 创建更新全文检索向量的函数
CREATE OR REPLACE FUNCTION update_article_search_vector() 
RETURNS TRIGGER AS $$
BEGIN
    NEW.search_vector := 
        setweight(to_tsvector('english', COALESCE(NEW.title, '')), 'A') ||
        setweight(to_tsvector('english', COALESCE(NEW.content, '')), 'B') ||
        setweight(to_tsvector('english', COALESCE(array_to_string(NEW.tags, ' '), '')), 'C');
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

-- 创建触发器
CREATE TRIGGER update_articles_search_vector
    BEFORE INSERT OR UPDATE ON articles
    FOR EACH ROW EXECUTE FUNCTION update_article_search_vector();

-- 插入测试数据
INSERT INTO articles (title, content, author_id, category, tags, published_at) VALUES
('PostgreSQL Advanced Features', 'PostgreSQL offers many advanced features like full-text search, JSON support, and window functions...', 1, 'database', ARRAY['postgresql', 'database', 'sql'], NOW()),
('JavaScript Performance Tips', 'Optimizing JavaScript performance is crucial for modern web applications. Here are some key strategies...', 2, 'programming', ARRAY['javascript', 'performance', 'optimization'], NOW()),
('Database Design Best Practices', 'When designing databases, consider normalization, indexing strategies, and query optimization...', 1, 'database', ARRAY['database', 'design', 'sql'], NOW());
使用全文检索查询
// 全文检索查询类型
const searchTypes = {
  基本搜索: "使用 @@ 操作符进行匹配",
  短语搜索: "使用双引号包围的精确短语",
  布尔搜索: "使用 & | ! 进行逻辑组合",
  排序搜索: "使用 ts_rank 进行相关性排序"
};
-- 基本全文检索
SELECT id, title, ts_rank(search_vector, query) AS rank
FROM articles, plainto_tsquery('english', 'PostgreSQL database') query
WHERE search_vector @@ query
ORDER BY rank DESC;

-- 高级搜索:布尔查询
SELECT id, title, content,
       ts_rank(search_vector, query) AS rank,
       ts_headline('english', content, query) AS highlighted_content
FROM articles, to_tsquery('english', 'PostgreSQL & (performance | optimization)') query
WHERE search_vector @@ query
ORDER BY rank DESC;

-- 多条件搜索
SELECT a.id, a.title, a.category, 
       ts_rank(a.search_vector, query) AS rank
FROM articles a, to_tsquery('english', 'database & design') query
WHERE a.search_vector @@ query
    AND a.category = 'database'
    AND a.published_at >= NOW() - INTERVAL '30 days'
ORDER BY rank DESC, a.published_at DESC;

-- 搜索建议(自动补全)
SELECT word, ndoc, nentry
FROM ts_stat('SELECT search_vector FROM articles')
WHERE word ILIKE 'postgres%'
ORDER BY nentry DESC, ndoc DESC
LIMIT 10;

数据分区(Table Partitioning)

数据分区可以提高大表的查询性能和维护效率。

示例:创建分区表
-- 创建按时间分区的订单表
CREATE TABLE orders (
    id BIGSERIAL,
    user_id INTEGER NOT NULL,
    product_id INTEGER NOT NULL,
    quantity INTEGER NOT NULL,
    amount DECIMAL(10,2) NOT NULL,
    status VARCHAR(20) DEFAULT 'pending',
    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
    PRIMARY KEY (id, created_at)
) PARTITION BY RANGE (created_at);
创建分区
-- 创建月度分区
CREATE TABLE orders_y2024m01 PARTITION OF orders
    FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');

CREATE TABLE orders_y2024m02 PARTITION OF orders  
    FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');

CREATE TABLE orders_y2024m03 PARTITION OF orders
    FOR VALUES FROM ('2024-03-01') TO ('2024-04-01');

-- 创建分区索引
CREATE INDEX idx_orders_y2024m01_user_id ON orders_y2024m01(user_id);
CREATE INDEX idx_orders_y2024m01_status ON orders_y2024m01(status);

CREATE INDEX idx_orders_y2024m02_user_id ON orders_y2024m02(user_id);
CREATE INDEX idx_orders_y2024m02_status ON orders_y2024m02(status);

-- 自动创建分区的函数
CREATE OR REPLACE FUNCTION create_monthly_partition(table_name text, start_date date)
RETURNS void AS $$
DECLARE
    partition_name text;
    end_date date;
BEGIN
    partition_name := table_name || '_y' || EXTRACT(year FROM start_date) || 'm' || LPAD(EXTRACT(month FROM start_date)::text, 2, '0');
    end_date := start_date + INTERVAL '1 month';
    
    EXECUTE format('CREATE TABLE %I PARTITION OF %I FOR VALUES FROM (%L) TO (%L)',
                   partition_name, table_name, start_date, end_date);
                   
    -- 创建索引
    EXECUTE format('CREATE INDEX idx_%s_user_id ON %I(user_id)', partition_name, partition_name);
    EXECUTE format('CREATE INDEX idx_%s_status ON %I(status)', partition_name, partition_name);
END;
$$ LANGUAGE plpgsql;

-- 创建未来几个月的分区
SELECT create_monthly_partition('orders', date_trunc('month', NOW() + interval '1 month' * generate_series(0, 11)));
插入和查询数据
-- 插入测试数据
INSERT INTO orders (user_id, product_id, quantity, amount, status, created_at) 
SELECT 
    (random() * 100 + 1)::integer,
    (random() * 1000 + 1)::integer,
    (random() * 5 + 1)::integer,
    (random() * 1000 + 10)::numeric(10,2),
    CASE (random() * 3)::integer 
        WHEN 0 THEN 'pending'
        WHEN 1 THEN 'completed' 
        ELSE 'cancelled'
    END,
    NOW() - (random() * 90)::integer * INTERVAL '1 day'
FROM generate_series(1, 100000);

-- 分区裁剪查询示例
EXPLAIN (ANALYZE, BUFFERS) 
SELECT COUNT(*), AVG(amount) 
FROM orders 
WHERE created_at >= '2024-01-01' AND created_at < '2024-02-01'
    AND status = 'completed';

-- 跨分区查询
SELECT 
    DATE_TRUNC('month', created_at) as month,
    status,
    COUNT(*) as order_count,
    SUM(amount) as total_amount
FROM orders 
WHERE created_at >= '2024-01-01'
GROUP BY DATE_TRUNC('month', created_at), status
ORDER BY month, status;

分区策略选择流程

分区优化
查询性能提升
分区裁剪
并行处理
维护简化
数据管理效率
选择分区策略
数据特征
时间序列数据
地理分布数据
分类数据
哈希分布
RANGE分区
LIST分区
LIST分区
HASH分区
按日期/时间范围分区
按地区/国家分区
按类别/状态分区
均匀分布数据

并行查询 (Parallel Query)

PostgreSQL 支持并行查询执行,可以显著提高大数据集查询的性能。

启用并行查询
-- 查看当前并行查询设置
SHOW max_parallel_workers_per_gather;
SHOW max_worker_processes;
SHOW max_parallel_workers;

-- 设置并行查询参数
SET max_parallel_workers_per_gather = 4;
SET parallel_tuple_cost = 0.1;
SET parallel_setup_cost = 1000.0;

-- 创建大表用于测试
CREATE TABLE large_dataset AS
SELECT 
    generate_series(1, 10000000) as id,
    (random() * 1000000)::integer as value,
    md5(random()::text) as text_data,
    NOW() - (random() * 365 * 24 * 60 * 60)::integer * INTERVAL '1 second' as created_at;

-- 创建索引
CREATE INDEX idx_large_dataset_value ON large_dataset(value);
CREATE INDEX idx_large_dataset_created_at ON large_dataset(created_at);
执行并行查询
// 并行查询适用场景
const parallelQueryScenarios = {
  全表扫描: "大表的聚合查询",
  排序操作: "ORDER BY 大量数据",
  连接操作: "大表之间的JOIN",
  分组聚合: "GROUP BY 操作"
};
-- 并行聚合查询
EXPLAIN (ANALYZE, BUFFERS)
SELECT COUNT(*), AVG(value), MIN(created_at), MAX(created_at)
FROM large_dataset;

-- 并行分组查询
EXPLAIN (ANALYZE, BUFFERS)
SELECT 
    DATE_TRUNC('month', created_at) as month,
    COUNT(*) as record_count,
    AVG(value) as avg_value
FROM large_dataset
WHERE created_at >= '2023-01-01'
GROUP BY DATE_TRUNC('month', created_at)
ORDER BY month;

-- 并行连接查询
CREATE TABLE users_large AS
SELECT generate_series(1, 1000000) as id,
       'user_' || generate_series(1, 1000000) as username;

EXPLAIN (ANALYZE, BUFFERS)
SELECT u.username, COUNT(d.id) as dataset_count
FROM users_large u
JOIN large_dataset d ON u.id = d.value % 1000000 + 1
GROUP BY u.username
HAVING COUNT(d.id) > 10
ORDER BY dataset_count DESC
LIMIT 100;

JSON 和 JSONB 数据类型

PostgreSQL 对 JSON 数据提供了强大的支持,JSONB 是二进制格式的 JSON,提供更好的性能。

示例:创建包含 JSONB 字段的表
-- 创建产品表包含 JSONB 配置
CREATE TABLE products (
    id SERIAL PRIMARY KEY,
    name VARCHAR(100) NOT NULL,
    category VARCHAR(50),
    price DECIMAL(10,2),
    attributes JSONB,              -- 产品属性
    metadata JSONB,                -- 元数据
    created_at TIMESTAMPTZ DEFAULT NOW()
);
插入 JSON 数据
-- 插入包含 JSONB 数据的记录
INSERT INTO products (name, category, price, attributes, metadata) VALUES
('iPhone 15 Pro', 'electronics', 999.99, 
 '{"brand": "Apple", "color": "Space Gray", "storage": "256GB", "features": ["Face ID", "5G", "Wireless Charging"]}',
 '{"supplier": "Apple Inc", "warranty": "1 year", "tags": ["premium", "smartphone"]}'
),
('MacBook Air M2', 'electronics', 1299.99,
 '{"brand": "Apple", "processor": "M2", "ram": "8GB", "storage": "256GB", "screen": "13.6inch"}',
 '{"supplier": "Apple Inc", "warranty": "1 year", "tags": ["laptop", "ultrabook"]}'
),
('Nike Air Max', 'shoes', 129.99,
 '{"brand": "Nike", "size": [7, 8, 9, 10, 11], "color": ["white", "black", "red"], "type": "running"}',
 '{"supplier": "Nike Inc", "warranty": "6 months", "tags": ["sport", "running"]}'
);
查询 JSON 数据
// JSONB 操作符
const jsonbOperators = {
  "->": "获取JSON对象字段(返回JSON)",
  "->>": "获取JSON对象字段(返回文本)", 
  "#>": "获取指定路径的JSON对象",
  "#>>": "获取指定路径的文本",
  "@>": "包含操作符",
  "<@": "被包含操作符",
  "?": "存在键操作符",
  "?&": "存在所有键",
  "?|": "存在任意键"
};
-- 基本 JSON 查询
SELECT name, attributes->>'brand' as brand, attributes->>'color' as color
FROM products
WHERE category = 'electronics';

-- 复杂 JSON 路径查询
SELECT name, attributes#>>'{features,0}' as first_feature
FROM products
WHERE attributes ? 'features';

-- JSON 包含查询
SELECT name, price
FROM products
WHERE attributes @> '{"brand": "Apple"}';

-- 数组元素查询
SELECT name, jsonb_array_elements_text(attributes->'features') as feature
FROM products
WHERE attributes ? 'features';

-- JSON 聚合查询
SELECT 
    attributes->>'brand' as brand,
    COUNT(*) as product_count,
    AVG(price) as avg_price,
    jsonb_agg(name) as product_names
FROM products
WHERE attributes ? 'brand'
GROUP BY attributes->>'brand';

-- 复杂 JSON 查询:查找包含特定标签的产品
SELECT name, price, metadata->'tags' as tags
FROM products
WHERE metadata->'tags' @> '["premium"]'
   OR metadata->'tags' @> '["sport"]';

-- JSON 更新操作
UPDATE products 
SET attributes = attributes || '{"updated_at": "2024-01-15"}'::jsonb
WHERE id = 1;

-- JSON 删除键操作
UPDATE products
SET attributes = attributes - 'updated_at'
WHERE id = 1;
JSONB 索引
-- 创建 JSONB 索引提高查询性能
-- GIN 索引:支持包含操作符 @>, ?, ?&, ?|
CREATE INDEX idx_products_attributes ON products USING gin(attributes);
CREATE INDEX idx_products_metadata ON products USING gin(metadata);

-- 特定路径索引
CREATE INDEX idx_products_brand ON products USING btree((attributes->>'brand'));
CREATE INDEX idx_products_category ON products USING btree((attributes->>'category'));

-- 表达式索引
CREATE INDEX idx_products_price_range ON products(price) 
WHERE attributes->>'brand' = 'Apple';

-- 查看索引使用情况
EXPLAIN (ANALYZE, BUFFERS)
SELECT name, price FROM products 
WHERE attributes @> '{"brand": "Apple", "storage": "256GB"}';

JSON 数据处理流程

flowchart TD
    A[JSON 数据输入] --> B{数据格式验证}
    B --> C[有效] --> D[选择存储类型]
    B --> E[无效] --> F[数据清洗]
    F --> D
    
    D --> G{数据特征}
    G --> H[频繁查询] --> I[JSONB + 索引]
    G --> J[存储优先] --> K[JSON]
    
    I --> L[创建 GIN 索引]
    L --> M[路径特定索引]
    M --> N[查询优化]
    
    subgraph "查询优化"
        O[使用操作符] --> P[@>, ?, ?&, ?|]
        Q[避免函数] --> R[减少计算开销]
        S[索引覆盖] --> T[提高查询速度]
    end

数据库逻辑复制(Logical Replication)

逻辑复制允许在不同 PostgreSQL 实例之间复制数据,支持表级别的选择性复制。

创建发布者和订阅者
// 逻辑复制应用场景
const logicalReplicationUses = {
  数据分发: "将数据复制到多个只读副本",
  实时分析: "将 OLTP 数据复制到分析系统",
  迁移升级: "在不同版本间迁移数据",
  多租户: "在租户间复制特定数据"
};
-- 发布者配置 (源数据库)
-- 1. 确保配置参数
-- wal_level = logical
-- max_replication_slots >= 1
-- max_wal_senders >= 1

-- 2. 创建复制用户
CREATE USER replication_user WITH REPLICATION LOGIN PASSWORD 'replication_password';
GRANT SELECT ON ALL TABLES IN SCHEMA public TO replication_user;
GRANT USAGE ON SCHEMA public TO replication_user;

-- 3. 创建发布
CREATE PUBLICATION users_publication FOR TABLE users, user_behaviors;

-- 查看发布信息
SELECT * FROM pg_publication;
SELECT * FROM pg_publication_tables WHERE pubname = 'users_publication';

-- 4. 创建复制槽
SELECT pg_create_logical_replication_slot('users_replication_slot', 'pgoutput');

-- 订阅者配置 (目标数据库)
-- 1. 创建相同结构的表
CREATE TABLE users (
    id SERIAL PRIMARY KEY,
    username VARCHAR(50) UNIQUE NOT NULL,
    email VARCHAR(100) UNIQUE NOT NULL,
    password_hash VARCHAR(255) NOT NULL,
    created_at TIMESTAMPTZ DEFAULT NOW(),
    updated_at TIMESTAMPTZ DEFAULT NOW()
);

CREATE TABLE user_behaviors (
    id BIGSERIAL PRIMARY KEY,
    user_id INTEGER REFERENCES users(id) ON DELETE CASCADE,
    action VARCHAR(50) NOT NULL,
    metadata JSONB,
    created_at TIMESTAMPTZ DEFAULT NOW()
);

-- 2. 创建订阅
CREATE SUBSCRIPTION users_subscription
    CONNECTION 'host=source_host port=5432 dbname=source_db user=replication_user password=replication_password'
    PUBLICATION users_publication;

-- 查看订阅状态
SELECT * FROM pg_subscription;
SELECT * FROM pg_replication_slots;

-- 监控复制状态
SELECT 
    application_name,
    state,
    sent_lsn,
    write_lsn,
    flush_lsn,
    replay_lsn,
    write_lag,
    flush_lag,
    replay_lag
FROM pg_stat_replication;

逻辑复制架构图

graph TD
    subgraph "发布者 (Publisher)"
        A[源数据库]
        B[WAL 日志]
        C[逻辑复制槽]
        D[发布 (Publication)]
        
        A --> B
        B --> C
        A --> D
    end
    
    subgraph "网络传输"
        E[复制连接]
    end
    
    subgraph "订阅者 (Subscriber)"
        F[订阅 (Subscription)]
        G[应用进程]
        H[目标数据库]
        
        F --> G
        G --> H
    end
    
    C --> E
    D --> E
    E --> F
    
    subgraph "监控和管理"
        I[复制状态监控]
        J[冲突处理]
        K[性能调优]
    end

JavaScript 编写 postgresql 存储过程与函数

PostgreSQL 支持使用 PLV8 扩展来编写 JavaScript 存储过程和函数,为熟悉 JavaScript 的开发者提供了便利。

安装 PLV8 扩展

容器内安装
// PLV8 扩展优势
const plv8Advantages = {
  熟悉语法: "使用 JavaScript 语法编写",
  JSON处理: "原生 JSON 支持",
  性能优秀: "V8 引擎驱动",
  库支持: "支持部分 JavaScript 库",
  调试便利: "熟悉的调试方式"
};
# Docker 容器中安装 PLV8
# 使用支持 PLV8 的 PostgreSQL 镜像
docker pull postgres:15
docker run --name postgres-plv8 -e POSTGRES_PASSWORD=postgres -d -p 5432:5432 postgres:15

# 进入容器
docker exec -it postgres-plv8 bash

# 安装依赖
apt-get update
apt-get install -y build-essential postgresql-server-dev-15

# 下载并编译 PLV8
git clone https://github.com/plv8/plv8.git
cd plv8
make static
make install
启用 PLV8 扩展
-- 连接到数据库并启用扩展
CREATE EXTENSION IF NOT EXISTS plv8;

-- 验证扩展安装
SELECT * FROM pg_extension WHERE extname = 'plv8';

-- 查看 PLV8 版本
SELECT plv8_version();

使用 JavaScript 编写存储过程或函数

示例1:使用 JavaScript 编写加法函数
-- JavaScript 加法函数
CREATE OR REPLACE FUNCTION js_add(a INTEGER, b INTEGER)
RETURNS INTEGER AS $$
    return a + b;
$$ LANGUAGE plv8;

-- 更复杂的数学函数
CREATE OR REPLACE FUNCTION js_calculate_stats(numbers INTEGER[])
RETURNS JSON AS $$
    const nums = numbers || [];
    if (nums.length === 0) return null;
    
    const sum = nums.reduce((acc, num) => acc + num, 0);
    const mean = sum / nums.length;
    const sortedNums = [...nums].sort((a, b) => a - b);
    const median = sortedNums.length % 2 === 0
        ? (sortedNums[sortedNums.length / 2 - 1] + sortedNums[sortedNums.length / 2]) / 2
        : sortedNums[Math.floor(sortedNums.length / 2)];
    
    const variance = nums.reduce((acc, num) => acc + Math.pow(num - mean, 2), 0) / nums.length;
    const standardDeviation = Math.sqrt(variance);
    
    return {
        count: nums.length,
        sum: sum,
        mean: mean,
        median: median,
        min: Math.min(...nums),
        max: Math.max(...nums),
        variance: variance,
        standardDeviation: standardDeviation
    };
$$ LANGUAGE plv8;

-- 测试统计函数
SELECT js_calculate_stats(ARRAY[1, 2, 3, 4, 5, 10, 15, 20]);
示例 2:使用 JavaScript 插入数据
-- JavaScript 批量插入用户数据
CREATE OR REPLACE FUNCTION js_batch_create_users(user_data JSON)
RETURNS JSON AS $$
    const users = user_data || [];
    const results = {
        success: [],
        errors: []
    };
    
    for (let i = 0; i < users.length; i++) {
        const user = users[i];
        
        try {
            // 验证必填字段
            if (!user.username || !user.email) {
                throw new Error('Username and email are required');
            }
            
            // 验证邮箱格式
            const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
            if (!emailRegex.test(user.email)) {
                throw new Error('Invalid email format');
            }
            
            // 插入用户
            const insertResult = plv8.execute(
                'INSERT INTO users (username, email, first_name, last_name) VALUES ($1, $2, $3, $4) RETURNING id',
                [user.username, user.email, user.first_name || null, user.last_name || null]
            );
            
            results.success.push({
                id: insertResult[0].id,
                username: user.username,
                email: user.email
            });
            
        } catch (error) {
            results.errors.push({
                user: user,
                error: error.message
            });
        }
    }
    
    return results;
$$ LANGUAGE plv8;

-- 测试批量创建用户
SELECT js_batch_create_users('[
    {"username": "js_user1", "email": "user1@example.com", "first_name": "John"},
    {"username": "js_user2", "email": "user2@example.com", "first_name": "Jane"},
    {"username": "js_user3", "email": "invalid-email"},
    {"username": "", "email": "user4@example.com"}
]'::json);
示例3:使用 JavaScript 查询并返回 JSON数据
-- JavaScript 用户分析函数
CREATE OR REPLACE FUNCTION js_user_analytics(date_from TIMESTAMP, date_to TIMESTAMP)
RETURNS JSON AS $$
    const analytics = {
        summary: {},
        userStats: [],
        behaviorDistribution: {}
    };
    
    try {
        // 获取用户总数和新用户数
        const userSummary = plv8.execute(`
            SELECT 
                COUNT(*) as total_users,
                COUNT(*) FILTER (WHERE created_at BETWEEN $1 AND $2) as new_users
            FROM users
        `, [date_from, date_to]);
        
        analytics.summary = userSummary[0];
        
        // 获取用户行为统计
        const behaviorStats = plv8.execute(`
            SELECT 
                action,
                COUNT(*) as count,
                COUNT(DISTINCT user_id) as unique_users
            FROM user_behaviors 
            WHERE created_at BETWEEN $1 AND $2
            GROUP BY action
            ORDER BY count DESC
        `, [date_from, date_to]);
        
        analytics.behaviorDistribution = behaviorStats.reduce((acc, row) => {
            acc[row.action] = {
                total_actions: parseInt(row.count),
                unique_users: parseInt(row.unique_users)
            };
            return acc;
        }, {});
        
        // 获取活跃用户统计
        const activeUsers = plv8.execute(`
            SELECT 
                u.id,
                u.username,
                u.email,
                COUNT(ub.id) as total_actions,
                COUNT(DISTINCT ub.action) as unique_actions,
                MAX(ub.created_at) as last_activity
            FROM users u
            JOIN user_behaviors ub ON u.id = ub.user_id
            WHERE ub.created_at BETWEEN $1 AND $2
            GROUP BY u.id, u.username, u.email
            ORDER BY total_actions DESC
            LIMIT 10
        `, [date_from, date_to]);
        
        analytics.userStats = activeUsers.map(user => ({
            id: user.id,
            username: user.username,
            email: user.email,
            totalActions: parseInt(user.total_actions),
            uniqueActions: parseInt(user.unique_actions),
            lastActivity: user.last_activity
        }));
        
    } catch (error) {
        return { error: error.message };
    }
    
    return analytics;
$$ LANGUAGE plv8;

-- 调用分析函数
SELECT js_user_analytics('2024-01-01'::timestamp, '2024-01-31'::timestamp);

使用 JavaScript 进行更多高级操作

示例4:处理 JSON 数据
-- JavaScript JSON 数据处理和转换
CREATE OR REPLACE FUNCTION js_process_product_data(product_json JSON)
RETURNS JSON AS $$
    const product = product_json || {};
    
    // 数据验证和清理
    const processedProduct = {
        id: product.id || null,
        name: (product.name || '').trim(),
        category: (product.category || '').toLowerCase(),
        price: parseFloat(product.price) || 0,
        attributes: {},
        computed: {}
    };
    
    // 处理属性数据
    if (product.attributes && typeof product.attributes === 'object') {
        // 标准化属性名称
        Object.keys(product.attributes).forEach(key => {
            const normalizedKey = key.toLowerCase().replace(/[^a-z0-9]/g, '_');
            processedProduct.attributes[normalizedKey] = product.attributes[key];
        });
    }
    
    // 计算字段
    processedProduct.computed.priceCategory = 
        processedProduct.price < 100 ? 'budget' :
        processedProduct.price < 500 ? 'mid-range' : 'premium';
    
    processedProduct.computed.hasDiscount = 
        product.originalPrice && product.originalPrice > processedProduct.price;
    
    if (processedProduct.computed.hasDiscount) {
        processedProduct.computed.discountPercent = Math.round(
            ((product.originalPrice - processedProduct.price) / product.originalPrice) * 100
        );
    }
    
    // 生成搜索关键词
    const searchKeywords = [];
    searchKeywords.push(processedProduct.name.toLowerCase());
    searchKeywords.push(processedProduct.category);
    
    if (processedProduct.attributes.brand) {
        searchKeywords.push(processedProduct.attributes.brand.toLowerCase());
    }
    
    processedProduct.computed.searchKeywords = [...new Set(searchKeywords)];
    
    // 数据验证
    const validation = {
        isValid: true,
        errors: []
    };
    
    if (!processedProduct.name) {
        validation.errors.push('Product name is required');
        validation.isValid = false;
    }
    
    if (processedProduct.price <= 0) {
        validation.errors.push('Price must be greater than 0');
        validation.isValid = false;
    }
    
    processedProduct.validation = validation;
    
    return processedProduct;
$$ LANGUAGE plv8;

-- 批量处理产品数据的函数
CREATE OR REPLACE FUNCTION js_batch_process_products(products_json JSON)
RETURNS JSON AS $$
    const products = Array.isArray(products_json) ? products_json : [products_json];
    const results = {
        processed: [],
        errors: [],
        summary: {
            total: products.length,
            valid: 0,
            invalid: 0
        }
    };
    
    products.forEach((product, index) => {
        try {
            const processed = plv8.execute(
                'SELECT js_process_product_data($1) as result', 
                [JSON.stringify(product)]
            )[0].result;
            
            if (processed.validation.isValid) {
                results.processed.push(processed);
                results.summary.valid++;
            } else {
                results.errors.push({
                    index: index,
                    product: product,
                    errors: processed.validation.errors
                });
                results.summary.invalid++;
            }
        } catch (error) {
            results.errors.push({
                index: index,
                product: product,
                errors: [error.message]
            });
            results.summary.invalid++;
        }
    });
    
    return results;
$$ LANGUAGE plv8;

-- 测试批量处理
SELECT js_batch_process_products('[
    {
        "id": 1,
        "name": "iPhone 15 Pro",
        "category": "Electronics",
        "price": 999.99,
        "originalPrice": 1099.99,
        "attributes": {
            "Brand": "Apple",
            "Storage Size": "256GB",
            "Color": "Space Gray"
        }
    },
    {
        "id": 2,
        "name": "Invalid Product",
        "category": "Test",
        "price": -10,
        "attributes": {}
    }
]'::json);

JavaScript 函数开发流程

PLV8 集成
SQL 查询
plv8.execute
plv8.prepare
预编译语句
错误捕获
try-catch 处理
JavaScript 特性
原生支持
JSON 处理
数组操作
丰富的方法
字符串处理
正则表达式
函数式编程
map, filter, reduce
需求分析
设计 JS 函数
编写函数逻辑
数据库交互
错误处理
性能优化
单元测试
部署上线

PLV8 最佳实践

// PLV8 开发最佳实践
const plv8BestPractices = {
  性能优化: {
    预编译语句: "使用 plv8.prepare 预编译频繁执行的 SQL",
    批处理: "批量处理数据减少数据库往返",
    索引利用: "确保查询能够有效使用索引",
    结果缓存: "缓存计算结果避免重复计算"
  },
  错误处理: {
    异常捕获: "使用 try-catch 处理所有可能的错误",
    参数验证: "验证输入参数的有效性",
    事务处理: "在需要时正确处理事务",
    日志记录: "记录错误信息用于调试"
  },
  代码质量: {
    函数职责: "保持函数职责单一",
    代码复用: "提取公共逻辑为独立函数",
    文档注释: "添加清晰的函数文档",
    测试覆盖: "编写充分的测试用例"
  }
};
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值