整体架构设计
限流策略设计
维度 | 适用场景 | 限流算法 | 规则示例 |
---|---|---|---|
IP地址 | 防CC攻击、爬虫 | 滑动窗口 | 单个IP: 100请求/分钟 |
UID | 用户级API保护 | 令牌桶 | 单个用户: 50请求/秒 |
API路径 | 关键接口保护 | 漏桶 | /payment: 200请求/秒 |
业务参数 | 特殊业务防护 | 并发控制 | 商品ID123: 10并发 |
组合策略 | 综合防护 | 混合策略 | IP+UID+API联合限流 |
OpenResty 完整配置
1. Nginx 主配置 (nginx.conf
)
user nginx;
worker_processes auto;
events {
worker_connections 10240;
use epoll;
}
http {
# 基础配置
include mime.types;
default_type application/octet-stream;
sendfile on;
keepalive_timeout 65;
lua_package_path "/etc/nginx/lua/?.lua;;";
# 共享内存区域
lua_shared_dict limit_counters 100m; # 限流计数器
lua_shared_dict limit_rules 10m; # 限流规则缓存
lua_shared_dict limit_locks 1m; # 分布式锁
# 初始化Lua配置
init_by_lua_block {
-- Redis配置
redis_config = {
host = "10.0.0.100",
port = 6379,
password = "your_redis_password",
db_index = 0,
timeout = 500, -- 0.5秒超时
rules_key = "limit:rules" -- 限流规则集合
}
}
# 初始化定时任务
init_worker_by_lua_block {
local function refresh_limit_rules()
require("limit_util").refresh_rules()
end
-- 每30秒刷新规则
local handler
handler = function()
pcall(refresh_limit_rules)
ngx.timer.at(30, handler)
end
ngx.timer.at(0, handler)
}
server {
listen 80;
server_name api.example.com;
# 主路由 - 动态限流
location / {
access_by_lua_file /etc/nginx/lua/dynamic_limiter.lua;
proxy_pass http://backend;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# 连接超时设置
proxy_connect_timeout 3s;
proxy_send_timeout 10s;
proxy_read_timeout 10s;
}
# 限流管理API
location /limit-api {
access_by_lua_block {
require("limit_auth").authenticate()
}
content_by_lua_file /etc/nginx/lua/limit_api.lua;
}
# 监控端点
location /metrics {
content_by_lua_block {
require("limit_metrics").export()
}
}
# 健康检查
location /health {
return 200 "OK";
}
}
}
2. 限流工具脚本 (/etc/nginx/lua/limit_util.lua
)
local redis = require "resty.redis"
local cjson = require "cjson"
local resty_lock = require "resty.lock"
local _M = {}
-- 获取Redis连接
function _M.get_redis_conn()
local red = redis:new()
red:set_timeout(redis_config.timeout)
local ok, err = red:connect(redis_config.host, redis_config.port)
if not ok then
ngx.log(ngx.ERR, "Redis连接失败: ", err)
return nil, err
end
-- 认证
if redis_config.password then
local res, err = red:auth(redis_config.password)
if not res then
ngx.log(ngx.ERR, "Redis认证失败: ", err)
return nil, err
end
end
-- 选择数据库
if redis_config.db_index then
red:select(redis_config.db_index)
end
return red
end
-- 刷新限流规则
function _M.refresh_rules()
local red, err = _M.get_redis_conn()
if not red then return end
-- 获取所有规则
local rules, err = red:smembers(redis_config.rules_key)
if not rules then
ngx.log(ngx.ERR, "获取限流规则失败: ", err)
return
end
-- 转换为规则表
local rules_table = {}
for _, rule_json in ipairs(rules) do
local rule = cjson.decode(rule_json)
if rule then
rules_table[rule.key] = rule
end
end
-- 更新缓存
ngx.shared.limit_rules:set("data", cjson.encode(rules_table))
-- 放回连接池
red:set_keepalive(10000, 100)
end
-- 获取当前规则
function _M.get_rules()
local rules_str = ngx.shared.limit_rules:get("data")
if not rules_str then return {} end
return cjson.decode(rules_str) or {}
end
-- 获取分布式锁
function _M.get_lock(key)
local lock = resty_lock:new("limit_locks")
local elapsed, err = lock:lock(key, { timeout = 0.1, exptime = 0.5 })
if not elapsed then
ngx.log(ngx.WARN, "获取锁失败: ", err)
return nil
end
return lock
end
-- 滑动窗口计数器
function _M.sliding_window_counter(key, window, limit)
local now = ngx.now()
local counter_key = key .. "_counter"
local timestamp_key = key .. "_timestamps"
-- 获取共享内存中的计数器
local counters = ngx.shared.limit_counters
local lock = _M.get_lock(key)
-- 获取当前计数
local count = counters:get(counter_key) or 0
local timestamps = counters:get(timestamp_key) or ""
-- 清理过期时间戳
local valid_timestamps = {}
for ts in string.gmatch(timestamps, "([^,]+)") do
ts = tonumber(ts)
if ts > now - window then
table.insert(valid_timestamps, ts)
end
end
-- 更新计数
count = #valid_timestamps
table.insert(valid_timestamps, now)
-- 存储更新后的值
counters:set(counter_key, count + 1)
counters:set(timestamp_key, table.concat(valid_timestamps, ","))
-- 释放锁
if lock then lock:unlock() end
return count + 1
end
-- 令牌桶算法
function _M.token_bucket(key, rate, burst)
local now = ngx.now()
local tokens_key = key .. "_tokens"
local last_time_key = key .. "_last_time"
-- 获取共享内存中的值
local counters = ngx.shared.limit_counters
local lock = _M.get_lock(key)
-- 获取当前状态
local tokens = counters:get(tokens_key) or burst
local last_time = counters:get(last_time_key) or now
-- 计算新增令牌
local elapsed = now - last_time
local new_tokens = elapsed * rate
-- 更新令牌数 (不超过突发值)
tokens = math.min(tokens + new_tokens, burst)
-- 检查是否有足够令牌
if tokens < 1 then
if lock then lock:unlock() end
return 0, tokens
end
-- 消耗令牌
tokens = tokens - 1
-- 存储更新后的值
counters:set(tokens_key, tokens)
counters:set(last_time_key, now)
-- 释放锁
if lock then lock:unlock() end
return 1, tokens
end
return _M
3. 动态限流核心 (/etc/nginx/lua/dynamic_limiter.lua
)
local limit_util = require "limit_util"
local cjson = require "cjson"
local prometheus = require "prometheus"
-- 注册指标
local metric_requests = prometheus:counter(
"http_requests_total",
"Total HTTP requests",
{"status"}
)
local metric_limited = prometheus:counter(
"limit_rejected_total",
"Total rejected requests by limit",
{"dimension", "key"}
)
-- 获取客户端标识
local function get_client_identifiers()
local ip = ngx.var.remote_addr
local uid = ngx.req.get_headers()["X-User-ID"] or ngx.var.cookie_uid or ""
local path = ngx.var.uri
return {
ip = ip,
uid = uid,
path = path,
ip_path = ip .. ":" .. path,
uid_path = uid .. ":" .. path,
ip_uid = ip .. ":" .. uid
}
end
-- 应用限流规则
local function apply_limiting_rules(identifiers)
local rules = limit_util.get_rules()
for _, rule in pairs(rules) do
-- 检查是否匹配规则维度
local key = identifiers[rule.dimension]
if key and rule.keys[key] then
-- 获取当前规则配置
local limit = rule.limit
local window = rule.window
local algorithm = rule.algorithm
-- 应用限流算法
local result, remaining
if algorithm == "sliding_window" then
result = limit_util.sliding_window_counter(key, window, limit)
elseif algorithm == "token_bucket" then
result, remaining = limit_util.token_bucket(key, limit/window, rule.burst or limit)
else -- 默认漏桶算法
-- 使用OpenResty内置漏桶
local lim, err = require("resty.limit.req").new("limit_counters", limit, window)
if not lim then
ngx.log(ngx.ERR, "创建限流器失败: ", err)
else
local delay, err = lim:incoming(key, true)
if not delay then
if err == "rejected" then
result = 0
else
ngx.log(ngx.ERR, "限流失败: ", err)
end
else
result = 1
end
end
end
-- 检查是否触发限流
if result == 0 then
-- 记录指标
metric_limited:inc(1, {rule.dimension, key})
-- 设置响应头
ngx.header["X-RateLimit-Limit"] = limit
ngx.header["X-RateLimit-Remaining"] = remaining or 0
ngx.header["X-RateLimit-Reset"] = ngx.now() + window
ngx.header["X-RateLimit-Dimension"] = rule.dimension
-- 返回429
ngx.status = 429
ngx.say(cjson.encode({
error = "too many requests",
dimension = rule.dimension,
key = key,
limit = limit,
window = window
}))
ngx.exit(429)
end
end
end
end
-- 主执行逻辑
local identifiers = get_client_identifiers()
apply_limiting_rules(identifiers)
-- 记录请求指标
metric_requests:inc(1, {ngx.var.status})
4. 限流管理API (/etc/nginx/lua/limit_api.lua
)
local limit_util = require "limit_util"
local cjson = require "cjson"
local method = ngx.req.get_method()
local path = ngx.var.uri
-- API: 获取当前限流规则
if method == "GET" and path == "/limit-api/rules" then
local rules = limit_util.get_rules()
ngx.say(cjson.encode(rules))
return
end
-- API: 添加限流规则
if method == "POST" and path == "/limit-api/rules" then
ngx.req.read_body()
local data = cjson.decode(ngx.req.get_body_data())
if not data or not data.dimension or not data.limit or not data.window then
ngx.status = ngx.HTTP_BAD_REQUEST
ngx.say(cjson.encode({error = "缺少必要参数"}))
return
end
-- 基本验证
local valid_dimensions = {ip = true, uid = true, path = true, ip_path = true, uid_path = true, ip_uid = true}
if not valid_dimensions[data.dimension] then
ngx.status = ngx.HTTP_BAD_REQUEST
ngx.say(cjson.encode({error = "无效维度: " .. data.dimension}))
return
end
-- 连接Redis
local red, err = limit_util.get_redis_conn()
if not red then
ngx.status = ngx.HTTP_INTERNAL_SERVER_ERROR
ngx.say(cjson.encode({error = "Redis连接失败"}))
return
end
-- 获取现有规则
local rules, err = red:smembers(redis_config.rules_key)
if not rules then
ngx.status = ngx.HTTP_INTERNAL_SERVER_ERROR
ngx.say(cjson.encode({error = "获取规则失败: " .. err}))
return
end
-- 生成规则ID
local rule_id = ngx.md5(data.dimension .. data.limit .. data.window)
data.id = rule_id
data.algorithm = data.algorithm or "sliding_window"
data.burst = data.burst or data.limit
data.keys = data.keys or {}
-- 添加新规则
table.insert(rules, cjson.encode(data))
-- 保存回Redis
red:del(redis_config.rules_key)
for _, rule in ipairs(rules) do
red:sadd(redis_config.rules_key, rule)
end
red:set_keepalive(10000, 100)
-- 刷新本地缓存
limit_util.refresh_rules()
ngx.say(cjson.encode({success = true, id = rule_id}))
return
end
-- API: 删除限流规则
if method == "POST" and path == "/limit-api/rules/delete" then
ngx.req.read_body()
local data = cjson.decode(ngx.req.get_body_data())
if not data or not data.id then
ngx.status = ngx.HTTP_BAD_REQUEST
ngx.say(cjson.encode({error = "缺少规则ID"}))
return
end
local red, err = limit_util.get_redis_conn()
if not red then
ngx.status = ngx.HTTP_INTERNAL_SERVER_ERROR
ngx.say(cjson.encode({error = "Redis连接失败"}))
return
end
-- 获取现有规则
local rules, err = red:smembers(redis_config.rules_key)
if not rules then
ngx.status = ngx.HTTP_INTERNAL_SERVER_ERROR
ngx.say(cjson.encode({error = "获取规则失败: " .. err}))
return
end
-- 过滤掉要删除的规则
local new_rules = {}
for _, rule_json in ipairs(rules) do
local rule = cjson.decode(rule_json)
if rule and rule.id ~= data.id then
table.insert(new_rules, rule_json)
end
end
-- 保存回Redis
red:del(redis_config.rules_key)
for _, rule in ipairs(new_rules) do
red:sadd(redis_config.rules_key, rule)
end
red:set_keepalive(10000, 100)
-- 刷新本地缓存
limit_util.refresh_rules()
ngx.say(cjson.encode({success = true}))
return
end
-- 默认响应
ngx.status = ngx.HTTP_NOT_FOUND
ngx.say(cjson.encode({error = "API endpoint not found"}))
5. 监控指标脚本 (/etc/nginx/lua/limit_metrics.lua
)
local prometheus = require "prometheus"
local registry = prometheus.registry
local counter = prometheus.counter
local gauge = prometheus.gauge
-- 注册指标
local metrics = {
req_total = counter("nginx_http_requests_total", "Total requests", {"status"}),
limit_rejected = counter("limit_rejected_total", "Rejected requests", {"dimension", "key"}),
limit_active = gauge("limit_active_rules", "Active limit rules")
}
-- 获取当前规则数量
local limit_util = require "limit_util"
local rules = limit_util.get_rules()
local rule_count = 0
for _ in pairs(rules) do rule_count = rule_count + 1 end
metrics.limit_active:set(rule_count)
-- 暴露所有指标
registry:collect()
ngx.header["Content-Type"] = "text/plain; version=0.0.4"
ngx.say(registry:export())
限流规则配置示例
1. IP限流 (防CC攻击)
{
"dimension": "ip",
"limit": 100,
"window": 60,
"algorithm": "sliding_window",
"keys": {
"*": true
}
}
2. UID限流 (用户级保护)
{
"dimension": "uid",
"limit": 30,
"window": 10,
"algorithm": "token_bucket",
"burst": 50,
"keys": {
"user123": true,
"user456": true
}
}
3. API路径限流 (关键接口保护)
{
"dimension": "path",
"limit": 200,
"window": 1,
"algorithm": "leaky_bucket",
"keys": {
"/api/payment": true,
"/api/checkout": true
}
}
4. 组合维度限流
{
"dimension": "ip_uid",
"limit": 5,
"window": 60,
"algorithm": "sliding_window",
"keys": {
"192.168.1.1:user123": true,
"10.0.0.5:user789": true
}
}
操作与管理
1. 添加限流规则
curl -X POST -H "Content-Type: application/json" \
-d '{
"dimension": "ip",
"limit": 100,
"window": 60,
"algorithm": "sliding_window",
"keys": {"*": true}
}' \
http://api.example.com/limit-api/rules
2. 查看当前规则
curl http://api.example.com/limit-api/rules
3. 删除规则
curl -X POST -H "Content-Type: application/json" \
-d '{"id": "RULE_ID"}' \
http://api.example.com/limit-api/rules/delete
4. 监控指标查看
curl http://api.example.com/metrics
生产环境优化建议
1)多级缓存策略:
-
一级缓存:Worker级LRU缓存(
lua_shared_dict
) -
二级缓存:本地磁盘缓存(规则变化不频繁时)
-
三级缓存:Redis存储
2)降级策略:
-- 在限流脚本中添加
if not red then
-- Redis不可用时使用默认规则
apply_default_limiting()
end
3)动态调整:
-- 根据系统负载动态调整限流阈值
local load = require("ngx.process").get_memory_stats().load
if load > 0.8 then
rule.limit = rule.limit * 0.8 -- 高负载时降低阈值
end
4)智能限流:
-
对认证用户放宽限制
-
对爬虫UA加强限制
-
对API错误率高的客户端降低阈值
性能指标监控
指标名称 | 监控重点 | 告警阈值 |
---|---|---|
limit_rejected_total | 各维度限流次数 | 突增300% |
limit_active_rules | 活跃规则数量 | >100条 |
redis_connection_error | Redis连接错误 | >5次/分钟 |
limit_processing_time | 限流决策延迟 | P99>50ms |
通过本方案,企业可以实现细粒度、动态可调的限流策略,有效防护系统免受过载请求影响,同时保持核心业务的可用性。