feat: 实现 IoT 卡轮询系统(支持千万级卡规模)
All checks were successful
构建并部署到测试环境(无 SSH) / build-and-deploy (push) Successful in 6m35s

实现功能:
- 实名状态检查轮询(可配置间隔)
- 卡流量检查轮询(支持跨月流量追踪)
- 套餐检查与超额自动停机
- 分布式并发控制(Redis 信号量)
- 手动触发轮询(单卡/批量/条件筛选)
- 数据清理配置与执行
- 告警规则与历史记录
- 实时监控统计(队列/性能/并发)

性能优化:
- Redis 缓存卡信息,减少 DB 查询
- Pipeline 批量写入 Redis
- 异步流量记录写入
- 渐进式初始化(10万卡/批)

压测工具(scripts/benchmark/):
- Mock Gateway 模拟上游服务
- 测试卡生成器
- 配置初始化脚本
- 实时监控脚本

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-05 17:32:44 +08:00
parent b11edde720
commit 931e140e8e
104 changed files with 16883 additions and 87 deletions

View File

@@ -0,0 +1,54 @@
# 轮询系统压测指南
## 目标
模拟 1000 万张卡的轮询场景,测试系统性能。
## 环境要求
- Docker运行本地 Redis
- 测试环境 PostgreSQL已有
- 10+ CPU 核心
- 16GB+ 内存
## 压测步骤
### Step 1: 启动本地 Redis
```bash
./scripts/benchmark/start_redis.sh
```
### Step 2: 启动 Mock Gateway模拟上游接口
```bash
go run ./scripts/benchmark/mock_gateway.go
```
### Step 3: 生成测试数据1000万张卡
```bash
go run ./scripts/benchmark/generate_cards.go
```
### Step 4: 启动 Worker 进行压测
```bash
# 使用本地 Redis 配置 + Mock Gateway
source .env.local && \
JUNHONG_REDIS_ADDRESS=127.0.0.1 \
JUNHONG_REDIS_PORT=6379 \
JUNHONG_REDIS_PASSWORD="" \
JUNHONG_REDIS_DB=0 \
JUNHONG_GATEWAY_BASE_URL=http://127.0.0.1:8888 \
JUNHONG_GATEWAY_APP_ID=test \
JUNHONG_GATEWAY_APP_SECRET=testsecret123456 \
JUNHONG_GATEWAY_TIMEOUT=30 \
go run ./cmd/worker/...
```
**注意**:可以启动多个 Worker 实例来增加并发处理能力。单个 Worker 通过 Asynq 已支持并发任务处理。
### Step 5: 监控压测状态
```bash
./scripts/benchmark/monitor.sh
```
## 预期结果
- 初始化时间:~50秒1000万卡
- 调度吞吐5万张/秒
- 任务处理:取决于 Gateway 响应时间

View File

@@ -0,0 +1,223 @@
// +build ignore
package main
import (
"context"
"flag"
"fmt"
"log"
"math/rand"
"os"
"sync"
"sync/atomic"
"time"
"gorm.io/driver/postgres"
"gorm.io/gorm"
"gorm.io/gorm/logger"
)
// IotCard 简化的卡模型
type IotCard struct {
ID uint `gorm:"primaryKey"`
ICCID string `gorm:"column:iccid;uniqueIndex:idx_iot_card_iccid,where:deleted_at IS NULL"`
CardCategory string `gorm:"column:card_category;default:normal"`
CarrierID uint `gorm:"column:carrier_id"`
Status int `gorm:"column:status;default:1"`
ActivationStatus int `gorm:"column:activation_status;default:0"`
RealNameStatus int `gorm:"column:real_name_status;default:0"`
NetworkStatus int `gorm:"column:network_status;default:0"`
EnablePolling bool `gorm:"column:enable_polling;default:true"`
Creator uint `gorm:"column:creator"`
Updater uint `gorm:"column:updater"`
CreatedAt time.Time
UpdatedAt time.Time
DeletedAt *time.Time `gorm:"index"`
}
func (IotCard) TableName() string {
return "tb_iot_card"
}
var (
totalCards = flag.Int("total", 10000000, "要生成的卡数量")
batchSize = flag.Int("batch", 10000, "每批插入数量")
workers = flag.Int("workers", 10, "并行 worker 数量")
startICCID = flag.String("start", "898600000", "起始 ICCID 前缀9位总长度不超过20位")
clearOld = flag.Bool("clear", false, "是否清空现有测试卡")
insertedCount int64
startTime time.Time
)
func main() {
flag.Parse()
fmt.Println("=== 生成测试卡数据 ===")
fmt.Printf("目标数量: %d 张\n", *totalCards)
fmt.Printf("批次大小: %d\n", *batchSize)
fmt.Printf("并行数: %d\n", *workers)
fmt.Println("")
// 连接数据库
dsn := fmt.Sprintf("host=%s port=%s user=%s password=%s dbname=%s sslmode=disable",
os.Getenv("JUNHONG_DATABASE_HOST"),
os.Getenv("JUNHONG_DATABASE_PORT"),
os.Getenv("JUNHONG_DATABASE_USER"),
os.Getenv("JUNHONG_DATABASE_PASSWORD"),
os.Getenv("JUNHONG_DATABASE_DBNAME"),
)
db, err := gorm.Open(postgres.Open(dsn), &gorm.Config{
Logger: logger.Default.LogMode(logger.Silent),
})
if err != nil {
log.Fatalf("连接数据库失败: %v", err)
}
// 配置连接池
sqlDB, _ := db.DB()
sqlDB.SetMaxOpenConns(50)
sqlDB.SetMaxIdleConns(25)
fmt.Println("✓ 数据库连接成功")
// 检查现有卡数量
var existingCount int64
db.Model(&IotCard{}).Count(&existingCount)
fmt.Printf("现有卡数量: %d\n", existingCount)
if *clearOld {
fmt.Println("清空现有测试卡...")
// 只删除 ICCID 以 898600000 开头的测试卡
db.Exec("DELETE FROM tb_iot_card WHERE iccid LIKE '898600000%'")
fmt.Println("✓ 清空完成")
}
// 开始生成
startTime = time.Now()
ctx := context.Background()
// 创建任务通道
taskCh := make(chan int, *workers*2)
var wg sync.WaitGroup
// 启动 worker
for i := 0; i < *workers; i++ {
wg.Add(1)
go func(workerID int) {
defer wg.Done()
worker(ctx, db, workerID, taskCh)
}(i)
}
// 分发任务
batches := *totalCards / *batchSize
for i := 0; i < batches; i++ {
taskCh <- i
}
close(taskCh)
// 等待完成
wg.Wait()
elapsed := time.Since(startTime)
fmt.Println("")
fmt.Println("=== 生成完成 ===")
fmt.Printf("总插入: %d 张\n", atomic.LoadInt64(&insertedCount))
fmt.Printf("耗时: %v\n", elapsed)
fmt.Printf("速度: %.0f 张/秒\n", float64(atomic.LoadInt64(&insertedCount))/elapsed.Seconds())
// 验证
var finalCount int64
db.Model(&IotCard{}).Count(&finalCount)
fmt.Printf("数据库总卡数: %d\n", finalCount)
}
func worker(ctx context.Context, db *gorm.DB, workerID int, taskCh <-chan int) {
rng := rand.New(rand.NewSource(time.Now().UnixNano() + int64(workerID)))
for batchIndex := range taskCh {
cards := generateBatch(rng, *startICCID, batchIndex, *batchSize)
// 批量插入
err := db.WithContext(ctx).CreateInBatches(cards, 1000).Error
if err != nil {
log.Printf("Worker %d 插入失败: %v", workerID, err)
continue
}
count := atomic.AddInt64(&insertedCount, int64(len(cards)))
// 进度报告
if count%100000 == 0 {
elapsed := time.Since(startTime).Seconds()
speed := float64(count) / elapsed
eta := float64(*totalCards-int(count)) / speed
fmt.Printf("进度: %d/%d (%.1f%%) | 速度: %.0f/秒 | ETA: %.0f秒\n",
count, *totalCards, float64(count)*100/float64(*totalCards), speed, eta)
}
}
}
func generateBatch(rng *rand.Rand, iccidPrefix string, batchIndex int, size int) []IotCard {
cards := make([]IotCard, size)
now := time.Now()
for i := 0; i < size; i++ {
// 使用前缀 + 序号生成 ICCID总长度 20 位)
// 例如: 898600000 (9位) + 00000000001 (11位) = 20 位
cardIndex := batchIndex*size + i
iccid := fmt.Sprintf("%s%011d", iccidPrefix, cardIndex)
// 随机分配状态(匹配轮询配置条件)
// 实名状态: 0=未实名, 1=实名中, 2=已实名
// 网络状态: 0=停机, 1=正常
// 配置匹配逻辑:
// - not_real_name: RealNameStatus == 0 或 1
// - real_name: RealNameStatus == 2 && NetworkStatus != 1
// - activated: RealNameStatus == 2 && NetworkStatus == 1
r := rng.Float64()
var realNameStatus, activationStatus, networkStatus int
if r < 0.10 {
// 10% 未实名 -> 匹配 not_real_name 配置
realNameStatus = 0
activationStatus = 0
networkStatus = 0
} else if r < 0.30 {
// 20% 已实名未激活 -> 匹配 real_name 配置
realNameStatus = 2
activationStatus = 0
networkStatus = 0
} else {
// 70% 已激活 -> 匹配 activated 配置(流量+套餐检查)
realNameStatus = 2
activationStatus = 1
networkStatus = 1
}
// 随机卡类型
cardCategory := "normal"
if rng.Float64() < 0.05 {
cardCategory = "industry"
}
cards[i] = IotCard{
ICCID: iccid,
CardCategory: cardCategory,
CarrierID: uint(rng.Intn(3) + 1), // 1-3 运营商
Status: 1,
ActivationStatus: activationStatus,
RealNameStatus: realNameStatus,
NetworkStatus: networkStatus,
EnablePolling: true,
Creator: 1,
Updater: 1,
CreatedAt: now,
UpdatedAt: now,
}
}
return cards
}

View File

@@ -0,0 +1,156 @@
//go:build ignore
// +build ignore
package main
import (
"fmt"
"log"
"os"
"gorm.io/driver/postgres"
"gorm.io/gorm"
"gorm.io/gorm/logger"
)
// PollingConfig 轮询配置
type PollingConfig struct {
ID uint `gorm:"primaryKey"`
ConfigName string `gorm:"column:config_name"`
CardCondition *string `gorm:"column:card_condition"`
CardCategory *string `gorm:"column:card_category"`
CarrierID *uint `gorm:"column:carrier_id"`
Priority int `gorm:"column:priority"`
RealnameCheckInterval *int `gorm:"column:realname_check_interval"`
CarddataCheckInterval *int `gorm:"column:carddata_check_interval"`
PackageCheckInterval *int `gorm:"column:package_check_interval"`
Status int `gorm:"column:status;default:1"`
Description string `gorm:"column:description"`
}
func (PollingConfig) TableName() string {
return "tb_polling_config"
}
// PollingConcurrencyConfig 并发控制配置
type PollingConcurrencyConfig struct {
ID uint `gorm:"primaryKey"`
TaskType string `gorm:"column:task_type"`
MaxConcurrency int `gorm:"column:max_concurrency"`
Description string `gorm:"column:description"`
}
func (PollingConcurrencyConfig) TableName() string {
return "tb_polling_concurrency_config"
}
func ptr[T any](v T) *T {
return &v
}
func main() {
fmt.Println("=== 初始化轮询配置 ===")
// 连接数据库
dsn := fmt.Sprintf("host=%s port=%s user=%s password=%s dbname=%s sslmode=disable",
os.Getenv("JUNHONG_DATABASE_HOST"),
os.Getenv("JUNHONG_DATABASE_PORT"),
os.Getenv("JUNHONG_DATABASE_USER"),
os.Getenv("JUNHONG_DATABASE_PASSWORD"),
os.Getenv("JUNHONG_DATABASE_DBNAME"),
)
db, err := gorm.Open(postgres.Open(dsn), &gorm.Config{
Logger: logger.Default.LogMode(logger.Silent),
})
if err != nil {
log.Fatalf("连接数据库失败: %v", err)
}
fmt.Println("✓ 数据库连接成功")
// 清空现有配置
db.Exec("DELETE FROM tb_polling_config")
db.Exec("DELETE FROM tb_polling_concurrency_config")
fmt.Println("✓ 清空现有配置")
// 插入轮询配置
configs := []PollingConfig{
{
ConfigName: "未实名卡轮询",
CardCondition: ptr("not_real_name"),
Priority: 10,
RealnameCheckInterval: ptr(300), // 5分钟
Status: 1,
Description: "未实名卡每5分钟检查一次实名状态",
},
{
ConfigName: "行业卡轮询",
CardCategory: ptr("industry"),
Priority: 15,
CarddataCheckInterval: ptr(3600), // 1小时
PackageCheckInterval: ptr(3600),
Status: 1,
Description: "行业卡无需实名检查,每小时检查流量和套餐",
},
{
ConfigName: "已实名卡轮询",
CardCondition: ptr("real_name"),
Priority: 20,
RealnameCheckInterval: ptr(86400), // 1天
Status: 1,
Description: "已实名卡每天检查一次实名状态",
},
{
ConfigName: "已激活卡轮询",
CardCondition: ptr("activated"),
Priority: 30,
CarddataCheckInterval: ptr(3600), // 1小时
PackageCheckInterval: ptr(3600),
Status: 1,
Description: "已激活卡每小时检查流量和套餐",
},
{
ConfigName: "默认轮询配置",
Priority: 100,
RealnameCheckInterval: ptr(86400),
CarddataCheckInterval: ptr(86400),
PackageCheckInterval: ptr(86400),
Status: 1,
Description: "默认配置,每天检查一次",
},
}
for _, cfg := range configs {
if err := db.Create(&cfg).Error; err != nil {
log.Printf("插入配置失败 [%s]: %v", cfg.ConfigName, err)
} else {
fmt.Printf(" + %s (优先级: %d)\n", cfg.ConfigName, cfg.Priority)
}
}
fmt.Println("✓ 轮询配置初始化完成")
// 插入并发控制配置5+ Worker 场景,每种任务 2000-5000 并发)
concurrencyConfigs := []PollingConcurrencyConfig{
{TaskType: "realname", MaxConcurrency: 5000, Description: "实名检查任务最大并发数"},
{TaskType: "carddata", MaxConcurrency: 5000, Description: "流量检查任务最大并发数"},
{TaskType: "package", MaxConcurrency: 5000, Description: "套餐检查任务最大并发数"},
{TaskType: "stop_start", MaxConcurrency: 5000, Description: "停复机操作最大并发数"},
}
for _, cfg := range concurrencyConfigs {
if err := db.Create(&cfg).Error; err != nil {
log.Printf("插入并发配置失败 [%s]: %v", cfg.TaskType, err)
} else {
fmt.Printf(" + %s (最大并发: %d)\n", cfg.TaskType, cfg.MaxConcurrency)
}
}
fmt.Println("✓ 并发控制配置初始化完成")
// 验证
var pollingCount, concurrencyCount int64
db.Model(&PollingConfig{}).Count(&pollingCount)
db.Model(&PollingConcurrencyConfig{}).Count(&concurrencyCount)
fmt.Printf("\n=== 初始化完成 ===\n")
fmt.Printf("轮询配置: %d 条\n", pollingCount)
fmt.Printf("并发配置: %d 条\n", concurrencyCount)
}

View File

@@ -0,0 +1,263 @@
// +build ignore
package main
import (
"encoding/json"
"fmt"
"log"
"math/rand"
"net/http"
"os"
"sync/atomic"
"time"
)
// 统计计数器
var (
totalRequests int64
successRequests int64
failedRequests int64
startTime time.Time
fastMode bool // 快速模式:低延迟
)
// GatewayResponse 模拟网关响应
type GatewayResponse struct {
Code int `json:"code"`
Msg string `json:"msg"`
TraceID string `json:"traceId"`
Data json.RawMessage `json:"data"`
}
func main() {
startTime = time.Now()
rand.Seed(time.Now().UnixNano())
// 检查是否启用快速模式
if os.Getenv("FAST_MODE") == "1" || os.Getenv("FAST_MODE") == "true" {
fastMode = true
fmt.Println("⚡ 快速模式已启用(延迟: 10-50ms")
} else {
fmt.Println("🐢 真实模式(延迟: 200ms-4s")
fmt.Println(" 提示: 设置 FAST_MODE=1 可启用快速模式")
}
// 实名查询接口(匹配 gateway client 的路径)
http.HandleFunc("/flow-card/realname-status", handleRealnameQuery)
// 流量查询接口
http.HandleFunc("/flow-card/flow", handleFlowQuery)
// 停机接口
http.HandleFunc("/flow-card/cardStop", handleStopCard)
// 复机接口
http.HandleFunc("/flow-card/cardStart", handleStartCard)
// 卡状态查询接口
http.HandleFunc("/flow-card/status", handleCardStatus)
// 统计接口
http.HandleFunc("/stats", handleStats)
fmt.Println("=== Mock Gateway 服务器启动 ===")
fmt.Println("监听端口: 8888")
fmt.Println("模拟响应时间: 200ms - 4s")
fmt.Println("")
fmt.Println("接口列表:")
fmt.Println(" POST /flow-card/realname-status - 实名查询")
fmt.Println(" POST /flow-card/flow - 流量查询")
fmt.Println(" POST /flow-card/status - 卡状态查询")
fmt.Println(" POST /flow-card/cardStop - 停机操作")
fmt.Println(" POST /flow-card/cardStart - 复机操作")
fmt.Println(" GET /stats - 查看统计")
fmt.Println("")
fmt.Println("按 Ctrl+C 停止服务器")
log.Fatal(http.ListenAndServe(":8888", nil))
}
// simulateLatency 模拟网络延迟
func simulateLatency() {
var delay time.Duration
if fastMode {
// 快速模式10-50ms
delay = time.Duration(10+rand.Intn(40)) * time.Millisecond
} else {
// 真实模式200ms - 4s
// 80% 概率 200-500ms正常
// 15% 概率 500ms-2s较慢
// 5% 概率 2s-4s很慢
r := rand.Float64()
if r < 0.80 {
delay = time.Duration(200+rand.Intn(300)) * time.Millisecond
} else if r < 0.95 {
delay = time.Duration(500+rand.Intn(1500)) * time.Millisecond
} else {
delay = time.Duration(2000+rand.Intn(2000)) * time.Millisecond
}
}
time.Sleep(delay)
}
// handleRealnameQuery 处理实名查询
func handleRealnameQuery(w http.ResponseWriter, r *http.Request) {
atomic.AddInt64(&totalRequests, 1)
simulateLatency()
// 90% 成功10% 失败
if rand.Float64() < 0.90 {
atomic.AddInt64(&successRequests, 1)
// 随机返回实名状态
statuses := []string{"未实名", "实名中", "已实名"}
status := statuses[rand.Intn(3)]
resp := GatewayResponse{
Code: 200,
Msg: "success",
TraceID: fmt.Sprintf("trace-%d", time.Now().UnixNano()),
Data: json.RawMessage(fmt.Sprintf(`{"status": "%s"}`, status)),
}
json.NewEncoder(w).Encode(resp)
} else {
atomic.AddInt64(&failedRequests, 1)
resp := GatewayResponse{
Code: 500,
Msg: "upstream error",
TraceID: fmt.Sprintf("trace-%d", time.Now().UnixNano()),
}
json.NewEncoder(w).Encode(resp)
}
}
// handleFlowQuery 处理流量查询
func handleFlowQuery(w http.ResponseWriter, r *http.Request) {
atomic.AddInt64(&totalRequests, 1)
simulateLatency()
if rand.Float64() < 0.90 {
atomic.AddInt64(&successRequests, 1)
// 随机返回流量数据(匹配 FlowUsageResp 结构)
usedFlow := rand.Intn(10000)
resp := GatewayResponse{
Code: 200,
Msg: "success",
TraceID: fmt.Sprintf("trace-%d", time.Now().UnixNano()),
Data: json.RawMessage(fmt.Sprintf(`{"usedFlow": %d, "unit": "MB"}`, usedFlow)),
}
json.NewEncoder(w).Encode(resp)
} else {
atomic.AddInt64(&failedRequests, 1)
resp := GatewayResponse{
Code: 500,
Msg: "upstream error",
TraceID: fmt.Sprintf("trace-%d", time.Now().UnixNano()),
}
json.NewEncoder(w).Encode(resp)
}
}
// handleStopCard 处理停机操作
func handleStopCard(w http.ResponseWriter, r *http.Request) {
atomic.AddInt64(&totalRequests, 1)
simulateLatency()
if rand.Float64() < 0.95 {
atomic.AddInt64(&successRequests, 1)
resp := GatewayResponse{
Code: 200,
Msg: "success",
TraceID: fmt.Sprintf("trace-%d", time.Now().UnixNano()),
Data: json.RawMessage(`{"result": "stopped"}`),
}
json.NewEncoder(w).Encode(resp)
} else {
atomic.AddInt64(&failedRequests, 1)
resp := GatewayResponse{
Code: 500,
Msg: "stop failed",
TraceID: fmt.Sprintf("trace-%d", time.Now().UnixNano()),
}
json.NewEncoder(w).Encode(resp)
}
}
// handleStartCard 处理复机操作
func handleStartCard(w http.ResponseWriter, r *http.Request) {
atomic.AddInt64(&totalRequests, 1)
simulateLatency()
if rand.Float64() < 0.95 {
atomic.AddInt64(&successRequests, 1)
resp := GatewayResponse{
Code: 200,
Msg: "success",
TraceID: fmt.Sprintf("trace-%d", time.Now().UnixNano()),
Data: json.RawMessage(`{"result": "started"}`),
}
json.NewEncoder(w).Encode(resp)
} else {
atomic.AddInt64(&failedRequests, 1)
resp := GatewayResponse{
Code: 500,
Msg: "start failed",
TraceID: fmt.Sprintf("trace-%d", time.Now().UnixNano()),
}
json.NewEncoder(w).Encode(resp)
}
}
// handleCardStatus 处理卡状态查询
func handleCardStatus(w http.ResponseWriter, r *http.Request) {
atomic.AddInt64(&totalRequests, 1)
simulateLatency()
if rand.Float64() < 0.90 {
atomic.AddInt64(&successRequests, 1)
// 随机返回卡状态1-正常0-停机
cardStatus := rand.Intn(2)
resp := GatewayResponse{
Code: 200,
Msg: "success",
TraceID: fmt.Sprintf("trace-%d", time.Now().UnixNano()),
Data: json.RawMessage(fmt.Sprintf(`{"status": %d}`, cardStatus)),
}
json.NewEncoder(w).Encode(resp)
} else {
atomic.AddInt64(&failedRequests, 1)
resp := GatewayResponse{
Code: 500,
Msg: "query failed",
TraceID: fmt.Sprintf("trace-%d", time.Now().UnixNano()),
}
json.NewEncoder(w).Encode(resp)
}
}
// handleStats 返回统计信息
func handleStats(w http.ResponseWriter, r *http.Request) {
elapsed := time.Since(startTime).Seconds()
total := atomic.LoadInt64(&totalRequests)
success := atomic.LoadInt64(&successRequests)
failed := atomic.LoadInt64(&failedRequests)
qps := float64(total) / elapsed
successRate := float64(0)
if total > 0 {
successRate = float64(success) * 100 / float64(total)
}
stats := map[string]interface{}{
"uptime_seconds": elapsed,
"total_requests": total,
"success_count": success,
"failed_count": failed,
"qps": qps,
"success_rate": fmt.Sprintf("%.2f%%", successRate),
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(stats)
}

224
scripts/benchmark/monitor.sh Executable file
View File

@@ -0,0 +1,224 @@
#!/bin/bash
# 压测监控脚本 - 增强版
set -e
# 检查 Redis 连接
REDIS_HOST="${JUNHONG_REDIS_ADDRESS:-127.0.0.1}"
REDIS_PORT="${JUNHONG_REDIS_PORT:-6379}"
REDIS_CLI="redis-cli -h $REDIS_HOST -p $REDIS_PORT"
# 上一次的统计值(用于计算增量)
LAST_REALNAME_SUCCESS=0
LAST_REALNAME_FAILURE=0
LAST_CARDDATA_SUCCESS=0
LAST_CARDDATA_FAILURE=0
LAST_PACKAGE_SUCCESS=0
LAST_PACKAGE_FAILURE=0
LAST_TIME=$(date +%s)
echo "=== 轮询系统压测监控(增强版)==="
echo "Redis 地址: $REDIS_HOST:$REDIS_PORT"
echo ""
# 循环监控
while true; do
clear
NOW=$(date +%s)
INTERVAL=$((NOW - LAST_TIME))
if [ $INTERVAL -eq 0 ]; then
INTERVAL=1
fi
echo "╔══════════════════════════════════════════════════════════════════════╗"
echo "║ 轮询系统压测监控 $(date '+%Y-%m-%d %H:%M:%S')"
echo "╚══════════════════════════════════════════════════════════════════════╝"
echo ""
# ========== Redis 队列状态 ==========
echo "【📊 Redis 队列状态】"
REALNAME_QUEUE=$($REDIS_CLI ZCARD "polling:queue:realname" 2>/dev/null || echo "0")
CARDDATA_QUEUE=$($REDIS_CLI ZCARD "polling:queue:carddata" 2>/dev/null || echo "0")
PACKAGE_QUEUE=$($REDIS_CLI ZCARD "polling:queue:package" 2>/dev/null || echo "0")
MANUAL_REALNAME=$($REDIS_CLI LLEN "polling:manual:realname" 2>/dev/null || echo "0")
MANUAL_CARDDATA=$($REDIS_CLI LLEN "polling:manual:carddata" 2>/dev/null || echo "0")
MANUAL_PACKAGE=$($REDIS_CLI LLEN "polling:manual:package" 2>/dev/null || echo "0")
printf " %-20s %'12d\n" "实名检查队列:" "$REALNAME_QUEUE"
printf " %-20s %'12d\n" "流量检查队列:" "$CARDDATA_QUEUE"
printf " %-20s %'12d\n" "套餐检查队列:" "$PACKAGE_QUEUE"
printf " %-20s %'12d\n" "手动触发(实名):" "$MANUAL_REALNAME"
printf " %-20s %'12d\n" "手动触发(流量):" "$MANUAL_CARDDATA"
printf " %-20s %'12d\n" "手动触发(套餐):" "$MANUAL_PACKAGE"
echo ""
# ========== 处理性能统计 ==========
echo "【⚡ 处理性能统计】"
# 获取当前统计值注意key 格式是 polling:stats:polling:xxx
REALNAME_SUCCESS=$($REDIS_CLI HGET "polling:stats:polling:realname" "success_count_1h" 2>/dev/null || echo "0")
REALNAME_FAILURE=$($REDIS_CLI HGET "polling:stats:polling:realname" "failure_count_1h" 2>/dev/null || echo "0")
REALNAME_DURATION=$($REDIS_CLI HGET "polling:stats:polling:realname" "total_duration_1h" 2>/dev/null || echo "0")
CARDDATA_SUCCESS=$($REDIS_CLI HGET "polling:stats:polling:carddata" "success_count_1h" 2>/dev/null || echo "0")
CARDDATA_FAILURE=$($REDIS_CLI HGET "polling:stats:polling:carddata" "failure_count_1h" 2>/dev/null || echo "0")
CARDDATA_DURATION=$($REDIS_CLI HGET "polling:stats:polling:carddata" "total_duration_1h" 2>/dev/null || echo "0")
PACKAGE_SUCCESS=$($REDIS_CLI HGET "polling:stats:polling:package" "success_count_1h" 2>/dev/null || echo "0")
PACKAGE_FAILURE=$($REDIS_CLI HGET "polling:stats:polling:package" "failure_count_1h" 2>/dev/null || echo "0")
PACKAGE_DURATION=$($REDIS_CLI HGET "polling:stats:polling:package" "total_duration_1h" 2>/dev/null || echo "0")
# 设置默认值
REALNAME_SUCCESS=${REALNAME_SUCCESS:-0}
REALNAME_FAILURE=${REALNAME_FAILURE:-0}
REALNAME_DURATION=${REALNAME_DURATION:-0}
CARDDATA_SUCCESS=${CARDDATA_SUCCESS:-0}
CARDDATA_FAILURE=${CARDDATA_FAILURE:-0}
CARDDATA_DURATION=${CARDDATA_DURATION:-0}
PACKAGE_SUCCESS=${PACKAGE_SUCCESS:-0}
PACKAGE_FAILURE=${PACKAGE_FAILURE:-0}
PACKAGE_DURATION=${PACKAGE_DURATION:-0}
# 计算增量和 QPS
REALNAME_SUCCESS_DELTA=$((REALNAME_SUCCESS - LAST_REALNAME_SUCCESS))
REALNAME_FAILURE_DELTA=$((REALNAME_FAILURE - LAST_REALNAME_FAILURE))
CARDDATA_SUCCESS_DELTA=$((CARDDATA_SUCCESS - LAST_CARDDATA_SUCCESS))
CARDDATA_FAILURE_DELTA=$((CARDDATA_FAILURE - LAST_CARDDATA_FAILURE))
PACKAGE_SUCCESS_DELTA=$((PACKAGE_SUCCESS - LAST_PACKAGE_SUCCESS))
PACKAGE_FAILURE_DELTA=$((PACKAGE_FAILURE - LAST_PACKAGE_FAILURE))
REALNAME_QPS=$((REALNAME_SUCCESS_DELTA / INTERVAL))
CARDDATA_QPS=$((CARDDATA_SUCCESS_DELTA / INTERVAL))
PACKAGE_QPS=$((PACKAGE_SUCCESS_DELTA / INTERVAL))
TOTAL_QPS=$((REALNAME_QPS + CARDDATA_QPS + PACKAGE_QPS))
# 计算成功率
REALNAME_TOTAL=$((REALNAME_SUCCESS + REALNAME_FAILURE))
CARDDATA_TOTAL=$((CARDDATA_SUCCESS + CARDDATA_FAILURE))
PACKAGE_TOTAL=$((PACKAGE_SUCCESS + PACKAGE_FAILURE))
if [ $REALNAME_TOTAL -gt 0 ]; then
REALNAME_RATE=$(echo "scale=1; $REALNAME_SUCCESS * 100 / $REALNAME_TOTAL" | bc)
else
REALNAME_RATE="0.0"
fi
if [ $CARDDATA_TOTAL -gt 0 ]; then
CARDDATA_RATE=$(echo "scale=1; $CARDDATA_SUCCESS * 100 / $CARDDATA_TOTAL" | bc)
else
CARDDATA_RATE="0.0"
fi
if [ $PACKAGE_TOTAL -gt 0 ]; then
PACKAGE_RATE=$(echo "scale=1; $PACKAGE_SUCCESS * 100 / $PACKAGE_TOTAL" | bc)
else
PACKAGE_RATE="0.0"
fi
# 计算平均延迟
if [ $REALNAME_SUCCESS -gt 0 ]; then
REALNAME_AVG_MS=$((REALNAME_DURATION / REALNAME_SUCCESS))
else
REALNAME_AVG_MS=0
fi
if [ $CARDDATA_SUCCESS -gt 0 ]; then
CARDDATA_AVG_MS=$((CARDDATA_DURATION / CARDDATA_SUCCESS))
else
CARDDATA_AVG_MS=0
fi
if [ $PACKAGE_SUCCESS -gt 0 ]; then
PACKAGE_AVG_MS=$((PACKAGE_DURATION / PACKAGE_SUCCESS))
else
PACKAGE_AVG_MS=0
fi
printf " %-10s | %8s | %8s | %6s | %6s | %8s\n" "任务类型" "成功" "失败" "成功率" "QPS" "平均延迟"
printf " %-10s | %8s | %8s | %6s | %6s | %8s\n" "----------" "--------" "--------" "------" "------" "--------"
printf " %-10s | %'8d | %'8d | %5.1f%% | %6d | %6dms\n" "实名检查" "$REALNAME_SUCCESS" "$REALNAME_FAILURE" "$REALNAME_RATE" "$REALNAME_QPS" "$REALNAME_AVG_MS"
printf " %-10s | %'8d | %'8d | %5.1f%% | %6d | %6dms\n" "流量检查" "$CARDDATA_SUCCESS" "$CARDDATA_FAILURE" "$CARDDATA_RATE" "$CARDDATA_QPS" "$CARDDATA_AVG_MS"
printf " %-10s | %'8d | %'8d | %5.1f%% | %6d | %6dms\n" "套餐检查" "$PACKAGE_SUCCESS" "$PACKAGE_FAILURE" "$PACKAGE_RATE" "$PACKAGE_QPS" "$PACKAGE_AVG_MS"
printf " %-10s | %8s | %8s | %6s | %6d | %8s\n" "总计" "-" "-" "-" "$TOTAL_QPS" "-"
echo ""
# 更新上次值
LAST_REALNAME_SUCCESS=$REALNAME_SUCCESS
LAST_REALNAME_FAILURE=$REALNAME_FAILURE
LAST_CARDDATA_SUCCESS=$CARDDATA_SUCCESS
LAST_CARDDATA_FAILURE=$CARDDATA_FAILURE
LAST_PACKAGE_SUCCESS=$PACKAGE_SUCCESS
LAST_PACKAGE_FAILURE=$PACKAGE_FAILURE
LAST_TIME=$NOW
# ========== 并发控制状态 ==========
echo "【🔒 并发控制状态】"
# 注意current key 包含 polling: 前缀config key 不包含
REALNAME_CURRENT=$($REDIS_CLI GET "polling:concurrency:current:polling:realname" 2>/dev/null || echo "0")
REALNAME_MAX=$($REDIS_CLI GET "polling:concurrency:config:realname" 2>/dev/null || echo "50")
CARDDATA_CURRENT=$($REDIS_CLI GET "polling:concurrency:current:polling:carddata" 2>/dev/null || echo "0")
CARDDATA_MAX=$($REDIS_CLI GET "polling:concurrency:config:carddata" 2>/dev/null || echo "50")
PACKAGE_CURRENT=$($REDIS_CLI GET "polling:concurrency:current:polling:package" 2>/dev/null || echo "0")
PACKAGE_MAX=$($REDIS_CLI GET "polling:concurrency:config:package" 2>/dev/null || echo "50")
REALNAME_CURRENT=${REALNAME_CURRENT:-0}
REALNAME_MAX=${REALNAME_MAX:-50}
CARDDATA_CURRENT=${CARDDATA_CURRENT:-0}
CARDDATA_MAX=${CARDDATA_MAX:-50}
PACKAGE_CURRENT=${PACKAGE_CURRENT:-0}
PACKAGE_MAX=${PACKAGE_MAX:-50}
if [ "$REALNAME_MAX" = "50" ] && [ -z "$($REDIS_CLI GET "polling:concurrency:config:realname" 2>/dev/null)" ]; then
echo " (未启动 Worker并发配置未加载)"
else
printf " 实名检查: %d / %s\n" "$REALNAME_CURRENT" "$REALNAME_MAX"
printf " 流量检查: %d / %s\n" "$CARDDATA_CURRENT" "$CARDDATA_MAX"
printf " 套餐检查: %d / %s\n" "$PACKAGE_CURRENT" "$PACKAGE_MAX"
fi
echo ""
# ========== Mock Gateway 统计 ==========
if curl -s http://127.0.0.1:8888/stats > /dev/null 2>&1; then
echo "【🌐 Mock Gateway 统计】"
GATEWAY_STATS=$(curl -s http://127.0.0.1:8888/stats 2>/dev/null)
if [ -n "$GATEWAY_STATS" ]; then
echo "$GATEWAY_STATS" | python3 -c "
import sys, json
try:
data = json.load(sys.stdin)
uptime = data.get('uptime_seconds', 0)
total = data.get('total_requests', 0)
success = data.get('success_count', 0)
failed = data.get('failed_count', 0)
qps = data.get('qps', 0)
rate = data.get('success_rate', '0%')
print(f' 运行时长: {uptime:.0f}s | 总请求: {total:,} | QPS: {qps:.1f} | 成功率: {rate}')
except Exception as e:
print(f' 解析失败: {e}')
" 2>/dev/null || echo " 解析失败"
fi
echo ""
fi
# ========== Redis 内存 ==========
echo "【💾 Redis 内存使用】"
REDIS_INFO=$($REDIS_CLI INFO memory 2>/dev/null)
if [ -n "$REDIS_INFO" ]; then
USED_MEMORY=$(echo "$REDIS_INFO" | grep "used_memory_human:" | cut -d: -f2 | tr -d '\r')
MAX_MEMORY=$(echo "$REDIS_INFO" | grep "maxmemory_human:" | cut -d: -f2 | tr -d '\r')
printf " 已用: %s / 最大: %s\n" "$USED_MEMORY" "$MAX_MEMORY"
else
echo " 无法获取 Redis 信息"
fi
echo ""
# ========== 数据库统计(从 Redis 计算)==========
echo "【📦 卡统计(队列推算)】"
TOTAL_QUEUE=$((REALNAME_QUEUE + CARDDATA_QUEUE + PACKAGE_QUEUE))
# 根据配置推算:未实名进入实名队列,已激活进入流量和套餐队列
# 这只是近似值,实际统计需要查数据库
printf " 队列总卡数: %'d\n" "$TOTAL_QUEUE"
printf " 未实名(估): %'d | 已激活(估): %'d\n" "$REALNAME_QUEUE" "$CARDDATA_QUEUE"
echo " (注: 精确统计需要数据库连接)"
echo ""
echo "────────────────────────────────────────────────────────────────────────"
echo "按 Ctrl+C 退出监控... (每 5 秒刷新)"
sleep 5
done

View File

@@ -0,0 +1,48 @@
#!/bin/bash
# 启动本地 Redis 用于压测
set -e
echo "=== 启动本地 Redis ==="
# 检查是否已有容器在运行
if docker ps | grep -q polling-redis; then
echo "Redis 容器已在运行"
docker ps | grep polling-redis
exit 0
fi
# 停止并删除旧容器(如果存在)
docker rm -f polling-redis 2>/dev/null || true
# 启动 Redis 容器
# - 16GB maxmemory压测用
# - 禁用持久化(提高性能)
docker run -d \
--name polling-redis \
-p 6379:6379 \
redis:7-alpine \
redis-server \
--maxmemory 8gb \
--maxmemory-policy allkeys-lru \
--appendonly no \
--save ""
echo ""
echo "等待 Redis 启动..."
sleep 2
# 验证连接
if redis-cli ping | grep -q PONG; then
echo "✓ Redis 启动成功"
echo ""
echo "连接信息:"
echo " 地址: 127.0.0.1:6379"
echo " 密码: (无)"
echo ""
echo "Redis 内存配置:"
redis-cli CONFIG GET maxmemory
else
echo "✗ Redis 启动失败"
exit 1
fi

View File

@@ -0,0 +1,156 @@
-- 轮询系统初始化配置脚本
-- 设计目标: 支持一亿张卡规模
-- 执行: psql -U user -d database -f scripts/init_polling_config.sql
-- ========================================
-- 1. 轮询配置初始化
-- 设计原则:
-- - 未实名卡5分钟检查一次避免过于频繁
-- - 已实名卡:每天检查一次(状态稳定)
-- - 激活卡流量:每小时检查一次
-- ========================================
-- 删除已有配置(如果存在)
DELETE FROM tb_polling_config;
-- 优先级 10: 未实名卡中频检查每5分钟
-- 预估1000万未实名卡 × 12次/小时 = 1.2亿次/小时
INSERT INTO tb_polling_config (config_name, card_condition, card_category, carrier_id, priority, realname_check_interval, carddata_check_interval, package_check_interval, status, description)
VALUES
('未实名卡轮询', 'not_real_name', NULL, NULL, 10, 300, NULL, NULL, 1, '未实名卡每5分钟检查一次实名状态一亿卡规模优化');
-- 优先级 15: 行业卡(无需实名检查)
INSERT INTO tb_polling_config (config_name, card_condition, card_category, carrier_id, priority, realname_check_interval, carddata_check_interval, package_check_interval, status, description)
VALUES
('行业卡轮询', NULL, 'industry', NULL, 15, NULL, 3600, 3600, 1, '行业卡无需实名检查,每小时检查流量和套餐');
-- 优先级 20: 已实名卡(低频检查,每天一次)
-- 预估3000万已实名卡 × 1次/天 = 很少
INSERT INTO tb_polling_config (config_name, card_condition, card_category, carrier_id, priority, realname_check_interval, carddata_check_interval, package_check_interval, status, description)
VALUES
('已实名卡轮询', 'real_name', NULL, NULL, 20, 86400, NULL, NULL, 1, '已实名卡每天检查一次实名状态(状态稳定,无需频繁检查)');
-- 优先级 30: 已激活卡(流量和套餐检查,每小时)
-- 预估6000万激活卡 × 1次/小时 = 6000万次/小时
INSERT INTO tb_polling_config (config_name, card_condition, card_category, carrier_id, priority, realname_check_interval, carddata_check_interval, package_check_interval, status, description)
VALUES
('已激活卡轮询', 'activated', NULL, NULL, 30, NULL, 3600, 3600, 1, '已激活卡每小时检查流量和套餐(一亿卡规模优化)');
-- 优先级 100: 默认配置(兜底,保守策略)
INSERT INTO tb_polling_config (config_name, card_condition, card_category, carrier_id, priority, realname_check_interval, carddata_check_interval, package_check_interval, status, description)
VALUES
('默认轮询配置', NULL, NULL, NULL, 100, 86400, 86400, 86400, 1, '默认配置,每天检查一次(未匹配其他配置的卡)');
-- ========================================
-- 2. 并发控制配置初始化
-- 设计目标:支持 5 万 QPS 吞吐
-- 单 Worker 建议500-1000 并发
-- 多 Worker 部署8-16 个 Worker
-- ========================================
-- 删除已有配置(如果存在)
DELETE FROM tb_polling_concurrency_config;
-- 实名检查并发数(单 Worker
INSERT INTO tb_polling_concurrency_config (task_type, max_concurrency, description)
VALUES
('realname', 500, '实名检查任务最大并发数(单 Worker可部署多个 Worker 水平扩展)');
-- 卡流量检查并发数(单 Worker
INSERT INTO tb_polling_concurrency_config (task_type, max_concurrency, description)
VALUES
('carddata', 1000, '流量检查任务最大并发数(单 Worker流量检查占比最大');
-- 套餐检查并发数(单 Worker
INSERT INTO tb_polling_concurrency_config (task_type, max_concurrency, description)
VALUES
('package', 500, '套餐检查任务最大并发数(单 Worker');
-- 停复机操作并发数(单 Worker
INSERT INTO tb_polling_concurrency_config (task_type, max_concurrency, description)
VALUES
('stop_start', 100, '停复机操作最大并发数(需要谨慎控制)');
-- ========================================
-- 3. 数据清理配置初始化
-- 一亿卡规模每天产生大量数据,需要及时清理
-- ========================================
-- 删除已有配置(如果存在)
DELETE FROM tb_data_cleanup_config;
-- 流量历史记录清理配置(保留较短时间)
INSERT INTO tb_data_cleanup_config (table_name, retention_days, enabled, batch_size, description)
VALUES
('tb_data_usage_record', 30, 1, 50000, '保留30天流量历史每批删除5万条一亿卡每天产生大量数据');
-- 操作日志清理配置
INSERT INTO tb_data_cleanup_config (table_name, retention_days, enabled, batch_size, description)
VALUES
('tb_account_operation_log', 90, 1, 50000, '保留90天操作日志每批删除5万条');
-- 告警历史清理配置
INSERT INTO tb_data_cleanup_config (table_name, retention_days, enabled, batch_size, description)
VALUES
('tb_polling_alert_history', 14, 1, 50000, '保留14天告警历史每批删除5万条');
-- 手动触发日志清理配置
INSERT INTO tb_data_cleanup_config (table_name, retention_days, enabled, batch_size, description)
VALUES
('tb_polling_manual_trigger_log', 30, 1, 50000, '保留30天手动触发日志每批删除5万条');
-- 数据清理日志清理配置
INSERT INTO tb_data_cleanup_config (table_name, retention_days, enabled, batch_size, description)
VALUES
('tb_data_cleanup_log', 60, 1, 10000, '保留60天数据清理日志');
-- ========================================
-- 4. 告警规则初始化(一亿卡规模)
-- ========================================
-- 删除已有规则(如果存在)
DELETE FROM tb_polling_alert_rule;
-- 队列积压告警(阈值调高,适应大规模)
INSERT INTO tb_polling_alert_rule (rule_name, task_type, metric_type, operator, threshold, alert_level, cooldown_minutes, status, notify_channels, description)
VALUES
('实名检查队列积压', 'polling:realname', 'queue_size', '>', 500000, 'warning', 10, 1, 'log', '实名检查队列超过50万时告警'),
('流量检查队列积压', 'polling:carddata', 'queue_size', '>', 1000000, 'warning', 10, 1, 'log', '流量检查队列超过100万时告警'),
('实名检查队列严重积压', 'polling:realname', 'queue_size', '>', 2000000, 'critical', 5, 1, 'log', '实名检查队列超过200万时严重告警');
-- 失败率告警
INSERT INTO tb_polling_alert_rule (rule_name, task_type, metric_type, operator, threshold, alert_level, cooldown_minutes, status, notify_channels, description)
VALUES
('实名检查失败率过高', 'polling:realname', 'failure_rate', '>', 20, 'warning', 10, 1, 'log', '实名检查失败率超过20%时告警'),
('流量检查失败率过高', 'polling:carddata', 'failure_rate', '>', 20, 'warning', 10, 1, 'log', '流量检查失败率超过20%时告警');
-- ========================================
-- 初始化完成
-- ========================================
-- 验证初始化结果
SELECT '轮询配置初始化完成' AS message, COUNT(*) AS count FROM tb_polling_config;
SELECT '并发控制配置初始化完成' AS message, COUNT(*) AS count FROM tb_polling_concurrency_config;
SELECT '数据清理配置初始化完成' AS message, COUNT(*) AS count FROM tb_data_cleanup_config;
SELECT '告警规则初始化完成' AS message, COUNT(*) AS count FROM tb_polling_alert_rule;
-- ========================================
-- 容量规划参考(一亿卡)
-- ========================================
--
-- 检查次数估算(按上述配置):
-- - 未实名卡10%1000万 × 12次/小时 = 1.2亿次/小时
-- - 已实名卡30%3000万 × 1次/天 ≈ 125万次/小时
-- - 激活卡流量60%6000万 × 1次/小时 = 6000万次/小时
-- - 激活卡套餐60%6000万 × 1次/小时 = 6000万次/小时
-- 总计:约 2.4 亿次/小时 = 6.7万次/秒
--
-- 推荐部署:
-- - Worker 数量16 个(每个处理约 4000 QPS
-- - Redis 内存16GB+(缓存 + 队列)
-- - 数据库连接池:每 Worker 50 连接
-- - Asynq 队列critical/default/low 三个队列
--
-- 初始化时间估算:
-- - 1000 万卡:约 50 秒10万/批500ms间隔
-- - 1 亿卡:约 500 秒 ≈ 8 分钟