kingbase教程FG183-金仓数据库CPU过高优化处理
内容简介:本文档详细介绍金仓数据库CPU过高的原因及优化处理方法,包括CPU使用监控、性能分析、优化策略等。风哥教程参考kingbase官方文档kingbase8系统管理员手册、kingbase8性能优化指南等。
Part01-基础概念与理论知识
1.1 CPU使用概述
CPU是数据库系统的核心资源之一,负责执行数据库的各种操作,如SQL解析、执行计划生成、数据处理等。CPU使用过高会导致数据库性能下降,影响业务正常运行。,风哥提示:
CPU使用的主要来源:
- SQL执行:复杂的SQL查询会消耗大量CPU资源
- 排序操作:大数据量的排序会消耗大量CPU资源
- 哈希操作:哈希连接等操作会消耗大量CPU资源
- 并发操作:高并发场景下,CPU资源竞争加剧
- 后台进程:如autovacuum、checkpointer等后台进程会消耗CPU资源
1.2 CPU过高的原因
CPU过高的原因:
- SQL语句优化不足:复杂的SQL语句或缺少索引的查询会导致CPU使用率过高
- 并发连接数过多:过多的并发连接会导致CPU资源竞争
- 后台进程异常:如autovacuum进程异常会导致CPU使用率过高,学习交流加群风哥微信: itpux-com
- 系统资源不足:CPU资源本身不足,无法满足数据库的需求
- 参数配置不当:如并行度设置过高会导致CPU使用率过高
1.3 CPU使用监控
CPU使用监控:
- 系统级监控:使用top、vmstat、mpstat等命令监控系统CPU使用
- 数据库级监控:使用pg_stat_activity、pg_stat_database等系统视图监控数据库CPU使用
- 性能视图:使用pg_stat_statements等视图监控SQL语句的CPU使用
- 监控工具:使用Prometheus、Zabbix等监控工具监控CPU使用
Part02-生产环境规划与建议
2.1 CPU资源规划
CPU资源规划:
- 根据业务需求选择合适的CPU:根据数据库的负载类型和规模,选择合适的CPU型号和核心数
- 考虑并发度:根据并发连接数和SQL复杂度,合理规划CPU资源
- 考虑并行度:根据CPU核心数,合理设置并行度参数,学习交流加群风哥QQ113257174
- 预留足够的CPU资源:为操作系统和其他应用预留足够的CPU资源
2.2 优化策略
优化策略:
- SQL语句优化:优化复杂的SQL语句,添加必要的索引
- 并发控制:控制并发连接数,避免过多的并发连接
- 参数优化:合理设置并行度、work_mem等参数
- 后台进程管理:合理配置autovacuum等后台进程
- 硬件升级:在必要时升级CPU硬件
2.3 配置建议
配置建议:
- max_connections:根据系统资源和业务需求合理设置
- max_parallel_workers:根据CPU核心数合理设置,更多视频教程www.fgedu.net.cn
- max_parallel_workers_per_gather:根据CPU核心数合理设置
- work_mem:根据内存大小和并发度合理设置
- autovacuum_max_workers:根据系统资源合理设置
Part03-生产环境项目实施方案
3.1 CPU使用监控
CPU使用监控步骤:
- 系统级监控:使用top、vmstat、mpstat等命令监控系统CPU使用
- 数据库级监控:使用pg_stat_activity、pg_stat_database等系统视图监控数据库CPU使用
- SQL级监控:使用pg_stat_statements等视图监控SQL语句的CPU使用
- 设置告警:设置CPU使用率过高的告警
3.2 性能分析
性能分析步骤:
- 识别高CPU进程:使用top命令识别消耗CPU资源最多的进程
- 识别高CPU SQL:使用pg_stat_statements视图识别消耗CPU资源最多的SQL语句,更多学习教程公众号风哥教程itpux_com
- 分析执行计划:分析高CPU SQL的执行计划,找出性能瓶颈
- 识别后台进程:检查后台进程的CPU使用情况
3.3 优化调优
优化调优步骤:
- 优化SQL语句:优化复杂的SQL语句,添加必要的索引
- 调整参数:合理设置并行度、work_mem等参数
- 控制并发:控制并发连接数,避免过多的并发连接
- 管理后台进程:合理配置autovacuum等后台进程
- 验证优化效果:验证优化后的效果
Part04-生产案例与实战讲解
4.1 CPU过高诊断
CPU过高诊断:
# 系统级CPU监控
$ top
# 输出日志(示例)
top – 10:00:00 up 10 days, 8:00, 2 users, load average: 8.00, 7.50, 7.00
Tasks: 200 total, 5 running, 195 sleeping, 0 stopped, 0 zombie
%Cpu(s): 90.0 us, 5.0 sy, 0.0 ni, 5.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
MiB Mem : 65536.0 total, 51200.0 free, 8192.0 used, 6144.0 buff/cache
MiB Swap: 0.0 total, 0.0 free, 0.0 used. 56320.0 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
1234 kingbase 20 0 20.0g 16.0g 15.5g R 80.0 25.0 1:00.00 kbserver
5678 kingbase 20 0 2.0g 1.0g 0.8g R 10.0 1.5 0:30.00 postgres: autovacuum launcher process
9012 kingbase 20 0 2.0g 1.0g 0.8g S 5.0 1.5 0:20.00 postgres: checkpointer process
3456 kingbase 20 0 2.0g 1.0g 0.8g S 5.0 1.5 0:15.00 postgres: writer process
# 数据库级CPU监控
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “SELECT pid, usename, state, query, backend_start, client_addr FROM pg_stat_activity WHERE state = ‘active’;”
# 输出日志
pid | usename | state | query | backend_start | client_addr
——+———+——–+————————————-+——————————-+————- 1234 | fgedu | active | SELECT * FROM fgedu_sales WHERE sale_date BETWEEN ‘2026-01-01’ AND ‘2026-03-31’ ORDER BY amount DESC | 2026-04-09 10:00:00.000000+08 | 192.168.1.1
5678 | fgedu | active | SELECT * FROM fgedu_employee WHERE department = ‘技术部’ | 2026-04-09 10:00:01.000000+08 | 192.168.1.2
# SQL级CPU监控
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “SELECT queryid, query, mean_exec_time, max_exec_time, mean_rows, shared_blks_hit, shared_blks_read FROM pg_stat_statements ORDER BY mean_exec_time DESC LIMIT 10;”
# 输出日志
queryid | query | mean_exec_time | max_exec_time | mean_rows | shared_blks_hit | shared_blks_read
———+————————————-+—————-+—————-+———–+—————–+——————- 123456 | SELECT * FROM fgedu_sales WHERE sale_date BETWEEN $1 AND $2 ORDER BY amount DESC | 500.5 | 1000.0 | 1000 | 5000 | 1000
789012 | SELECT * FROM fgedu_employee WHERE department = $1 | 50.2 | 100.0 | 50 | 1000 | 0
4.2 优化实战
优化实战:,from DB视频:www.itpux.com
# 分析高CPU SQL的执行计划
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “EXPLAIN ANALYZE SELECT * FROM fgedu_sales WHERE sale_date BETWEEN ‘2026-01-01’ AND ‘2026-03-31’ ORDER BY amount DESC;”
# 输出日志
QUERY PLAN
———————————————————————————————————————-
Gather Merge (cost=10000000000.00..10000005000.00 rows=10000 width=20) (actual time=100.000..500.000 rows=10000 loops=1)
Workers Planned: 4
Workers Launched: 4
-> Sort (cost=10000000000.00..10000000012.50 rows=2500 width=20) (actual time=50.000..200.000 rows=2500 loops=4)
Sort Key: amount DESC
Sort Method: quicksort Memory: 512kB
-> Parallel Seq Scan on fgedu_sales (cost=0.00..10000000000.00 rows=2500 width=20) (actual time=0.000..50.000 rows=2500 loops=4)
Filter: ((sale_date >= ‘2026-01-01’::date) AND (sale_date <= '2026-03-31'::date))
Rows Removed by Filter: 7500
Planning Time: 0.100 ms
Execution Time: 500.100 ms
# 优化措施1:添加索引
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “CREATE INDEX idx_fgedu_sales_sale_date ON fgedu_sales(sale_date);”
# 优化措施2:添加排序索引
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “CREATE INDEX idx_fgedu_sales_amount ON fgedu_sales(amount DESC);”
# 验证优化效果
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “EXPLAIN ANALYZE SELECT * FROM fgedu_sales WHERE sale_date BETWEEN ‘2026-01-01’ AND ‘2026-03-31’ ORDER BY amount DESC;”
# 输出日志
QUERY PLAN
———————————————————————————————————————-
Index Scan using idx_fgedu_sales_amount on fgedu_sales (cost=0.29..8.31 rows=10000 width=20) (actual time=0.050..100.000 rows=10000 loops=1)
Filter: ((sale_date >= ‘2026-01-01’::date) AND (sale_date <= '2026-03-31'::date))
Rows Removed by Filter: 90000
Planning Time: 0.100 ms
Execution Time: 100.100 ms
# 优化措施3:调整并行度参数
$ vi /kingbase/data/postgresql.conf
# 修改并行度参数
max_parallel_workers = 4 # 并行工作线程数
max_parallel_workers_per_gather = 2 # 每个gather操作的并行工作线程数
# 重启数据库
$ systemctl restart kingbase
# 验证配置
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “SHOW max_parallel_workers;”
# 输出日志
max_parallel_workers
———————-
4
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “SHOW max_parallel_workers_per_gather;”
# 输出日志
max_parallel_workers_per_gather
———————————-
2
4.3 监控与验证
监控与验证:
# 系统级CPU监控(优化后)
$ top
# 输出日志(示例)
top – 10:00:00 up 10 days, 8:00, 2 users, load average: 2.00, 2.50, 3.00
Tasks: 200 total, 1 running, 199 sleeping, 0 stopped, 0 zombie
%Cpu(s): 20.0 us, 5.0 sy, 0.0 ni, 75.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
MiB Mem : 65536.0 total, 51200.0 free, 8192.0 used, 6144.0 buff/cache
MiB Swap: 0.0 total, 0.0 free, 0.0 used. 56320.0 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
1234 kingbase 20 0 20.0g 16.0g 15.5g S 15.0 25.0 1:00.00 kbserver
5678 kingbase 20 0 2.0g 1.0g 0.8g S 2.0 1.5 0:30.00 postgres: autovacuum launcher process
9012 kingbase 20 0 2.0g 1.0g 0.8g S 1.0 1.5 0:20.00 postgres: checkpointer process
3456 kingbase 20 0 2.0g 1.0g 0.8g S 1.0 1.5 0:15.00 postgres: writer process
# 数据库级CPU监控(优化后)
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “SELECT pid, usename, state, query, backend_start, client_addr FROM pg_stat_activity WHERE state = ‘active’;”
# 输出日志
pid | usename | state | query | backend_start | client_addr
——+———+——–+————————————-+——————————-+————- 1234 | fgedu | active | SELECT * FROM fgedu_sales WHERE sale_date BETWEEN ‘2026-01-01’ AND ‘2026-03-31’ ORDER BY amount DESC | 2026-04-09 10:00:00.000000+08 | 192.168.1.1
5678 | fgedu | active | SELECT * FROM fgedu_employee WHERE department = ‘技术部’ | 2026-04-09 10:00:01.000000+08 | 192.168.1.2
# SQL级CPU监控(优化后)
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “SELECT queryid, query, mean_exec_time, max_exec_time, mean_rows, shared_blks_hit, shared_blks_read FROM pg_stat_statements ORDER BY mean_exec_time DESC LIMIT 10;”
# 输出日志
queryid | query | mean_exec_time | max_exec_time | mean_rows | shared_blks_hit | shared_blks_read
———+————————————-+—————-+—————-+———–+—————–+——————- 123456 | SELECT * FROM fgedu_sales WHERE sale_date BETWEEN $1 AND $2 ORDER BY amount DESC | 100.5 | 200.0 | 1000 | 5000 | 100
789012 | SELECT * FROM fgedu_employee WHERE department = $1 | 50.2 | 100.0 | 50 | 1000 | 0
4.4 性能测试
性能测试:
# 测试查询性能(优化前)
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “EXPLAIN ANALYZE SELECT * FROM fgedu_sales WHERE sale_date BETWEEN ‘2026-01-01’ AND ‘2026-03-31’ ORDER BY amount DESC;”
# 输出日志(优化前)
QUERY PLAN
———————————————————————————————————————-
Gather Merge (cost=10000000000.00..10000005000.00 rows=10000 width=20) (actual time=100.000..500.000 rows=10000 loops=1)
Workers Planned: 4
Workers Launched: 4
-> Sort (cost=10000000000.00..10000000012.50 rows=2500 width=20) (actual time=50.000..200.000 rows=2500 loops=4)
Sort Key: amount DESC
Sort Method: quicksort Memory: 512kB
-> Parallel Seq Scan on fgedu_sales (cost=0.00..10000000000.00 rows=2500 width=20) (actual time=0.000..50.000 rows=2500 loops=4)
Filter: ((sale_date >= ‘2026-01-01’::date) AND (sale_date <= '2026-03-31'::date))
Rows Removed by Filter: 7500
Planning Time: 0.100 ms
Execution Time: 500.100 ms
# 测试查询性能(优化后)
$ psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “EXPLAIN ANALYZE SELECT * FROM fgedu_sales WHERE sale_date BETWEEN ‘2026-01-01’ AND ‘2026-03-31’ ORDER BY amount DESC;”
# 输出日志(优化后)
QUERY PLAN
———————————————————————————————————————-
Index Scan using idx_fgedu_sales_amount on fgedu_sales (cost=0.29..8.31 rows=10000 width=20) (actual time=0.050..100.000 rows=10000 loops=1)
Filter: ((sale_date >= ‘2026-01-01’::date) AND (sale_date <= '2026-03-31'::date))
Rows Removed by Filter: 90000
Planning Time: 0.100 ms
Execution Time: 100.100 ms
# 测试并发性能
$ ab -n 1000 -c 100 http://fgedu.localhost:8080/api/query
# 输出日志(示例)
This is ApacheBench, Version 2.3 <$Revision: 1879490 $>
Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/
Benchmarking fgedu.localhost (be patient)
Completed 100 requests
Completed 200 requests
Completed 300 requests
Completed 400 requests
Completed 500 requests
Completed 600 requests
Completed 700 requests
Completed 800 requests
Completed 900 requests
Completed 1000 requests
Finished 1000 requests
Server Software: nginx/1.20.1
Server Hostname: fgedu.localhost
Server Port: 8080
Document Path: /api/query
Document Length: 1000 bytes
Concurrency Level: 100
Time taken for tests: 10.000 seconds
Complete requests: 1000
Failed requests: 0
Total transferred: 1234567 bytes
HTML transferred: 1000000 bytes
Requests per second: 100.00 [#/sec] (mean)
Time per request: 1000.00 [ms] (mean)
Time per request: 10.00 [ms] (mean, across all concurrent requests)
Transfer rate: 123.46 [Kbytes/sec] received
Connection Times (ms)
min mean[+/-sd] median max
Connect: 0 0 0.1 0 1
Processing: 10 500 100.0 500 1000
Waiting: 0 400 100.0 400 900
Total: 10 500 100.0 500 1000
Percentage of the requests served within a certain time (ms)
50% 500
66% 600
75% 700
80% 800
90% 900
95% 950
98% 980
99% 990
100% 1000 (longest request)
Part05-风哥经验总结与分享
5.1 CPU过高常见问题与解决方案
CPU过高常见问题与解决方案:
- SQL语句优化不足:优化复杂的SQL语句,添加必要的索引
- 并发连接数过多:控制并发连接数,使用连接池
- 后台进程异常:合理配置autovacuum等后台进程
- 并行度设置过高:根据CPU核心数合理设置并行度
- 系统资源不足:升级CPU硬件或增加CPU核心数
5.2 CPU优化最佳实践
CPU优化最佳实践:
- SQL语句优化:优化复杂的SQL语句,添加必要的索引
- 并发控制:控制并发连接数,使用连接池
- 参数优化:合理设置并行度、work_mem等参数
- 后台进程管理:合理配置autovacuum等后台进程
- 监控与告警:建立完善的监控和告警机制
- 定期分析:定期分析CPU使用情况,及时发现问题
5.3 CPU优化脚本分享
以下是一个CPU优化脚本示例:
#!/bin/bash
# cpu_optimization.sh
# from:www.itpux.com.qq113257174.wx:itpux-com
# web: `http://www.fgedu.net.cn`
# 配置信息
DB_HOME=”/kingbase”
DB_DATA=”${DB_HOME}/data”
LOG_FILE=”${DB_HOME}/log/cpu_optimization.log”
# 记录日志
log() {
echo “$(date ‘+%Y-%m-%d %H:%M:%S’) – $1” >> $LOG_FILE
}
# 检查CPU使用情况
check_cpu_usage() {
log “检查CPU使用情况…”
# 查看系统CPU使用率
CPU_USAGE=$(top -b -n 1 | grep “%Cpu(s)” | awk ‘{print $2 + $4}’)
log “系统CPU使用率: ${CPU_USAGE}%”
# 查看数据库进程CPU使用率
DB_CPU_USAGE=$(top -b -n 1 | grep kbserver | awk ‘{print $9}’)
log “数据库进程CPU使用率: ${DB_CPU_USAGE}%”
# 查看活跃连接数
ACTIVE_CONNECTIONS=$(psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “SELECT COUNT(*) FROM pg_stat_activity WHERE state = ‘active’;” -t)
log “活跃连接数: ${ACTIVE_CONNECTIONS}”
# 查看高CPU SQL
log “高CPU SQL:”
psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “SELECT queryid, query, mean_exec_time, max_exec_time FROM pg_stat_statements ORDER BY mean_exec_time DESC LIMIT 5;” >> $LOG_FILE
}
# 优化SQL语句
optimize_sql() {
log “优化SQL语句…”
# 分析高CPU SQL
HIGH_CPU_SQL=$(psql -h fgedu.localhost -p 54321 -U fgedu -d fgedudb -c “SELECT query FROM pg_stat_statements ORDER BY mean_exec_time DESC LIMIT 1;” -t)
if [ -n “$HIGH_CPU_SQL” ]; then
log “发现高CPU SQL: $HIGH_CPU_SQL”
# 这里可以添加自动优化SQL的逻辑
else
log “未发现高CPU SQL”
fi
}
# 调整参数
adjust_parameters() {
log “调整参数…”
# 备份配置文件
cp ${DB_DATA}/postgresql.conf ${DB_DATA}/postgresql.conf.bak
# 调整并行度参数
sed -i “s/^max_parallel_workers =.*/max_parallel_workers = 4/” ${DB_DATA}/postgresql.conf
sed -i “s/^max_parallel_workers_per_gather =.*/max_parallel_workers_per_gather = 2/” ${DB_DATA}/postgresql.conf
log “参数调整完成”
}
# 重启数据库
restart_database() {
log “重启数据库…”
systemctl restart kingbase
if [ $? -eq 0 ]; then
log “数据库重启成功”
else
log “数据库重启失败”
fi
}
# 验证优化效果
verify_optimization() {
log “验证优化效果…”
# 等待数据库启动
sleep 10
# 检查CPU使用情况
check_cpu_usage
log “优化效果验证完成”
}
# 主函数
main() {
log “开始CPU优化”
check_cpu_usage
optimize_sql
adjust_parameters
restart_database
verify_optimization
log “CPU优化完成”
}
# 执行主函数
main
风哥提示:CPU过高会严重影响数据库性能,通过合理的SQL优化、参数调整和并发控制,可以有效降低CPU使用率,提高数据库性能。
本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html
