风哥教程参考DB2官方文档Batch Processing、Performance Tuning、LOAD等内容,详细介绍批处理作业优化、大数据加载、批量更新、性能调优等。更多视频教程www.fgedu.net.cn
目录大纲
Part01-批处理作业架构
1.1 批处理场景
常见的批处理作业场景:
- 日终/月末结算
- 数据清洗与转换
- 报表生成
- 数据归档
- 批量数据导入导出
- MQT刷新
- 统计信息更新
1.2 批处理优化原则
- 使用批量操作,减少SQL语句数量
- 合理使用LOAD替代INSERT
- 批量更新使用FOR UPDATE或MERGE
- 合理的提交频率
- 临时表使用优化
- 并行处理大作业
Part02-批量数据加载
2.1 LOAD命令优化
LOAD FROM /data/batch_data.csv OF DEL
INSERT INTO TARGET_TABLE;
# 优化的LOAD命令
LOAD FROM /data/batch_data.csv OF DEL
INSERT INTO TARGET_TABLE
FASTPARSE
NOCHECK CONSTRAINT
NONRECOVERABLE
CPU_PARALLELISM 4
DISK_PARALLELISM 4
BUFFER 1000
SAVECOUNT 10000
ROWS_PER_COMMIT 10000;
# 使用CURSOR加载
DECLARE c1 CURSOR FOR
SELECT * FROM SOURCE_TABLE;
LOAD FROM c1 OF CURSOR
INSERT INTO TARGET_TABLE
NONRECOVERABLE;
# 分区加载
LOAD FROM /data/partition1.csv OF DEL
INSERT INTO TARGET_TABLE PARTITION (PART1);
LOAD FROM /data/partition2.csv OF DEL
INSERT INTO TARGET_TABLE PARTITION (PART2);
2.2 临时表使用
CREATE USER TEMPORARY TABLESPACE TEMP_TS
PAGESIZE 8K
MANAGED BY DATABASE
USING (FILE ‘/db2data/temp/temp_ts.dms’ 5G);
# 创建临时表
DECLARE GLOBAL TEMPORARY TABLE SESSION.TEMP_ORDER (
ORDER_ID VARCHAR(32),
CUSTOMER_ID VARCHAR(32),
TOTAL_AMOUNT DECIMAL(18, 2),
ORDER_STATUS VARCHAR(20)
) ON COMMIT PRESERVE ROWS
NOT LOGGED
IN TEMP_TS;
# 使用临时表处理数据
INSERT INTO SESSION.TEMP_ORDER
SELECT
ORDER_ID,
CUSTOMER_ID,
TOTAL_AMOUNT,
ORDER_STATUS
FROM ORDERS
WHERE CREATE_TIME BETWEEN ‘2026-04-01’ AND ‘2026-04-30’;
# 更新临时表
UPDATE SESSION.TEMP_ORDER
SET ORDER_STATUS = ‘PROCESSED’
WHERE TOTAL_AMOUNT > 1000;
# 合并到目标表
MERGE INTO ORDER_SUMMARY s
USING SESSION.TEMP_ORDER t
ON s.ORDER_ID = t.ORDER_ID
WHEN MATCHED THEN
UPDATE SET
s.TOTAL_AMOUNT = t.TOTAL_AMOUNT,
s.ORDER_STATUS = t.ORDER_STATUS
WHEN NOT MATCHED THEN
INSERT (ORDER_ID, CUSTOMER_ID, TOTAL_AMOUNT, ORDER_STATUS)
VALUES (t.ORDER_ID, t.CUSTOMER_ID, t.TOTAL_AMOUNT, t.ORDER_STATUS);
# 清理临时表
DROP TABLE SESSION.TEMP_ORDER;
Part03-批量更新优化
3.1 MERGE语句
MERGE INTO CUSTOMER_SUMMARY s
USING (
SELECT
CUSTOMER_ID,
COUNT(*) AS ORDER_COUNT,
SUM(TOTAL_AMOUNT) AS TOTAL_AMOUNT,
MAX(CREATE_TIME) AS LAST_ORDER_TIME
FROM ORDERS
WHERE CREATE_TIME BETWEEN ‘2026-04-01’ AND ‘2026-04-30’
GROUP BY CUSTOMER_ID
) t
ON s.CUSTOMER_ID = t.CUSTOMER_ID
WHEN MATCHED THEN
UPDATE SET
s.ORDER_COUNT = t.ORDER_COUNT,
s.TOTAL_AMOUNT = t.TOTAL_AMOUNT,
s.LAST_ORDER_TIME = t.LAST_ORDER_TIME,
s.UPDATE_TIME = CURRENT TIMESTAMP
WHEN NOT MATCHED THEN
INSERT (
CUSTOMER_ID, ORDER_COUNT, TOTAL_AMOUNT,
LAST_ORDER_TIME, CREATE_TIME
) VALUES (
t.CUSTOMER_ID, t.ORDER_COUNT, t.TOTAL_AMOUNT,
t.LAST_ORDER_TIME, CURRENT TIMESTAMP
);
# 使用FOR UPDATE批量更新
UPDATE (
SELECT
o.ORDER_ID,
o.ORDER_STATUS,
p.PAYMENT_STATUS
FROM ORDERS o
INNER JOIN PAYMENT p ON o.ORDER_ID = p.ORDER_ID
WHERE o.ORDER_STATUS = ‘PENDING’
AND p.PAYMENT_STATUS = ‘SUCCESS’
WITH RS USE AND KEEP UPDATE LOCKS
) t
SET t.ORDER_STATUS = ‘PAID’;
# 分批次更新
DECLARE v_offset INTEGER DEFAULT 0;
DECLARE v_batch_size INTEGER DEFAULT 10000;
DECLARE v_updated_count INTEGER;
SET v_updated_count = 1;
WHILE v_updated_count > 0 DO
UPDATE ORDERS
SET ORDER_STATUS = ‘PROCESSED’
WHERE ORDER_STATUS = ‘PENDING’
AND ORDER_ID IN (
SELECT ORDER_ID FROM ORDERS
WHERE ORDER_STATUS = ‘PENDING’
ORDER BY ORDER_ID
FETCH FIRST v_batch_size ROWS ONLY
);
GET DIAGNOSTICS v_updated_count = ROW_COUNT;
SET v_offset = v_offset + v_batch_size;
COMMIT;
END WHILE;
3.2 批量删除
DECLARE v_date DATE;
SET v_date = CURRENT DATE – 365 DAYS;
WHILE 1 = 1 DO
DELETE FROM ORDERS
WHERE CREATE_TIME < v_date
AND ORDER_ID IN (
SELECT ORDER_ID FROM ORDERS
WHERE CREATE_TIME < v_date
FETCH FIRST 10000 ROWS ONLY
);
IF SQLCODE = 100 THEN
LEAVE;
END IF;
COMMIT;
END WHILE;
# 使用ALTER TABLE删除分区(更快)
ALTER TABLE ORDERS
DETACH PARTITION PART_OLD INTO TABLE ORDERS_OLD;
DROP TABLE ORDERS_OLD;
# 使用LOAD替换
# 1. 导出需要保留的数据
EXPORT TO /data/orders_keep.del OF DEL
SELECT * FROM ORDERS WHERE CREATE_TIME >= v_date;
# 2. 清空表
ALTER TABLE ORDERS ACTIVATE NOT LOGGED INITIALLY WITH EMPTY TABLE;
# 3. 重新导入
LOAD FROM /data/orders_keep.del OF DEL
INSERT INTO ORDERS
NONRECOVERABLE;
Part04-作业调度与监控
4.1 作业调度脚本
#!/bin/bash
# batch_job.sh
LOG_DIR=”/db2logs/batch”
DATA_DIR=”/db2data/batch”
DATE=$(date +%Y%m%d)
LOG_FILE=”${LOG_DIR}/batch_job_${DATE}.log”
mkdir -p ${LOG_DIR}
echo “=== 批处理作业开始: $(date) ===” >> ${LOG_FILE}
# 连接数据库
db2 connect to FGEDB >> ${LOG_FILE} 2>&1
if [ $? -ne 0 ]; then
echo “数据库连接失败” >> ${LOG_FILE}
exit 1
fi
# 步骤1: 加载数据
echo “步骤1: 开始加载数据” >> ${LOG_FILE}
db2 “LOAD FROM ${DATA_DIR}/orders_${DATE}.csv OF DEL
INSERT INTO ORDERS
NONRECOVERABLE” >> ${LOG_FILE} 2>&1
if [ $? -ne 0 ]; then
echo “数据加载失败” >> ${LOG_FILE}
db2 connect reset
exit 1
fi
# 步骤2: 客户汇总
echo “步骤2: 开始客户汇总” >> ${LOG_FILE}
db2 -f /db2scripts/customer_summary.sql >> ${LOG_FILE} 2>&1
if [ $? -ne 0 ]; then
echo “客户汇总失败” >> ${LOG_FILE}
db2 connect reset
exit 1
fi
# 步骤3: 刷新MQT
echo “步骤3: 开始刷新MQT” >> ${LOG_FILE}
db2 “REFRESH TABLE MQT_DAILY_SALES” >> ${LOG_FILE} 2>&1
db2 “REFRESH TABLE MQT_CUSTOMER_SUMMARY” >> ${LOG_FILE} 2>&1
# 步骤4: 更新统计信息
echo “步骤4: 开始更新统计信息” >> ${LOG_FILE}
db2 “RUNSTATS ON TABLE ORDERS WITH DISTRIBUTION AND DETAILED INDEXES ALL” >> ${LOG_FILE} 2>&1
db2 “RUNSTATS ON TABLE CUSTOMER_SUMMARY WITH DISTRIBUTION” >> ${LOG_FILE} 2>&1
# 步骤5: 清理历史数据
echo “步骤5: 开始清理历史数据” >> ${LOG_FILE}
db2 -f /db2scripts/cleanup_history.sql >> ${LOG_FILE} 2>&1
# 断开连接
db2 connect reset
echo “=== 批处理作业完成: $(date) ===” >> ${LOG_FILE}
# 发送邮件通知
# mail -s “批处理作业完成” dba@company.com < ${LOG_FILE}
# crontab配置
# 0 2 * * * /db2scripts/batch_job.sh >> /db2logs/batch_job_cron.log 2>&1
4.2 作业监控
SELECT
AGENT_ID,
SUBSTR(APPL_NAME, 1, 30) AS APPL_NAME,
SUBSTR(PRIMARY_AUTH_ID, 1, 20) AS AUTH_ID,
APPL_STATUS,
UOW_ID,
ACTIVITY_ID,
TOTAL_UOW_TIME,
TOTAL_ACT_TIME
FROM SYSIBMADM.APPLICATIONS
ORDER BY TOTAL_ACT_TIME DESC;
# 查看当前运行的SQL
SELECT
SUBSTR(STMT_TEXT, 1, 100) AS STMT_TEXT,
TOTAL_EXEC_TIME,
TOTAL_USR_CPU_TIME,
TOTAL_SYS_CPU_TIME,
STMT_EXEC_TIME
FROM SYSIBMADM.TOP_DYNAMIC_SQL
ORDER BY STMT_EXEC_TIME DESC
FETCH FIRST 10 ROWS ONLY;
# 查看锁等待
SELECT
LOCK_NAME,
LOCK_WAIT_MODE,
LOCK_OBJECT_TYPE,
SUBSTR(TABSCHEMA, 1, 20) AS TABSCHEMA,
SUBSTR(TABNAME, 1, 30) AS TABNAME
FROM SYSIBMADM.LOCKS_HELD
WHERE LOCK_WAIT_MODE IS NOT NULL;
# 监控表空间使用
SELECT
SUBSTR(TBSP_NAME, 1, 20) AS TBSP_NAME,
TBSP_TYPE,
TBSP_STATE,
TBSP_PAGE_SIZE,
TBSP_USED_PAGES,
TBSP_TOTAL_PAGES,
ROUND(TBSP_USED_PAGES * 100.0 / TBSP_TOTAL_PAGES, 2) AS USAGE_PERCENT
FROM SYSIBMADM.TBSP_UTILIZATION;
# 查看批处理日志
tail -100 /db2logs/batch/batch_job_$(date +%Y%m%d).log
Part05-风哥经验总结与分享
5.1 批处理优化要点
- 使用LOAD替代INSERT加载大量数据
- 使用MERGE替代UPDATE+INSERT
- 分批次处理大数据,避免长事务
- 合理使用临时表
- 删除数据优先考虑分区删除
- 作业步骤化,便于监控和重试
5.2 性能优化建议
| 场景 | 优化方案 |
|---|---|
| 批量加载 | LOAD命令,并行加载 |
| 批量更新 | MERGE语句,分批次提交 |
| 批量删除 | 分区删除,分批次提交 |
| 中间结果 | 临时表,NOT LOGGED |
5.3 运维要点
- 批处理脚本日志要详细
- 作业失败要有告警通知
- 作业要有重试机制
- 定期归档批处理日志
- 监控批处理作业执行时间
- 建立作业执行时间基线
学习交流加群风哥微信: itpux-com
风哥Oracle/MySQL/PostgreSQL/Greenplum/DB2/Redis等数据库培训课程,10年一线实战经验,企业级培训,真正掌握数据库核心技术!
本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html
