1. 首页 > DB2教程 > 正文

DB2教程FG064-DB2批处理作业优化实战

风哥教程参考DB2官方文档Batch Processing、Performance Tuning、LOAD等内容,详细介绍批处理作业优化、大数据加载、批量更新、性能调优等。更多视频教程www.fgedu.net.cn

目录大纲

Part01-批处理作业架构

1.1 批处理场景

常见的批处理作业场景:

  • 日终/月末结算
  • 数据清洗与转换
  • 报表生成
  • 数据归档
  • 批量数据导入导出
  • MQT刷新
  • 统计信息更新

1.2 批处理优化原则

  • 使用批量操作,减少SQL语句数量
  • 合理使用LOAD替代INSERT
  • 批量更新使用FOR UPDATE或MERGE
  • 合理的提交频率
  • 临时表使用优化
  • 并行处理大作业

Part02-批量数据加载

2.1 LOAD命令优化

# 基本LOAD命令
LOAD FROM /data/batch_data.csv OF DEL
INSERT INTO TARGET_TABLE;

# 优化的LOAD命令
LOAD FROM /data/batch_data.csv OF DEL
INSERT INTO TARGET_TABLE
FASTPARSE
NOCHECK CONSTRAINT
NONRECOVERABLE
CPU_PARALLELISM 4
DISK_PARALLELISM 4
BUFFER 1000
SAVECOUNT 10000
ROWS_PER_COMMIT 10000;

# 使用CURSOR加载
DECLARE c1 CURSOR FOR
SELECT * FROM SOURCE_TABLE;

LOAD FROM c1 OF CURSOR
INSERT INTO TARGET_TABLE
NONRECOVERABLE;

# 分区加载
LOAD FROM /data/partition1.csv OF DEL
INSERT INTO TARGET_TABLE PARTITION (PART1);

LOAD FROM /data/partition2.csv OF DEL
INSERT INTO TARGET_TABLE PARTITION (PART2);

2.2 临时表使用

# 创建临时表空间
CREATE USER TEMPORARY TABLESPACE TEMP_TS
PAGESIZE 8K
MANAGED BY DATABASE
USING (FILE ‘/db2data/temp/temp_ts.dms’ 5G);

# 创建临时表
DECLARE GLOBAL TEMPORARY TABLE SESSION.TEMP_ORDER (
ORDER_ID VARCHAR(32),
CUSTOMER_ID VARCHAR(32),
TOTAL_AMOUNT DECIMAL(18, 2),
ORDER_STATUS VARCHAR(20)
) ON COMMIT PRESERVE ROWS
NOT LOGGED
IN TEMP_TS;

# 使用临时表处理数据
INSERT INTO SESSION.TEMP_ORDER
SELECT
ORDER_ID,
CUSTOMER_ID,
TOTAL_AMOUNT,
ORDER_STATUS
FROM ORDERS
WHERE CREATE_TIME BETWEEN ‘2026-04-01’ AND ‘2026-04-30’;

# 更新临时表
UPDATE SESSION.TEMP_ORDER
SET ORDER_STATUS = ‘PROCESSED’
WHERE TOTAL_AMOUNT > 1000;

# 合并到目标表
MERGE INTO ORDER_SUMMARY s
USING SESSION.TEMP_ORDER t
ON s.ORDER_ID = t.ORDER_ID
WHEN MATCHED THEN
UPDATE SET
s.TOTAL_AMOUNT = t.TOTAL_AMOUNT,
s.ORDER_STATUS = t.ORDER_STATUS
WHEN NOT MATCHED THEN
INSERT (ORDER_ID, CUSTOMER_ID, TOTAL_AMOUNT, ORDER_STATUS)
VALUES (t.ORDER_ID, t.CUSTOMER_ID, t.TOTAL_AMOUNT, t.ORDER_STATUS);

# 清理临时表
DROP TABLE SESSION.TEMP_ORDER;

Part03-批量更新优化

3.1 MERGE语句

# 使用MERGE批量更新
MERGE INTO CUSTOMER_SUMMARY s
USING (
SELECT
CUSTOMER_ID,
COUNT(*) AS ORDER_COUNT,
SUM(TOTAL_AMOUNT) AS TOTAL_AMOUNT,
MAX(CREATE_TIME) AS LAST_ORDER_TIME
FROM ORDERS
WHERE CREATE_TIME BETWEEN ‘2026-04-01’ AND ‘2026-04-30’
GROUP BY CUSTOMER_ID
) t
ON s.CUSTOMER_ID = t.CUSTOMER_ID
WHEN MATCHED THEN
UPDATE SET
s.ORDER_COUNT = t.ORDER_COUNT,
s.TOTAL_AMOUNT = t.TOTAL_AMOUNT,
s.LAST_ORDER_TIME = t.LAST_ORDER_TIME,
s.UPDATE_TIME = CURRENT TIMESTAMP
WHEN NOT MATCHED THEN
INSERT (
CUSTOMER_ID, ORDER_COUNT, TOTAL_AMOUNT,
LAST_ORDER_TIME, CREATE_TIME
) VALUES (
t.CUSTOMER_ID, t.ORDER_COUNT, t.TOTAL_AMOUNT,
t.LAST_ORDER_TIME, CURRENT TIMESTAMP
);

# 使用FOR UPDATE批量更新
UPDATE (
SELECT
o.ORDER_ID,
o.ORDER_STATUS,
p.PAYMENT_STATUS
FROM ORDERS o
INNER JOIN PAYMENT p ON o.ORDER_ID = p.ORDER_ID
WHERE o.ORDER_STATUS = ‘PENDING’
AND p.PAYMENT_STATUS = ‘SUCCESS’
WITH RS USE AND KEEP UPDATE LOCKS
) t
SET t.ORDER_STATUS = ‘PAID’;

# 分批次更新
DECLARE v_offset INTEGER DEFAULT 0;
DECLARE v_batch_size INTEGER DEFAULT 10000;
DECLARE v_updated_count INTEGER;

SET v_updated_count = 1;

WHILE v_updated_count > 0 DO
UPDATE ORDERS
SET ORDER_STATUS = ‘PROCESSED’
WHERE ORDER_STATUS = ‘PENDING’
AND ORDER_ID IN (
SELECT ORDER_ID FROM ORDERS
WHERE ORDER_STATUS = ‘PENDING’
ORDER BY ORDER_ID
FETCH FIRST v_batch_size ROWS ONLY
);

GET DIAGNOSTICS v_updated_count = ROW_COUNT;

SET v_offset = v_offset + v_batch_size;

COMMIT;
END WHILE;

3.2 批量删除

# 批量删除历史数据
DECLARE v_date DATE;
SET v_date = CURRENT DATE – 365 DAYS;

WHILE 1 = 1 DO
DELETE FROM ORDERS
WHERE CREATE_TIME < v_date AND ORDER_ID IN ( SELECT ORDER_ID FROM ORDERS WHERE CREATE_TIME < v_date FETCH FIRST 10000 ROWS ONLY ); IF SQLCODE = 100 THEN LEAVE; END IF; COMMIT; END WHILE; # 使用ALTER TABLE删除分区(更快) ALTER TABLE ORDERS DETACH PARTITION PART_OLD INTO TABLE ORDERS_OLD; DROP TABLE ORDERS_OLD; # 使用LOAD替换 # 1. 导出需要保留的数据 EXPORT TO /data/orders_keep.del OF DEL SELECT * FROM ORDERS WHERE CREATE_TIME >= v_date;

# 2. 清空表
ALTER TABLE ORDERS ACTIVATE NOT LOGGED INITIALLY WITH EMPTY TABLE;

# 3. 重新导入
LOAD FROM /data/orders_keep.del OF DEL
INSERT INTO ORDERS
NONRECOVERABLE;

Part04-作业调度与监控

4.1 作业调度脚本

# 创建批处理脚本
#!/bin/bash
# batch_job.sh

LOG_DIR=”/db2logs/batch”
DATA_DIR=”/db2data/batch”
DATE=$(date +%Y%m%d)
LOG_FILE=”${LOG_DIR}/batch_job_${DATE}.log”

mkdir -p ${LOG_DIR}

echo “=== 批处理作业开始: $(date) ===” >> ${LOG_FILE}

# 连接数据库
db2 connect to FGEDB >> ${LOG_FILE} 2>&1
if [ $? -ne 0 ]; then
echo “数据库连接失败” >> ${LOG_FILE}
exit 1
fi

# 步骤1: 加载数据
echo “步骤1: 开始加载数据” >> ${LOG_FILE}
db2 “LOAD FROM ${DATA_DIR}/orders_${DATE}.csv OF DEL
INSERT INTO ORDERS
NONRECOVERABLE” >> ${LOG_FILE} 2>&1
if [ $? -ne 0 ]; then
echo “数据加载失败” >> ${LOG_FILE}
db2 connect reset
exit 1
fi

# 步骤2: 客户汇总
echo “步骤2: 开始客户汇总” >> ${LOG_FILE}
db2 -f /db2scripts/customer_summary.sql >> ${LOG_FILE} 2>&1
if [ $? -ne 0 ]; then
echo “客户汇总失败” >> ${LOG_FILE}
db2 connect reset
exit 1
fi

# 步骤3: 刷新MQT
echo “步骤3: 开始刷新MQT” >> ${LOG_FILE}
db2 “REFRESH TABLE MQT_DAILY_SALES” >> ${LOG_FILE} 2>&1
db2 “REFRESH TABLE MQT_CUSTOMER_SUMMARY” >> ${LOG_FILE} 2>&1

# 步骤4: 更新统计信息
echo “步骤4: 开始更新统计信息” >> ${LOG_FILE}
db2 “RUNSTATS ON TABLE ORDERS WITH DISTRIBUTION AND DETAILED INDEXES ALL” >> ${LOG_FILE} 2>&1
db2 “RUNSTATS ON TABLE CUSTOMER_SUMMARY WITH DISTRIBUTION” >> ${LOG_FILE} 2>&1

# 步骤5: 清理历史数据
echo “步骤5: 开始清理历史数据” >> ${LOG_FILE}
db2 -f /db2scripts/cleanup_history.sql >> ${LOG_FILE} 2>&1

# 断开连接
db2 connect reset

echo “=== 批处理作业完成: $(date) ===” >> ${LOG_FILE}

# 发送邮件通知
# mail -s “批处理作业完成” dba@company.com < ${LOG_FILE} # crontab配置 # 0 2 * * * /db2scripts/batch_job.sh >> /db2logs/batch_job_cron.log 2>&1

4.2 作业监控

# 查看当前活动的应用
SELECT
AGENT_ID,
SUBSTR(APPL_NAME, 1, 30) AS APPL_NAME,
SUBSTR(PRIMARY_AUTH_ID, 1, 20) AS AUTH_ID,
APPL_STATUS,
UOW_ID,
ACTIVITY_ID,
TOTAL_UOW_TIME,
TOTAL_ACT_TIME
FROM SYSIBMADM.APPLICATIONS
ORDER BY TOTAL_ACT_TIME DESC;

# 查看当前运行的SQL
SELECT
SUBSTR(STMT_TEXT, 1, 100) AS STMT_TEXT,
TOTAL_EXEC_TIME,
TOTAL_USR_CPU_TIME,
TOTAL_SYS_CPU_TIME,
STMT_EXEC_TIME
FROM SYSIBMADM.TOP_DYNAMIC_SQL
ORDER BY STMT_EXEC_TIME DESC
FETCH FIRST 10 ROWS ONLY;

# 查看锁等待
SELECT
LOCK_NAME,
LOCK_WAIT_MODE,
LOCK_OBJECT_TYPE,
SUBSTR(TABSCHEMA, 1, 20) AS TABSCHEMA,
SUBSTR(TABNAME, 1, 30) AS TABNAME
FROM SYSIBMADM.LOCKS_HELD
WHERE LOCK_WAIT_MODE IS NOT NULL;

# 监控表空间使用
SELECT
SUBSTR(TBSP_NAME, 1, 20) AS TBSP_NAME,
TBSP_TYPE,
TBSP_STATE,
TBSP_PAGE_SIZE,
TBSP_USED_PAGES,
TBSP_TOTAL_PAGES,
ROUND(TBSP_USED_PAGES * 100.0 / TBSP_TOTAL_PAGES, 2) AS USAGE_PERCENT
FROM SYSIBMADM.TBSP_UTILIZATION;

# 查看批处理日志
tail -100 /db2logs/batch/batch_job_$(date +%Y%m%d).log

Part05-风哥经验总结与分享

5.1 批处理优化要点

  • 使用LOAD替代INSERT加载大量数据
  • 使用MERGE替代UPDATE+INSERT
  • 分批次处理大数据,避免长事务
  • 合理使用临时表
  • 删除数据优先考虑分区删除
  • 作业步骤化,便于监控和重试

5.2 性能优化建议

场景 优化方案
批量加载 LOAD命令,并行加载
批量更新 MERGE语句,分批次提交
批量删除 分区删除,分批次提交
中间结果 临时表,NOT LOGGED

5.3 运维要点

  • 批处理脚本日志要详细
  • 作业失败要有告警通知
  • 作业要有重试机制
  • 定期归档批处理日志
  • 监控批处理作业执行时间
  • 建立作业执行时间基线
更多视频教程www.fgedu.net.cn
学习交流加群风哥微信: itpux-com
风哥Oracle/MySQL/PostgreSQL/Greenplum/DB2/Redis等数据库培训课程,10年一线实战经验,企业级培训,真正掌握数据库核心技术!

本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html

联系我们

在线咨询:点击这里给我发消息

微信号:itpux-com

工作日:9:30-18:30,节假日休息