1. 首页 > DB2教程 > 正文

DB2教程FG062-DB2数据仓库设计实战

风哥教程参考DB2官方文档Data Warehousing、Analytics、Column Storage等内容,详细介绍DB2数据仓库设计、星型/雪花模型、列式存储、物化查询表等。更多视频教程www.fgedu.net.cn

目录大纲

Part01-数据仓库架构

1.1 数据仓库架构

DB2数据仓库架构:

  • 数据源层:OLTP系统、日志文件、外部数据
  • ETL层:抽取、转换、加载
  • 数据仓库层:ODS层、DWD层、DWS层、ADS层
  • 应用层:报表、OLAP分析、数据挖掘

1.2 DB2数据仓库特性

  • 列式存储(BLU Acceleration)
  • 物化查询表(MQT)
  • 数据分区
  • 多维集群表(MDC)
  • 查询并行处理(SMP/MPP)
  • 工作负载管理(WLM)

Part02-星型模型设计

2.1 维度表设计

# 日期维度表
CREATE TABLE DIM_DATE (
DATE_KEY INTEGER NOT NULL,
DATE_VALUE DATE NOT NULL,
YEAR INTEGER,
QUARTER INTEGER,
MONTH INTEGER,
MONTH_NAME VARCHAR(20),
DAY_OF_MONTH INTEGER,
DAY_OF_WEEK INTEGER,
DAY_NAME VARCHAR(20),
WEEK_OF_YEAR INTEGER,
IS_WEEKEND CHAR(1),
IS_HOLIDAY CHAR(1),
CONSTRAINT PK_DIM_DATE PRIMARY KEY (DATE_KEY)
) ORGANIZE BY COLUMN;

# 产品维度表
CREATE TABLE DIM_PRODUCT (
PRODUCT_KEY INTEGER NOT NULL,
PRODUCT_ID VARCHAR(50) NOT NULL,
PRODUCT_NAME VARCHAR(200) NOT NULL,
CATEGORY_ID VARCHAR(50),
CATEGORY_NAME VARCHAR(100),
BRAND VARCHAR(100),
PRICE DECIMAL(18, 2),
START_DATE DATE,
END_DATE DATE,
IS_CURRENT CHAR(1) DEFAULT ‘Y’,
CONSTRAINT PK_DIM_PRODUCT PRIMARY KEY (PRODUCT_KEY)
) ORGANIZE BY COLUMN;

# 客户维度表
CREATE TABLE DIM_CUSTOMER (
CUSTOMER_KEY INTEGER NOT NULL,
CUSTOMER_ID VARCHAR(50) NOT NULL,
CUSTOMER_NAME VARCHAR(200) NOT NULL,
GENDER VARCHAR(10),
AGE INTEGER,
CITY VARCHAR(100),
PROVINCE VARCHAR(100),
COUNTRY VARCHAR(100),
SEGMENT VARCHAR(50),
REGISTRATION_DATE DATE,
CONSTRAINT PK_DIM_CUSTOMER PRIMARY KEY (CUSTOMER_KEY)
) ORGANIZE BY COLUMN;

# 商店维度表
CREATE TABLE DIM_STORE (
STORE_KEY INTEGER NOT NULL,
STORE_ID VARCHAR(50) NOT NULL,
STORE_NAME VARCHAR(200) NOT NULL,
CITY VARCHAR(100),
PROVINCE VARCHAR(100),
REGION VARCHAR(50),
STORE_TYPE VARCHAR(50),
OPEN_DATE DATE,
CONSTRAINT PK_DIM_STORE PRIMARY KEY (STORE_KEY)
) ORGANIZE BY COLUMN;

2.2 事实表设计

# 销售事实表
CREATE TABLE FACT_SALES (
DATE_KEY INTEGER NOT NULL,
PRODUCT_KEY INTEGER NOT NULL,
CUSTOMER_KEY INTEGER NOT NULL,
STORE_KEY INTEGER NOT NULL,
ORDER_ID VARCHAR(50),
QUANTITY INTEGER,
UNIT_PRICE DECIMAL(18, 2),
TOTAL_AMOUNT DECIMAL(18, 2),
DISCOUNT DECIMAL(18, 2),
TAX DECIMAL(18, 2),
PROFIT DECIMAL(18, 2),
CREATE_TIME TIMESTAMP DEFAULT CURRENT TIMESTAMP,
CONSTRAINT PK_FACT_SALES PRIMARY KEY (DATE_KEY, PRODUCT_KEY, CUSTOMER_KEY, STORE_KEY)
) ORGANIZE BY COLUMN
PARTITION BY RANGE (DATE_KEY)
(
STARTING 20260101 ENDING 20261231 EVERY 1 MONTH
);

# 创建索引
CREATE INDEX IDX_FACT_SALES_DATE ON FACT_SALES(DATE_KEY);
CREATE INDEX IDX_FACT_SALES_PRODUCT ON FACT_SALES(PRODUCT_KEY);
CREATE INDEX IDX_FACT_SALES_CUSTOMER ON FACT_SALES(CUSTOMER_KEY);
CREATE INDEX IDX_FACT_SALES_STORE ON FACT_SALES(STORE_KEY);

# 库存事实表
CREATE TABLE FACT_INVENTORY (
DATE_KEY INTEGER NOT NULL,
PRODUCT_KEY INTEGER NOT NULL,
STORE_KEY INTEGER NOT NULL,
BEGINNING_QUANTITY INTEGER,
ENDING_QUANTITY INTEGER,
QUANTITY_IN INTEGER,
QUANTITY_OUT INTEGER,
CONSTRAINT PK_FACT_INVENTORY PRIMARY KEY (DATE_KEY, PRODUCT_KEY, STORE_KEY)
) ORGANIZE BY COLUMN
PARTITION BY RANGE (DATE_KEY)
(
STARTING 20260101 ENDING 20261231 EVERY 1 MONTH
);

Part03-列式存储与MQT

3.1 列式存储(BLU)

# 创建列式表空间
CREATE TABLESPACE DW_COL_TS
PAGESIZE 32K
MANAGED BY DATABASE
USING (FILE ‘/db2data/dw/dw_col_ts.dms’ 10G)
EXTENTSIZE 4
PREFETCHSIZE 32
BUFFERPOOL BP32K;

# 创建列式缓冲池
CREATE BUFFERPOOL BP32K ALL DBPARTITIONNUMS SIZE 10000 PAGESIZE 32K;

# 创建列式组织表
CREATE TABLE FACT_SALES_COL (
DATE_KEY INTEGER NOT NULL,
PRODUCT_KEY INTEGER NOT NULL,
CUSTOMER_KEY INTEGER NOT NULL,
STORE_KEY INTEGER NOT NULL,
QUANTITY INTEGER,
UNIT_PRICE DECIMAL(18, 2),
TOTAL_AMOUNT DECIMAL(18, 2),
DISCOUNT DECIMAL(18, 2),
TAX DECIMAL(18, 2),
PROFIT DECIMAL(18, 2),
CONSTRAINT PK_FACT_SALES_COL PRIMARY KEY (DATE_KEY, PRODUCT_KEY, CUSTOMER_KEY, STORE_KEY)
) ORGANIZE BY COLUMN
IN DW_COL_TS;

# 启用BLU加速
UPDATE DATABASE CONFIGURATION USING DFT_TABLE_ORG COLUMN;

# 检查表组织方式
SELECT
SUBSTR(TABSCHEMA, 1, 20) AS TABSCHEMA,
SUBSTR(TABNAME, 1, 30) AS TABNAME,
TABLEORG
FROM SYSCAT.TABLES
WHERE TABSCHEMA = CURRENT SCHEMA
AND TABNAME LIKE ‘FACT%’;

3.2 物化查询表(MQT)

# 按日期和产品聚合的MQT
CREATE TABLE MQT_SALES_DATE_PRODUCT AS (
SELECT
DATE_KEY,
PRODUCT_KEY,
SUM(QUANTITY) AS TOTAL_QUANTITY,
SUM(TOTAL_AMOUNT) AS TOTAL_AMOUNT,
SUM(PROFIT) AS TOTAL_PROFIT,
COUNT(*) AS ORDER_COUNT
FROM FACT_SALES
GROUP BY DATE_KEY, PRODUCT_KEY
) DATA INITIALLY DEFERRED REFRESH DEFERRED
ORGANIZE BY COLUMN;

# 按日期和商店聚合的MQT
CREATE TABLE MQT_SALES_DATE_STORE AS (
SELECT
DATE_KEY,
STORE_KEY,
SUM(QUANTITY) AS TOTAL_QUANTITY,
SUM(TOTAL_AMOUNT) AS TOTAL_AMOUNT,
SUM(PROFIT) AS TOTAL_PROFIT,
COUNT(*) AS ORDER_COUNT
FROM FACT_SALES
GROUP BY DATE_KEY, STORE_KEY
) DATA INITIALLY DEFERRED REFRESH DEFERRED
ORGANIZE BY COLUMN;

# 按月份和类别聚合的MQT
CREATE TABLE MQT_SALES_MONTH_CATEGORY AS (
SELECT
d.YEAR,
d.MONTH,
p.CATEGORY_ID,
p.CATEGORY_NAME,
SUM(f.QUANTITY) AS TOTAL_QUANTITY,
SUM(f.TOTAL_AMOUNT) AS TOTAL_AMOUNT,
SUM(f.PROFIT) AS TOTAL_PROFIT
FROM FACT_SALES f
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
INNER JOIN DIM_PRODUCT p ON f.PRODUCT_KEY = p.PRODUCT_KEY
GROUP BY d.YEAR, d.MONTH, p.CATEGORY_ID, p.CATEGORY_NAME
) DATA INITIALLY DEFERRED REFRESH DEFERRED
ORGANIZE BY COLUMN;

# 刷新MQT
REFRESH TABLE MQT_SALES_DATE_PRODUCT;
REFRESH TABLE MQT_SALES_DATE_STORE;
REFRESH TABLE MQT_SALES_MONTH_CATEGORY;

# 启用查询优化器自动使用MQT
SET CURRENT MAINTAINED TABLE TYPES FOR OPTIMIZATION = ALL;

# 检查MQT状态
SELECT
SUBSTR(TABSCHEMA, 1, 20) AS TABSCHEMA,
SUBSTR(TABNAME, 1, 40) AS TABNAME,
REFRESH,
INITIAL_REFRESH,
STATS_TIME
FROM SYSCAT.TABLES
WHERE TABSCHEMA = CURRENT SCHEMA
AND TABNAME LIKE ‘MQT%’;

Part04-ETL与数据加载

4.1 数据加载

# 使用IMPORT加载数据
IMPORT FROM /data/sales_202604.csv OF DEL
INSERT INTO FACT_SALES;

# 使用LOAD加载数据(更快)
LOAD FROM /data/sales_202604.csv OF DEL
INSERT INTO FACT_SALES
NONRECOVERABLE;

# 带分区的LOAD
LOAD FROM /data/sales_202604.csv OF DEL
INSERT INTO FACT_SALES PARTITION (202604);

# 创建加载控制脚本
#!/bin/bash
# load_data.sh

DATE=$1
if [ -z “$DATE” ]; then
DATE=$(date -d “yesterday” +%Y%m%d)
fi

echo “开始加载数据: $DATE”

# 连接数据库
db2 connect to FGEDB

# 设置日期KEY
DATE_KEY=$(date -d “$DATE” +%Y%m%d)

# 加载销售数据
db2 “LOAD FROM /data/sales_${DATE}.csv OF DEL
INSERT INTO FACT_SALES
NONRECOVERABLE”

# 刷新MQT
db2 “REFRESH TABLE MQT_SALES_DATE_PRODUCT”
db2 “REFRESH TABLE MQT_SALES_DATE_STORE”

# 更新统计信息
db2 “RUNSTATS ON TABLE FACT_SALES WITH DISTRIBUTION AND DETAILED INDEXES ALL”
db2 “RUNSTATS ON TABLE MQT_SALES_DATE_PRODUCT WITH DISTRIBUTION”
db2 “RUNSTATS ON TABLE MQT_SALES_DATE_STORE WITH DISTRIBUTION”

# 断开连接
db2 connect reset

echo “数据加载完成: $DATE”

# 缓慢变化维度处理
# 类型1:覆盖更新
UPDATE DIM_PRODUCT
SET PRICE = 99.99,
UPDATE_TIME = CURRENT TIMESTAMP
WHERE PRODUCT_ID = ‘P001’
AND IS_CURRENT = ‘Y’;

# 类型2:新增版本
UPDATE DIM_PRODUCT
SET END_DATE = CURRENT DATE – 1 DAY,
IS_CURRENT = ‘N’
WHERE PRODUCT_ID = ‘P001’
AND IS_CURRENT = ‘Y’;

INSERT INTO DIM_PRODUCT (
PRODUCT_KEY, PRODUCT_ID, PRODUCT_NAME, CATEGORY_ID,
CATEGORY_NAME, BRAND, PRICE, START_DATE, IS_CURRENT
) VALUES (
10001, ‘P001’, ‘Product A’, ‘C001’,
‘Category 1’, ‘Brand A’, 99.99, CURRENT DATE, ‘Y’
);

4.2 数据分析查询

# 按日期查询销售趋势
SELECT
d.DATE_VALUE,
SUM(f.TOTAL_AMOUNT) AS DAILY_SALES,
SUM(f.QUANTITY) AS DAILY_QUANTITY,
SUM(f.PROFIT) AS DAILY_PROFIT
FROM FACT_SALES f
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
WHERE d.DATE_VALUE BETWEEN ‘2026-04-01’ AND ‘2026-04-30’
GROUP BY d.DATE_VALUE
ORDER BY d.DATE_VALUE;

# 按产品类别查询销售
SELECT
p.CATEGORY_NAME,
p.BRAND,
SUM(f.TOTAL_AMOUNT) AS CATEGORY_SALES,
SUM(f.QUANTITY) AS CATEGORY_QUANTITY,
SUM(f.PROFIT) AS CATEGORY_PROFIT
FROM FACT_SALES f
INNER JOIN DIM_PRODUCT p ON f.PRODUCT_KEY = p.PRODUCT_KEY
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
WHERE d.YEAR = 2026
AND d.MONTH = 4
GROUP BY p.CATEGORY_NAME, p.BRAND
ORDER BY CATEGORY_SALES DESC;

# 按商店查询销售排名
SELECT
s.STORE_NAME,
s.CITY,
s.REGION,
SUM(f.TOTAL_AMOUNT) AS STORE_SALES,
RANK() OVER (ORDER BY SUM(f.TOTAL_AMOUNT) DESC) AS SALES_RANK
FROM FACT_SALES f
INNER JOIN DIM_STORE s ON f.STORE_KEY = s.STORE_KEY
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
WHERE d.YEAR = 2026
AND d.MONTH = 4
GROUP BY s.STORE_NAME, s.CITY, s.REGION
ORDER BY SALES_RANK;

# 同比环比分析
SELECT
d.YEAR,
d.MONTH,
SUM(f.TOTAL_AMOUNT) AS MONTHLY_SALES,
LAG(SUM(f.TOTAL_AMOUNT), 1) OVER (ORDER BY d.YEAR, d.MONTH) AS PREV_MONTH_SALES,
LAG(SUM(f.TOTAL_AMOUNT), 12) OVER (ORDER BY d.YEAR, d.MONTH) AS PREV_YEAR_SALES,
ROUND((SUM(f.TOTAL_AMOUNT) – LAG(SUM(f.TOTAL_AMOUNT), 1) OVER (ORDER BY d.YEAR, d.MONTH))
/ LAG(SUM(f.TOTAL_AMOUNT), 1) OVER (ORDER BY d.YEAR, d.MONTH) * 100, 2) AS MONTH_OVER_MONTH,
ROUND((SUM(f.TOTAL_AMOUNT) – LAG(SUM(f.TOTAL_AMOUNT), 12) OVER (ORDER BY d.YEAR, d.MONTH))
/ LAG(SUM(f.TOTAL_AMOUNT), 12) OVER (ORDER BY d.YEAR, d.MONTH) * 100, 2) AS YEAR_OVER_YEAR
FROM FACT_SALES f
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
GROUP BY d.YEAR, d.MONTH
ORDER BY d.YEAR, d.MONTH;

Part05-风哥经验总结与分享

5.1 数据仓库设计要点

  • 采用星型或雪花模型,简化查询
  • 事实表使用列式存储提高查询性能
  • 维度表使用代理键,支持缓慢变化维度
  • 使用MQT预计算聚合数据
  • 事实表按时间分区
  • 合理使用MDC表提高查询效率

5.2 性能优化建议

场景 优化方案
大数据量查询 列式存储(BLU),MQT预聚合
多维度分析 MDC表,合理的维度组合
数据加载 LOAD命令,分区加载
历史数据 按时间分区,定期归档

5.3 运维要点

  • 定期刷新MQT
  • 定期RUNSTATS更新统计信息
  • 监控表空间增长
  • 定期归档历史分区
  • ETL作业监控和告警
  • 完善的备份和恢复方案
更多视频教程www.fgedu.net.cn
学习交流加群风哥微信: itpux-com
风哥Oracle/MySQL/PostgreSQL/Greenplum/DB2/Redis等数据库培训课程,10年一线实战经验,企业级培训,真正掌握数据库核心技术!

本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html

联系我们

在线咨询:点击这里给我发消息

微信号:itpux-com

工作日:9:30-18:30,节假日休息