1. 首页 > DB2教程 > 正文

DB2教程FG069-DB2与BI工具集成实战

风哥教程参考DB2官方文档Business Intelligence、Analytics等内容,详细介绍DB2与BI工具集成、数据仓库设计、OLAP分析等。更多视频教程www.fgedu.net.cn

目录大纲

Part01-BI架构概述

1.1 BI架构层次

BI系统架构:

  • 数据源层:OLTP系统、日志、外部数据
  • ETL层:数据抽取、转换、加载
  • 数据仓库层:ODS、DWD、DWS、ADS
  • OLAP层:多维分析、数据立方体
  • 展现层:报表、仪表板、可视化

1.2 DB2 BI特性

  • 列式存储(BLU Acceleration)
  • 物化查询表(MQT)
  • 多维集群表(MDC)
  • 数据分区
  • OLAP函数
  • 工作负载管理(WLM)

Part02-数据仓库建模

2.1 星型模型

# 维度表
CREATE TABLE DIM_DATE (
DATE_KEY INTEGER NOT NULL,
DATE_VALUE DATE NOT NULL,
YEAR INTEGER,
QUARTER INTEGER,
MONTH INTEGER,
MONTH_NAME VARCHAR(20),
DAY_OF_MONTH INTEGER,
DAY_OF_WEEK INTEGER,
CONSTRAINT PK_DIM_DATE PRIMARY KEY (DATE_KEY)
) ORGANIZE BY COLUMN;

CREATE TABLE DIM_PRODUCT (
PRODUCT_KEY INTEGER NOT NULL,
PRODUCT_ID VARCHAR(50) NOT NULL,
PRODUCT_NAME VARCHAR(200) NOT NULL,
CATEGORY VARCHAR(100),
BRAND VARCHAR(100),
CONSTRAINT PK_DIM_PRODUCT PRIMARY KEY (PRODUCT_KEY)
) ORGANIZE BY COLUMN;

CREATE TABLE DIM_CUSTOMER (
CUSTOMER_KEY INTEGER NOT NULL,
CUSTOMER_ID VARCHAR(50) NOT NULL,
CUSTOMER_NAME VARCHAR(200) NOT NULL,
CITY VARCHAR(100),
PROVINCE VARCHAR(100),
SEGMENT VARCHAR(50),
CONSTRAINT PK_DIM_CUSTOMER PRIMARY KEY (CUSTOMER_KEY)
) ORGANIZE BY COLUMN;

# 事实表
CREATE TABLE FACT_SALES (
DATE_KEY INTEGER NOT NULL,
PRODUCT_KEY INTEGER NOT NULL,
CUSTOMER_KEY INTEGER NOT NULL,
QUANTITY INTEGER,
TOTAL_AMOUNT DECIMAL(18, 2),
PROFIT DECIMAL(18, 2),
CONSTRAINT PK_FACT_SALES PRIMARY KEY (DATE_KEY, PRODUCT_KEY, CUSTOMER_KEY)
) ORGANIZE BY COLUMN
PARTITION BY RANGE (DATE_KEY)
(
STARTING 20260101 ENDING 20261231 EVERY 1 MONTH
);

2.2 MQT预计算

# 按月聚合MQT
CREATE TABLE MQT_SALES_MONTH AS (
SELECT
d.YEAR,
d.MONTH,
p.CATEGORY,
SUM(f.QUANTITY) AS TOTAL_QUANTITY,
SUM(f.TOTAL_AMOUNT) AS TOTAL_AMOUNT,
SUM(f.PROFIT) AS TOTAL_PROFIT
FROM FACT_SALES f
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
INNER JOIN DIM_PRODUCT p ON f.PRODUCT_KEY = p.PRODUCT_KEY
GROUP BY d.YEAR, d.MONTH, p.CATEGORY
) DATA INITIALLY DEFERRED REFRESH DEFERRED
ORGANIZE BY COLUMN;

REFRESH TABLE MQT_SALES_MONTH;

# 按客户聚合MQT
CREATE TABLE MQT_SALES_CUSTOMER AS (
SELECT
c.CUSTOMER_KEY,
c.CUSTOMER_NAME,
c.SEGMENT,
SUM(f.TOTAL_AMOUNT) AS TOTAL_AMOUNT,
COUNT(*) AS ORDER_COUNT
FROM FACT_SALES f
INNER JOIN DIM_CUSTOMER c ON f.CUSTOMER_KEY = c.CUSTOMER_KEY
GROUP BY c.CUSTOMER_KEY, c.CUSTOMER_NAME, c.SEGMENT
) DATA INITIALLY DEFERRED REFRESH DEFERRED
ORGANIZE BY COLUMN;

REFRESH TABLE MQT_SALES_CUSTOMER;

Part03-OLAP分析

3.1 OLAP函数

# 排名函数
SELECT
PRODUCT_NAME,
TOTAL_AMOUNT,
RANK() OVER (ORDER BY TOTAL_AMOUNT DESC) AS SALES_RANK,
DENSE_RANK() OVER (ORDER BY TOTAL_AMOUNT DESC) AS DENSE_RANK,
ROW_NUMBER() OVER (ORDER BY TOTAL_AMOUNT DESC) AS ROW_NUM
FROM (
SELECT
p.PRODUCT_NAME,
SUM(f.TOTAL_AMOUNT) AS TOTAL_AMOUNT
FROM FACT_SALES f
INNER JOIN DIM_PRODUCT p ON f.PRODUCT_KEY = p.PRODUCT_KEY
GROUP BY p.PRODUCT_NAME
);

# 聚合函数
SELECT
PRODUCT_NAME,
TOTAL_AMOUNT,
SUM(TOTAL_AMOUNT) OVER (PARTITION BY CATEGORY) AS CATEGORY_TOTAL,
AVG(TOTAL_AMOUNT) OVER (PARTITION BY CATEGORY) AS CATEGORY_AVG,
TOTAL_AMOUNT * 100.0 / SUM(TOTAL_AMOUNT) OVER (PARTITION BY CATEGORY) AS PERCENTAGE
FROM (
SELECT
p.PRODUCT_NAME,
p.CATEGORY,
SUM(f.TOTAL_AMOUNT) AS TOTAL_AMOUNT
FROM FACT_SALES f
INNER JOIN DIM_PRODUCT p ON f.PRODUCT_KEY = p.PRODUCT_KEY
GROUP BY p.PRODUCT_NAME, p.CATEGORY
);

# 移动窗口函数
SELECT
DATE_VALUE,
DAILY_SALES,
AVG(DAILY_SALES) OVER (
ORDER BY DATE_VALUE
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS MOVING_AVG_7DAY,
SUM(DAILY_SALES) OVER (
ORDER BY DATE_VALUE
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS MOVING_SUM_7DAY
FROM (
SELECT
d.DATE_VALUE,
SUM(f.TOTAL_AMOUNT) AS DAILY_SALES
FROM FACT_SALES f
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
GROUP BY d.DATE_VALUE
);

# LAG/LEAD函数
SELECT
DATE_VALUE,
DAILY_SALES,
LAG(DAILY_SALES, 1) OVER (ORDER BY DATE_VALUE) AS PREV_DAY_SALES,
LAG(DAILY_SALES, 7) OVER (ORDER BY DATE_VALUE) AS PREV_WEEK_SALES,
LEAD(DAILY_SALES, 1) OVER (ORDER BY DATE_VALUE) AS NEXT_DAY_SALES
FROM (
SELECT
d.DATE_VALUE,
SUM(f.TOTAL_AMOUNT) AS DAILY_SALES
FROM FACT_SALES f
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
GROUP BY d.DATE_VALUE
);

3.2 多维分析

# CUBE多维分析
SELECT
COALESCE(d.YEAR, ‘ALL’) AS YEAR,
COALESCE(d.MONTH, ‘ALL’) AS MONTH,
COALESCE(p.CATEGORY, ‘ALL’) AS CATEGORY,
SUM(f.TOTAL_AMOUNT) AS TOTAL_AMOUNT
FROM FACT_SALES f
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
INNER JOIN DIM_PRODUCT p ON f.PRODUCT_KEY = p.PRODUCT_KEY
GROUP BY CUBE(d.YEAR, d.MONTH, p.CATEGORY);

# ROLLUP层次分析
SELECT
COALESCE(c.PROVINCE, ‘ALL’) AS PROVINCE,
COALESCE(c.CITY, ‘ALL’) AS CITY,
SUM(f.TOTAL_AMOUNT) AS TOTAL_AMOUNT
FROM FACT_SALES f
INNER JOIN DIM_CUSTOMER c ON f.CUSTOMER_KEY = c.CUSTOMER_KEY
GROUP BY ROLLUP(c.PROVINCE, c.CITY);

# GROUPING SETS自定义分组
SELECT
COALESCE(d.YEAR, ‘ALL’) AS YEAR,
COALESCE(p.CATEGORY, ‘ALL’) AS CATEGORY,
COALESCE(c.SEGMENT, ‘ALL’) AS SEGMENT,
SUM(f.TOTAL_AMOUNT) AS TOTAL_AMOUNT
FROM FACT_SALES f
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
INNER JOIN DIM_PRODUCT p ON f.PRODUCT_KEY = p.PRODUCT_KEY
INNER JOIN DIM_CUSTOMER c ON f.CUSTOMER_KEY = c.CUSTOMER_KEY
GROUP BY GROUPING SETS (
(d.YEAR, p.CATEGORY),
(d.YEAR, c.SEGMENT),
(p.CATEGORY, c.SEGMENT)
);

Part04-BI工具集成

4.1 JDBC/ODBC连接

# JDBC连接字符串
jdbc:db2://hostname:50000/FGEDB:currentSchema=BI;

# ODBC配置
# 在/etc/odbcinst.ini中添加:
[DB2]
Description = IBM DB2 ODBC Driver
Driver = /opt/ibm/db2/V12.1/lib64/libdb2o.so
FileUsage = 1

# 在/etc/odbc.ini中添加:
[FGEDB]
Description = FGEDB Database
Driver = DB2
Database = FGEDB
Servername = hostname
Port = 50000

# 测试连接
isql -v FGEDB username password

# Python连接示例
import ibm_db

conn_str = “DATABASE=FGEDB;HOSTNAME=hostname;PORT=50000;PROTOCOL=TCPIP;UID=username;PWD=password;”
conn = ibm_db.connect(conn_str, ”, ”)

sql = “SELECT * FROM FACT_SALES FETCH FIRST 10 ROWS ONLY”
stmt = ibm_db.exec_immediate(conn, sql)
result = ibm_db.fetch_both(stmt)
while result:
print(result)
result = ibm_db.fetch_both(stmt)

ibm_db.close(conn)

4.2 BI工具配置

# Tableau连接
# 1. 选择”IBM DB2″数据源
# 2. 输入服务器、端口、数据库名称
# 3. 输入用户名和密码
# 4. 选择Schema和表
# 5. 创建数据提取或实时连接

# Power BI连接
# 1. 选择”IBM DB2数据库”数据源
# 2. 输入服务器和数据库名称
# 3. 选择连接模式(导入或DirectQuery)
# 4. 输入凭据
# 5. 选择表并加载数据

# 创建视图简化BI工具访问
CREATE VIEW BI_SALES_SUMMARY AS
SELECT
d.DATE_VALUE,
d.YEAR,
d.MONTH,
d.DAY_NAME,
p.PRODUCT_NAME,
p.CATEGORY,
p.BRAND,
c.CUSTOMER_NAME,
c.CITY,
c.PROVINCE,
c.SEGMENT,
f.QUANTITY,
f.TOTAL_AMOUNT,
f.PROFIT
FROM FACT_SALES f
INNER JOIN DIM_DATE d ON f.DATE_KEY = d.DATE_KEY
INNER JOIN DIM_PRODUCT p ON f.PRODUCT_KEY = p.PRODUCT_KEY
INNER JOIN DIM_CUSTOMER c ON f.CUSTOMER_KEY = c.CUSTOMER_KEY;

# 授权BI用户
GRANT SELECT ON BI_SALES_SUMMARY TO bi_user;
GRANT SELECT ON MQT_SALES_MONTH TO bi_user;
GRANT SELECT ON MQT_SALES_CUSTOMER TO bi_user;

Part05-风哥经验总结与分享

5.1 BI集成要点

  • 采用星型/雪花模型设计数据仓库
  • 使用MQT预计算聚合数据
  • 列式存储提高查询性能
  • 合理使用OLAP函数
  • 创建视图简化BI工具访问
  • 工作负载管理隔离BI和OLTP

5.2 性能优化建议

场景 优化方案
复杂报表查询 MQT预计算,列式存储
多维分析 OLAP函数,CUBE/ROLLUP
BI工具连接 创建视图,合理授权
并发查询 WLM工作负载管理

5.3 运维要点

  • 定期刷新MQT
  • 监控BI查询性能
  • 管理BI用户权限
  • 定期更新统计信息
  • 监控并发和资源使用
  • 建立查询性能基线
更多视频教程www.fgedu.net.cn
学习交流加群风哥微信: itpux-com
风哥Oracle/MySQL/PostgreSQL/Greenplum/DB2/Redis等数据库培训课程,10年一线实战经验,企业级培训,真正掌握数据库核心技术!

本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html

联系我们

在线咨询:点击这里给我发消息

微信号:itpux-com

工作日:9:30-18:30,节假日休息