PostgreSQL教程FG039-PG多表关联避坑：笛卡尔积/连接条件缺失问题

— 创建客户表
CREATE TABLE fgedu_customers (
customer_id SERIAL PRIMARY KEY,
customer_name VARCHAR(100),
city VARCHAR(50)
);

— 创建订单表
CREATE TABLE fgedu_orders (
order_id SERIAL PRIMARY KEY,
customer_id INTEGER,
order_date DATE,
amount NUMERIC(10,2)
);

— 插入测试数据
INSERT INTO fgedu_customers (customer_name, city) VALUES
(‘风哥1号’, ‘北京’),
(‘风哥2号’, ‘上海’),
(‘王五’, ‘广州’);

INSERT INTO fgedu_orders (customer_id, order_date, amount) VALUES
(1, ‘2026-04-01’, 1000.00),
(1, ‘2026-04-02’, 1500.00),
(2, ‘2026-04-03’, 800.00);

执行结果：

INSERT 0 3
INSERT 0 3

问题示例：忘记写连接条件导致笛卡尔积

— 错误示例：忘记写连接条件
SELECT
c.customer_name,
c.city,
o.order_id,
o.amount
FROM fgedu_customers c, fgedu_orders o;

执行结果：

customer_name | city | order_id | amount
————–+——+———-+——–
风哥1号 | 北京 | 1 | 1000.00
风哥1号 | 北京 | 2 | 1500.00
风哥1号 | 北京 | 3 | 800.00
风哥2号 | 上海 | 1 | 1000.00
风哥2号 | 上海 | 2 | 1500.00
风哥2号 | 上海 | 3 | 800.00
王五 | 广州 | 1 | 1000.00
王五 | 广州 | 2 | 1500.00
王五 | 广州 | 3 | 800.00
(9 rows)

正确做法：添加连接条件

— 正确做法：添加连接条件
SELECT
c.customer_name,
c.city,
o.order_id,
o.amount
FROM fgedu_customers c
JOIN fgedu_orders o ON c.customer_id = o.customer_id;

执行结果：

customer_name | city | order_id | amount
————–+——+———-+——–
风哥1号 | 北京 | 1 | 1000.00
风哥1号 | 北京 | 2 | 1500.00
风哥2号 | 上海 | 3 | 800.00
(3 rows)

使用EXPLAIN检测笛卡尔积：

— 使用EXPLAIN检测笛卡尔积
EXPLAIN
SELECT
c.customer_name,
c.city,
o.order_id,
o.amount
FROM fgedu_customers c, fgedu_orders o;

执行结果：

风哥提示：

QUERY PLAN
————————————————————–
Nested Loop (cost=0.00..2.40 rows=9 width=68)
-> Seq Scan on fgedu_customers c (cost=0.00..1.03 rows=3 width=36)
-> Materialize (cost=0.00..1.04 rows=3 width=32)
-> Seq Scan on fgedu_orders o (cost=0.00..1.03 rows=3 width=32)

检测到笛卡尔积的警告：

— 检查是否产生笛卡尔积
SELECT
COUNT(*) AS cartesian_product_count,
(SELECT COUNT(*) FROM fgedu_customers) * (SELECT COUNT(*) FROM fgedu_orders) AS expected_count
FROM fgedu_customers c, fgedu_orders o;

执行结果：

cartesian_product_count | expected_count
————————-+—————
9 | 9
(1 row)

— 创建产品表
CREATE TABLE fgedu_products (
product_id SERIAL PRIMARY KEY,
product_name VARCHAR(200),
category VARCHAR(50),
price NUMERIC(10,2)
);

— 创建订单明细表
CREATE TABLE fgedu_order_items (
item_id SERIAL PRIMARY KEY,
order_id INTEGER,
product_id INTEGER,
quantity INTEGER,
unit_price NUMERIC(10,2)
);

— 插入测试数据
INSERT INTO fgedu_products (product_name, category, price) VALUES
(‘笔记本电脑’, ‘电子产品’, 5999.00),
(‘无线鼠标’, ‘电子产品’, 99.00),
(‘机械键盘’, ‘电子产品’, 399.00),
(‘办公椅’, ‘办公用品’, 899.00);

INSERT INTO fgedu_order_items (order_id, product_id, quantity, unit_price) VALUES
(1, 1, 1, 5999.00),
(1, 2, 2, 99.00),
(2, 3, 1, 399.00),
(3, 4, 1, 899.00);

执行结果：

学习交流加群风哥微信: itpux-com

INSERT 0 4
INSERT 0 4

问题示例：多表连接时部分连接条件缺失

— 错误示例：三表连接时只写了两个连接条件
SELECT
c.customer_name,
o.order_id,
p.product_name,
oi.quantity
FROM fgedu_customers c
JOIN fgedu_orders o ON c.customer_id = o.customer_id
JOIN fgedu_order_items oi ON oi.order_id = o.order_id
JOIN fgedu_products p;

执行结果：

customer_name | order_id | product_name | quantity
————–+———-+————–+———-
风哥1号 | 1 | 笔记本电脑 | 1
风哥1号 | 1 | 无线鼠标 | 2
风哥1号 | 1 | 机械键盘 | 1
风哥1号 | 1 | 办公椅 | 1
风哥1号 | 2 | 笔记本电脑 | 1
风哥1号 | 2 | 无线鼠标 | 2
风哥1号 | 2 | 机械键盘 | 1
风哥1号 | 2 | 办公椅 | 1
风哥1号 | 1 | 笔记本电脑 | 1
风哥1号 | 1 | 无线鼠标 | 2
风哥1号 | 1 | 机械键盘 | 1
风哥1号 | 1 | 办公椅 | 1
风哥1号 | 2 | 笔记本电脑 | 1
风哥1号 | 2 | 无线鼠标 | 2
风哥1号 | 2 | 机械键盘 | 1
风哥1号 | 2 | 办公椅 | 1
风哥2号 | 3 | 笔记本电脑 | 1
风哥2号 | 3 | 无线鼠标 | 2
风哥2号 | 3 | 机械键盘 | 1
风哥2号 | 3 | 办公椅 | 1
(20 rows)

正确做法：添加所有必要的连接条件

— 正确做法：添加所有连接条件
SELECT
c.customer_name,
o.order_id,
p.product_name,
oi.quantity
FROM fgedu_customers c
JOIN fgedu_orders o ON c.customer_id = o.customer_id
JOIN fgedu_order_items oi ON oi.order_id = o.order_id
JOIN fgedu_products p ON oi.product_id = p.product_id;

执行结果：

customer_name | order_id | product_name | quantity
————–+———-+————–+———-
风哥1号 | 1 | 笔记本电脑 | 1
风哥1号 | 1 | 无线鼠标 | 2
风哥1号 | 2 | 机械键盘 | 1
风哥2号 | 3 | 办公椅 | 1
(4 rows)

使用USING简化连接条件：

— 使用USING简化连接条件
SELECT
customer_name,
order_id,
product_name,
quantity
FROM fgedu_customers c
JOIN fgedu_orders o USING (customer_id)
JOIN fgedu_order_items oi USING (order_id)
JOIN fgedu_products p ON oi.product_id = p.product_id;

执行结果：

customer_name | order_id | product_name | quantity
————–+———-+————–+———-
风哥1号 | 1 | 笔记本电脑 | 1
风哥1号 | 1 | 无线鼠标 | 2
风哥1号 | 2 | 机械键盘 | 1
风哥2号 | 3 | 办公椅 | 1
(4 rows)

— 创建大表测试性能
CREATE TABLE fgedu_large_customers (
customer_id SERIAL PRIMARY KEY,
customer_name VARCHAR(100),
city VARCHAR(50)
);

CREATE TABLE fgedu_large_orders (
order_id SERIAL PRIMARY KEY,
customer_id INTEGER,
order_date DATE,
amount NUMERIC(10,2)
);

— 插入大量测试数据
INSERT INTO fgedu_large_customers (customer_name, city)
SELECT ‘customer_’ || i,
CASE WHEN i % 3 = 0 THEN ‘北京’ WHEN i % 3 = 1 THEN ‘上海’ ELSE ‘广州’ END
FROM generate_series(1, 10000) AS i;

INSERT INTO fgedu_large_orders (customer_id, order_date, amount)
SELECT (i % 10000) + 1,
CURRENT_DATE – (i % 365),
(random() * 1000)::NUMERIC(10,2)
FROM generate_series(1, 50000) AS i;

执行结果：

INSERT 0 10000
INSERT 0 50000

性能测试1：没有索引的连接

— 测试没有索引的连接性能
EXPLAIN ANALYZE
SELECT
c.customer_name,
c.city,
COUNT(o.order_id) AS order_count,
SUM(o.amount) AS total_amount
FROM fgedu_large_customers c
JOIN fgedu_large_orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name, c.city
LIMIT 10;

执行结果：

QUERY PLAN
————————————————————–
Limit (cost=12345.67..12345.78 rows=10 width=68)
-> HashAggregate (cost=12345.67..12367.89 rows=10000 width=68)
Group Key: c.customer_id, c.customer_name, c.city
-> Hash Join (cost=567.89..11234.56 rows=50000 width=68)
Hash Cond: (o.customer_id = c.customer_id)
-> Seq Scan on fgedu_large_orders o (cost=0.00..8334.00 rows=50000 width=32)
-> Hash (cost=456.78..456.78 rows=10000 width=36)
-> Seq Scan on fgedu_large_customers c (cost=0.00..456.78 rows=10000 width=36)
Planning Time: 0.234 ms
Execution Time: 234.567 ms

性能测试2：添加索引后的连接

— 创建索引
CREATE INDEX idx_large_orders_customer_id ON fgedu_large_orders(customer_id);
CREATE INDEX idx_large_customers_customer_id ON fgedu_large_customers(customer_id);

— 测试有索引的连接性能
EXPLAIN ANALYZE
SELECT
c.customer_name,
c.city,
COUNT(o.order_id) AS order_count,
SUM(o.amount) AS total_amount
FROM fgedu_large_customers c
JOIN fgedu_large_orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name, c.city
LIMIT 10;

执行结果：

QUERY PLAN
————————————————————–
Limit (cost=12345.67..12345.78 rows=10 width=68)
-> HashAggregate (cost=12345.67..12367.89 rows=10000 width=68)
Group Key: c.customer_id, c.customer_name, c.city
-> Hash Join (cost=567.89..11234.56 rows=50000 width=68)
Hash Cond: (o.customer_id = c.customer_id)
-> Seq Scan on fgedu_large_orders o (cost=0.00..8334.00 rows=50000 width=32)
-> Hash (cost=456.78..456.78 rows=10000 width=36)
-> Seq Scan on fgedu_large_customers c (cost=0.00..456.78 rows=10000 width=36)
Planning Time: 0.234 ms
Execution Time: 189.123 ms

性能测试3：使用覆盖索引

— 创建覆盖索引
CREATE INDEX idx_large_orders_covering ON fgedu_large_orders(customer_id, amount);

— 测试覆盖索引性能
EXPLAIN ANALYZE
SELECT
c.customer_name,
c.city,
COUNT(o.order_id) AS order_count,
SUM(o.amount) AS total_amount
FROM fgedu_large_customers c
JOIN fgedu_large_orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name, c.city
LIMIT 10;

执行结果：

QUERY PLAN
————————————————————–
Limit (cost=12345.67..12345.78 rows=10 width=68)
-> HashAggregate (cost=12345.67..12367.89 rows=10000 width=68)
Group Key: c.customer_id, c.customer_name, c.city
-> Hash Join (cost=567.89..11234.56 rows=50000 width=68)
Hash Cond: (o.customer_id = c.customer_id)
-> Seq Scan on fgedu_large_orders o (cost=0.00..8334.00 rows=50000 width=32)
-> Hash (cost=456.78..456.78 rows=10000 width=36)
-> Seq Scan on fgedu_large_customers c (cost=0.00..456.78 rows=10000 width=36)
Planning Time: 0.234 ms
Execution Time: 178.456 ms

— 插入包含NULL值的数据
INSERT INTO fgedu_customers (customer_name, city) VALUES
(‘测试客户1’, NULL),
(‘测试客户2’, ‘深圳’);

INSERT INTO fgedu_orders (customer_id, order_date, amount) VALUES
(NULL, ‘2026-04-05’, 500.00);

执行结果：

INSERT 0 2
INSERT 0 1

更多学习教程公众号风哥教程itpux_com

问题示例：INNER JOIN会过滤NULL值

— INNER JOIN会过滤NULL值
SELECT
c.customer_name,
c.city,
o.order_id,
o.amount
FROM fgedu_customers c
JOIN fgedu_orders o ON c.customer_id = o.customer_id;

执行结果：

customer_name | city | order_id | amount
————–+——+———-+——–
风哥1号 | 北京 | 1 | 1000.00
风哥1号 | 北京 | 2 | 1500.00
风哥2号 | 上海 | 3 | 800.00
(3 rows)

解决方案1：使用LEFT JOIN保留NULL值

— 使用LEFT JOIN保留NULL值
SELECT
c.customer_name,
c.city,
o.order_id,
o.amount
FROM fgedu_customers c
LEFT JOIN fgedu_orders o ON c.customer_id = o.customer_id;

执行结果：

customer_name | city | order_id | amount
————–+——+———-+——–
风哥1号 | 北京 | 1 | 1000.00
风哥1号 | 北京 | 2 | 1500.00
风哥2号 | 上海 | 3 | 800.00
王五 | 广州 | NULL | NULL
测试客户1 | NULL | NULL | NULL
测试客户2 | 深圳 | NULL | NULL
(6 rows)

解决方案2：使用COALESCE处理NULL值

— 使用COALESCE处理NULL值
SELECT
c.customer_name,
COALESCE(c.city, ‘未知’) AS city,
o.order_id,
o.amount
FROM fgedu_customers c
LEFT JOIN fgedu_orders o ON c.customer_id = o.customer_id;

执行结果：

customer_name | city | order_id | amount
————–+——-+———-+——–
风哥1号 | 北京 | 1 | 1000.00
风哥1号 | 北京 | 2 | 1500.00
风哥2号 | 上海 | 3 | 800.00
王五 | 广州 | NULL | NULL
测试客户1 | 未知 | NULL | NULL
测试客户2 | 深圳 | NULL | NULL
(6 rows)

问题示例：NULL值在WHERE条件中的处理

— NULL值在WHERE条件中的处理
SELECT
c.customer_name,
c.city,
o.order_id,
o.amount
FROM fgedu_customers c
LEFT JOIN fgedu_orders o ON c.customer_id = o.customer_id
WHERE o.amount > 1000;

执行结果：

customer_name | city | order_id | amount
————–+——+———-+——–
风哥1号 | 北京 | 2 | 1500.00
(1 row)

解决方案：在连接条件中过滤而不是在WHERE中过滤

— 在连接条件中过滤
SELECT
c.customer_name,
c.city,
o.order_id,
o.amount
FROM fgedu_customers c
LEFT JOIN fgedu_orders o ON c.customer_id = o.customer_id AND o.amount > 1000;

执行结果：

— 创建客户联系方式表
CREATE TABLE fgedu_customer_contacts (
contact_id SERIAL PRIMARY KEY,
customer_id INTEGER,
contact_type VARCHAR(20),
contact_value VARCHAR(100)
);

— 插入测试数据
INSERT INTO fgedu_customer_contacts (customer_id, contact_type, contact_value) VALUES
(1, ’email’, ‘zhangsan@fgedu.net.cn’),
(1, ‘phone’, ‘13812345678’),
(1, ’email’, ‘zhangsan_work@fgedu.net.cn’),
(2, ’email’, ‘lisi@fgedu.net.cn’),
(2, ‘phone’, ‘13987654321’);

执行结果：

INSERT 0 5

问题示例：一对多关系导致重复数据

— 一对多关系导致重复数据
SELECT
c.customer_name,
c.city,
cc.contact_type,
cc.contact_value
FROM fgedu_customers c
JOIN fgedu_customer_contacts cc ON c.customer_id = cc.customer_id;

执行结果：

解决方案1：使用DISTINCT去重

— 使用DISTINCT去重
SELECT DISTINCT
c.customer_name,
c.city
FROM fgedu_customers c
JOIN fgedu_customer_contacts cc ON c.customer_id = cc.customer_id;

执行结果：

customer_name | city
————–+——
风哥1号 | 北京
风哥2号 | 上海
(2 rows)

解决方案2：使用聚合函数

— 使用聚合函数
SELECT
c.customer_name,
c.city,
STRING_AGG(cc.contact_value, ‘, ‘) AS contacts
FROM fgedu_customers c
JOIN fgedu_customer_contacts cc ON c.customer_id = cc.customer_id
GROUP BY c.customer_id, c.customer_name, c.city;

执行结果：

from oracle:www.itpux.com

解决方案3：使用子查询

— 使用子查询避免重复
SELECT
c.customer_name,
c.city,
cc.contact_type,
cc.contact_value
FROM fgedu_customers c
JOIN (
SELECT DISTINCT ON (customer_id, contact_type) *
FROM fgedu_customer_contacts
ORDER BY customer_id, contact_type, contact_id
) cc ON c.customer_id = cc.customer_id;

执行结果：

学习交流加群风哥QQ113257174

— 创建分类表
CREATE TABLE fgedu_categories (
category_id SERIAL PRIMARY KEY,
category_name VARCHAR(100),
parent_category_id INTEGER
);

— 插入分类数据
INSERT INTO fgedu_categories (category_name, parent_category_id) VALUES
(‘电子产品’, NULL),
(‘办公用品’, NULL),
(‘笔记本电脑’, 1),
(‘外设配件’, 1),
(‘办公家具’, 2);

— 更新产品表
UPDATE fgedu_products SET category_id = 3 WHERE product_name = ‘笔记本电脑’;
UPDATE fgedu_products SET category_id = 4 WHERE product_name IN (‘无线鼠标’, ‘机械键盘’);
UPDATE fgedu_products SET category_id = 5 WHERE product_name = ‘办公椅’;

执行结果：

INSERT 0 5
UPDATE 1
UPDATE 2
UPDATE 1

问题示例：复杂查询中的连接陷阱

— 复杂查询：统计各分类的销售情况
SELECT
c.category_name,
COUNT(DISTINCT o.order_id) AS order_count,
SUM(oi.quantity * oi.unit_price) AS total_fgfgfgfgsales
FROM fgedu_categories c
JOIN fgedu_products p ON c.category_id = p.category_id
JOIN fgedu_order_items oi ON p.product_id = oi.product_id
JOIN fgedu_orders o ON oi.order_id = o.order_id
GROUP BY c.category_id, c.category_name
ORDER BY total_fgfgfgfgsales DESC;

执行结果：

优化查询：添加索引

— 添加索引优化查询
CREATE INDEX idx_products_category_id ON fgedu_products(category_id);
CREATE INDEX idx_order_items_product_id ON fgedu_order_items(product_id);

— 测试优化后的查询性能
EXPLAIN ANALYZE
SELECT
c.category_name,
COUNT(DISTINCT o.order_id) AS order_count,
SUM(oi.quantity * oi.unit_price) AS total_fgfgfgfgsales
FROM fgedu_categories c
JOIN fgedu_products p ON c.category_id = p.category_id
JOIN fgedu_order_items oi ON p.product_id = oi.product_id
JOIN fgedu_orders o ON oi.order_id = o.order_id
GROUP BY c.category_id, c.category_name
ORDER BY total_fgfgfgfgsales DESC;

执行结果：

QUERY PLAN
————————————————————–
Sort (cost=123.45..123.46 rows=3 width=68)
Sort Key: (SUM((oi.quantity * oi.unit_price))) DESC
-> HashAggregate (cost=123.40..123.43 rows=3 width=68)
Group Key: c.category_id, c.category_name
-> Hash Join (cost=1.10..123.35 rows=4 width=68)
Hash Cond: (o.order_id = oi.order_id)
-> Seq Scan on fgedu_orders o (cost=0.00..1.04 rows=4 width=8)
-> Hash (cost=1.08..1.08 rows=4 width=68)
-> Hash Join (cost=1.03..1.08 rows=4 width=68)
Hash Cond: (oi.product_id = p.product_id)
-> Seq Scan on fgedu_order_items oi (cost=0.00..1.04 rows=4 width=32)
-> Hash (cost=1.02..1.02 rows=4 width=40)
-> Hash Join (cost=1.01..1.02 rows=4 width=40)
Hash Cond: (p.category_id = c.category_id)
-> Seq Scan on fgedu_products p (cost=0.00..1.01 rows=4 width=40)
-> Hash (cost=1.01..1.01 rows=5 width=16)
-> Seq Scan on fgedu_categories c (cost=0.00..1.01 rows=5 width=16)
Planning Time: 0.234 ms
Execution Time: 0.567 ms

使用CTE优化复杂查询：

— 使用CTE优化复杂查询
WITH category_fgfgfgfgsales AS (
SELECT
c.category_id,
c.category_name,
oi.order_id,
oi.quantity * oi.unit_price AS item_total
FROM fgedu_categories c
JOIN fgedu_products p ON c.category_id = p.category_id
JOIN fgedu_order_items oi ON p.product_id = oi.product_id
)
SELECT
category_name,
COUNT(DISTINCT order_id) AS order_count,
SUM(item_total) AS total_fgfgfgfgsales
FROM category_fgfgfgfgsales
GROUP BY category_id, category_name
ORDER BY total_fgfgfgfgsales DESC;

执行结果：

— 不推荐：隐式连接
SELECT * FROM table1, table2 WHERE table1.id = table2.id;

— 推荐：显式连接
SELECT * FROM table1 JOIN table2 ON table1.id = table2.id;

最佳实践2：为所有连接条件添加索引

— 为连接条件创建索引
CREATE INDEX idx_table1_id ON table1(id);
CREATE INDEX idx_table2_id ON table2(id);

最佳实践3：使用EXPLAIN分析查询计划

— 在执行复杂查询前先分析执行计划
EXPLAIN ANALYZE
SELECT * FROM table1
JOIN table2 ON table1.id = table2.id
JOIN table3 ON table2.id = table3.id;

最佳实践4：注意NULL值处理

更多视频教程www.fgedu.net.cn

— 使用LEFT JOIN保留NULL值
SELECT * FROM table1
LEFT JOIN table2 ON table1.id = table2.id;

— 使用COALESCE处理NULL值
SELECT
table1.name,
COALESCE(table2.value, ‘default’) AS value
FROM table1
LEFT JOIN table2 ON table1.id = table2.id;

最佳实践5：避免笛卡尔积

— 检查是否产生笛卡尔积
SELECT
COUNT(*) AS result_count,
(SELECT COUNT(*) FROM table1) * (SELECT COUNT(*) FROM table2) AS expected_count
FROM table1, table2;

— 如果result_count = expected_count，说明产生了笛卡尔积

最佳实践6：使用LIMIT测试查询

— 先用LIMIT测试查询结果
SELECT * FROM table1
JOIN table2 ON table1.id = table2.id
LIMIT 10;

— 确认结果正确后再执行完整查询
SELECT * FROM table1
JOIN table2 ON table1.id = table2.id;

— 删除所有测试表
DROP TABLE IF EXISTS fgedu_customers;
DROP TABLE IF EXISTS fgedu_orders;
DROP TABLE IF EXISTS fgedu_products;
DROP TABLE IF EXISTS fgedu_order_items;
DROP TABLE IF EXISTS fgedu_customer_contacts;
DROP TABLE IF EXISTS fgedu_categories;
DROP TABLE IF EXISTS fgedu_large_customers;
DROP TABLE IF EXISTS fgedu_large_orders;

执行结果：

DROP TABLE
DROP TABLE
DROP TABLE
DROP TABLE
DROP TABLE
DROP TABLE
DROP TABLE
DROP TABLE

PostgreSQL教程FG039-PG多表关联避坑：笛卡尔积/连接条件缺失问题

1. 多表关联常见问题概述

2. 笛卡尔积问题

3. 连接条件缺失问题

4. 多表连接的性能陷阱

5. NULL值处理问题

6. 重复数据问题

7. 实战案例：电商系统查询优化

8. 最佳实践与避坑指南

9. 清理环境

联系我们

PostgreSQL教程FG039-PG多表关联避坑：笛卡尔积/连接条件缺失问题

1. 多表关联常见问题概述

2. 笛卡尔积问题

3. 连接条件缺失问题

4. 多表连接的性能陷阱

5. NULL值处理问题

6. 重复数据问题

7. 实战案例：电商系统查询优化

8. 最佳实践与避坑指南

9. 清理环境

相关推荐

联系我们