3.1.2 归档旧分区
# 备份旧分区数据
mysqldump -h 192.168.1.20 -P 4000 -u root -p test logs –where=”created_at < '2024-03-01'" > logs_archive.sql
mysqldump -h 192.168.1.20 -P 4000 -u root -p test logs –where=”created_at < '2024-03-01'" > logs_archive.sql
# 截断旧分区
ALTER TABLE test.logs TRUNCATE PARTITION p202401, p202402;
# 或者删除旧分区
ALTER TABLE test.logs DROP PARTITION p202401, p202402;
# 添加新分区
ALTER TABLE test.logs ADD PARTITION (PARTITION p202407 VALUES LESS THAN (TO_DAYS(‘2024-08-01’)));
3.2 数据迁移归档
3.2.1 使用TiDB Lightning归档数据
# 导出历史数据
mysqldump -h 192.168.1.20 -P 4000 -u root -p test old_data –where=”created_at < '2024-01-01'" > old_data.sql
mysqldump -h 192.168.1.20 -P 4000 -u root -p test old_data –where=”created_at < '2024-01-01'" > old_data.sql
# 导入到归档数据库
tiup lightning –config lightning.toml
3.2.2 配置TiDB Lightning
# lightning.toml [lightning] # 任务名称 task = "tidb lightning" # 日志级别 log-level = "info" # 数据目录 data-source-dir = "/tidb/app/backup" # 目标TiDB配置 [tidb] host = "192.168.1.21" port = 4000 user = "root" password = "password" db-name = "archive" # 并发配置 [mydumper] read-block-size = 67108864 # 导入配置 [tidb-lightning] region-concurrency = 16 index-concurrency = 8
3.3 数据清理
3.3.1 批量删除历史数据
# 批量删除历史数据
DELETE FROM test.logs WHERE created_at < '2024-01-01' LIMIT 10000; # 重复执行直到删除完毕 # 或者使用存储过程批量删除
DELETE FROM test.logs WHERE created_at < '2024-01-01' LIMIT 10000; # 重复执行直到删除完毕 # 或者使用存储过程批量删除
3.3.2 使用存储过程清理数据
-- 创建存储过程批量删除数据
DELIMITER //
CREATE PROCEDURE `clean_old_data`()
BEGIN
DECLARE done INT DEFAULT 0;
WHILE done = 0 DO
DELETE FROM test.logs WHERE created_at < '2024-01-01' LIMIT 10000;
IF ROW_COUNT() = 0 THEN
SET done = 1;
END IF;
-- 暂停一下,避免影响系统性能
DO SLEEP(0.1);
END WHILE;
END //
DELIMITER ;
-- 执行存储过程
CALL clean_old_data();
3.4 监控与验证
3.4.1 监控存储使用情况
# 查看表大小
SELECT table_schema, table_name, data_length, index_length, data_free
FROM information_schema.tables
WHERE table_schema = 'test'
ORDER BY data_length + index_length DESC;学习交流加群风哥QQ113257174
SELECT table_schema, table_name, data_length, index_length, data_free
FROM information_schema.tables
WHERE table_schema = 'test'
ORDER BY data_length + index_length DESC;学习交流加群风哥QQ113257174
# 查看分区大小
SELECT partition_name, table_rows, data_length, index_length
FROM information_schema.partitions
WHERE table_schema = 'test' AND table_name = 'logs'
ORDER BY partition_name;
3.4.2 验证归档数据
# 验证归档数据完整性
# 源表数据量
SELECT COUNT(*) FROM test.logs WHERE created_at < '2024-03-01'; # 归档表数据量 SELECT COUNT(*) FROM archive.logs; # 验证数据一致性 SELECT * FROM test.logs WHERE created_at < '2024-03-01' LIMIT 10; SELECT * FROM archive.logs LIMIT 10;
# 源表数据量
SELECT COUNT(*) FROM test.logs WHERE created_at < '2024-03-01'; # 归档表数据量 SELECT COUNT(*) FROM archive.logs; # 验证数据一致性 SELECT * FROM test.logs WHERE created_at < '2024-03-01' LIMIT 10; SELECT * FROM archive.logs LIMIT 10;
更多视频教程www.fgedu.net.cn
本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html
