内容简介:本文风哥教程参考Linux官方文档、Red Hat Enterprise Linux官方文档、Ansible Automation Platform官方文档、Docker官方文档、Kubernetes官方文档和Podman官方文档等内容,详细介绍了相关技术的配置和使用方法。
本文档介绍
风哥提示:
Li学习交流加群风哥微信: itpux-comnux系统磁盘满额故障的排查方法和解决策略。
Part01-磁盘空间检查
1.1 检查磁盘使用情况
[root@fgedu-server ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/sda1 50G 48G 1.2G 98% /
/dev/sda2 200G 80G 110G 43% /data
/dev/sdb1 500G 200G 280G 42% /backup
# 查看inode使用情况
[root@fgedu-server ~]# df -i
Filesystem Inodes IUsed IFree IUse% Mounted on
/dev/sda1 3276800 280000 2996800 9% /
/dev/sda2 13107200 500000 12607200 4% /data
/dev/sdb1 32768000 100000 32668000 1% /backup
# 查看根目录下各目录大小
[root@fgedu-server ~]# du -h –max-depth=1 /
4.0K /dev
4.0K /proc
16G /usr
8.0G /var
12G /home
4.0K /sys
4.0K /tmp
4.0K /mnt
4.0K /media
4.0K /opt
4.0K /srv
40K /root
36G /
Part02-定位大文件
2.1 查找大文件
[root@fgedu-server ~]# find / -type f -size +100M -exec ls -lh {} \; 2>/dev/null
-rw-r–r–. 1 root root 1.2G Jan 15 10:00 /var/log/messages
-rw-r–r–. 1 root root 850M Jan 15 09:30 /var/log/secure
-rw-r–r–. 1 rfrom PG视频:www.itpux.comoot root 2.5G Jan 15 08:00 /home/user/data.log
-rw-r–r–. 1 mysql mysql 1.8G Jan 15 07:00 /var/lib/mysql/ibdata1
# 查找最近修改的大文件
[root@fgedu-server ~]# find / -type f -size +50M -mtime -7 -exec ls -lh {} \; 2>/dev/null
-rw-r–r–. 1 root root 650M Jan 14 16:00 /var/log/nginx/access.log
-rw-r–r–. 1 root root 450M Jan 14 15:30 /var/log/httpd/error.log
# 按大小排序显示目录
[root@fgedu-server ~]# du -h –max-depth=2 /var | sort -hr
8.0G /var
4.5G /var/log
2.0G /var/lib
1.5G /var/cache
# 查看具体日志文件大小
[root@fgedu-server ~]# ls -lh /var/log/
-rw-r–r–. 1 root root 1.2G Jan 15 10:00 messages
-rw-r–r–. 1 root root 850M Jan 15 09:30 secure
-rw-r–r–. 1 root root 650M Jan 14 16:00 nginx/access.log
-rw-r–r–. 1 root root 450M Jan 14 15:30 httpd/error.log
Part03-清理磁盘空间
3.1 清理日志文件
[root@fgedu-server ~]# echo “” > /var/log/messages
[root@fgedu-server ~]# echo “” > /var/log/secure
[root@fgedu-server ~]# echo “” > /var/log/nginx/access.log
[root@fgedu-server ~]# echo “” > /var/log/httpd/error.log
# 配置日志轮转
[root@fgedu-server ~]# cat > /etc/logrotate.d/custom << 'EOF'
/var/log/messages {
daily
rotate 7
compress
delaycompress
missingok
notifempty
postrotate
systemctl reload rsyslog > /dev/null 2>&1 || true
endscript
}
/var/log/secure {
daily
rotate 7
compress
delaycompress
missingok
notifempty
postrotate
systemctl reload rsyslog > /dev/null 2>&1 || true
endscript
}
/var/log/nginx/*.log {
daily
rotate 7
compress
delaycompress
missingok
notifempty
create 0644 nginx nginx
postrotate
systemctl reload nginx > /dev/null 2>&1 || true
endscript
}
EOF
# 执行日志轮转
[root@fgedu-server ~]# logrotate -f /etc/logrotate.d/custom
# 清理旧的轮转日志
[root@fgedu-server ~]# find /var/log -name “*.gz” -mtime +14 -delete
3.2 清理临时文件
[root@fgedu-server ~]# find /tmp -type f -mtime +7 -delete
[root@fgedu-server ~]# find /tmp -type d -empty -delete
# 清理/var/tmp目录
[root@fgedu-server ~]# find /var/tmp -type f -mtime +7 -delete
[root@fgedu-server ~]# find /var/tmp -type d -empty -delete
# 清理缓存文件
[root@fgedu-server ~]# sync && echo 3 > /proc/sys/vm/drop_caches
# 清理包管理器缓存
[root@fgedu-server ~]# dnf clean all
# 清理Docker镜像和容器
[root@fgedu-server ~]# docker system prune -f
[root@fgedu-server ~]# docker volume prune -f
Part04-深度清理
4.1 查找并清理重复文件
[root@fgedu-server ~]# dnf install -y fdupes
# 查找重复文件
[root@fgedu-server ~]# fdupes -r /home
# 交互式删除重复文件
[root@fgedu-server ~]# fdupes -rdN /home
# 清理用户回收站
[root@fgedu-server ~]# find /home -name “.Trash” -type d -exec rm -rf {} \; 2>/dev/null
# 清理大的邮件文件
[root@fgedu-server ~]# find /var/spool/mail -type f -size +10M -exec ls -lh {} \;
[root@fgedu-server ~]# echo “” > /var/spool/mail/root
Part05-空间不足应急处理
5.1 紧急释放空间
[root@fgedu-server ~]# dd if=/dev/zero of=/bigfile bs=1M count=1024
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB) copied, 1.234567 s, 870 MB/s
# 删除临时文件释放空间
[root@fgedu-server ~]# rm -f /bigfile
# 检查应用程序日志
[root@fgedu-server ~]# lsof | grep deleted
# 重启占用已删除文件的进程
[root@fgedu-server ~]# systemctl restart nginx
[root@fgedu-server ~]# systemctl restart httpd
# 检查并清理僵尸进程
[root@fgedu-server ~]# ps aux | grep defunct
[root@fgedu-server ~]# kill -9 $(ps aux | grep defunct | awk ‘{print $2}’)
Part06-预防措施
6.1 监控与告警
[root@fgedu-server ~]# cat > /usr/local/bin/disk-monitor.sh << 'EOF' #!/bin/bash # disk-monitor.sh # from:www.itpux.com.更多学习教程公众号风哥教程itpux_comqq113257174.wx:itpux-com # web: http://www.fgedu.net.cn ALERT_EMAIL="admin@fgedu.net.cn" THRESHOLD=90 while read line; do mount=$(echo $line | awk '{print $6}') usage=$(echo $line | awk '{print $5}' | sed 's/%//') if [ $usage -ge $THRESHOLD ]; then echo "告警: $mount 磁盘使用率达到 $usage%" echo "$mount 磁盘使用率达到 $usage%,请及时处理" | mail -s "告警: 磁盘空间不足" $ALERT_EMAIL fi done <<< "$(df -h | grep -v tmpfs | grep -v devtmpfs)" echo "磁盘监控完成: $(date)" EOF [root@fgedu-server ~]# chmod +x /usr/local/bin/disk-monitor.sh # 配置定学习交流加群风哥QQ113257174时任务 [root@fgedu-server ~]# cat > /etc/cron.d/disk-monitor << 'EOF' # 磁盘空间监控 0 * * * * root /usr/local/bin/disk-monitor.sh EOF # 配置自动清理任务 [root@fgedu-server ~]# cat > /usr/local/bin/auto-clean.sh << 'EOF' #!/bin/bash # auto-clean.sh # from:www.itpux.com.qq113257174.wx:itpux-com # web: http://www.fgedu.net.cn # 清理日志文件 echo "清理日志文件..." find /var/log -name "*.log" -size +100M -exec truncate -s 0 {} \; # 清理临时文件 echo "清理临时文件..." find /tmp -type f -mtime +7 -delete find /var/tmp -type f -mtime +7 -delete # 清理包管理器缓存 echo "清理包管理器缓存..." dnf clean all > /dev/null 2>&1
# 清理Docker资源
echo “清理Docker资源…”
docker system prune -f > /dev/null 2>&1
docker volume prune -f > /dev/null 2>&1
echo “自动清理完成: $(date)”
EOF
[root@fgedu-server ~]# chmod +x /usr/local/bin/auto-clean.sh
# 配置定时清理任务
[root@fgedu-server ~]# cat > /etc/cron.d/auto-clean << 'EOF'
# 自动清理任务
0 2 * * * root /usr/local/bin/auto-clean.sh
EOF
- 定期监控磁盘使用情况
- 配置合理的日志轮转策略
- 及时清理临时文件和缓存
- 设置磁盘空间告警阈值
- 考虑使用LVM进行灵活的空间管理
- 对于重要数据,实施定期备份策略
本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html
