内容简介:本文风哥教程参考Linux官方文档、Red Hat Enterprise Linux官方文档、Ansible Automation Platform官方文档、Docker官方文档、Kubernetes官方文档和Podman官方文档等内容,详细介绍了相关技术的配置和使用方法。
风哥提示:
本文档介绍企业级任务调度系统部署综合实战案例。
Part01-Jenkins任务调度
1.1 Jenkins定时任务配置
[root@fgedu-jenkins ~]# cat > /var/lib/jenkins/jobs/fgedu-backup/config.xml << 'EOF'
#!/bin/bash
# 每日备份任务
DATE=$(date +%Y%m%d)
BACKUP_DIR=”/backup/daily/$DATE”
mkdir -p $BACKUP_DIR
# 备份数据库
mysqldump -u root -p’MySQL@123′ –all-databases | gzip > $BACKUP_DIR/database.sql.gz
# 备份应用
tar -czf学习交流加群风哥QQ113257174 $BACKUP_DIR/app.tar.gz /var/www/fgedu
# 备份配置
tar -czf $BACKUP_DIR/config.tar.gz /etc/nginx /etc/php-fpm
# 同步到远程存储
rsync -avz $BACKUP_DIR backup-server::backup/
# 清理旧备份
find /backup/daily -mtime +30 -delete
echo “备份完成: $BACKUP_DIR”
# 配置Pipeline定时任务
[root@fgedu-jenkins ~]# cat > /var/lib/jenkins/jobs/fgedu-report/Jenkinsfile << 'EOF'
pipeline {
agent any
triggers {
cron('0 6 * * *')
}
stages {
stage('收集数据') {
steps {
更多学习教程公众号风哥教程itpux_comsh '''
echo "收集系统数据..."
# 收集服务器状态
ansible all -m setup --tree /tmp/facts/
# 收集监控数据
curl -s http://prometheus:9090/api/v1/query?query=up > /tmp/metrics.json
”’
}
}
stage(‘生成报告’) {
steps {
sh ”’
echo “生成运维报告…”
python3 /opt/scripts/generate_report.py
”’
}
}
stage(‘发送通知’) {
steps {
emailext (
subject: “FGEDU日报 – ${env.BUILD_DATE}”,
body: ‘${FILE,path=”report.html”}’,
to: ‘ops@fgedu.net.cn’
)
}
}
}
}
EOF
Part02-Cron任务管理
2.1 系统定时任务配置
[root@fgedu-server ~]# cat > /etc/cron.d/fgedu-tasks << 'EOF' # FGEDU系统定时任务 # from:www.itpux.com.qq113257174.wx:itpux-com # web: http://www.fgedu.net.cn # 每小时日志轮转检查 0 * * * * root /usr/local/bin/log-rotate-check.sh # 每5分钟健康检查 */5 * * * * root /usr/local/bin/health-check.sh # 每天凌晨2点清理临时文件 0 2 * * * root find /tmp -type f -mtime +7 -delete # 每天凌晨3点更新SSL证书 0 3 * * * root /usr/local/bin/cert-renew.sh # 每周日凌晨4点数据库优化 0 4 * * 0 root /usr/local/bin/db-optimize.sh # 每月1号凌晨5点清理日志 0 5 1 * * root /usr/local/bin/log-cleanup.sh EOF # 创建健康检查脚本 [root@fgedu-server ~]# cat > /usr/local/bin/health-check.sh << 'EOF' #!/bin/bash # health-check.sh # from:www.itpux.com.qq113257174.wx:itpux-com # web: http://www.fgedu.net.cn LOG_FILE="/var/log/health-check.log" DATE=$(date '+%Y-%m-%d %H:%M:%S') check_service() { service=$1 if systemctl is-active --quiet $service; then echo "[$DATE] $service: OK" >> $LOG_FILE
else
echo “[$DATE] $service: FAILED” >> $LOG_FILE
systemctl restart $service
echo “[$DATE] $service: RESTARTED” >> $LOG_FILE
fi
}
check_disk() {
usage=$(df -h / | tail -1 | awk ‘{print $5}’ | sed ‘s/%//’)
if [ $usage -gt 90 ]; then
echo “[$DATE] DISK WARNING: Usage at $usage%” >> $LOG_FILE
fi
}
check_memory() {
usage=$(free | grep Mem | awk ‘{printf “%.0f”, $3/$2 * 100}’)
if [ $usage -gt 90 ]; then
echo “[$DATE] MEMORY WARNING: Usage at $usage%” >> $LOG_FILE
fi
}
check_service nginx
check_service php-fpm
check_service mysql
check_disk
check_memory
EOF
[root@fgedu-server ~]# chmod +x /usr/local/bin/health-check.sh
Part03-Ansible定时任务
3.1 Ansible Tower调度
[root@fgedu-ansible ~]# cat > /etc/ansible/playbooks/scheduled-tasks.yml << 'EOF' --- - name: 系统维护任务 hosts: all become: yes tasks: - name: 清理包缓存 yum: name: '*' state: latest update_cache: yes when: ansible_os_family == 'RedHat' - name: 清理旧内核 shell: | current_kernel=$(uname -r) package-cleanup --oldkernels --count=2 -y when: ansible_os_family == 'RedHat' - name: 清理日志文件 shell: | find /var/log -type f -name "*.log" -mtime +30 -exec gzip {} \; find /var/log -type f -name "*.gz" -mtime +90 -delete - name: 更新时间同步 command: chronyc makestep when: ansible_os_family == 'RedHat' - name: 检查服务状态 service_facts: register: services_state - name: 报告异常服务 debug: msg: "服务 {{ item }} 状态异常" when: services_state.ansible_facts.services[item].state != 'running' loop: "{{ services_state.ansible_facts.services.keys() | list }}" EOF # 配置Tower定时任务 [root@fgedu-tower ~]# cat > /var/lib/awx/projects/schedules/maintenance.yml << 'EOF' --- - name: 配置维护任务调度 hosts: localhost gather_facts: no tasks: - name: 每日系统检查 tower_schedule: name: "Daily System Check" state: present unified_job_template: "System Health Check" rrule: "DTSTART:20260101T000000 RRULE:FREQ=DAILY;INTERVAL=1" - name: 每周安全扫描 tower_schedule: name: "Weekly Security Scan" state: present unified_job_template: "Security Scan" rrule: "DTSTART:20260101T010000 RRULE:FREQ=WEEKLY;更多视频教程www.fgedu.net.cnBYDAY=SU" - name: 每月补丁更新 tower_schedule: name: "Monthly Patch Update" state: present unified_job_template: "System Update" rrule: "DTSTART:20260101T020000 RRULE:FREQ=MONTHLY;BYMONTHDAY=1" EOF
Part04-任务监控
4.1 任务执行监控
[root@fgedu-server ~]# cat > /usr/local/bin/task-monitor.sh << 'EOF' #!/bin/bash # task-monitor.sh # from:www.itpux.com.qq113257174.wx:itpux-com # web: http://www.fgedu.net.cn echo "=== 任务调度监控 ===" echo "监控时间: $(date)" echo "" echo "1. Cron任务列表" ls -la /etc/cron.d/ echo "" echo "2. 今日Cron执行日志" grep CRON /var/log/cron | grep "$(date '+%b %d')" | tail -20 echo "" echo "3. Jenkins任务状态" curl -s http://localhost:8080/api/json?tree=jobs[name,color] | jq '.jobs[] | select(.color | contains("red")) | .name' echo "" echo "4. 失败任务统计" grep -c "FAILED" /var/log/cron 2>/dev/null || echo 0
echo “”
echo “5. 长时间运行任务”
ps aux | awk ‘$8 ~ /D/ {print $0}’
echo “”
echo “6. 任务执行时间分析”
awk ‘/^Start:/ {start=$2} /^End:/ {print “Duration: ” $2 – start ” seconds”}’ /var/log/tasks.log 2>/dev/null | tail -10
echo “”
echo “=== 监控完成 ===”
EOF
[root@fgedu-server ~]# chmod +x /usr/local/bin/task-monitor.sh
# 配置任务告警
[root@fgedu-server ~]# cat > /usr/local/bin/task-alert.sh << 'EOF'
#!/bin/bash
# task-alert.sh
# from:www.itpux.com.qq113257174.wx:itpux-com
# web: http://www.fgedu.net.cn
# 检查失败任务
FAILED_TASKS=$(grep "FAILED" /var/log/cron | grep "$(date '+%b %d')" | wc -l)
if [ $FAILED_TASKS -gt 0 ]; then
mail -s "任务执行失败告警" ops@fgedu.net.cn << EMAIL
检测到 $FAILED_TASKS 个任务执行失败
失败详情:
$(grep "FAILED" /var/log/cron | grep "$(date '+%b %d')")
请及时处理。
EMAIL
fi
# 检查长时间运行任务
LONG_TASKS=$(ps aux | awk '$8 ~ /D/ {print $2}' | wc -l)
if [ $LONG_TASKS -gt 5 ]; then
mail -s "长时间运行任务告警" ops@fgedu.net.cn << EMAIL
检测到 $LONG_TASKS 个长时间运行任务
任务列表:
$(ps aux | awk '$8 ~ /D/')
请检查是否正常。
EMAIL
fi
EOF
[root@fgedu-server ~]# chmod +x /usr/local/bin/task-alert.sh
- 合理规划任务执行时间
- 配置任务失败告警
- 记录任务执行日志
- 定期审查任务列表
- 监控任务执行时间
本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html
