内容简介:本文风哥教程参考Linux官方文档、Red Hat Enterprise Linux官方文档、Ansible Automation Platform官方文档、Docker官方文档、Kubernetes官方文档和Podman官方文档等内容,详细介绍了相关技术的配置和使用方法。
本文档介
风哥提示:
绍Python运维开发的基础知识和实践方法。
Part01-Python环境配置
1.1 安装Python
[root@linux ~]# dnf install -y python3 python3-pip python3-devel
# 查看Python版本
[root@linux ~]# python3 –version
Python 3.9.16
# 配置pip镜像源
[root@linux ~]# mkdir -p ~/.pip
[root@linux ~]# cat > ~/.pip/pip.conf << 'EOF'
[global]
index-url = https://pypi.tuna.tsinghua.edu.cn/simple
trusted-host = pypi.tuna.tsinghua.edu.cn
EOF
# 安装常用运维库
[root@linux ~]# pip3 install paramiko requests psutil fabric
# 创建虚拟环境
[root@linux ~]# python3 -m venv /fglinux/venv
[root@linux ~]# source /fglinux/venv/bin/activate
(venv) [root@linux ~]# python --version
Python 3.9.16
# 退出虚拟环境
(venv) [root@linux ~]# deactivate
1.2 Python基础语法
[root@linux ~]# cat > /fglinux/python_basics.py << 'EOF' #!/usr/bin/env python3 # -*- coding: utf-8 -*- # python_basics.py # from:www.itpux.com.qq113257174.wx:itpux-com # web: http://www.fgedu.net.cn import os import sys import subprocess from datetime import datetime # 变量和数据类型 name = "风哥" age = 30 score = 95.5 is_admin = True print(f"姓名: {name}, 年龄: {age}, 分数: {score}") # 列表操作 servers = ["web1", "web2", "db1", "cache1"] print(f"服务器列表: {servers}") print(f"第一个服务器: {servers[0]}") print(f"服务器数量: {len(servers)}") # 遍历列表 print("=== 遍历服务器 ===") for server in servers: print(f"服务器: {server}") # 字典操作 server_info = { "hostname": "linux.fgedu.net.cn", "ip": "192.168.1.10", "cpu_cores": 8, "memory_gb": 32 } print(f"服务器信息: {server_info}") print(f"主机名: {server_info['hostname']}") # 条件判断 print("=== 条件判断 ===") if age >= 18:
print(f”{name} 已成年”)
else:
print(f”{name} 未成年”)
# 函数定义
def get_system_info():
“””获取系统信息”””
info = {
“hostname”: os.uname().nodename,
“system”: os.uname().sysname,
“release”: os.uname().release,
“machine”: os.uname().machine
}
return info
def execute_command(cmd):
“””执行系统命令”””
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
return result.stdout.strip()
# 调用函数
print(“=== 系统信息 ===”)
sys_info = get_system_info()
for key, value in sys_info.items():
print(f”{key}: {value}”)
# 执行命令
print(“=== 执行命令 ===”)
uptime = execute_command(“uptime”)
print(f”系统运行时间: {uptime}”)
# 类定义
class ServerMonitor:
def __init__(self, hostname, ip):
self.hostname = hostname
self.ip = ip
def ping(self):
result = subprocess.run(f”ping -c 1 {self.ip}”, shell=True, capture_output=True)
return result.returncode == 0
def get_status(self):
return {
“hostname”: self.hostname,
“ip”: self.ip,
“reachable”: self.ping()
}
# 使用类
monitor = ServerMonitor(“web-server”, “192.168.1.20”)
print(f”监控状态: {monitor.get_status()}”)
# 异常处理
print(“=== 异常处理 ===”)
try:
with open(“/etc/passwd”, “r”) as f:
content = f.read()
print(f”文件大小: {len(content)} 字节”)
except FileNotFoundError:
print(“文件不存在”)
except Exception as e:
print(f”发生错误: {e}”)
print(“脚本执行完成”)
EOF
# 执行Python脚本
[root@linux ~]# python3 /fglinux/python_basics.py
姓名: 风哥, 年龄: 30, 分数: 95.5
服务器列表: [‘web1’, ‘web2’, ‘db1’, ‘cache1’]
第一个服务器: web1
服务器数量: 4
=== 遍历服务器 ===
服务器: web1
服务器: web2
服务器: db1
服务器: cache1
服务器信息: {‘hostname’: ‘linux.fgedu.net.学习交流加群风哥微信: itpux-comcn’, ‘ip’: ‘192.168from PG视频:www.itpux.com.1.10’, ‘cpu_cores’: 8, ‘memory_gb’: 32}
主机名: linux.fgedu.net.cn
=== 条件判断 ===
风哥 已成年
=== 系统信息 ===
hostname: linux.fgedu.net.cn
system: Linux
release: 5.14.0-284.11.1.el9_2.x86_64
machine: x86_64
=== 执行命令 ===
系更多学习教程公众号风哥教程itpux_com统运行时间: 00:50:00 up 10 days, 2:30, 1 user, load average: 0.00, 0.01, 0.05
监控状态: {‘hostname’: ‘web-server’, ‘ip’: ‘192.168.1.20’, ‘reachable’: True}
=== 异常处理 ===
文件大小: 2456 字节
脚本执行完成
Part02-系统监控脚本
2.1 服务器监控脚本
[root@linux ~]# cat > /fglinux/server_monitor.py << 'EOF' #!/usr/bin/env python3 # -*- coding: utf-8 -*- # server_monitor.py # from:www.itpux.com.qq113257174.wx:itpux-com # web: http://www.fgedu.net.cn import psutil import socket import json from datetime import datetime class SystemMonitor: def __init__(self): self.hostname = socket.gethostname() def get_cpu_info(self): """获取CPU信息""" return { "cpu_percent": psutil.cpu_percent(interval=1), "cpu_count": psutil.cpu_count(), "cpu_count_logical": psutil.cpu_count(logical=True), "cpu_freq": psutil.cpu_freq().current if psutil.cpu_freq() else None } def get_memory_info(self): """获取内存信息""" mem = psutil.virtual_memory() return { "total_gb": round(mem.total / (1024**3), 2), "available_gb": round(mem.available / (1024**3), 2), "used_gb": round(mem.used / (1024**3), 2), "percent": mem.percent } def get_disk_info(self): """获取磁盘信息""" disks = [] for partition in psutil.disk_partitions(): try: usage = psutil.disk_usage(partition.mountpoint) disks.append({ "device": partition.device, "mountpoint": partition.mountpoint, "total_gb": round(usage.total / (1024**3), 2), "used_gb": round(usage.used / (1024**3), 2), "free_gb": round(usage.free / (1024**3), 2), "percent": usage.percent }) except PermissionError: continue return disks def get_network_info(self): """获取网络信息""" net = psutil.net_io_counters() return { "bytes_sent_mb": round(net.bytes_sent / (1024**2), 2), "bytes_recv_mb": round(net.bytes_recv / (1024**2), 2), "packets_sent": net.packets_sent, "packets_recv": net.packets_recv } def get_process_info(self, top_n=10): """获取进程信息""" processes = [] for proc in sorted(psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent']), key=lambda p: p.info['cpu_percent'] or 0, reverse=True)[:top_n]: processes.append({ "pid": proc.info['pid'], "name": proc.info['name'], "cpu_percent": proc.info['cpu_percent'], "memory_percent": round(proc.info['memory_percent'], 2) }) return processes def check_alerts(self, cpu_threshold=80, mem_threshold=80, disk_threshold=80): """检查告警""" alerts = [] cpu = self.get_cpu_info() if cpu['cpu_percent'] > cpu_threshold:
alerts.append(f”CPU使用率过高: {cpu[‘cpu_percent’]}%”)
mem = self.get_memory_info()
if mem[‘percent’] > mem_threshold:
alerts.append(f”内存使用率过高: {mem[‘percent’]}%”)
for disk in self.get_disk_info():
if disk[‘percent’] > disk_threshold:
alerts.append(f”磁盘 {disk[‘mountpoint’]} 使用率过高: {disk[‘percent’]}%”)
return alerts
def generate_report(self):
“””生成报告”””
report = {
“timestamp”: datetime.now().isoformat(),
“hostname”: self.hostname,
“cpu”: self.get_cpu_info(),
“memory”: self.get_memory_info(),
“disk”: self.get_disk_info(),
“network”: self.get_network_info(),
“top_processes”: self.get_process_info(),
“alerts”: self.check_alerts()
}
return report
if __name__ == “__main__”:
monitor = SystemMonitor()
report = monitor.generate_report()
print(“=” * 60)
print(f”系统监控报告 – {report[‘hostname’]}”)
print(“=” * 60)
print(f”时间: {report[‘timestamp’]}”)
print()
print(“【CPU信息】”)
cpu = report[‘cpu’]
print(f” 使用率: {cpu[‘cpu_percent’]}%”)
print(f” 核心数: {cpu[‘cpu_count’]} (逻辑: {cpu[‘cpu_count_logical’]})”)
print()
print(“【内存信息】”)
mem = report[‘memory’]
print(f” 总计: {mem[‘total_gb’]} GB”)
print(f” 已用: {mem[‘used_gb’]} GB ({mem[‘percent’]}%)”)
print(f” 可用: {mem[‘available_gb’]} GB”)
print()
print(“【磁盘信息】”)
for disk in report[‘disk’]:
print(f” {disk[‘mountpoint’]}: {disk[‘used_gb’]}/{disk[‘total_gb’]} GB ({disk[‘percent’]}%)”)
print()
print(“【网络信息】”)
net = report[‘network’]
print(f” 发送: {net[‘bytes_sent_mb’]} MB”)
print(f” 接收: {net[‘bytes_recv_mb’]} MB”)
print()
print(“【Top进程】”)
for proc in report[‘top_processes’][:5]:
print(f” {proc[‘pid’]:6} {proc[‘name’][:20]:20} CPU:{proc[‘cpu_percent’]:5}% MEM:{proc[‘memory_percent’]:5}%”)
print()
if report[‘alerts’]:
print(“【告警信息】”)
for alert in report[‘alerts’]:
print(f” ⚠ {alert}”)
else:
print(“【告警信息】无告警”)
print(“=” * 60)
EOF
# 执行监控脚本
[root@linux ~]# python3 /fglinux/server_monitor.py
============================================================
系统监控报告 – linux.fgedu.net.cn
============================================================
时间: 2026-04-04T00:55:00.123456
【CPU信息】
使用率: 15.5%
核心数: 4 (逻辑: 8)
【内存信息】
总计: 31.25 GB
已用: 8.52 GB (27.3%)
可用: 22.73 GB
【磁盘信息】
/: 45.2/100.0 GB (45.2%)
/boot: 0.5/1.0 GB (50.0%)
/data: 200.5/500.0 GB (40.1%)
【网络信息】
发送: 1234.56 MB
接收: 5678.90 MB
【Top进程】
1 systemd CPU: 2.5% MEM: 0.5%
456 nginx CPU: 1.8% MEM: 1.2%
789 mysql CPU: 1.5% MEM: 15.3%
1234 python3 CPU: 1.0% MEM: 0.8%
2345 sshd CPU: 0.5% MEM: 0.3%
【告警信息】无告警
============================================================
- 使用虚拟环境隔离项目依赖
- 编写模块化和可复用的代码
- 添加异常处理提高健壮性
- 使用psutil库获取系统信息
- 配置日志记录便于调试
本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html
