内容大纲
内容简介:本文风哥教程参考Linux官方文档、Red Hat Enterprise Linux官方文档、Ansible Automation Platform官方文档、Docker官方文档、Kubernetes官方文档和Podman官方文档等内容,详细介绍了相关技术的配置和使用方法。
1. 工具概述
系统监控与性能调优工具是用于监控系统性能、分析性能瓶颈、进行性能调优的工具集合。
学习交流加群风哥微信: itpux-com
# 系统监控工具:实时监控系统性能
# 性能分析工具:分析性能瓶颈
# 故障排查工具:排查系统故障
# 性能调优工具:进行性能调优
# 综合工具:集成多种功能
2. 系统监控工具
实时监控系统性能的工具。
更多视频教程www.fgedu.net.cn
# 1. top
[root@localhost ~]# top -bn1 | head -20
top – 10:00:00 up 1 day, 2:30, 4 users, load average: 0.10, 0.15, 0.12
Tasks: 123 total, 1 running, 122 sleeping, 0 stopped, 0 zombie
%Cpu(s): 2.5 us, 1.0 sy, 0.0 ni, 96.0 id, 0.5 wa, 0.0 hi, 0.0 si, 0.0 st
MiB Mem : 8192.0 total, 4096.0 free, 2048.0 used, 2048.0 buff/cache
MiB Swap: 4096.0 total, 4096.0 free, 0.0 used. 8192.0 avail Mem
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
1234 root 20 0 123456 10240 4096 S 2.5 0.1 0:00.05 systemd
2345 root 20 0 65536 5120 2048 S 1.0 0.1 0:00.03 kworker/u4:0
3456 mysql 20 0 2097152 524288 102400 S 0.5 6.3 1:23.45 mysqld
4567 nginx 20 0 262144 51200 20480 S 0.3 0.6 0:12.34 nginx
5678 root 20 0 131072 10240 4096 R 0.2 0.1 0:00.01 top
# 2. htop
[root@localhost ~]# htop
# htop启动后,显示交互式系统监控界面
# 3. vmstat
[root@localhost ~]# vmstat 1 3
procs ———–memory———- —swap– —–io—- -system– ——cpu—–
r b swpd free buff cache si so bi bo in cs us sy id wa st
1 0 0 4096000 102400 2048000 0 0 0 0 10 20 2 1 96 1 0
0 0 0 4096000 102400 2048000 0 0 0 0 12 25 3 1 95 1 0
1 0 0 4096000 102400 2048000 0 0 0 0 11 22 2 1 96 1 0
# 4. iostat
[root@localhost ~]# iostat -x 1 3
Linux 5.14.0-362.el9.x86_64 (localhost.localdomain) 04/03/2026 _x86_64_ (4 CPU)
avg-cpu: %user %nice %system %iowait %steal %idle
2.50 0.00 1.00 0.50 0.00 96.00
Device r/s w/s rkB/s wkB/s rrqm/s wrqm/s %rrqm %wrqm r_await w_await aqu-sz rareq-sz wareq-sz svctm %util
sda 0.50 0.50 5.00 2.50 0.00 0.00 0.00 0.00 0.00 0.00 0.00 10.00 5.00 0.00 0.00
sdb 1.00 1.00 10.00 5.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 10.00 5.00 0.00 0.00
sdc 1.50 1.50 15.00 7.50 0.00 0.00 0.00 0.00 0.00 0.00 0.00 10.00 5.00 0.00 0.00
# 5. sar
[root@localhost ~]# sar -u 1 3
Linux 5.14.0-362.el9.x86_64 (localhost.localdomain) 04/03/2026 _x86_64_ (4 CPU)
10:00:00 AM CPU %user %nice %system %iowait %steal %idle
10:00:01 AM all 2.50 0.00 1.00 0.50 0.00 96.00
10:00:01 AM 0 2.00 0.00 1.00 0.00 0.00 97.00
10:00:01 AM 1 3.00 0.00 1.00 0.00 0.00 96.00
10:00:01 AM 2 2.00 0.00 1.00 0.00 0.00 97.00
10:00:01 AM 3 3.00 0.00 1.00 0.00 0.00 96.00
# 6. netstat
[root@localhost ~]# netstat -tuln
Active Internet connections (only servers)
Proto Recv-Q Send-Q Local Address Foreign Address State
tcp 0 0 0.0.0.0:22 0.0.0.0:* LISTEN
tcp 0 0 0.0.0.0:80 0.0.0.0:* LISTEN
tcp 0 0 0.0.0.0:443 0.0.0.0:* LISTEN
udp 0 0 0.0.0.0:123 0.0.0.0:*
udp 0 0 0.0.0.0:514 0.0.0.0:*
# 7. ss
[root@localhost ~]# ss -tuln
Netid State Recv-Q Send-Q Local Address:Port Peer Address:PortProcess
udp UNCONN 0 0 0.0.0.0:123 0.0.0.0:* users:((“chronyd”,pid=1234,fd=3))
udp UNCONN 0 0 0.0.0.0:514 0.0.0.0:* users:((“rsyslogd”,pid=1235,fd=4))
tcp LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:((“sshd”,pid=1236,fd=3))
tcp LISTEN 0 128 0.0.0.0:80 0.0.0.0:* users:((“nginx”,pid=1237,fd=6))
tcp LISTEN 0 128 0.0.0.0:443 0.0.0.0:* users:((“nginx”,pid=1237,fd=7))
# 8. nmon
[root@localhost ~]# nmon
# nmon启动后,显示交互式系统监控界面
# 9. dstat
[root@localhost ~]# dstat 1 3
—-total-cpu-usage—- -dsk/total- -net/total- —paging– —system–
usr sys idl wai stl| read writ| recv send| in out | csw int
2 1 96 1 0| 0B 0B| 0B 0B| 0 0B| 20 10
3 1 95 1 0| 0B 0B| 0B 0B| 0 0B| 25 12
2 1 96 1 0| 0B 0B| 0B 0B| 0 0B| 22 11
# 10. glances
[root@localhost ~]# glances
# glances启动后,显示交互式系统监控界面
from PG视频:www.itpux.com
3. 性能分析工具
分析性能瓶颈的工具。
# 1. perf
[root@localhost ~]# perf top
# perf top启动后,显示性能分析界面
# 2. strace
[root@localhost ~]# strace -p 1234
strace: Process 1234 attached
epoll_wait(4, [], 1, 1000) = 0
epoll_wait(4, [], 1, 1000) = 0
epoll_wait(4, [], 1, 1000) = 0
# 3. ltrace
[root@localhost ~]# ltrace -p 1234
# ltrace启动后,显示库函数调用跟踪
# 4. valgrind
[root@localhost ~]# valgrind –leak-check=full ./myapp
# valgrind启动后,显示内存泄漏检测
# 5. gdb
[root@localhost ~]# gdb -p 1234
# gdb启动后,显示调试界面
# 6. tcpdump
[root@localhost ~]# tcpdump -i eth0 -n
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes
10:00:00.123456 IP 192.168.1.100.12345 > 192.168.1.200.80: Flags [S], seq 0, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1234567890 ecr 0,sackOK,eol], length 0
10:00:00.123457 IP 192.168.1.200.80 > 192.168.1.100.12345: Flags [S.], seq 0, ack 1, win 65535, options [mss 1460,nop,wscale 6,nop,nop,TS val 1234567891 ecr 1234567890,sackOK,eol], length 0
10:00:00.123458 IP 192.168.1.100.12345 > 192.168.1.200.80: Flags [.], ack 1, win 65535, options [nop,nop,TS val 1234567892 ecr 1234567891], length 0
# 7. wireshark
[root@localhost ~]# wireshark
# wireshark启动后,显示网络协议分析界面
# 8. iotop
[root@localhost ~]# iotop -o
Total DISK READ: 0.00 B/s | Total DISK WRITE: 0.00 B/s
TID PRIO USER DISK READ DISK WRITE SWAPIN IO> COMMAND
1234 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % systemd
2345 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % kworker/u4:0
3456 be/4 mysql 0.00 B/s 0.00 B/s 0.00 % 0.00 % mysqld
4567 be/4 nginx 0.00 B/s 0.00 B/s 0.00 % 0.00 % nginx
# 9. nethogs
[root@localhost ~]# nethogs eth0
# nethogs启动后,显示进程网络流量
# 10. pidstat
[root@localhost ~]# pidstat -p ALL 1 3
Linux 5.14.0-362.el9.x86_64 (localhost.localdomain) 04/03/2026 _x86_64_ (4 CPU)
10:00:00 AM UID PID %usr %system %guest %wait %CPU CPU Command
10:00:01 AM 0 1 0.00 0.00 0.00 0.00 0.00 0.00 systemd
10:00:01 AM 0 123 0.00 0.00 0.00 0.00 0.00 0.00 kworker/0:0H
10:00:01 AM 0 3456 0.50 0.00 0.00 0.00 0.50 0.00 mysqld
风哥提示:
4. 故障排查工具
排查系统故障的工具。
# 1. journalctl
[root@localhost ~]# journalctl -b
Apr 03 09:00:00 localhost systemd[1]: Starting system…
Apr 03 09:00:00 localhost kernel: Linux version 5.14.0-362.el9.x86_64 (mockbuild@x86-05.bsys.centos.org) (gcc version 11.2.1 20220127 (Red Hat 11.2.1-9), GNU ld version 2.35-9.el9) #1 SMP PREEMPT Wed Jan 11 18:35:18 UTC 2023
Apr 03 09:00:00 localhost kernel: Command line: BOOT_IMAGE=/vmlinuz-5.14.0-362.el9.x86_64 root=/dev/mapper/rootvg-rootlv ro crashkernel=auto resume=/dev/mapper/rootvg-swaplv rd.lvm.lv=rootvg/rootlv rd.lvm.lv=rootvg/swaplv rhgb quiet
# 2. dmesg
[root@localhost ~]# dmesg | tail -20
[12345.678901] ACPI: Core revision 20220331
[12345.678902] ACPI: All ACPI Tables successfully acquired
[12345.678903] ACPI: DSDT 0000000000000000 v02 BOCHS BXPCDSDT 00000001 BXPC 00000001
[12345.678904] ACPI: XSDT 0000000000000000 v01 BOCHS BXPCXSDT 00000001 BXPC 00000001
# 3. systemctl
[root@localhost ~]# systemctl list-units –failed
UNIT LOAD ACTIVE SUB DESCRIPTION
● mysqld.service loaded failed active MySQL Database Server
LOAD = Reflects whether unit definition was properly loaded.
ACTIVE = The high-level unit activation state, i.e. generalization of SUB.
SUB = The low-level unit activation state, values depend on unit type.
1 loaded units listed.
# 4. ping
[root@localhost ~]# ping -c 4 192.168.1.1
PING 192.168.1.1 (192.168.1.1) 56(84) bytes of data.
64 bytes from 192.168.1.1: icmp_seq=1 ttl=64 time=0.123 ms
64 bytes from 192.168.1.1: icmp_seq=2 ttl=64 time=0.123 ms
64 bytes from 192.168.1.1: icmp_seq=3 ttl=64 time=0.123 ms
64 bytes from 192.168.1.1: icmp_seq=4 ttl=64 time=0.123 ms
— 192.168.1.1 ping statistics —
4 packets transmitted, 4 received, 0% packet loss
rtt min/avg/max/mdev = 0.123/0.123/0.123/0.000 ms
# 5. traceroute
[root@localhost ~]# traceroute www.fgedu.net.cn
traceroute to www.fgedu.net.cn (93.184.216.34), 30 hops max, 60 byte packets
1 192.168.1.1 (192.168.1.1) 0.123 ms 0.123 ms 0.123 ms
2 192.168.1.254 (192.168.1.254) 0.456 ms 0.456 ms 0.456 ms
3 10.0.0.1 (10.0.0.1) 1.234 ms 1.234 ms 1.234 ms
4 93.184.216.34 (93.184.216.34) 12.345 ms 12.345 ms 12.345 ms
# 6. nslookup
[root@localhost ~]# nslookup www.fgedu.net.cn
Server: 192.168.1.1
Address: 192.168.1.1#53
Non-authoritative answer:
Name: www.fgedu.net.cn
Address: 93.184.216.34
# 7. smartctl
[root@localhost ~]# smartctl -H /dev/sda
smartctl 7.2 2020-12-30 r5155 [x86_64-linux-5.14.0-362.el9.x86_64] (local build)
Copyright (C) 2002-20, Bruce Allen, Christian Franke, www.smartmontools.org
=== START OF READ SMART DATA SECTION ===
SMART overall-health self-assessment test result: PASSED
# 8. lsblk
[root@localhost ~]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 931G 0 disk
└─sda1 8:1 0 931G 0 part /
sdb 8:16 0 931G 0 disk
└─sdb1 8:17 0 931G 0 part /data
sdc 8:32 0 931G 0 disk
└─sdc1 8:33 0 931G 0 part /backup
# 9. lsof
[root@localhost ~]# lsof -p 1234
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
mysqld 1234 mysql cwd DIR 8,1 4096 2 /
mysqld 1234 mysql rtd DIR 8,1 4096 2 /
mysqld 1234 mysql txt REG 8,1 12345678 1234567 /usr/libexec/mysqld
# 10. ps
[root@localhost ~]# ps aux
USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
root 1 0.0 0.1 123456 10240 ? Ss 09:00 0:01 /usr/lib/systemd/systemd –switched-root –system –deserialize 17
root 123 0.0 0.0 65536 5120 ? S< 09:00 0:00 [kworker/0:0H]
root 3456 0.5 6.3 2097152 524288 ? S 09:00 1:23 /usr/libexec/mysqld --basedir=/usr --datadir=/var/lib/mysql --plugin-dir=/usr/lib64/mysql/plugin --user=mysql --log-error=/var/log/mysqld.log --pid-file=/var/run/mysqld/mysqld.pid
学习交流加群风哥QQ113257174
5. 性能调优工具
进行性能调优的工具。
# 1. cpupower
[root@localhost ~]# cpupower frequency-set -g performance
Setting cpu: 0
Setting cpu: 1
Setting cpu: 2
Setting cpu: 3
# 2. taskset
[root@localhost ~]# taskset -pc 0,1 1234
pid 1234’s new affinity list: 0,1
# 3. renice
[root@localhost ~]# renice -n -5 -p 1234
1234 (process ID) old priority 0, new priority -5
# 4. sysctl
[root@localhost ~]# sysctl -w vm.swappiness=10
vm.swappiness = 10
# 5. ethtool
[root@localhost ~]# ethtool -s eth0 speed 1000 duplex full autoneg on
# 6. blockdev
[root@localhost ~]# blockdev –setra 1024 /dev/sda
# 7. iptables
[root@localhost ~]# iptables -A INPUT -p tcp –dport 80 -j ACCEPT
# 8. firewalld
[root@localhost ~]# firewall-cmd –permanent –add-service=http
success
# 9. tuned
[root@localhost ~]# tuned-adm profile throughput-performance
# 10. ulimit
[root@localhost ~]#
更多学习教程公众号风哥教程itpux_com
ulimit -n 65535
6. 工具选择建议
根据场景选择合适的工具。
# 1. 系统监控工具选择
# 实时监控:top、htop、glances
# 历史监控:sar、sysstat
# 磁盘I/O监控:iostat、iotop
# 网络监控:netstat、ss、nethogs
# 综合监控:nmon、dstat
# 2. 性能分析工具选择
# CPU性能分析:perf、pidstat
# 内存性能分析:valgrind、smem
# 磁盘I/O性能分析:iotop、blktrace
# 网络性能分析:tcpdump、wireshark
# 进程性能分析:strace、ltrace
# 3. 故障排查工具选择
# 系统日志:journalctl、dmesg
# 网络故障:ping、traceroute、nslookup
# 磁盘故障:smartctl、badblocks
# 进程故障:ps、top、htop
# 服务故障:systemctl、service
# 4. 性能调优工具选择
# CPU调优:cpupower、taskset、renice
# 内存调优:sysctl、ulimit
# 磁盘I/O调优:ethtool、blockdev
# 网络调优:ethtool、sysctl、firewall-cmd
# 系统调优:tuned、sysctl
# 5. 工具组合使用
# 监控+分析:top + perf
# 监控+调优:htop + cpupower
# 分析+调优:strace + renice
# 排查+调优:journalctl + sysctl
# 综合:nmon + tuned
提示
系统监控与性能调优工具需要根据实际应用场景选择。建议熟悉常用工具的使用方法,掌握工具的组合使用技巧,提高监控与调优效率。
