Linux教程FG327-集群性能优化

内容简介：本文风哥教程参考Linux官方文档、Red Hat Enterprise Linux官方文档、Ansible Automation Platform官方文档、Docker官方文档、Kubernetes官方文档和Podman官方文档等内容，详细介绍了相关技术的配置和使用方法。

本文档详细介绍集群性能优化的方法和技巧。

风哥提示：

Part01-集群属性优化

1.1 优化集群超时

# 查看当前集群属性
[root@ha-node1 ~]# pcs property list
Cluster Properties:
cluster-infrastructure: corosync
cluster-name: mycluster
dc-version: 2.1.6-1.el9
have-watchdog: false
stonith-enabled: true
stonith-timeout: 60s
cluster-delay: 60s
dc-deadtime: 20s

# 优化集群延迟
[root@ha-node1 ~]# pcs property set cluster-delay=30s

# 优化DC死锁时间
[root@ha-node1 ~]# pcs property set dc-deadtime=10s

# 优化stonith超时
[root@ha-node1 ~]# pcs property set stonith-timeout=30s

# 验证优化
[root@ha-node1 ~]# pcs property list | grep -E “delay|timeout|deadtime”
cluster-delay: 30s
dc-deadtime: 10s
stonith-timeout: 30s

# 优化批量限制
[root@ha-node1 ~]# pcs property set batch-limit=30

# 优化节点动作限制
[root@ha-node1 ~]# pcs property set node-action-limit=20

# 验证设置
[root@ha-node1 ~]# pcs property list | grep limit
batch-limit: 30
node-action-limit: 20

1.2 优化资源监控

# 查看资源配置
[root@ha-node1 ~]# pcs resource show vip
Resource: vip (class=ocf provider=heartbeat type=IPaddr2)
Attributes: cidr_netmask=24 ip=192.168.1.100
Operations: monitor interval=30s (vip-monitor-interval-30s)

# 优化监控间隔
[root@ha-node1 ~]# pcs resource update vip op monitor interval=15s timeout=10s

# 验证更新
[root@ha-node1 ~]# pcs resource show vip
Resource: vip (class=ocf provider=heartbeat type=IPaddr2)
Attributes: cidr_netmask=24 ip=192.168.1.100
Operations: monitor interval=15s timeout=10s (vip-monitor-interval-15s)

# 批量更新资源监控
[root@ha-node1 ~]# for res in vip nginx mysql; do
pcs resource update $res op monitor interval=15s timeout=10s
done

# 验证更新
[root@ha-node1 ~]# pcs resource show
vip (ocf::heartbeat:IPaddr2): Started
nginx (systemd:nginx): Started
mysql (systemd:mariadb): Started

Part02-Corosync优化

2.1 优化Corosync配置

# 查看当前Corosync配置
[root@ha-node1 ~]# pcs cluster corosync
totem {
version: 2
cluster_name: mycluster
secauth: off
transport: knet
token: 1000
token_retransmits_before_loss_const: 10
join: 50
consensus: 1200
max_messages: 20
}

nodelist {
node {
ring0_addr: ha-node1
name: ha-node1
nodeid: 1
}
node {
ring0_addr: ha-node2
name: ha-node2
nodeid: 2
}
}

quorum {
provider: corosync_votequorum
two_node: 1
}

# 优化token超时
[root@ha-node1 ~]# pcs cluster corosync node add-attr ha-node1 token=5000

# 优化consensus超时
[root@ha-node1 ~]# pcs cluster corosync node add-attr ha-node1 consensus=6000

# 优化join超时
[root@ha-node1 ~]# pcs cluster corosync node add-attr ha-node1 join=100

# 验证优化
[root@ha-node1 ~]# pcs cluster corosync | grep -E “token|consensus|join”
token: 5000
join: 100
consensus: 6000

2.2 优化网络传输

# 查看网络配置
[root@ha-node1 ~]# pcs cluster corosync | grep -A 10 “totem”
totem {
version: 2
cluster_name: mycluster
secauth: off
transport: knet
token: 5000
token_retransmits_before_loss_const: 10
join: 100
consensus: 6000
max_messages: 20
}

# 优化max_messages
[root@ha-node1 ~]# pcs cluster corosync node add-attr ha-node1 max_messages=100

# 优化token重传
[root@ha-node1 ~]# pcs cluster corosync node add-attr ha-node1 token_retransmits_before_loss_const=20

# 启用加密认证
[root@ha-node1 ~]# pcs cluster corosync node add-attr ha-node1 secauth=on

# 验证优化
[root@ha-node1 ~]# pcs cluster corosync | grep -E “max_messages|token_retransmits|secauth”
secauth: on
token_retransmits_before_loss_const: 20
max_messages: 100

# 重启集群应用配置
[root@ha-node1 ~]# pcs cluster restart –all
ha-node1: Stopping Cluster (pacemaker)…
ha-node1: Stopping Cluster (corosync)…
ha-node2: Stopping Cluster (pacemaker)…
ha-node2: Stopping Cluster (corosync)…
ha-node1: Starting Cluster (corosync)…
ha-node1: Starting Cluster (pacemaker)…
ha-node2: Starting Cluster (corosync)…
ha-node2: Starting Cluster (pacemaker)…

Part03-资源优化

3.1 优化资源粘性

# 设置默认资源粘性
[root@ha-node1 ~]# pcs resource defaults resource-stickiness=100

# 查看默认值
[root@ha-node1 ~]# pcs resource defaults
resource-stickiness: 100

# 为特定资源设置粘性
[root@ha-node1 ~]# pcs resource update vip meta resource-stickiness=200

# 验证设置
[root@ha-node1 ~]# pcs resource show vip
Resource: vip (class=ocf provider=heartbeat type=IPaddr2)
Attributes: cidr_netmask=24 ip=192.168.1.100
Meta Attrs: resource-stickiness=200
Operations: monitor interval=15s timeout=10s (vip-monitor-interval-15s)

# 设置迁移阈值
[root@ha-node1 ~]# pcs resource update nginx meta migration-threshold=3

# 设置失败超时
[root@ha-node1 ~]# pcs resource update nginx meta failure-timeout=60s

# 验证设置
[root@ha-node1 ~]# pcs resource show nginx
Resource: nginx (class=systemd type=nginx)
Meta Attrs: failure-timeout=60s migration-threshold=3
Operations: monitor interval=15s timeout=10s (nginx-monitor-interval-15s)

3.2 优化资源启动顺序

# 查看当前约束
[root@ha-node1 ~]# pcs constraint
Location Constraints:
No location constraints found
Ordering Constraints:
No ordering constraints found
Colocation Constraints:
No colocation constraints found

# 设置启动顺序
[root@ha-node1 ~]# pcs constraint order vip then nginx
Adding vip nginx (kind: Mandatory) (Options: first-action=start then-action=start)

[root@ha-node1 ~]# pcs constraint order nginx then mysql
Adding nginx mysql (kind: Mandatory) (Options: first-action=start then-action=start)

# 设置共置约束
[root@ha-node1 ~]# pcs constraint colocation add nginx with vip INFINITY
[root@ha-node1 ~]# pcs constraint colocation add mysql with nginx INFINITY

# 查看约束
[root@ha-node1 ~]# pcs constraint
Location Constraints:
No location constraints found
Ordering Constraints:
start vip then start nginx (Mandatory)
start nginx then start mysql (Mandatory)
Colocation Constraints:
nginx with vip (score:INFINITY)
mysql with nginx (score:INFINITY)

# 优化启动超时
[root@ha-node1 ~]# pcs resource update nginx op start timeout=30s
[root@ha-node1 ~]# pcs resource update mysql op start timeout=60s

# 验证优化
[root@ha-node1 ~]# pcs resource show nginx
Resource: nginx (class=systemd type=nginx)
Meta Attrs: failure-timeout=60s migration-threshold=3
Operations: monitor interval=15s timeout=10s (nginx-monitor-interval-15s)
start interval=0s timeout=30s (nginx-start-interval-0s)

Part04-性能监控

4.1 监控集群性能

# 监控集群状态
[root@ha-node1 ~]# pcs status
Cluster name: mycluster
Cluster Summary:
* Stack: corosync
* Current DC: ha-node1 (version 2.1.6-1.el9)
* Last updated: Fri Apr 4 12:40:00 2026
* Last change: Fri Apr 4 12:35:00 2026
* 2 nodes configured
* 3 resource instances configured

# 监控资源操作时间
[root@ha-node1 ~]# pcs status operations
Operations:
* vip: monitor interval=15s last-rc-change=Fri Apr 4 12:40:00 2026 exec-time=10ms
* nginx: monitor interval=15s last-rc-change=Fri Apr 4 12:40:00 2026 exec-time=5ms
* mysql: monitor interval=15s last-rc-change=Fri Apr 4 12:40:00 2026 exec-time=8ms

# 监控系统资源
[root@ha-node1 ~]# top -b -n 1 | head -20
top – 12:40:00 up 2:40, 1 user, load average: 0.00, 0.01, 0.05
Tasks: 150 total, 1 running, 149 sleeping, 0 stopped,更多视频教程www.fgedu.net.cn 0 zombie
%Cpu(s): 0.0 us, 0.0 sy, 0.0 ni,100.0学习交流加群风哥微信: itpux-com id, 0.0 wa, 0.0 hi, 0.0 si
MiB Mem : 3861.9 total, 2500.5 free, 500.2 used, 861.2 buff/cache
MiB Swap: 2048.0 total, 2048.0 free, 0.0 used. 3100.5 avail Mem

PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
12345 root 20 0 150000 50000 10000 S 0.0from PG视频:www.itpux.com 1.3 0:05.00 pacemaker
12340 root 20 0 100000 30000 5000 S 0.0 0.8 0:03.00 corosync

# 监控网络延迟
[root@ha-node1 ~]# ping -c 10 ha-node2 | tail -2
— ha-node2 ping statistics —
10 packets transmitted, 10 received, 0% packet loss, time 9010ms
rtt min/avg/max/mdev = 0.389/0.407/0.421/0.013 ms

风哥针对性能优化建议：

根据网络环境调整超时
优化资源监控间隔
设置合理的资源粘性
配置正确的启动顺序
定期监控集群性能

本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html