内容简介:本文风哥教程参考Linux官方文档、Red Hat Enterprise Linux官方文档、Ansible Automation Platform官方文档、Docker官方文档、Kubernetes官方文档和Podman官方文档等内容,详细介绍了相关技术的配置和使用方法。
风哥提示:
本文档介绍监控系统的部署和配置方法。
Part01-Prometheus部署
1.1 安装Prometheus
[root@prometheus ~]# useradd -r -s /sbin/nologin prometheus
# 下载Prometheus
[root@prometheus ~]# cd /opt
[root@prometheus opt]# wget https://github.学习交流加群风哥微信: itpux-comcom/prometheus/prometheus/releases/download/更多学习教程公众号风哥教程itpux_comv2.42.0/prometheus-2.42.0.linux-amd64.tar.gz
[root@prometheus opt]# tar xzf prometheus-2.42.0.linux-amd64.tar.gz
[root@prometheus opt]# ln -s prometheus-2.42.0.linux-amd64 prometheus
# 创建目录
[root@prometheus ~]# mkdir -p /var/lib/prometheus
[root@prometheus ~]# chown prometheus:prometheus /var/lib/prometheus
# 配置Prometheus
[root@prometheus ~]# cat > /opt/prometheus/prometheus.更多视频教程www.fgedu.net.cnyml << 'EOF'
global:
scrape_interval: 15s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
rule_files: []
scrape_configs:
- job_name: "prometheus"
static_configs:
- targets: ["localhost:9090"]
- job_name: "node_exporter"
static_configs:
- targets:
- "192.168.1.11:9100"
- "192.168.1.12:9100"
- "192.168.1.13:9100"
- job_name: "nginx"
static_configs:
- targets:
- "192.168.1.11:9113"
- "192.168.1.12:9113"
- "192.168.1.13:9113"
EOF
# 创建systemd服务
[root@prometheus ~]# cat > /etc/systemd/system/prometheus.service << 'EOF'
[Unit]
Description=Prometheus
Wants=network-online.target
After=network-online.target
[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/opt/prometheus/prometheus \
--config.file /opt/prometheus/prometheus.yml \
--storage.tsdb.path /var/lib/prometheus/ \
--web.console.templates=/opt/prometheus/consoles \
--web.console.libraries=/opt/prometheus/console_libraries
[Install]
WantedBy=multi-user.target
EOF
# 启动Prometheus
[root@prometheus ~]# systemctl daemon-reload
[root@prometheus ~]# systemctl enable --now prometheus
Created symlink /etc/systemd/system/multi-user.target.wants/prometheus.service → /usr/lib/systemd/system/prometheus.service.
# 测试访问
[root@prometheus ~]# curl http://localhost:9090/-/healthy
Prometheus is Healthy.
1.2 安装Node Exporter
[root@node1 ~]# useradd -r -s /sbin/nologin node_exporter
[root@node1 ~]# cd /opt
[root@node1 opt]# wget https://github.com/prometheus/node_exporter/releases/download/v1.5.0/node_exporter-1.5.0.linux-amd64.tar.gz
[root@node1 opt]# tar xzf node_exporter-1.5.0.linux-amd64.tar.gz
[root@node1 opt]# ln -s node_exporter-1.5.0.linux-amd64 node_exporter
# 创建systemd服务
[root@node1 ~]# cat > /etc/systemd/system/node_exporter.service << 'EOF'
[Unit]
Description=Node Exporter
Wants=network-online.target
After=network-online.target
[Service]
User=node_exporter
Group=node_exporter
Type=simple
ExecStart=/opt/node_exporter/node_exporter
[Install]
WantedBy=multi-user.target
EOF
# 启动Node Exporter
[root@node1 ~]# systemctl daemon-reload
[root@node1 ~]# systemctl enable --now node_exporter
# 测试访问
[root@node1 ~]# curl http://localhost:9100/metrics | head
# HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles.
# TYPE go_gc_duration_seconds summary
go_gc_duration_seconds{quantile="0"} 0.00学习交流加群风哥QQ1132571740123456
go_gc_duration_seconds{quantile="0.25"} 0.000234567
go_gc_duration_seconds{quantile="0.5"} 0.000345678
go_gc_duration_seconds{quantile="0.75"} 0.000456789
go_gc_duration_seconds{quantile="1"} 0.000567890
go_gc_duration_seconds_sum 0.001234567
go_gc_duration_seconds_count 5
# HELP go_goroutines Number of goroutines that currently exist.
# TYPE go_goroutines gauge
go_goroutines 10
Part02-Grafana部署
2.1 安装Grafana
[root@grafana ~]# cat > /etc/yum.repos.d/grafana.repo << 'EOF' [grafana] name=grafana baseurl=https://packages.grafana.com/oss/rpm repo_gpgcheck=1 enabled=1 gpgcheck=1 gpgkey=https://packages.grafana.com/gpg.key sslverify=1 sslcacert=/etc/pki/tls/certs/ca-bundle.crt EOF # 安装Grafana [root@grafana ~]# dnf install -y grafana # 启动Grafana [root@grafana ~]# systemctl enable --now grafanfrom PG视频:www.itpux.coma-server Created symlink /etc/systemd/system/multi-user.target.wants/grafana-server.service → /usr/lib/systemd/system/grafana-server.service. # 配置防火墙 [root@grafana ~]# firewall-cmd --permanent --add-port=3000/tcp success [root@grafana ~]# firewall-cmd --reload success # 访问Web界面 # http://192.168.1.10:3000 # 默认用户名/密码: admin/admin # 添加Prometheus数据源 [root@grafana ~]# grafana-cli admin data-sources create << 'EOF' { "name": "Prometheus", "type": "prometheus", "url": "http://192.168.1.10:9090", "access": "proxy", "isDefault": true } EOF # 安装仪表盘 [root@grafana ~]# grafana-cli plugins install grafana-clock-panel installing grafana-clock-panel @ 2.0.1 from url: https://grafana.com/api/plugins/grafana-clock-panel/versions/2.0.1/download into: /var/lib/grafana/plugins Installed grafana-clock-panel successfully # 重启Grafana [root@grafana ~]# systemctl restart grafana-server # 常用PromQL查询 [root@prometheus ~]# cat > /root/promql-examples.txt << 'EOF' PromQL常用查询 ============== # CPU使用率 100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) # 内存使用率 (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 # 磁盘使用率 (1 - (node_filesystem_avail_bytes{fstype!="tmpfs"} / node_filesystem_size_bytes{fstype!="tmpfs"})) * 100 # 网络流量 rate(node_network_receive_bytes_total{device!="lo"}[5m]) rate(node_network_transmit_bytes_total{device!="lo"}[5m]) # 磁盘I/O rate(node_disk_read_bytes_total[5m]) rate(node_disk_written_bytes_total[5m]) # 系统负载 node_load1 node_load5 node_load15 EOF
- 配置合理的采集间隔
- 设置告警规则
- 使用Grafana可视化
- 配置数据保留策略
- 监控监控系统本身
本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html
