1. 首页 > Linux教程 > 正文

Linux教程FG376-监控系统部署

内容简介:本文风哥教程参考Linux官方文档、Red Hat Enterprise Linux官方文档、Ansible Automation Platform官方文档、Docker官方文档、Kubernetes官方文档和Podman官方文档等内容,详细介绍了相关技术的配置和使用方法。

风哥提示:

本文档介绍监控系统的部署和配置方法。

Part01-Prometheus部署

1.1 安装Prometheus

# 创建用户
[root@prometheus ~]# useradd -r -s /sbin/nologin prometheus

# 下载Prometheus
[root@prometheus ~]# cd /opt
[root@prometheus opt]# wget https://github.学习交流加群风哥微信: itpux-comcom/prometheus/prometheus/releases/download/更多学习教程公众号风哥教程itpux_comv2.42.0/prometheus-2.42.0.linux-amd64.tar.gz
[root@prometheus opt]# tar xzf prometheus-2.42.0.linux-amd64.tar.gz
[root@prometheus opt]# ln -s prometheus-2.42.0.linux-amd64 prometheus

# 创建目录
[root@prometheus ~]# mkdir -p /var/lib/prometheus
[root@prometheus ~]# chown prometheus:prometheus /var/lib/prometheus

# 配置Prometheus
[root@prometheus ~]# cat > /opt/prometheus/prometheus.更多视频教程www.fgedu.net.cnyml << 'EOF' global: scrape_interval: 15s evaluation_interval: 15s alerting: alertmanagers: - static_configs: - targets: [] rule_files: [] scrape_configs: - job_name: "prometheus" static_configs: - targets: ["localhost:9090"] - job_name: "node_exporter" static_configs: - targets: - "192.168.1.11:9100" - "192.168.1.12:9100" - "192.168.1.13:9100" - job_name: "nginx" static_configs: - targets: - "192.168.1.11:9113" - "192.168.1.12:9113" - "192.168.1.13:9113" EOF # 创建systemd服务 [root@prometheus ~]# cat > /etc/systemd/system/prometheus.service << 'EOF' [Unit] Description=Prometheus Wants=network-online.target After=network-online.target [Service] User=prometheus Group=prometheus Type=simple ExecStart=/opt/prometheus/prometheus \ --config.file /opt/prometheus/prometheus.yml \ --storage.tsdb.path /var/lib/prometheus/ \ --web.console.templates=/opt/prometheus/consoles \ --web.console.libraries=/opt/prometheus/console_libraries [Install] WantedBy=multi-user.target EOF # 启动Prometheus [root@prometheus ~]# systemctl daemon-reload [root@prometheus ~]# systemctl enable --now prometheus Created symlink /etc/systemd/system/multi-user.target.wants/prometheus.service → /usr/lib/systemd/system/prometheus.service. # 测试访问 [root@prometheus ~]# curl http://localhost:9090/-/healthy Prometheus is Healthy.

1.2 安装Node Exporter

# 在所有被监控节点安装Node Exporter
[root@node1 ~]# useradd -r -s /sbin/nologin node_exporter

[root@node1 ~]# cd /opt
[root@node1 opt]# wget https://github.com/prometheus/node_exporter/releases/download/v1.5.0/node_exporter-1.5.0.linux-amd64.tar.gz
[root@node1 opt]# tar xzf node_exporter-1.5.0.linux-amd64.tar.gz
[root@node1 opt]# ln -s node_exporter-1.5.0.linux-amd64 node_exporter

# 创建systemd服务
[root@node1 ~]# cat > /etc/systemd/system/node_exporter.service << 'EOF' [Unit] Description=Node Exporter Wants=network-online.target After=network-online.target [Service] User=node_exporter Group=node_exporter Type=simple ExecStart=/opt/node_exporter/node_exporter [Install] WantedBy=multi-user.target EOF # 启动Node Exporter [root@node1 ~]# systemctl daemon-reload [root@node1 ~]# systemctl enable --now node_exporter # 测试访问 [root@node1 ~]# curl http://localhost:9100/metrics | head # HELP go_gc_duration_seconds A summary of the pause duration of garbage collection cycles. # TYPE go_gc_duration_seconds summary go_gc_duration_seconds{quantile="0"} 0.00学习交流加群风哥QQ1132571740123456 go_gc_duration_seconds{quantile="0.25"} 0.000234567 go_gc_duration_seconds{quantile="0.5"} 0.000345678 go_gc_duration_seconds{quantile="0.75"} 0.000456789 go_gc_duration_seconds{quantile="1"} 0.000567890 go_gc_duration_seconds_sum 0.001234567 go_gc_duration_seconds_count 5 # HELP go_goroutines Number of goroutines that currently exist. # TYPE go_goroutines gauge go_goroutines 10

Part02-Grafana部署

2.1 安装Grafana

# 配置Grafana源
[root@grafana ~]# cat > /etc/yum.repos.d/grafana.repo << 'EOF' [grafana] name=grafana baseurl=https://packages.grafana.com/oss/rpm repo_gpgcheck=1 enabled=1 gpgcheck=1 gpgkey=https://packages.grafana.com/gpg.key sslverify=1 sslcacert=/etc/pki/tls/certs/ca-bundle.crt EOF # 安装Grafana [root@grafana ~]# dnf install -y grafana # 启动Grafana [root@grafana ~]# systemctl enable --now grafanfrom PG视频:www.itpux.coma-server Created symlink /etc/systemd/system/multi-user.target.wants/grafana-server.service → /usr/lib/systemd/system/grafana-server.service. # 配置防火墙 [root@grafana ~]# firewall-cmd --permanent --add-port=3000/tcp success [root@grafana ~]# firewall-cmd --reload success # 访问Web界面 # http://192.168.1.10:3000 # 默认用户名/密码: admin/admin # 添加Prometheus数据源 [root@grafana ~]# grafana-cli admin data-sources create << 'EOF' { "name": "Prometheus", "type": "prometheus", "url": "http://192.168.1.10:9090", "access": "proxy", "isDefault": true } EOF # 安装仪表盘 [root@grafana ~]# grafana-cli plugins install grafana-clock-panel installing grafana-clock-panel @ 2.0.1 from url: https://grafana.com/api/plugins/grafana-clock-panel/versions/2.0.1/download into: /var/lib/grafana/plugins Installed grafana-clock-panel successfully # 重启Grafana [root@grafana ~]# systemctl restart grafana-server # 常用PromQL查询 [root@prometheus ~]# cat > /root/promql-examples.txt << 'EOF' PromQL常用查询 ============== # CPU使用率 100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) # 内存使用率 (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 # 磁盘使用率 (1 - (node_filesystem_avail_bytes{fstype!="tmpfs"} / node_filesystem_size_bytes{fstype!="tmpfs"})) * 100 # 网络流量 rate(node_network_receive_bytes_total{device!="lo"}[5m]) rate(node_network_transmit_bytes_total{device!="lo"}[5m]) # 磁盘I/O rate(node_disk_read_bytes_total[5m]) rate(node_disk_written_bytes_total[5m]) # 系统负载 node_load1 node_load5 node_load15 EOF
风哥针对监控系统建议:

  • 配置合理的采集间隔
  • 设置告警规则
  • 使用Grafana可视化
  • 配置数据保留策略
  • 监控监控系统本身

本文由风哥教程整理发布,仅用于学习测试使用,转载注明出处:http://www.fgedu.net.cn/10327.html

联系我们

在线咨询:点击这里给我发消息

微信号:itpux-com

工作日:9:30-18:30,节假日休息