Prometheus
镜像加速
{
"registry-mirrors":[
"https://hub-mirror.c.163.com/",
"https://dockerhub.icu",
"https://doublezonline.cloud",
"https://docker.m.daocloud.io",
"https://0wu3ajmi.mirror.aliyuncs.com"
],
"insecure-registries":["http://sea.registry.com"]
}
镜像pull
docker pull prom/prometheus
docker pull grafana/grafana
配置prometheus.yml
mkdir /var/prometheus
vim prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
rule_files:
- "rules/*.rules"
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['192.168.50.100:9090']
- job_name: 'node_exporter'
scrape_interval: 10s
static_configs:
- targets: ['192.168.50.100:9100', '192.168.50.130:9100', '192.168.50.131:9100']
Granfana
docker pull grafana/grafana
docker run -d -p 3000:3000 grafana/grafana
设置数据源并导入面板
占位
自定义告警rule
groups:
- name: hostAlerts
rules:
- alert: hostCpuUsageAlert
expr: sum(avg without (cpu)(irate(node_cpu{mode!='idle'}[5m]))) by (instance) > 0.85
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} CPU usage high"
description: "{{ $labels.instance }} CPU usage above 85% (current value: {{ $value }})"
- alert: hostMemUsageAlert
expr: (node_memory_MemTotal - node_memory_MemAvailable) / node_memory_MemTotal > 0.85
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} MEM usage high"
description: "{{ $labels.instance }} MEM usage above 85% (current value: {{ $value }})"