fengwuxichen 2020-01-14
一、说明
Prometheus负责收集数据,Grafana负责展示数据。其中采用Prometheus 中的 Exporter含:
1)Node Exporter,负责收集 host 硬件和操作系统数据。它将以容器方式运行在所有 host 上。
2)cAdvisor,负责收集容器数据。它将以容器方式运行在所有 host 上。
3)Alertmanager,负责告警。它将以容器方式运行在所有 host 上。
完整Exporter列表请参考:https://prometheus.io/docs/instrumenting/exporters/
二、添加配置文件
1、alertmanager.yaml
global: smtp_smarthost: ‘smtp.sina.com:25‘ #163服务器 smtp_from: ‘‘ #发邮件的邮箱 smtp_auth_username: ‘dogotsn‘ #发邮件的邮箱用户名,也就是你的邮箱 smtp_auth_password: ‘35ea02c*****‘ #发邮件的邮箱密码 smtp_require_tls: false #不进行tls验证 route: group_by: [‘alertname‘] group_wait: 10s group_interval: 10s repeat_interval: 10m receiver: live-monitoring receivers: - name: ‘live-monitoring‘ email_configs: - to: ‘zhangc***‘
2、node_down.yml
groups:
- name: node_down
rules:
- alert: InstanceDown
expr: up == 0
for: 1m
labels:
user: test
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."3、prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: [‘193.168.1.39:9093‘]
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global ‘evaluation_interval‘.
rule_files:
- "node_down.yml"
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it‘s Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: ‘prometheus‘
static_configs:
- targets: [‘193.168.1.39:9090‘]
- job_name: ‘cadvisor‘
static_configs:
- targets: [‘193.168.1.39:8080‘]
- job_name: ‘node‘
scrape_interval: 8s
static_configs:
- targets: [‘193.168.1.39:9100‘]三、编写docker-compose
version: ‘2‘
networks:
mynet:
driver: bridge
services:
prometheus:
image: prom/prometheus
container_name: prometheus
hostname: prometheus
restart: always
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- ./node_down.yml:/etc/prometheus/node_down.yml
ports:
- "9090:9090"
networks:
- mynet
alertmanager:
image: prom/alertmanager
container_name: alertmanager
hostname: alertmanager
restart: always
volumes:
- ./alertmanager.yaml:/etc/alertmanager/alertmanager.yaml
ports:
- "9093:9093"
networks:
- mynet
grafana:
image: grafana/grafana
container_name: grafana
hostname: grafana
restart: always
ports:
- "3000:3000"
networks:
- mynet
node-exporter:
image: prom/node-exporter
#image: quay.io/prometheus/node-exporter
container_name: node-exporter
hostname: node-exporter
restart: always
ports:
- "9100:9100"
networks:
- mynet
cadvisor:
image: google/cadvisor:latest
container_name: cadvisor
hostname: cadvisor
restart: always
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
ports:
- "8080:8080"
networks:
- mynet容器启动如下:

四、配置Prometheus
prometheus targets界面如下:

五、配置Grafana
1、 添加Prometheus数据源

然后选择Prometheus

然后save&test
2、配置dashboards

说明:可以用自带模板,也可以去https://grafana.com/dashboards,下载对应的模板。



六、防火墙配置
6.1 关闭selinux
setenforce 0 vim /etc/sysconfig/selinux
6.2 配置iptables
#删除自带防火墙 systemctl stop firewalld.service systemctl disable firewalld.service
#安装iptables yum install -y iptables-services
#配置 vim /etc/sysconfig/iptables *filter :INPUT ACCEPT [0:0] :FORWARD ACCEPT [0:0] :OUTPUT ACCEPT [24:11326] -A INPUT -m state --state RELATED,ESTABLISHED -j ACCEPT -A INPUT -p icmp -j ACCEPT -A INPUT -i lo -j ACCEPT -A INPUT -p tcp -m state --state NEW -m tcp --dport 22 -j ACCEPT -A INPUT -p tcp -m state --state NEW -m tcp --dport 9090 -j ACCEPT -A INPUT -p tcp -m state --state NEW -m tcp --dport 8080 -j ACCEPT -A INPUT -p tcp -m state --state NEW -m tcp --dport 3000 -j ACCEPT -A INPUT -p tcp -m state --state NEW -m tcp --dport 9093 -j ACCEPT -A INPUT -p tcp -m state --state NEW -m tcp --dport 9100 -j ACCEPT -A INPUT -j REJECT --reject-with icmp-host-prohibited -A FORWARD -j REJECT --reject-with icmp-host-prohibited COMMIT
#启动 systemctl restart iptables.service systemctl enable iptables.service