prometheus监控docker容器
导入grafana模板ID:16314,模板json文件下载地址:https://grafana.com/grafana/dashboards/16314-docker-container-os-node-node-exporter-cadvisor/: 将主机的 /var/lib/docker/目录(本机的容器数据目录)挂载到容器的 /var/lib/docker 目录,且为只读模式。: 将主机
文章目录
一、prometheus监控docker容器
1.1 docker安装
采用二进制包安装docker,下载地址:docker

下载解压
[root@localhost docker]# tar -xf docker-18.09.3.tgz
[root@localhost docker]# cd docker/
[root@localhost docker]# ls
containerd ctr dockerd docker-proxy
containerd-shim docker docker-init runc
查看docker版本
[root@localhost docker]# ./docker -v
Docker version 18.09.3, build 774a1f4
# 安装完成
将docker目录下的可执行文件移动到/usr/bin目录下,方便执行docker命令
[root@localhost docker]# cp docker/* /usr/bin/
[root@localhost docker]# cd
[root@localhost ~]# docker -v
Docker version 18.09.3, build 774a1f4
配置docker服务
[root@localhost ~]# vim /usr/lib/systemd/system/docker.service
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network.target
[Service]
ExecStart=/usr/bin/dockerd --host=unix:///var/run/docker.sock
ExecReload=/bin/kill -s HUP $MAINPID
KillMode=process
TimeoutStartSec=0
Restart=on-failure
RestartSec=5s
LimitNOFILE=1048576
LimitNPROC=1048576
LimitCORE=infinity
[Install]
WantedBy=multi-user.target
设置服务自启
[root@localhost ~]# systemctl daemon-reload
[root@localhost ~]# systemctl restart docker.service
[root@localhost ~]# systemctl enable docker.service
确认是否配置成功,输入docker info命令,应该输出相关信息

docker官方的镜像源通常在国内无法访问,需要配置国内镜像源
[root@localhost ~]# vim /etc/docker/daemon.json
{
"registry-mirrors": [
"https://do.nark.eu.org",
"https://dc.j8.work",
"https://docker.m.daocloud.io",
"https://dockerproxy.com",
"https://docker.nju.edu.cn",
"https://docker.mirrors.ustc.edu.cn"
]
}
# 重启docker服务
[root@localhost ~]# systemctl restart docker.service
使用docker info 查看是否配置成功
1.2 nginx容器安装
拉取docker镜像
[root@localhost ~]# docker pull nginx
[root@localhost ~]# docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
nginx latest 576306625d79 2 weeks ago 152MB
运行nginx容器
[root@localhost ~]# docker run -itd -p 8000:80 --name="nginx-docker" nginx:latest
187b3913fb67ee4b4875bcc1c9f1f72f81cb9777ebeb00a1c268315411cbf668
参数含义:-p 8000:80:将容器的80端口映射到宿主机的8000端口-- name:容器的名称nginx:lastest:容器运行使用的镜像-d:后台运行
查看容器
[root@localhost ~]# docker ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
187b3913fb67 nginx:latest "/docker-entrypoint.…" 3 seconds ago Up 2 seconds 0.0.0.0:8000->80/tcp nginx-docker
容器挂载本地配置
停止并删除运行的容器
也可采用重新运行一个容器,名称不一样即可
[root@localhost ~]# docker stop nginx-docker
[root@localhost ~]# docker rm nginx-docker
创建挂载目录
[root@localhost ~]# mkdir -p /data/nginx/{html,conf,logs}
创建配置文件
[root@localhost ~]# vim /data/nginx/html/index.html
hello nginx
[root@localhost ~]# vim /data/nginx/conf/nginx.conf
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log;
pid /run/nginx.pid;
include /usr/share/nginx/modules/*.conf;
events {
worker_connections 1024;
}
http {
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 4096;
include /etc/nginx/mime.types;
default_type application/octet-stream;
include /etc/nginx/conf.d/*.conf;
server {
listen 80;
listen [::]:80;
server_name _;
root /usr/share/nginx/html;
include /etc/nginx/default.d/*.conf;
error_page 404 /404.html;
location = /404.html {
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
}
}
}
运行容器
[root@localhost ~]# docker run -d --name="nginx-docker" -p 8000:80 -v /data/nginx/html:/usr/share/nginx/html -v /data/nginx/conf:/etc/nginx -v /data/nginx/logs:/var/log/nginx nginx:latest
访问本地端口

1.3 cadvisor容器安装
cAdvisor是 Google 开源的 容器监控工具,专门用于收集、处理、导出容器运行时的资源使用情况和性能数据,包括CPU数据、内存数据、磁盘数据等。端口为8080
cadvisor镜像地址:cadvisor
拉取镜像
[root@localhost ~]# docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/gcr.io/cadvisor/cadvisor:v0.52.1
查看镜像
[root@localhost ~]# docker images | grep cadvisor
swr.cn-north-4.myhuaweicloud.com/ddn-k8s/gcr.io/cadvisor/cadvisor v0.52.1 de1f4a4d7753 9 months ago 80.7MB
运行cadvisor容器
docker run -itd --name="cadvisor" -p 8080:8080 \
-v /:/rootfs:ro \
-v /var/run:/var/run:ro \
-v /sys:/sys:ro \
-v /var/lib/docker/:/var/lib/docker:ro \
-v /dev/disk/:/dev/disk:ro \
swr.cn-north-4.myhuaweicloud.com/ddn-k8s/gcr.io/cadvisor/cadvisor:v0.52.1
参数含义:-v /:/rootfs:ro: 将主机的根文件系统 / 挂载到容器的 /rootfs 目录下,且为只读模式(ro)。-v /var/run:/var/run:ro: 将主机上的 /var/run 目录挂载到容器内的同样路径,且为只读模式。-v /sys:/sys:ro: 将主机的 /sys 文件系统挂载到容器的 /sys 目录,并且设置为只读模式。-v /var/lib/docker/:/var/lib/docker:ro: 将主机的 /var/lib/docker/目录(本机的容器数据目录)挂载到容器的 /var/lib/docker 目录,且为只读模式。-v /dev/disk/:/dev/disk:ro: 将主机的 /dev/disk/ 目录挂载到容器内的 /dev/disk 目录,并且是只读模式。
查看cadvisor容器
[root@localhost ~]# docker ps | grep cad
a851a9c22967 swr.cn-north-4.myhuaweicloud.com/ddn-k8s/gcr.io/cadvisor/cadvisor:v0.52.1 "/usr/bin/cadvisor -…" 2 minutes ago Up 2 minutes (healthy) 0.0.0.0:8080->8080/tcp cadvisor
访问浏览器8080端口,可以看到当前宿主机上运行的两个容器
1.4 prometheus配置
prometheus.yml文件配置
# 监控docker的数据采集组件
- job_name: "cadvisor"
static_configs:
- targets: ["192.168.56.112:8080"] #cadvisor的ip+端口
配置完成重启prometheus
docker_alerts.yml文件配置
[root@k8s-node prometheus]# vim rules/docker_alerts.yml
groups:
- name: docker-alerts
rules:
# cadvisor服务宕机告警
- alert: CadvisorDown
expr: up{job="cadvisor"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "cadvisor 服务宕机 (实例 {{ $labels.instance }})"
description: "cadvisor服务已经宕机超过1min"
# 容器cpu使用率过高告警
- alert: ContainerCPUHigh
expr: (rate(container_cpu_usage_seconds_total{name!=""}[5m]) * 100) > 80
for: 1m
labels:
severity: warning
annotations:
summary: "容器CPU使用率过高 ({{ $labels.name }})"
description: "容器 {{ $labels.name }} CPU使用率超过80%持续1分钟"
# 容器内存使用率过高,容器启动时需要指定内存限制,否则监控无效
- alert: ContainerMemoryHigh
expr: (container_memory_usage_bytes{name!=""} / container_spec_memory_limit_bytes{name!=""}) * 100 > 85
for: 1m
labels:
severity: warning
annotations:
summary: "容器内存使用率过高 ({{ $labels.name }})"
description: "容器 {{ $labels.name }} 内存使用率超过85%"
# 容器因为内存不足,被停止
- alert: ContainerOOMKilled
expr: increase(container_oom_events_total{name!=""}[5m]) > 0
labels:
severity: critical
annotations:
summary: "容器发生OOM Kill ({{ $labels.name }})"
description: "容器 {{ $labels.name }} 在过去5分钟内被OOM Killer终止"
# 网络流量异常
- alert: ContainerNetworkTrafficHigh
expr: rate(container_network_receive_bytes_total[5m]) > 100000000 # > 100MB/s
for: 2m
labels:
severity: warning
annotations:
summary: "容器网络接收流量异常 ({{ $labels.name }})"
description: "容器 {{ $labels.name }} 网络接收流量超过100MB/s"
# 磁盘IO过高
- alert: ContainerDiskIOHigh
expr: rate(container_fs_reads_bytes_total[5m]) + rate(container_fs_writes_bytes_total[5m]) > 50000000 # > 50MB/s
for: 2m
labels:
severity: warning
annotations:
summary: "容器磁盘IO过高 ({{ $labels.name }})"
description: "容器 {{ $labels.name }} 磁盘IO超过50MB/s"
配置完成重启prometheus
1.5 Grafana配置
导入grafana模板ID:16314,模板json文件下载地址:https://grafana.com/grafana/dashboards/16314-docker-container-os-node-node-exporter-cadvisor/
同时监控宿主机和容器需要做以下配置:
更改prometheus.yml文件,设置node_exporter和cadvisor为相同标签
- job_name: "nodes" # 每个job是一个分组,可以配置多个被监控服务器
static_configs:
- targets: ["192.168.56.109:9100","192.168.56.110:9100"] # 主机上数据>采集组件的ip和端口
- targets: ["192.168.56.112:9100"]
labels:
instance: 'node-112'
- job_name: "cadvisor"
static_configs:
- targets: ["192.168.56.112:8080"]
labels:
instance: 'node-112'
重启prometheus后查看
更多推荐
所有评论(0)