监控外部Etcd
导入etcd证书
kubectl -n monitoring create secret generic etcd-certs \
--from-file=ca.pem=/etc/ssl/etcd/ssl/ca.pem \
--from-file=etcd.pem=/etc/ssl/etcd/ssl/admin-master1.pem \
--from-file=etcd-key.pem=/etc/ssl/etcd/ssl/admin-master1-key.pem
prometheus挂载etcd-certs
cd /root/kube-prometheus/manifests/
vim prometheus-prometheus.yaml
spec:
...
secrets:
- etcd-certs
kubectl apply -f prometheus-prometheus.yaml
验证
[root@master1 manifests]# kubectl exec -it -n monitoring prometheus-k8s-0 -- ls /etc/prometheus/secrets/etcd-certs/
ca.pem etcd-key.pem etcd.pem
创建etcd service
cat << EOF > etcd-service.yaml
apiVersion: v1
kind: Endpoints
metadata:
labels:
k8s-app: etcd-external
name: etcd-external
namespace: kube-system
subsets:
- addresses:
- ip: 192.168.10.221
- ip: 192.168.10.222
- ip: 192.168.10.223
ports:
- name: etcd-http
port: 2379
protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: etcd-external
name: etcd-external
namespace: kube-system
spec:
ports:
- name: etcd-http
port: 2379
targetPort: 2379
sessionAffinity: None
type: ClusterIP
EOF
kubectl apply -f etcd-service.yaml
验证etcd metrics
[root@master1 ~]# kubectl get svc -n kube-system | grep etcd
etcd-external ClusterIP 10.230.62.128 <none> 2379/TCP 104s
curl --cert /etc/ssl/etcd/ssl/admin-master1.pem --key /etc/ssl/etcd/ssl/admin-master1-key.pem https://10.230.62.128:2379/metrics -k
绑定角色权限使Prometheus有权限访问kube-system命名空间
cat << EOF > etcd-serviceaccount.yaml
# 在对应的ns中创建角色
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: kube-system
rules:
- apiGroups:
- ""
resources:
- services
- endpoints
- pods
verbs:
- get
- list
- watch
---
# 绑定角色 prometheus-k8s 角色到 Role
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s # Prometheus 容器使用的 serviceAccount,kube-prometheus默认使用prometheus-k8s这个用户
namespace: monitoring
EOF
kubectl apply -f etcd-serviceaccount.yaml
配置Service及ServiceMonitor服务自动发现规则
cat << EOF > etcd-servicemonitor.yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: etcd-external
namespace: monitoring
labels:
k8s-app: etcd-external
spec:
jobLabel: etcd-external
selector:
matchLabels:
k8s-app: etcd-external
namespaceSelector:
matchNames:
- kube-system
endpoints:
- port: etcd-http
interval: 15s
scheme: https
tlsConfig:
caFile: /etc/prometheus/secrets/etcd-certs/ca.pem
certFile: /etc/prometheus/secrets/etcd-certs/etcd.pem
keyFile: /etc/prometheus/secrets/etcd-certs/etcd-key.pem
EOF
kubectl apply -f etcd-servicemonitor.yaml
配置报警
cat << EOF | kubectl apply -f -
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: k8s
role: alert-rules
name: etcd-rules
namespace: monitoring
spec:
groups:
- name: etcd
rules:
- alert: EtcdClusterUnavailable
annotations:
summary: etcd cluster small
description: If one more etcd peer goes down the cluster will be unavailable
expr: |
count(up{job="etcd"} == 0) > (count(up{job="etcd"}) / 2 - 1)
for: 3m
labels:
severity: critical
EOF
注意 label 标签一定至少要有 prometheus=k8s 和 role=alert-rules