Kube Prometheus 监控 Etcd

Kube Prometheus Etcd

Posted by BlueFat on Sunday, November 29, 2020

监控外部Etcd

导入etcd证书

kubectl -n monitoring create secret generic etcd-certs \
--from-file=ca.pem=/etc/ssl/etcd/ssl/ca.pem \
--from-file=etcd.pem=/etc/ssl/etcd/ssl/admin-master1.pem \
--from-file=etcd-key.pem=/etc/ssl/etcd/ssl/admin-master1-key.pem

prometheus挂载etcd-certs

cd /root/kube-prometheus/manifests/
vim prometheus-prometheus.yaml 
spec:
...
  secrets:
  - etcd-certs
kubectl apply -f prometheus-prometheus.yaml 

验证

[root@master1 manifests]# kubectl exec -it -n monitoring prometheus-k8s-0 -- ls /etc/prometheus/secrets/etcd-certs/
ca.pem        etcd-key.pem  etcd.pem

创建etcd service

cat << EOF > etcd-service.yaml
apiVersion: v1
kind: Endpoints
metadata:
  labels:
    k8s-app: etcd-external
  name: etcd-external
  namespace: kube-system
subsets:
- addresses:
  - ip: 192.168.10.221
  - ip: 192.168.10.222
  - ip: 192.168.10.223
  ports:
  - name: etcd-http
    port: 2379
    protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
  labels:
    k8s-app: etcd-external
  name: etcd-external
  namespace: kube-system
spec:
  ports:
  - name: etcd-http
    port: 2379
    targetPort: 2379
  sessionAffinity: None
  type: ClusterIP
EOF
kubectl apply -f etcd-service.yaml

验证etcd metrics

[root@master1 ~]# kubectl get svc -n kube-system | grep etcd
etcd-external         ClusterIP   10.230.62.128   <none>        2379/TCP                       104s

curl --cert /etc/ssl/etcd/ssl/admin-master1.pem --key /etc/ssl/etcd/ssl/admin-master1-key.pem https://10.230.62.128:2379/metrics -k

绑定角色权限使Prometheus有权限访问kube-system命名空间

cat << EOF > etcd-serviceaccount.yaml
# 在对应的ns中创建角色
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  name: prometheus-k8s
  namespace: kube-system
rules:
- apiGroups:
  - ""
  resources:
  - services
  - endpoints
  - pods
  verbs:
  - get
  - list
  - watch
---
# 绑定角色 prometheus-k8s 角色到 Role
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: prometheus-k8s
  namespace: default
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: prometheus-k8s
subjects:
- kind: ServiceAccount
  name: prometheus-k8s # Prometheus 容器使用的 serviceAccount,kube-prometheus默认使用prometheus-k8s这个用户
  namespace: monitoring
EOF
kubectl apply -f etcd-serviceaccount.yaml

配置Service及ServiceMonitor服务自动发现规则

cat << EOF > etcd-servicemonitor.yaml 
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: etcd-external 
  namespace: monitoring
  labels:
    k8s-app: etcd-external
spec:
  jobLabel: etcd-external
  selector:
    matchLabels:
      k8s-app: etcd-external
  namespaceSelector:
    matchNames:
    - kube-system
  endpoints:
  - port: etcd-http
    interval: 15s
    scheme: https
    tlsConfig:
      caFile: /etc/prometheus/secrets/etcd-certs/ca.pem
      certFile: /etc/prometheus/secrets/etcd-certs/etcd.pem
      keyFile: /etc/prometheus/secrets/etcd-certs/etcd-key.pem
EOF
kubectl apply -f etcd-servicemonitor.yaml 

配置报警

cat << EOF | kubectl apply -f -
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    prometheus: k8s
    role: alert-rules
  name: etcd-rules
  namespace: monitoring
spec:
  groups:
    - name: etcd
      rules:
        - alert: EtcdClusterUnavailable
          annotations:
            summary: etcd cluster small
            description: If one more etcd peer goes down the cluster will be unavailable
          expr: |
            count(up{job="etcd"} == 0) > (count(up{job="etcd"}) / 2 - 1)
          for: 3m
          labels:
            severity: critical
EOF

注意 label 标签一定至少要有 prometheus=k8s 和 role=alert-rules

配置 PrometheusRule