交付prometheus

prometheus官方dockerhub地址

prometheus官方github地址

准备镜像

1
2
3
4
5
[root@ops-200 ~]# docker pull prom/prometheus:v2.17.1
[root@ops-200 ~]# docker images|grep prometheus
prom/prometheus v2.17.1 358a0d2395fe 2 weeks ago 135MB
[root@ops-200 ~]# docker tag 358a0d2395fe harbor.od.com/infra/prometheus:v2.17.1
[root@ops-200 ~]# docker push harbor.od.com/infra/prometheus:v2.17.1

准备资源配置清单

创建资源目录

1
2
[root@ops-200 ~]# mkdir /data/k8s-yaml/prometheus
[root@ops-200 ~]# cd /data/k8s-yaml/prometheus

编写yaml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
[root@ops-200 prometheus]# vim rbac.yaml 
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/cluster-service: "true"
name: prometheus
namespace: infra
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/cluster-service: "true"
name: prometheus
rules:
- apiGroups:
- ""
resources:
- nodes
- nodes/metrics
- services
- endpoints
- pods
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/cluster-service: "true"
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: infra

[root@ops-200 prometheus]# vim deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
deployment.kubernetes.io/revision: "5"
labels:
name: prometheus
name: prometheus
namespace: infra
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 7
selector:
matchLabels:
app: prometheus
strategy:
rollingUpdate:
maxSurge: 1
maxUnavailable: 1
type: RollingUpdate
template:
metadata:
labels:
app: prometheus
spec:
nodeName: k8s01-21.host.com # 因为22资源提交的太多
containers:
- name: prometheus
image: harbor.od.com/infra/prometheus:v2.17.1
imagePullPolicy: IfNotPresent
command:
- /bin/prometheus
args:
- --config.file=/data/etc/prometheus.yml # 配置文件
- --storage.tsdb.path=/data/prom-db
- --storage.tsdb.min-block-duration=10m # 只加载10分钟数据到缓存,生产环境可以加大 例:2h
- --storage.tsdb.retention=72h # tsdb存储多久时间数据,生产环境可以加大 例:100h
- --web.enable-lifecycle # 后期修改配置,可以curl url重新加载配置
ports:
- containerPort: 9090
protocol: TCP
volumeMounts:
- mountPath: /data
name: data
resources:
requests: # 容器启动资源
cpu: "1000m"
memory: "1.5Gi"
limits: # 容器最大资源
cpu: "2000m"
memory: "3Gi"
imagePullSecrets:
- name: harbor
securityContext:
runAsUser: 0
serviceAccountName: prometheus
volumes:
- name: data
nfs:
server: ops-200.host.com
path: /data/nfs-volume/prometheus

[root@ops-200 prometheus]# vim service.yaml
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: infra
spec:
ports:
- port: 9090
protocol: TCP
targetPort: 9090
selector:
app: prometheus

[root@ops-200 prometheus]# vim ingress.yaml
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
annotations:
kubernetes.io/ingress.class: traefik
name: prometheus
namespace: infra
spec:
rules:
- host: prometheus.od.com
http:
paths:
- path: /
backend:
serviceName: prometheus
servicePort: 9090

准备配置文件

创建配置目录

1
2
[root@ops-200 prometheus]# mkdir /data/nfs-volume/prometheus
[root@ops-200 prometheus]# mkdir -p /data/nfs-volume/prometheus/{etc,prom-db}

拷贝证书,需要与apiserver通信

1
2
[root@ops-200 prometheus]# cd /data/nfs-volume/prometheus/etc/        
[root@ops-200 etc]# cp /opt/certs/ca.pem /opt/certs/client.pem /opt/certs/client-key.pem .

准备配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
[root@ops-200 etc]# vim prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:

- job_name: 'etcd'
tls_config:
ca_file: /data/etc/ca.pem
cert_file: /data/etc/client.pem
key_file: /data/etc/client-key.pem
scheme: https
static_configs: # 静态配置
- targets:
- '192.168.1.12:2379'
- '192.168.1.21:2379'
- '192.168.1.22:2379'

- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https

- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name

- job_name: 'kubernetes-kubelet'
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __address__
replacement: ${1}:10255

- job_name: 'kubernetes-cadvisor'
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __address__
replacement: ${1}:4194
- job_name: 'kubernetes-kube-state'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- source_labels: [__meta_kubernetes_pod_label_grafanak8sapp]
regex: .*true.*
action: keep
- source_labels: ['__meta_kubernetes_pod_label_daemon', '__meta_kubernetes_pod_node_name']
regex: 'node-exporter;(.*)'
action: replace
target_label: nodename

- job_name: 'blackbox_http_pod_probe'
metrics_path: /probe
kubernetes_sd_configs:
- role: pod
params:
module: [http_2xx]
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_blackbox_scheme]
action: keep
regex: http
- source_labels: [__address__, __meta_kubernetes_pod_annotation_blackbox_port, __meta_kubernetes_pod_annotation_blackbox_path]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+);(.+)
replacement: $1:$2$3
target_label: __param_target
- action: replace
target_label: __address__
replacement: blackbox-exporter.kube-system:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name

- job_name: 'blackbox_tcp_pod_probe'
metrics_path: /probe
kubernetes_sd_configs:
- role: pod
params:
module: [tcp_connect]
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_blackbox_scheme]
action: keep
regex: tcp
- source_labels: [__address__, __meta_kubernetes_pod_annotation_blackbox_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __param_target
- action: replace
target_label: __address__
replacement: blackbox-exporter.kube-system:9115
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name

- job_name: 'traefik'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: keep
regex: traefik
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name

应用资源配置清单

1
2
3
4
5
6
7
8
9
10
11
[root@ops-200 etc]# cd /data/k8s-yaml/prometheus/
[root@ops-200 prometheus]# kubectl apply -f rbac.yaml
serviceaccount/prometheus created
clusterrole.rbac.authorization.k8s.io/prometheus created
clusterrolebinding.rbac.authorization.k8s.io/prometheus created
[root@ops-200 prometheus]# kubectl apply -f deployment.yaml
deployment.apps/prometheus created
[root@ops-200 prometheus]# kubectl apply -f service.yaml
service/prometheus created
[root@ops-200 prometheus]# kubectl apply -f ingress.yaml
ingress.extensions/prometheus created

annotaition注释

Annotations就是注解,Annotation与Label类似,也使用key/value键值对的形式进行定义.
Label具有严格的命名规则,它定义的是Kubernetes对象的元数据(Metadata),并且用于Label Selector.
Annotation则是用户任意定义的“附加”信息,以便客户端(如工具和库)可以检索此metadata.

annotation使用场景

  1. 声明配置层管理的字段。使用annotation关联这类字段可以用于区分以下几种配置来源:客户端或服务器设置的默认值,自动生成的字段或自动生成的 auto-scaling 和 auto-sizing 系统配置的字段。
  2. 创建信息、版本信息或镜像信息。例如时间戳、版本号、git分支、PR序号、镜像哈希值以及仓库地址。
  3. 记录日志、监控、分析或审计存储仓库的指针
  4. 可以用于debug的客户端(库或工具)信息,例如名称、版本和创建信息。
  5. 用户信息,以及工具或系统来源信息、例如来自非Kubernetes生态的相关对象的URL信息。
  6. 轻量级部署工具元数据,例如配置或检查点。
  7. 负责人的电话或联系方式,或能找到相关信息的目录条目信息,例如团队网站。

prometheus添加traefik监控

spec->template->metadata下,添加annotations注释

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
[root@ops-200 etc]# cd /data/k8s-yaml/traefik/
[root@ops-200 traefik]# vim daemonset.yaml
spec:
selector:
matchLabels:
app: traefik
template:
metadata:
name: traefik
labels:
app: traefik
annotations:
prometheus_io_scheme: "traefik"
prometheus_io_path: "/metrics"
prometheus_io_port: "8080"
[root@ops-200 traefik]# kubectl apply -f daemonset.yaml
daemonset.apps/traefik-ingress-controller configured

prometheus

image-20200413133958312

dubbo服务接入blackbox存活检查

提供者

Spinnaker添加annotion

TCP

1
2
3
annotations:
blackbox_port: "20880"
blackbox_scheme: "tcp"

image-20200413135358057

prometheus

image-20200413144240407

消费者

Spinnaker添加annotion

HTTP

1
2
3
4
annotations:
blackbox_path: "/hello?name=health"
blackbox_port: "8080"
blackbox_scheme: "http"

image-20200413145038983

prometheus

image-20200413145200497

dubbo服务接入jvm监控

提供者

Spinnaker添加annotion jvm

1
2
3
4
annotations:
prometheus_io_scrape: "true"
prometheus_io_port: "12346"
prometheus_io_path: "/"

image-20200413154211442

消费者

Spinnaker添加annotion

1
2
3
4
annotations:
prometheus_io_scrape: "true"
prometheus_io_port: "12346"
prometheus_io_path: "/"

image-20200413154506826

prometheus

image-20200413155039658