c97c592216
- Update alertmanger and prometheus discovery port from 6783 to 9094 - Update to support fqdn for discovery hostname - Add one test alert to Prometheus to test alert pipeline - update container name from alertmanger to prometheus-alertmanager Change-Id: Iec5e758e4b576dff01e84591a2440d030d5ff3c4
1074 lines
41 KiB
YAML
1074 lines
41 KiB
YAML
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Default values for prometheus.
|
|
# This is a YAML-formatted file.
|
|
# Declare name/value pairs to be passed into your templates.
|
|
# name: value
|
|
|
|
---
|
|
images:
|
|
tags:
|
|
apache_proxy: docker.io/httpd:2.4
|
|
prometheus: docker.io/prom/prometheus:v2.12.0
|
|
helm_tests: docker.io/openstackhelm/heat:newton-ubuntu_xenial
|
|
dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
|
|
image_repo_sync: docker.io/docker:17.07.0
|
|
pull_policy: IfNotPresent
|
|
local_registry:
|
|
active: false
|
|
exclude:
|
|
- dep_check
|
|
- image_repo_sync
|
|
|
|
labels:
|
|
prometheus:
|
|
node_selector_key: openstack-control-plane
|
|
node_selector_value: enabled
|
|
job:
|
|
node_selector_key: openstack-control-plane
|
|
node_selector_value: enabled
|
|
test:
|
|
node_selector_key: openstack-control-plane
|
|
node_selector_value: enabled
|
|
|
|
pod:
|
|
env:
|
|
prometheus: null
|
|
security_context:
|
|
api:
|
|
pod:
|
|
runAsUser: 65534
|
|
container:
|
|
prometheus_perms:
|
|
runAsUser: 0
|
|
readOnlyRootFilesystem: false
|
|
apache_proxy:
|
|
runAsUser: 0
|
|
readOnlyRootFilesystem: false
|
|
prometheus:
|
|
allowPrivilegeEscalation: false
|
|
readOnlyRootFilesystem: true
|
|
test:
|
|
pod:
|
|
runAsUser: 65534
|
|
container:
|
|
prometheus_helm_tests:
|
|
readOnlyRootFilesystem: true
|
|
allowPrivilegeEscalation: false
|
|
affinity:
|
|
anti:
|
|
type:
|
|
default: preferredDuringSchedulingIgnoredDuringExecution
|
|
topologyKey:
|
|
default: kubernetes.io/hostname
|
|
weight:
|
|
default: 10
|
|
mounts:
|
|
prometheus:
|
|
prometheus:
|
|
init_container: null
|
|
replicas:
|
|
prometheus: 1
|
|
lifecycle:
|
|
upgrades:
|
|
statefulsets:
|
|
pod_replacement_strategy: RollingUpdate
|
|
termination_grace_period:
|
|
prometheus:
|
|
timeout: 30
|
|
resources:
|
|
enabled: false
|
|
prometheus:
|
|
limits:
|
|
memory: "1024Mi"
|
|
cpu: "2000m"
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "500m"
|
|
jobs:
|
|
image_repo_sync:
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "1024Mi"
|
|
cpu: "2000m"
|
|
tests:
|
|
requests:
|
|
memory: "128Mi"
|
|
cpu: "100m"
|
|
limits:
|
|
memory: "1024Mi"
|
|
cpu: "2000m"
|
|
|
|
endpoints:
|
|
cluster_domain_suffix: cluster.local
|
|
local_image_registry:
|
|
name: docker-registry
|
|
namespace: docker-registry
|
|
hosts:
|
|
default: localhost
|
|
internal: docker-registry
|
|
node: localhost
|
|
host_fqdn_override:
|
|
default: null
|
|
port:
|
|
registry:
|
|
node: 5000
|
|
monitoring:
|
|
name: prometheus
|
|
namespace: null
|
|
auth:
|
|
admin:
|
|
username: admin
|
|
password: changeme
|
|
federate:
|
|
username: federate
|
|
password: changeme
|
|
hosts:
|
|
default: prom-metrics
|
|
public: prometheus
|
|
host_fqdn_override:
|
|
default: null
|
|
# NOTE(srwilkers): this chart supports TLS for fqdn over-ridden public
|
|
# endpoints using the following format:
|
|
# public:
|
|
# host: null
|
|
# tls:
|
|
# crt: null
|
|
# key: null
|
|
path:
|
|
default: null
|
|
scheme:
|
|
default: 'http'
|
|
port:
|
|
api:
|
|
default: 9090
|
|
http:
|
|
default: 80
|
|
alerts:
|
|
name: alertmanager
|
|
namespace: null
|
|
hosts:
|
|
default: alerts-engine
|
|
public: alertmanager
|
|
discovery: alertmanager-discovery
|
|
host_fqdn_override:
|
|
default: null
|
|
path:
|
|
default: null
|
|
scheme:
|
|
default: 'http'
|
|
port:
|
|
api:
|
|
default: 9093
|
|
public: 80
|
|
mesh:
|
|
default: 9094
|
|
ldap:
|
|
hosts:
|
|
default: ldap
|
|
auth:
|
|
admin:
|
|
bind: "cn=admin,dc=cluster,dc=local"
|
|
password: password
|
|
host_fqdn_override:
|
|
default: null
|
|
path:
|
|
default: "/ou=People,dc=cluster,dc=local"
|
|
scheme:
|
|
default: ldap
|
|
port:
|
|
ldap:
|
|
default: 389
|
|
|
|
dependencies:
|
|
dynamic:
|
|
common:
|
|
local_image_registry:
|
|
jobs:
|
|
- prometheus-image-repo-sync
|
|
services:
|
|
- endpoint: node
|
|
service: local_image_registry
|
|
static:
|
|
image_repo_sync:
|
|
services:
|
|
- endpoint: internal
|
|
service: local_image_registry
|
|
prometheus:
|
|
services: null
|
|
tests:
|
|
services:
|
|
- endpoint: internal
|
|
service: monitoring
|
|
|
|
monitoring:
|
|
prometheus:
|
|
enabled: true
|
|
prometheus:
|
|
scrape: true
|
|
|
|
network:
|
|
prometheus:
|
|
ingress:
|
|
public: true
|
|
classes:
|
|
namespace: "nginx"
|
|
cluster: "nginx-cluster"
|
|
annotations:
|
|
nginx.ingress.kubernetes.io/rewrite-target: /
|
|
nginx.ingress.kubernetes.io/affinity: cookie
|
|
nginx.ingress.kubernetes.io/session-cookie-name: kube-ingress-session-prometheus
|
|
nginx.ingress.kubernetes.io/session-cookie-hash: sha1
|
|
nginx.ingress.kubernetes.io/session-cookie-expires: "600"
|
|
nginx.ingress.kubernetes.io/session-cookie-max-age: "600"
|
|
node_port:
|
|
enabled: false
|
|
port: 30900
|
|
|
|
network_policy:
|
|
prometheus:
|
|
ingress:
|
|
- {}
|
|
egress:
|
|
- {}
|
|
|
|
secrets:
|
|
tls:
|
|
monitoring:
|
|
prometheus:
|
|
public: prometheus-tls-public
|
|
|
|
storage:
|
|
enabled: true
|
|
pvc:
|
|
name: prometheus-pvc
|
|
access_mode: ["ReadWriteOnce"]
|
|
requests:
|
|
storage: 5Gi
|
|
storage_class: general
|
|
|
|
manifests:
|
|
configmap_bin: true
|
|
configmap_etc: true
|
|
ingress: true
|
|
helm_tests: true
|
|
job_image_repo_sync: true
|
|
network_policy: true
|
|
secret_ingress_tls: true
|
|
secret_prometheus: true
|
|
service_ingress: true
|
|
service: true
|
|
statefulset_prometheus: true
|
|
|
|
conf:
|
|
httpd: |
|
|
ServerRoot "/usr/local/apache2"
|
|
|
|
Listen 80
|
|
|
|
LoadModule mpm_event_module modules/mod_mpm_event.so
|
|
LoadModule authn_file_module modules/mod_authn_file.so
|
|
LoadModule authn_core_module modules/mod_authn_core.so
|
|
LoadModule authz_host_module modules/mod_authz_host.so
|
|
LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
|
|
LoadModule authz_user_module modules/mod_authz_user.so
|
|
LoadModule authz_core_module modules/mod_authz_core.so
|
|
LoadModule access_compat_module modules/mod_access_compat.so
|
|
LoadModule auth_basic_module modules/mod_auth_basic.so
|
|
LoadModule ldap_module modules/mod_ldap.so
|
|
LoadModule authnz_ldap_module modules/mod_authnz_ldap.so
|
|
LoadModule reqtimeout_module modules/mod_reqtimeout.so
|
|
LoadModule filter_module modules/mod_filter.so
|
|
LoadModule proxy_html_module modules/mod_proxy_html.so
|
|
LoadModule log_config_module modules/mod_log_config.so
|
|
LoadModule env_module modules/mod_env.so
|
|
LoadModule headers_module modules/mod_headers.so
|
|
LoadModule setenvif_module modules/mod_setenvif.so
|
|
LoadModule version_module modules/mod_version.so
|
|
LoadModule proxy_module modules/mod_proxy.so
|
|
LoadModule proxy_connect_module modules/mod_proxy_connect.so
|
|
LoadModule proxy_http_module modules/mod_proxy_http.so
|
|
LoadModule proxy_balancer_module modules/mod_proxy_balancer.so
|
|
LoadModule slotmem_shm_module modules/mod_slotmem_shm.so
|
|
LoadModule slotmem_plain_module modules/mod_slotmem_plain.so
|
|
LoadModule unixd_module modules/mod_unixd.so
|
|
LoadModule status_module modules/mod_status.so
|
|
LoadModule autoindex_module modules/mod_autoindex.so
|
|
|
|
<IfModule unixd_module>
|
|
User daemon
|
|
Group daemon
|
|
</IfModule>
|
|
|
|
<Directory />
|
|
AllowOverride none
|
|
Require all denied
|
|
</Directory>
|
|
|
|
<Files ".ht*">
|
|
Require all denied
|
|
</Files>
|
|
|
|
ErrorLog /dev/stderr
|
|
|
|
LogLevel warn
|
|
|
|
<IfModule log_config_module>
|
|
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
|
|
LogFormat "%{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" proxy
|
|
LogFormat "%h %l %u %t \"%r\" %>s %b" common
|
|
|
|
<IfModule logio_module>
|
|
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O" combinedio
|
|
</IfModule>
|
|
|
|
SetEnvIf X-Forwarded-For "^.*\..*\..*\..*" forwarded
|
|
CustomLog /dev/stdout common
|
|
CustomLog /dev/stdout combined
|
|
CustomLog /dev/stdout proxy env=forwarded
|
|
</IfModule>
|
|
|
|
<Directory "/usr/local/apache2/cgi-bin">
|
|
AllowOverride None
|
|
Options None
|
|
Require all granted
|
|
</Directory>
|
|
|
|
<IfModule headers_module>
|
|
RequestHeader unset Proxy early
|
|
</IfModule>
|
|
|
|
<IfModule proxy_html_module>
|
|
Include conf/extra/proxy-html.conf
|
|
</IfModule>
|
|
|
|
<VirtualHost *:80>
|
|
# Expose metrics to all users, as this is not sensitive information and
|
|
# circumvents the inability of Prometheus to interpolate environment vars
|
|
# in its configuration file
|
|
<Location /metrics>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
|
|
Satisfy Any
|
|
Allow from all
|
|
</Location>
|
|
# Expose the /federate endpoint to all users, as this is also not
|
|
# sensitive information and circumvents the inability of Prometheus to
|
|
# interpolate environment vars in its configuration file
|
|
<Location /federate>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
|
|
Satisfy Any
|
|
Allow from all
|
|
</Location>
|
|
# Restrict general user (LDAP) access to the /graph endpoint, as general trusted
|
|
# users should only be able to query Prometheus for metrics and not have access
|
|
# to information like targets, configuration, flags or build info for Prometheus
|
|
<Location />
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file ldap
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
|
|
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
|
|
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
|
|
Require valid-user
|
|
</Location>
|
|
<Location /graph>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/graph
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/graph
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file ldap
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
|
|
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
|
|
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /config (dashboard) and /api/v1/status/config (http) endpoints
|
|
# to the admin user
|
|
<Location /config>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/config
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/config
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
<Location /api/v1/status/config>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/config
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/config
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /flags (dashboard) and /api/v1/status/flags (http) endpoints
|
|
# to the admin user
|
|
<Location /flags>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/flags
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/flags
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
<Location /api/v1/status/flags>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/flags
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/flags
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /status (dashboard) endpoint to the admin user
|
|
<Location /status>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/status
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/status
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /rules (dashboard) endpoint to the admin user
|
|
<Location /rules>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/rules
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/rules
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /targets (dashboard) and /api/v1/targets (http) endpoints
|
|
# to the admin user
|
|
<Location /targets>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/targets
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/targets
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
<Location /api/v1/targets>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/targets
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/targets
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
# Restrict access to the /api/v1/admin/tsdb/ endpoints (http) to the admin user.
|
|
# These endpoints are disabled by default, but are included here to ensure only
|
|
# an admin user has access to these endpoints when enabled
|
|
<Location /api/v1/admin/tsdb/>
|
|
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/admin/tsdb/
|
|
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/admin/tsdb/
|
|
AuthName "Prometheus"
|
|
AuthType Basic
|
|
AuthBasicProvider file
|
|
AuthUserFile /usr/local/apache2/conf/.htpasswd
|
|
Require valid-user
|
|
</Location>
|
|
</VirtualHost>
|
|
prometheus:
|
|
# Consumed by a prometheus helper function to generate the command line flags
|
|
# for configuring the prometheus service
|
|
command_line_flags:
|
|
log.level: info
|
|
query.max_concurrency: 20
|
|
query.timeout: 2m
|
|
storage.tsdb.path: /var/lib/prometheus/data
|
|
storage.tsdb.retention: 7d
|
|
# NOTE(srwilkers): These settings default to false, but they are
|
|
# exposed here to allow enabling if desired. Please note the security
|
|
# impacts of enabling these flags. More information regarding the impacts
|
|
# can be found here: https://prometheus.io/docs/operating/security/
|
|
#
|
|
# If set to true, all administrative functionality is exposed via the http
|
|
# /api/*/admin/ path
|
|
web.enable_admin_api: false
|
|
# If set to true, allows for http reloads and shutdown of Prometheus
|
|
web.enable_lifecycle: false
|
|
scrape_configs:
|
|
template: |
|
|
{{- $promHost := tuple "monitoring" "public" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}
|
|
{{- if not (empty .Values.conf.prometheus.rules)}}
|
|
rule_files:
|
|
{{- $rulesKeys := keys .Values.conf.prometheus.rules -}}
|
|
{{- range $rule := $rulesKeys }}
|
|
{{ printf "- /etc/config/rules/%s.rules" $rule }}
|
|
{{- end }}
|
|
{{- end }}
|
|
global:
|
|
scrape_interval: 60s
|
|
evaluation_interval: 60s
|
|
external_labels:
|
|
prometheus_host: {{$promHost}}
|
|
scrape_configs:
|
|
- job_name: kubelet
|
|
scheme: https
|
|
# This TLS & bearer token file config is used to connect to the actual scrape
|
|
# endpoints for cluster components. This is separate to discovery auth
|
|
# configuration because discovery & scraping are two separate concerns in
|
|
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
# the cluster. Otherwise, more config options have to be provided within the
|
|
# <kubernetes_sd_config>.
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
scrape_interval: 45s
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- target_label: __address__
|
|
replacement: kubernetes.default.svc:443
|
|
- source_labels:
|
|
- __meta_kubernetes_node_name
|
|
regex: (.+)
|
|
target_label: __metrics_path__
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics
|
|
- source_labels:
|
|
- __meta_kubernetes_node_name
|
|
action: replace
|
|
target_label: kubernetes_io_hostname
|
|
# Scrape config for Kubelet cAdvisor.
|
|
#
|
|
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
|
|
# (those whose names begin with 'container_') have been removed from the
|
|
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
|
|
# retrieve those metrics.
|
|
#
|
|
# In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
|
|
# HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
|
|
# in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
|
|
# the --cadvisor-port=0 Kubelet flag).
|
|
#
|
|
# This job is not necessary and should be removed in Kubernetes 1.6 and
|
|
# earlier versions, or it will cause the metrics to be scraped twice.
|
|
- job_name: 'kubernetes-cadvisor'
|
|
|
|
# Default to scraping over https. If required, just disable this or change to
|
|
# `http`.
|
|
scheme: https
|
|
|
|
# This TLS & bearer token file config is used to connect to the actual scrape
|
|
# endpoints for cluster components. This is separate to discovery auth
|
|
# configuration because discovery & scraping are two separate concerns in
|
|
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
# the cluster. Otherwise, more config options have to be provided within the
|
|
# <kubernetes_sd_config>.
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
|
|
kubernetes_sd_configs:
|
|
- role: node
|
|
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_node_label_(.+)
|
|
- target_label: __address__
|
|
replacement: kubernetes.default.svc:443
|
|
- source_labels:
|
|
- __meta_kubernetes_node_name
|
|
regex: (.+)
|
|
target_label: __metrics_path__
|
|
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
|
metric_relabel_configs:
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_tcp_usage_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_tasks_state'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_udp_usage_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_failures_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_cpu_load_average_10s'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_cpu_system_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_cpu_user_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_inodes_free'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_inodes_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_io_current'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_io_time_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_io_time_weighted_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_read_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_reads_merged_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_reads_merged_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_reads_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_sector_reads_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_sector_writes_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_write_seconds_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_writes_bytes_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_writes_merged_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_fs_writes_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_last_seen'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_cache'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_failcnt'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_max_usage_bytes'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_rss'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_swap'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_memory_usage_bytes'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_receive_errors_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_receive_packets_dropped_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_receive_packets_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_transmit_errors_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_transmit_packets_dropped_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_network_transmit_packets_total'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_spec_cpu_period'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_spec_cpu_shares'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_spec_memory_limit_bytes'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_spec_memory_reservation_limit_bytes'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_spec_memory_swap_limit_bytes'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'container_start_time_seconds'
|
|
action: drop
|
|
# Scrape config for API servers.
|
|
#
|
|
# Kubernetes exposes API servers as endpoints to the default/kubernetes
|
|
# service so this uses `endpoints` role and uses relabelling to only keep
|
|
# the endpoints associated with the default/kubernetes service using the
|
|
# default named port `https`. This works for single API server deployments as
|
|
# well as HA API server deployments.
|
|
- job_name: 'apiserver'
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
scrape_interval: 45s
|
|
# Default to scraping over https. If required, just disable this or change to
|
|
# `http`.
|
|
scheme: https
|
|
# This TLS & bearer token file config is used to connect to the actual scrape
|
|
# endpoints for cluster components. This is separate to discovery auth
|
|
# configuration because discovery & scraping are two separate concerns in
|
|
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
|
|
# the cluster. Otherwise, more config options have to be provided within the
|
|
# <kubernetes_sd_config>.
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
# If your node certificates are self-signed or use a different CA to the
|
|
# master CA, then disable certificate verification below. Note that
|
|
# certificate verification is an integral part of a secure infrastructure
|
|
# so this should only be disabled in a controlled environment. You can
|
|
# disable certificate verification by uncommenting the line below.
|
|
#
|
|
# insecure_skip_verify: true
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
# Keep only the default/kubernetes service endpoints for the https port. This
|
|
# will add targets for each API server which Kubernetes adds an endpoint to
|
|
# the default/kubernetes service.
|
|
relabel_configs:
|
|
- source_labels:
|
|
- __meta_kubernetes_namespace
|
|
- __meta_kubernetes_service_name
|
|
- __meta_kubernetes_endpoint_port_name
|
|
action: keep
|
|
regex: default;kubernetes;https
|
|
metric_relabel_configs:
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_admission_controller_admission_latencies_seconds_bucket'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'rest_client_request_latency_seconds_bucket'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_response_sizes_bucket'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_admission_step_admission_latencies_seconds_bucket'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_admission_controller_admission_latencies_seconds_count'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_admission_controller_admission_latencies_seconds_sum'
|
|
action: drop
|
|
- source_labels:
|
|
- __name__
|
|
regex: 'apiserver_request_latencies_summary'
|
|
action: drop
|
|
# Scrape config for service endpoints.
|
|
#
|
|
# The relabeling allows the actual service scrape endpoint to be configured
|
|
# via the following annotations:
|
|
#
|
|
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
|
|
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
|
|
# to set this to `https` & most likely set the `tls_config` of the scrape config.
|
|
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
|
|
# service then set this appropriately.
|
|
- job_name: 'openstack-exporter'
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
scrape_interval: 60s
|
|
relabel_configs:
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: keep
|
|
regex: "openstack-metrics"
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_scrape
|
|
action: keep
|
|
regex: true
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_scheme
|
|
action: replace
|
|
target_label: __scheme__
|
|
regex: (https?)
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_path
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels:
|
|
- __address__
|
|
- __meta_kubernetes_service_annotation_prometheus_io_port
|
|
action: replace
|
|
target_label: __address__
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- source_labels:
|
|
- __meta_kubernetes_namespace
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: replace
|
|
target_label: instance
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: replace
|
|
target_label: kubernetes_name
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: job
|
|
replacement: ${1}
|
|
- job_name: 'node-exporter'
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
scrape_interval: 60s
|
|
relabel_configs:
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: keep
|
|
regex: 'node-exporter'
|
|
- source_labels:
|
|
- __meta_kubernetes_pod_node_name
|
|
action: replace
|
|
target_label: hostname
|
|
- job_name: 'kubernetes-service-endpoints'
|
|
kubernetes_sd_configs:
|
|
- role: endpoints
|
|
scrape_interval: 60s
|
|
relabel_configs:
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: drop
|
|
regex: '(openstack-metrics|prom-metrics|ceph-mgr|node-exporter)'
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_scrape
|
|
action: keep
|
|
regex: true
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_scheme
|
|
action: replace
|
|
target_label: __scheme__
|
|
regex: (https?)
|
|
- source_labels:
|
|
- __meta_kubernetes_service_annotation_prometheus_io_path
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels:
|
|
- __address__
|
|
- __meta_kubernetes_service_annotation_prometheus_io_port
|
|
action: replace
|
|
target_label: __address__
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- source_labels:
|
|
- __meta_kubernetes_namespace
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
action: replace
|
|
target_label: kubernetes_name
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: job
|
|
replacement: ${1}
|
|
# Example scrape config for pods
|
|
#
|
|
# The relabeling allows the actual pod scrape endpoint to be configured via the
|
|
# following annotations:
|
|
#
|
|
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
|
|
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
|
|
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the
|
|
# pod's declared ports (default is a port-free target if none are declared).
|
|
- job_name: 'kubernetes-pods'
|
|
kubernetes_sd_configs:
|
|
- role: pod
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
|
action: keep
|
|
regex: true
|
|
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
|
action: replace
|
|
target_label: __metrics_path__
|
|
regex: (.+)
|
|
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
|
action: replace
|
|
regex: ([^:]+)(?::\d+)?;(\d+)
|
|
replacement: $1:$2
|
|
target_label: __address__
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_pod_label_(.+)
|
|
- source_labels: [__meta_kubernetes_namespace]
|
|
action: replace
|
|
target_label: kubernetes_namespace
|
|
- source_labels: [__meta_kubernetes_pod_name]
|
|
action: replace
|
|
target_label: kubernetes_pod_name
|
|
- job_name: calico-etcd
|
|
kubernetes_sd_configs:
|
|
- role: service
|
|
scrape_interval: 20s
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- action: keep
|
|
source_labels:
|
|
- __meta_kubernetes_service_name
|
|
regex: "calico-etcd"
|
|
- action: keep
|
|
source_labels:
|
|
- __meta_kubernetes_namespace
|
|
regex: kube-system
|
|
target_label: namespace
|
|
- source_labels:
|
|
- __meta_kubernetes_pod_name
|
|
target_label: pod
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: service
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: job
|
|
replacement: ${1}
|
|
- source_labels:
|
|
- __meta_kubernetes_service_label
|
|
target_label: job
|
|
regex: calico-etcd
|
|
replacement: ${1}
|
|
- target_label: endpoint
|
|
replacement: "calico-etcd"
|
|
- job_name: ceph-mgr
|
|
kubernetes_sd_configs:
|
|
- role: service
|
|
scrape_interval: 20s
|
|
relabel_configs:
|
|
- action: labelmap
|
|
regex: __meta_kubernetes_service_label_(.+)
|
|
- action: keep
|
|
source_labels:
|
|
- __meta_kubernetes_service_name
|
|
regex: "ceph-mgr"
|
|
- source_labels:
|
|
- __meta_kubernetes_service_port_name
|
|
action: drop
|
|
regex: 'ceph-mgr'
|
|
- action: keep
|
|
source_labels:
|
|
- __meta_kubernetes_namespace
|
|
regex: ceph
|
|
target_label: namespace
|
|
- source_labels:
|
|
- __meta_kubernetes_pod_name
|
|
target_label: pod
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: service
|
|
- source_labels:
|
|
- __meta_kubernetes_service_name
|
|
target_label: job
|
|
replacement: ${1}
|
|
- source_labels:
|
|
- __meta_kubernetes_service_label
|
|
target_label: job
|
|
regex: ceph-mgr
|
|
replacement: ${1}
|
|
- target_label: endpoint
|
|
replacement: "ceph-mgr"
|
|
alerting:
|
|
alertmanagers:
|
|
- kubernetes_sd_configs:
|
|
- role: pod
|
|
tls_config:
|
|
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
|
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
|
relabel_configs:
|
|
- source_labels: [__meta_kubernetes_pod_label_application]
|
|
regex: alertmanager
|
|
action: keep
|
|
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
|
regex: alerts-api
|
|
action: keep
|
|
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
|
regex: peer-mesh
|
|
action: drop
|
|
rules: []
|
|
...
|