Steve Wilkerson fbd34421f2 Prometheus: Update chart to support federation
This updates the Prometheus chart to support federation. This
moves to defining the Prometheus configuration file via a template
in the values.yaml file instead of through raw yaml. This allows
for overriding the chart's default configuration wholesale, as
this would be required for a hierarchical federated setup. This
also strips out all of the default rules defined in the chart for
the same reason. There are example rules defined for the various
aspects of OSH's infrastructure in the prometheus/values_overrides
directory that are executed as part of the normal CI jobs. This
also adds a nonvoting federated-monitoring job that vets out the
ability to federate prometheus in a hierarchical fashion with
extremely basic overrides

Change-Id: I0f121ad5e4f80be4c790dc869955c6b299ca9f26
Signed-off-by: Steve Wilkerson <sw5822@att.com>
2019-11-21 12:39:56 +00:00

1064 lines
40 KiB
YAML

# Copyright 2017 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for prometheus.
# This is a YAML-formatted file.
# Declare name/value pairs to be passed into your templates.
# name: value
images:
tags:
apache_proxy: docker.io/httpd:2.4
prometheus: docker.io/prom/prometheus:v2.12.0
helm_tests: docker.io/openstackhelm/heat:newton-ubuntu_xenial
dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
image_repo_sync: docker.io/docker:17.07.0
pull_policy: IfNotPresent
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
labels:
prometheus:
node_selector_key: openstack-control-plane
node_selector_value: enabled
job:
node_selector_key: openstack-control-plane
node_selector_value: enabled
test:
node_selector_key: openstack-control-plane
node_selector_value: enabled
pod:
env:
prometheus: null
security_context:
api:
pod:
runAsUser: 65534
container:
prometheus_perms:
runAsUser: 0
readOnlyRootFilesystem: false
apache_proxy:
runAsUser: 0
readOnlyRootFilesystem: false
prometheus:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
weight:
default: 10
mounts:
prometheus:
prometheus:
init_container: null
replicas:
prometheus: 1
lifecycle:
upgrades:
statefulsets:
pod_replacement_strategy: RollingUpdate
termination_grace_period:
prometheus:
timeout: 30
resources:
enabled: false
prometheus:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "500m"
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
tests:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
monitoring:
name: prometheus
namespace: null
auth:
admin:
username: admin
password: changeme
hosts:
default: prom-metrics
public: prometheus
host_fqdn_override:
default: null
# NOTE(srwilkers): this chart supports TLS for fqdn over-ridden public
# endpoints using the following format:
# public:
# host: null
# tls:
# crt: null
# key: null
path:
default: null
scheme:
default: 'http'
port:
api:
default: 9090
http:
default: 80
alerts:
name: alertmanager
namespace: null
hosts:
default: alerts-engine
public: alertmanager
discovery: alertmanager-discovery
host_fqdn_override:
default: null
path:
default: null
scheme:
default: 'http'
port:
api:
default: 9093
public: 80
mesh:
default: 6783
ldap:
hosts:
default: ldap
auth:
admin:
bind: "cn=admin,dc=cluster,dc=local"
password: password
host_fqdn_override:
default: null
path:
default: "/ou=People,dc=cluster,dc=local"
scheme:
default: ldap
port:
ldap:
default: 389
dependencies:
dynamic:
common:
local_image_registry:
jobs:
- prometheus-image-repo-sync
services:
- endpoint: node
service: local_image_registry
static:
image_repo_sync:
services:
- endpoint: internal
service: local_image_registry
prometheus:
services: null
tests:
services:
- endpoint: internal
service: monitoring
monitoring:
prometheus:
enabled: true
prometheus:
scrape: true
network:
prometheus:
ingress:
public: true
classes:
namespace: "nginx"
cluster: "nginx-cluster"
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
nginx.ingress.kubernetes.io/affinity: cookie
nginx.ingress.kubernetes.io/session-cookie-name: kube-ingress-session-prometheus
nginx.ingress.kubernetes.io/session-cookie-hash: sha1
nginx.ingress.kubernetes.io/session-cookie-expires: "600"
nginx.ingress.kubernetes.io/session-cookie-max-age: "600"
node_port:
enabled: false
port: 30900
network_policy:
prometheus:
ingress:
- {}
egress:
- {}
secrets:
tls:
monitoring:
prometheus:
public: prometheus-tls-public
storage:
enabled: true
pvc:
name: prometheus-pvc
access_mode: [ "ReadWriteOnce" ]
requests:
storage: 5Gi
storage_class: general
manifests:
configmap_bin: true
configmap_etc: true
ingress: true
helm_tests: true
job_image_repo_sync: true
network_policy: true
secret_ingress_tls: true
secret_prometheus: true
service_ingress: true
service: true
statefulset_prometheus: true
conf:
httpd: |
ServerRoot "/usr/local/apache2"
Listen 80
LoadModule mpm_event_module modules/mod_mpm_event.so
LoadModule authn_file_module modules/mod_authn_file.so
LoadModule authn_core_module modules/mod_authn_core.so
LoadModule authz_host_module modules/mod_authz_host.so
LoadModule authz_groupfile_module modules/mod_authz_groupfile.so
LoadModule authz_user_module modules/mod_authz_user.so
LoadModule authz_core_module modules/mod_authz_core.so
LoadModule access_compat_module modules/mod_access_compat.so
LoadModule auth_basic_module modules/mod_auth_basic.so
LoadModule ldap_module modules/mod_ldap.so
LoadModule authnz_ldap_module modules/mod_authnz_ldap.so
LoadModule reqtimeout_module modules/mod_reqtimeout.so
LoadModule filter_module modules/mod_filter.so
LoadModule proxy_html_module modules/mod_proxy_html.so
LoadModule log_config_module modules/mod_log_config.so
LoadModule env_module modules/mod_env.so
LoadModule headers_module modules/mod_headers.so
LoadModule setenvif_module modules/mod_setenvif.so
LoadModule version_module modules/mod_version.so
LoadModule proxy_module modules/mod_proxy.so
LoadModule proxy_connect_module modules/mod_proxy_connect.so
LoadModule proxy_http_module modules/mod_proxy_http.so
LoadModule proxy_balancer_module modules/mod_proxy_balancer.so
LoadModule slotmem_shm_module modules/mod_slotmem_shm.so
LoadModule slotmem_plain_module modules/mod_slotmem_plain.so
LoadModule unixd_module modules/mod_unixd.so
LoadModule status_module modules/mod_status.so
LoadModule autoindex_module modules/mod_autoindex.so
<IfModule unixd_module>
User daemon
Group daemon
</IfModule>
<Directory />
AllowOverride none
Require all denied
</Directory>
<Files ".ht*">
Require all denied
</Files>
ErrorLog /dev/stderr
LogLevel warn
<IfModule log_config_module>
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
LogFormat "%{X-Forwarded-For}i %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" proxy
LogFormat "%h %l %u %t \"%r\" %>s %b" common
<IfModule logio_module>
LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" %I %O" combinedio
</IfModule>
SetEnvIf X-Forwarded-For "^.*\..*\..*\..*" forwarded
CustomLog /dev/stdout common
CustomLog /dev/stdout combined
CustomLog /dev/stdout proxy env=forwarded
</IfModule>
<Directory "/usr/local/apache2/cgi-bin">
AllowOverride None
Options None
Require all granted
</Directory>
<IfModule headers_module>
RequestHeader unset Proxy early
</IfModule>
<IfModule proxy_html_module>
Include conf/extra/proxy-html.conf
</IfModule>
<VirtualHost *:80>
# Expose metrics to all users, as this is not sensitive information and
# circumvents the inability of Prometheus to interpolate environment vars
# in its configuration file
<Location /metrics>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
Satisfy Any
Allow from all
</Location>
# Expose the /federate endpoint to all users, as this is also not
# sensitive information and circumvents the inability of Prometheus to
# interpolate environment vars in its configuration file
<Location /federate>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/metrics
Satisfy Any
Allow from all
</Location>
# Restrict general user (LDAP) access to the /graph endpoint, as general trusted
# users should only be able to query Prometheus for metrics and not have access
# to information like targets, configuration, flags or build info for Prometheus
<Location />
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file ldap
AuthUserFile /usr/local/apache2/conf/.htpasswd
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
Require valid-user
</Location>
<Location /graph>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/graph
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/graph
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file ldap
AuthUserFile /usr/local/apache2/conf/.htpasswd
AuthLDAPBindDN {{ .Values.endpoints.ldap.auth.admin.bind }}
AuthLDAPBindPassword {{ .Values.endpoints.ldap.auth.admin.password }}
AuthLDAPURL {{ tuple "ldap" "default" "ldap" . | include "helm-toolkit.endpoints.keystone_endpoint_uri_lookup" | quote }}
Require valid-user
</Location>
# Restrict access to the /config (dashboard) and /api/v1/status/config (http) endpoints
# to the admin user
<Location /config>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/config
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/config
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
<Location /api/v1/status/config>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/config
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/config
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /flags (dashboard) and /api/v1/status/flags (http) endpoints
# to the admin user
<Location /flags>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/flags
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/flags
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
<Location /api/v1/status/flags>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/flags
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/status/flags
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /status (dashboard) endpoint to the admin user
<Location /status>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/status
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/status
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /rules (dashboard) endpoint to the admin user
<Location /rules>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/rules
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/rules
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /targets (dashboard) and /api/v1/targets (http) endpoints
# to the admin user
<Location /targets>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/targets
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/targets
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
<Location /api/v1/targets>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/targets
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/targets
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
# Restrict access to the /api/v1/admin/tsdb/ endpoints (http) to the admin user.
# These endpoints are disabled by default, but are included here to ensure only
# an admin user has access to these endpoints when enabled
<Location /api/v1/admin/tsdb/>
ProxyPass http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/admin/tsdb/
ProxyPassReverse http://localhost:{{ tuple "monitoring" "internal" "api" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}/api/v1/admin/tsdb/
AuthName "Prometheus"
AuthType Basic
AuthBasicProvider file
AuthUserFile /usr/local/apache2/conf/.htpasswd
Require valid-user
</Location>
</VirtualHost>
prometheus:
# Consumed by a prometheus helper function to generate the command line flags
# for configuring the prometheus service
command_line_flags:
log.level: info
query.max_concurrency: 20
query.timeout: 2m
storage.tsdb.path: /var/lib/prometheus/data
storage.tsdb.retention: 7d
# NOTE(srwilkers): These settings default to false, but they are
# exposed here to allow enabling if desired. Please note the security
# impacts of enabling these flags. More information regarding the impacts
# can be found here: https://prometheus.io/docs/operating/security/
#
# If set to true, all administrative functionality is exposed via the http
# /api/*/admin/ path
web.enable_admin_api: false
# If set to true, allows for http reloads and shutdown of Prometheus
web.enable_lifecycle: false
scrape_configs:
template: |
{{- $promHost := tuple "monitoring" "public" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }}
{{- if not (empty .Values.conf.prometheus.rules)}}
rule_files:
{{- $rulesKeys := keys .Values.conf.prometheus.rules -}}
{{- range $rule := $rulesKeys }}
{{ printf "- /etc/config/rules/%s.rules" $rule }}
{{- end }}
{{- end }}
global:
scrape_interval: 60s
evaluation_interval: 60s
external_labels:
prometheus_host: {{$promHost}}
scrape_configs:
- job_name: kubelet
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
scrape_interval: 45s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels:
- __meta_kubernetes_node_name
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
- source_labels:
- __meta_kubernetes_node_name
action: replace
target_label: kubernetes_io_hostname
# Scrape config for Kubelet cAdvisor.
#
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
# (those whose names begin with 'container_') have been removed from the
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
# retrieve those metrics.
#
# In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
# HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
# in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
# the --cadvisor-port=0 Kubelet flag).
#
# This job is not necessary and should be removed in Kubernetes 1.6 and
# earlier versions, or it will cause the metrics to be scraped twice.
- job_name: 'kubernetes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels:
- __meta_kubernetes_node_name
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
metric_relabel_configs:
- source_labels:
- __name__
regex: 'container_network_tcp_usage_total'
action: drop
- source_labels:
- __name__
regex: 'container_tasks_state'
action: drop
- source_labels:
- __name__
regex: 'container_network_udp_usage_total'
action: drop
- source_labels:
- __name__
regex: 'container_memory_failures_total'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_load_average_10s'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_system_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_cpu_user_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_inodes_free'
action: drop
- source_labels:
- __name__
regex: 'container_fs_inodes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_current'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_time_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_io_time_weighted_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_read_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_reads_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_sector_reads_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_sector_writes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_write_seconds_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_bytes_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_merged_total'
action: drop
- source_labels:
- __name__
regex: 'container_fs_writes_total'
action: drop
- source_labels:
- __name__
regex: 'container_last_seen'
action: drop
- source_labels:
- __name__
regex: 'container_memory_cache'
action: drop
- source_labels:
- __name__
regex: 'container_memory_failcnt'
action: drop
- source_labels:
- __name__
regex: 'container_memory_max_usage_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_memory_rss'
action: drop
- source_labels:
- __name__
regex: 'container_memory_swap'
action: drop
- source_labels:
- __name__
regex: 'container_memory_usage_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_errors_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_packets_dropped_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_receive_packets_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_errors_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_packets_dropped_total'
action: drop
- source_labels:
- __name__
regex: 'container_network_transmit_packets_total'
action: drop
- source_labels:
- __name__
regex: 'container_spec_cpu_period'
action: drop
- source_labels:
- __name__
regex: 'container_spec_cpu_shares'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_reservation_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_spec_memory_swap_limit_bytes'
action: drop
- source_labels:
- __name__
regex: 'container_start_time_seconds'
action: drop
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
- job_name: 'apiserver'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 45s
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
# insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels:
- __meta_kubernetes_namespace
- __meta_kubernetes_service_name
- __meta_kubernetes_endpoint_port_name
action: keep
regex: default;kubernetes;https
metric_relabel_configs:
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'rest_client_request_latency_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_response_sizes_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_step_admission_latencies_seconds_bucket'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_count'
action: drop
- source_labels:
- __name__
regex: 'apiserver_admission_controller_admission_latencies_seconds_sum'
action: drop
- source_labels:
- __name__
regex: 'apiserver_request_latencies_summary'
action: drop
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
- job_name: 'openstack-exporter'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 60s
relabel_configs:
- source_labels:
- __meta_kubernetes_service_name
action: keep
regex: "openstack-metrics"
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape
action: keep
regex: true
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
action: replace
target_label: __scheme__
regex: (https?)
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels:
- __meta_kubernetes_namespace
action: replace
target_label: kubernetes_namespace
- source_labels:
- __meta_kubernetes_service_name
action: replace
target_label: instance
- source_labels:
- __meta_kubernetes_service_name
action: replace
target_label: kubernetes_name
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- job_name: 'node-exporter'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 60s
relabel_configs:
- source_labels:
- __meta_kubernetes_service_name
action: keep
regex: 'node-exporter'
- source_labels:
- __meta_kubernetes_pod_node_name
action: replace
target_label: hostname
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
scrape_interval: 60s
relabel_configs:
- source_labels:
- __meta_kubernetes_service_name
action: drop
regex: '(openstack-metrics|prom-metrics|ceph-mgr|node-exporter)'
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape
action: keep
regex: true
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
action: replace
target_label: __scheme__
regex: (https?)
- source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels:
- __meta_kubernetes_namespace
action: replace
target_label: kubernetes_namespace
- source_labels:
- __meta_kubernetes_service_name
action: replace
target_label: kubernetes_name
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the
# pod's declared ports (default is a port-free target if none are declared).
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- job_name: calico-etcd
kubernetes_sd_configs:
- role: service
scrape_interval: 20s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: keep
source_labels:
- __meta_kubernetes_service_name
regex: "calico-etcd"
- action: keep
source_labels:
- __meta_kubernetes_namespace
regex: kube-system
target_label: namespace
- source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- source_labels:
- __meta_kubernetes_service_name
target_label: service
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- source_labels:
- __meta_kubernetes_service_label
target_label: job
regex: calico-etcd
replacement: ${1}
- target_label: endpoint
replacement: "calico-etcd"
- job_name: ceph-mgr
kubernetes_sd_configs:
- role: service
scrape_interval: 20s
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: keep
source_labels:
- __meta_kubernetes_service_name
regex: "ceph-mgr"
- source_labels:
- __meta_kubernetes_service_port_name
action: drop
regex: 'ceph-mgr'
- action: keep
source_labels:
- __meta_kubernetes_namespace
regex: ceph
target_label: namespace
- source_labels:
- __meta_kubernetes_pod_name
target_label: pod
- source_labels:
- __meta_kubernetes_service_name
target_label: service
- source_labels:
- __meta_kubernetes_service_name
target_label: job
replacement: ${1}
- source_labels:
- __meta_kubernetes_service_label
target_label: job
regex: ceph-mgr
replacement: ${1}
- target_label: endpoint
replacement: "ceph-mgr"
alerting:
alertmanagers:
- kubernetes_sd_configs:
- role: pod
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_application]
regex: alertmanager
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_name]
regex: alerts-api
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_name]
regex: peer-mesh
action: drop
rules: []