From 9d2e08f1a41b68286d68f58ef47d2863ddf08fdb Mon Sep 17 00:00:00 2001 From: Steven Fitzpatrick Date: Fri, 20 Mar 2020 11:28:19 -0500 Subject: [PATCH] Fluentd: Switch to Native Metrics Plugin This change updates the fluentd chart to use the native fluent-plugin-prometheus for metric production. This plugin provides more detailed metrics about fluentd's operations, specifically regarding input and output statistics. https://github.com/fluent/fluent-plugin-prometheus Using the plugin, each fluentd pod produces metrics, so scape annotations have been added to the pod spec. The zuul check on metric producers has been updated to account for this. Depends-On: https://review.opendev.org/714167 Change-Id: I809356d92b0cff1e31cb2062102bbedefd4843fd --- fluentd/templates/deployment-fluentd.yaml | 4 + .../prometheus/bin/_fluentd-exporter.sh.tpl | 30 ------- .../prometheus/exporter-configmap-bin.yaml | 27 ------ .../prometheus/exporter-deployment.yaml | 85 ------------------- .../prometheus/exporter-network-policy.yaml | 20 ----- .../prometheus/exporter-service.yaml | 37 -------- fluentd/templates/service-fluentd.yaml | 2 - fluentd/values.yaml | 80 +++++------------ roles/gather-prom-metrics/tasks/main.yaml | 19 ++++- .../armada/manifests/armada-lma.yaml | 3 - tools/deployment/common/fluentd-daemonset.sh | 23 +++-- tools/deployment/common/fluentd-deployment.sh | 7 -- 12 files changed, 56 insertions(+), 281 deletions(-) delete mode 100644 fluentd/templates/monitoring/prometheus/bin/_fluentd-exporter.sh.tpl delete mode 100644 fluentd/templates/monitoring/prometheus/exporter-configmap-bin.yaml delete mode 100644 fluentd/templates/monitoring/prometheus/exporter-deployment.yaml delete mode 100644 fluentd/templates/monitoring/prometheus/exporter-network-policy.yaml delete mode 100644 fluentd/templates/monitoring/prometheus/exporter-service.yaml diff --git a/fluentd/templates/deployment-fluentd.yaml b/fluentd/templates/deployment-fluentd.yaml index a8ef2be95..8befe3e4e 100644 --- a/fluentd/templates/deployment-fluentd.yaml +++ b/fluentd/templates/deployment-fluentd.yaml @@ -18,6 +18,7 @@ limitations under the License. {{- $envAll := . }} {{- $mounts_fluentd := .Values.pod.mounts.fluentd.fluentd }} +{{- $prometheus_annotations := $envAll.Values.monitoring.prometheus.fluentd }} {{- $kafkaBroker := tuple "kafka" "internal" . | include "helm-toolkit.endpoints.hostname_fqdn_endpoint_lookup" }} {{- $kafkaBrokerPort := tuple "kafka" "internal" "broker" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} @@ -100,6 +101,9 @@ spec: {{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" | indent 8 }} configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }} configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }} +{{- if .Values.monitoring.prometheus.enabled }} +{{ tuple $prometheus_annotations | include "helm-toolkit.snippets.prometheus_pod_annotations" | indent 8 }} +{{- end }} {{ dict "envAll" $envAll "podName" "fluentd" "containerNames" (list "fluentd" "init") | include "helm-toolkit.snippets.kubernetes_mandatory_access_control_annotation" | indent 8 }} spec: {{ dict "envAll" $envAll "application" "fluentd" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }} diff --git a/fluentd/templates/monitoring/prometheus/bin/_fluentd-exporter.sh.tpl b/fluentd/templates/monitoring/prometheus/bin/_fluentd-exporter.sh.tpl deleted file mode 100644 index a9cd2a3c3..000000000 --- a/fluentd/templates/monitoring/prometheus/bin/_fluentd-exporter.sh.tpl +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -{{/* -Copyright 2017 The Openstack-Helm Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -set -ex -COMMAND="${@:-start}" - -function start () { - exec fluentd_exporter --scrape_uri "$FLUENTD_METRICS_HOST" -} - -function stop () { - kill -TERM 1 -} - -$COMMAND diff --git a/fluentd/templates/monitoring/prometheus/exporter-configmap-bin.yaml b/fluentd/templates/monitoring/prometheus/exporter-configmap-bin.yaml deleted file mode 100644 index 8a9a1ca13..000000000 --- a/fluentd/templates/monitoring/prometheus/exporter-configmap-bin.yaml +++ /dev/null @@ -1,27 +0,0 @@ -{{/* -Copyright 2017 The Openstack-Helm Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -{{- if and .Values.manifests.monitoring.prometheus.configmap_bin_exporter .Values.monitoring.prometheus.enabled }} -{{- $envAll := . }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ printf "%s-%s" $envAll.Release.Name "fluentd-exporter-bin" }} -data: - fluentd-exporter.sh: | -{{ tuple "bin/_fluentd-exporter.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} -{{- end }} diff --git a/fluentd/templates/monitoring/prometheus/exporter-deployment.yaml b/fluentd/templates/monitoring/prometheus/exporter-deployment.yaml deleted file mode 100644 index 3812a5c75..000000000 --- a/fluentd/templates/monitoring/prometheus/exporter-deployment.yaml +++ /dev/null @@ -1,85 +0,0 @@ -{{/* -Copyright 2017 The Openstack-Helm Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -{{- if and .Values.manifests.monitoring.prometheus.deployment_exporter .Values.monitoring.prometheus.enabled }} -{{- $envAll := . }} - -{{ $fluentd_host := tuple "fluentd" "internal" "metrics" $envAll | include "helm-toolkit.endpoints.host_and_port_endpoint_uri_lookup" }} -{{ $fluentd_metrics_path := "api/plugins.json" }} -{{ $fluentd_metrics_host := printf "http://%s/%s" $fluentd_host $fluentd_metrics_path }} - -{{- $rcControllerName := printf "%s-%s" $envAll.Release.Name "fluentd-exporter" }} - -{{ tuple $envAll "prometheus_fluentd_exporter" $rcControllerName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ $rcControllerName | quote }} - labels: -{{ tuple $envAll "prometheus-fluentd-exporter" "exporter" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} -spec: - replicas: {{ .Values.pod.replicas.prometheus_fluentd_exporter }} - selector: - matchLabels: -{{ tuple $envAll "prometheus-fluentd-exporter" "exporter" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 6 }} -{{ tuple $envAll | include "helm-toolkit.snippets.kubernetes_upgrades_deployment" | indent 2 }} - template: - metadata: - labels: -{{ tuple $envAll "prometheus-fluentd-exporter" "exporter" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} - spec: -{{ dict "envAll" $envAll "application" "exporter" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }} - serviceAccountName: {{ $rcControllerName | quote }} - nodeSelector: - {{ .Values.labels.prometheus_fluentd_exporter.node_selector_key }}: {{ .Values.labels.prometheus_fluentd_exporter.node_selector_value | quote }} - terminationGracePeriodSeconds: {{ .Values.pod.lifecycle.termination_grace_period.prometheus_fluentd_exporter.timeout | default "30" }} - initContainers: -{{ tuple $envAll "prometheus_fluentd_exporter" list | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }} - containers: - - name: fluentd-exporter -{{ tuple $envAll "prometheus_fluentd_exporter" | include "helm-toolkit.snippets.image" | indent 10 }} -{{ tuple $envAll $envAll.Values.pod.resources.prometheus_fluentd_exporter | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }} -{{ dict "envAll" $envAll "application" "exporter" "container" "fluentd_exporter" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 10 }} - command: - - /tmp/fluentd-exporter.sh - - start - ports: - - name: metrics - containerPort: {{ tuple "prometheus_fluentd_exporter" "internal" "metrics" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} - readinessProbe: - tcpSocket: - port: {{ tuple "prometheus_fluentd_exporter" "internal" "metrics" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} - initialDelaySeconds: 20 - periodSeconds: 10 - env: - - name: FLUENTD_METRICS_HOST - value: {{ $fluentd_metrics_host }} - volumeMounts: - - name: pod-tmp - mountPath: /tmp - - name: fluentd-exporter-bin - mountPath: /tmp/fluentd-exporter.sh - subPath: fluentd-exporter.sh - readOnly: true - volumes: - - name: pod-tmp - emptyDir: {} - - name: fluentd-exporter-bin - configMap: - name: {{ printf "%s-%s" $envAll.Release.Name "fluentd-exporter-bin" | quote }} - defaultMode: 0555 -{{- end }} diff --git a/fluentd/templates/monitoring/prometheus/exporter-network-policy.yaml b/fluentd/templates/monitoring/prometheus/exporter-network-policy.yaml deleted file mode 100644 index 560dd4cbe..000000000 --- a/fluentd/templates/monitoring/prometheus/exporter-network-policy.yaml +++ /dev/null @@ -1,20 +0,0 @@ -{{/* -Copyright 2019 The Openstack-Helm Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -{{- if and .Values.manifests.monitoring.prometheus.network_policy_exporter .Values.monitoring.prometheus.enabled -}} -{{- $netpol_opts := dict "envAll" . "name" "application" "label" "prometheus-fluentd-exporter" -}} -{{ $netpol_opts | include "helm-toolkit.manifests.kubernetes_network_policy" }} -{{- end -}} diff --git a/fluentd/templates/monitoring/prometheus/exporter-service.yaml b/fluentd/templates/monitoring/prometheus/exporter-service.yaml deleted file mode 100644 index db6fdce04..000000000 --- a/fluentd/templates/monitoring/prometheus/exporter-service.yaml +++ /dev/null @@ -1,37 +0,0 @@ -{{/* -Copyright 2017 The Openstack-Helm Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/}} - -{{- if and .Values.manifests.monitoring.prometheus.service_exporter .Values.monitoring.prometheus.enabled }} -{{- $envAll := . }} -{{- $prometheus_annotations := $envAll.Values.monitoring.prometheus.fluentd_exporter }} ---- -apiVersion: v1 -kind: Service -metadata: - name: {{ tuple "prometheus_fluentd_exporter" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }} - labels: -{{ tuple $envAll "prometheus-fluentd-exporter" "metrics" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} - annotations: -{{- if .Values.monitoring.prometheus.enabled }} -{{ tuple $prometheus_annotations | include "helm-toolkit.snippets.prometheus_service_annotations" | indent 4 }} -{{- end }} -spec: - ports: - - name: metrics - port: {{ tuple "prometheus_fluentd_exporter" "internal" "metrics" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} - selector: -{{ tuple $envAll "prometheus-fluentd-exporter" "exporter" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} -{{- end }} diff --git a/fluentd/templates/service-fluentd.yaml b/fluentd/templates/service-fluentd.yaml index 4d7fc2bd8..d038a796e 100644 --- a/fluentd/templates/service-fluentd.yaml +++ b/fluentd/templates/service-fluentd.yaml @@ -28,8 +28,6 @@ spec: {{ if .Values.network.fluentd.node_port.enabled }} nodePort: {{ .Values.network.fluentd.node_port.port }} {{ end }} - - name: metrics - port: {{ tuple "fluentd" "internal" "metrics" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }} selector: {{ tuple $envAll "fluentd" "internal" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} {{ if .Values.network.fluentd.node_port.enabled }} diff --git a/fluentd/values.yaml b/fluentd/values.yaml index 564239caf..cd95a2b46 100644 --- a/fluentd/values.yaml +++ b/fluentd/values.yaml @@ -25,14 +25,10 @@ labels: fluentd: node_selector_key: openstack-control-plane node_selector_value: enabled - prometheus_fluentd_exporter: - node_selector_key: openstack-control-plane - node_selector_value: enabled images: tags: - fluentd: docker.io/openstackhelm/fluentd:debian-20190903 - prometheus_fluentd_exporter: docker.io/bitnami/fluentd-exporter:0.2.0 + fluentd: docker.io/openstackhelm/fluentd:debian-20200324 dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0 helm_tests: docker.io/openstackhelm/heat:newton-ubuntu_xenial elasticsearch_template: docker.io/openstackhelm/heat:newton-ubuntu_xenial @@ -60,18 +56,25 @@ dependencies: services: - endpoint: internal service: local_image_registry - prometheus_fluentd_exporter: - services: - - endpoint: internal - service: fluentd conf: fluentd: template: | - bind 0.0.0.0 - port 24220 - @type monitor_agent + @type prometheus + port 24231 + + + + @type prometheus_monitor + + + + @type prometheus_output_monitor + + + + @type prometheus_tail_monitor @@ -291,10 +294,6 @@ conf: type_name fluent user "#{ENV['ELASTICSEARCH_USERNAME']}" - fluentd_exporter: - log: - format: "logger:stdout?json=true" - level: "info" endpoints: cluster_domain_suffix: cluster.local @@ -346,7 +345,7 @@ endpoints: service: default: 24224 metrics: - default: 24220 + default: 24231 kafka: namespace: null name: kafka @@ -367,25 +366,13 @@ endpoints: broker: default: 9092 public: 80 - prometheus_fluentd_exporter: - namespace: null - hosts: - default: fluentd-exporter - host_fqdn_override: - default: null - path: - default: /metrics - scheme: - default: 'http' - port: - metrics: - default: 9309 monitoring: prometheus: - enabled: false - fluentd_exporter: + enabled: true + fluentd: scrape: true + port: 24231 network: fluentd: @@ -394,11 +381,6 @@ network: port: 32329 network_policy: - prometheus-fluentd-exporter: - ingress: - - {} - egress: - - {} fluentd: ingress: - {} @@ -421,13 +403,7 @@ pod: fluentd: allowPrivilegeEscalation: false readOnlyRootFilesystem: true - exporter: - pod: - runAsUser: 65534 - container: - fluentd_exporter: - allowPrivilegeEscalation: false - readOnlyRootFilesystem: true + affinity: anti: type: @@ -453,11 +429,8 @@ pod: termination_grace_period: fluentd: timeout: 30 - prometheus_fluentd_exporter: - timeout: 30 replicas: fluentd: 3 - prometheus_fluentd_exporter: 1 resources: enabled: false fluentd: @@ -467,13 +440,6 @@ pod: requests: memory: '128Mi' cpu: '500m' - prometheus_fluentd_exporter: - limits: - memory: "1024Mi" - cpu: "2000m" - requests: - memory: "128Mi" - cpu: "500m" mounts: fluentd: fluentd: @@ -483,12 +449,6 @@ manifests: configmap_etc: true deployment_fluentd: true job_image_repo_sync: true - monitoring: - prometheus: - configmap_bin_exporter: true - deployment_exporter: true - network_policy_exporter: false - service_exporter: true network_policy: false secret_elasticsearch: true secret_fluentd_env: true diff --git a/roles/gather-prom-metrics/tasks/main.yaml b/roles/gather-prom-metrics/tasks/main.yaml index a5ba9a1b4..0f22b2bef 100644 --- a/roles/gather-prom-metrics/tasks/main.yaml +++ b/roles/gather-prom-metrics/tasks/main.yaml @@ -15,7 +15,7 @@ path: "{{ logs_dir }}/prometheus" state: directory -- name: "Get prometheus metrics from exporters in all namespaces" +- name: "Get metrics from exporter services in all namespaces" shell: |- set -e NAMESPACES=$(kubectl get namespaces -o json | jq -r '.items[].metadata.name') @@ -23,6 +23,7 @@ SERVICES=$(kubectl get svc -n $NS -o json | jq -r '.items[] | select(.spec.ports[].name=="metrics") | .metadata.name') for SVC in $SERVICES; do PORT=$(kubectl get svc $SVC -n $NS -o json | jq -r '.spec.ports[] | select(.name=="metrics") | .port') + echo "Scraping $SVC.$NS:$PORT/metrics:" curl "$SVC.$NS:$PORT/metrics" >> "{{ logs_dir }}"/prometheus/$NS-$SVC.txt || true done done @@ -58,6 +59,22 @@ executable: /bin/bash ignore_errors: True +- name: "Get metrics from fluentd pods" + shell: |- + set -e + NAMESPACE="osh-infra" + APP_LABEL="fluentd" + PODS=$(kubectl get pods -n $NAMESPACE -l application=$APP_LABEL -o json | jq -r '.items[].metadata.name') + for POD in $PODS; do + IP=$(kubectl get pod -n $NAMESPACE $POD -o json | jq -r '.status.podIP') + PORT=$(kubectl get pod -n $NAMESPACE $POD -o json | jq -r '.spec.containers[0].ports[] | select(.name=="metrics") | .containerPort') + echo "Scraping $POD at $IP:$PORT/metrics" + curl "$IP:$PORT/metrics" >> "{{ logs_dir }}"/prometheus/$POD.txt || true + done + args: + executable: /bin/bash + ignore_errors: True + - name: "Downloads logs to executor" synchronize: src: "{{ logs_dir }}/prometheus" diff --git a/tools/deployment/armada/manifests/armada-lma.yaml b/tools/deployment/armada/manifests/armada-lma.yaml index f17b8ab46..9840eea28 100644 --- a/tools/deployment/armada/manifests/armada-lma.yaml +++ b/tools/deployment/armada/manifests/armada-lma.yaml @@ -524,9 +524,6 @@ data: fluentd: node_selector_key: openstack-control-plane node_selector_value: enabled - prometheus_fluentd_exporter: - node_selector_key: openstack-control-plane - node_selector_value: enabled job: node_selector_key: openstack-control-plane node_selector_value: enabled diff --git a/tools/deployment/common/fluentd-daemonset.sh b/tools/deployment/common/fluentd-daemonset.sh index 4e47348cc..9b1e6aeac 100755 --- a/tools/deployment/common/fluentd-daemonset.sh +++ b/tools/deployment/common/fluentd-daemonset.sh @@ -25,12 +25,6 @@ endpoints: fluentd: hosts: default: fluentd-daemonset - prometheus_fluentd_exporter: - hosts: - default: fluentd-daemonset-exporter -monitoring: - prometheus: - enabled: true pod: env: fluentd: @@ -48,9 +42,20 @@ conf: fluentd: template: | - bind 0.0.0.0 - port 24220 - @type monitor_agent + @type prometheus + port 24231 + + + + @type prometheus_monitor + + + + @type prometheus_output_monitor + + + + @type prometheus_tail_monitor diff --git a/tools/deployment/common/fluentd-deployment.sh b/tools/deployment/common/fluentd-deployment.sh index 1f39a3466..27183b6a6 100755 --- a/tools/deployment/common/fluentd-deployment.sh +++ b/tools/deployment/common/fluentd-deployment.sh @@ -25,9 +25,6 @@ if [ ! -d "/var/log/journal" ]; then tee /tmp/fluentd.yaml << EOF deployment: type: Deployment -monitoring: - prometheus: - enabled: true pod: replicas: fluentd: 1 @@ -52,9 +49,6 @@ else tee /tmp/fluentd.yaml << EOF deployment: type: Deployment -monitoring: - prometheus: - enabled: true pod: replicas: fluentd: 1 @@ -66,7 +60,6 @@ helm upgrade --install fluentd ./fluentd \ ${OSH_INFRA_EXTRA_HELM_ARGS} \ ${OSH_INFRA_EXTRA_HELM_ARGS_FLUENTD} - #NOTE: Wait for deploy ./tools/deployment/common/wait-for-pods.sh osh-infra