Doug Aaser 9efb353b83 Patroni inclusion work for HA Postgres
This patchset aims to add HA Clustering support for Postgres. HA Clustering
provides automatic failover in the event of the database going down in addition
to keeping replicas of the database for rebuilding in the event of a node
going down. To achieve this clustering we use
[Patroni](https://github.com/zalando/patroni) which offers HA clustering
support for Postgres.

Patroni is a daemon that runs in the background and keeps track of which
node in your cluster is currently the leader node and routes all traffic
on the Postgresql endpoint to that node. If the leader node goes down,
Patroni holds an election to chose a new leader and updates the endpoint
to route traffic accordingly. All communication between nodes is done by
a Patroni created endpoint, seperate from the externally facing Postgres
endpoint.

Note that, although the postgresql helm chart can be upgraded from
non-patroni to patroni clustering, the previous `postgresql`
endpoints object (which is not directly managed by helm) must be
deleted via an out-of-band mechanism so that it may be replaced by the
patroni-managed endpoints.  If Postgres itself is leveraged for the
deployment process, this must be done with careful timing.  Note that
the old endpoints had a port named "db", and the new endpoints has
a port named "postgresql".

- Picking up patchset: https://review.openstack.org/#/c/591663

Co-authored-by: Tony Sorrentino <as1413@att.com>
Co-authored-by: Randeep Jalli <rj2083@att.com>
Co-authored-by: Pete Birley <pete@port.direct>
Co-authored-by: Matt McEuen <mm9745@att.com>

Change-Id: I721b745017dc1ea7ae05dfd9f8d5dd08d0965985
2019-05-28 19:13:13 +00:00

434 lines
12 KiB
YAML

# Copyright 2019 The Openstack-Helm Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for postgresql.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
release_group: null
pod:
security_context:
prometheus_postgresql_exporter:
pod:
runAsUser: 65534
container:
postgresql_exporter:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
server:
pod:
runAsUser: 999
container:
set_volume_perms:
runAsUser: 0
readOnlyRootFilesystem: true
postgresql:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
patroni_conversion:
runAsUser: 999
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
pod:
runAsUser: 999
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
weight:
default: 10
replicas:
server: 3
prometheus_postgresql_exporter: 1
lifecycle:
upgrades:
deployments:
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
termination_grace_period:
prometheus_postgresql_exporter:
timeout: 30
resources:
enabled: false
server:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
test:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
prometheus_postgresql_exporter:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "500m"
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
prometheus_postgresql_exporter_create_user:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "100m"
postgresql_backup:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
# using dockerhub patroni: https://hub.docker.com/r/openstackhelm/patroni/tags/
images:
tags:
postgresql: "docker.io/openstackhelm/patroni:latest-ubuntu_xenial"
dep_check: quay.io/stackanetes/kubernetes-entrypoint:v0.3.1
image_repo_sync: docker.io/docker:17.07.0
prometheus_postgresql_exporter: docker.io/wrouesnel/postgres_exporter:v0.4.6
prometheus_postgresql_exporter_create_user: "docker.io/postgres:9.5"
postgresql_backup: "docker.io/postgres:9.5"
pull_policy: "IfNotPresent"
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
storage:
pvc:
enabled: true
size: 5Gi
class_name: general
class_path: volume.beta.kubernetes.io/storage-class
host:
host_path: /data/openstack-helm/postgresql
mount:
path: /var/lib/postgresql
subpath: .
labels:
server:
node_selector_key: openstack-control-plane
node_selector_value: enabled
test:
node_selectory_key: openstack-control-plane
node_selector_value: enabled
prometheus_postgresql_exporter:
node_selector_key: openstack-control-plane
node_selector_value: enabled
job:
node_selector_key: openstack-control-plane
node_selector_value: enabled
dependencies:
dynamic:
common:
local_image_registry:
jobs:
- postgresql-image-repo-sync
services:
- endpoint: node
service: local_image_registry
static:
postgresql:
jobs: null
tests:
services:
- endpoint: internal
service: postgresql
image_repo_sync:
services:
- endpoint: internal
service: local_image_registry
prometheus_postgresql_exporter_create_user:
services:
- endpoint: internal
service: postgresql
prometheus_postgresql_exporter:
services:
- endpoint: internal
service: postgresql
jobs:
- prometheus-postgresql-exporter-create-user
postgresql-backup:
services:
- endpoint: internal
service: postgresql
monitoring:
prometheus:
enabled: false
postgresql_exporter:
scrape: true
volume:
backup:
enabled: true
class_name: general
size: 5Gi
jobs:
backup_postgresql:
cron: "0 0 * * *"
history:
success: 3
failed: 1
conf:
debug: false
postgresql:
shared_buffers: 128MB
max_connections: 100
patroni: |
scope: {{ tuple "postgresql" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
kubernetes:
labels:
application: {{ tuple "postgresql" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
component: server
use_endpoints: true
ports:
- name: {{ tuple "postgresql" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
port: {{ tuple "postgresql" "internal" "postgresql" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
bootstrap:
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
postgresql:
data_dir: '{{ .Values.storage.mount.path }}/pgdata'
pgpass: '{{ .Values.storage.mount.path }}/pgpass'
use_pg_rewind: true
parameters:
archive_mode: 'on'
archive_timeout: 1800s
autovacuum_analyze_scale_factor: 0.02
autovacuum_max_workers: 5
autovacuum_vacuum_scale_factor: 0.05
checkpoint_completion_target: 0.9
datestyle: 'iso, mdy'
default_text_search_config: 'pg_catalog.english'
external_pid_file: '/tmp/postgres.pid'
hot_standby: 'on'
lc_messages: 'en_US.utf8'
lc_monetary: 'en_US.utf8'
lc_numeric: 'en_US.utf8'
lc_time: 'en_US.utf8'
log_autovacuum_min_duration: 0
log_checkpoints: 'on'
log_connections: 'on'
log_disconnections: 'on'
log_line_prefix: 'postgresql: %t [%p]: [%l-1] %c %x %d %u %a %h %m '
log_lock_waits: 'on'
log_min_duration_statement: 500
log_statement: none
log_temp_files: 0
log_timezone: 'UTC'
max_connections: {{ .Values.conf.postgresql.max_connections }}
max_replication_slots: 10
max_wal_senders: 10
max_worker_processes: 10
tcp_keepalives_idle: 900
tcp_keepalives_interval: 100
timezone: 'UTC'
track_commit_timestamp: 'on'
track_functions: all
wal_keep_segments: 100
wal_level: 'logical'
wal_log_hints: 'on'
initdb:
- auth-host: md5
- auth-local: trust
- encoding: UTF8
- locale: en_US.UTF-8
- data-checksums
pg_hba:
- host all all 127.0.0.1/32 trust
- host all all 0.0.0.0/0 md5
- host replication {{ .Values.endpoints.postgresql.auth.replica.username }} 127.0.0.1/32 md5 # Fixes issue with Postgres 9.5
- host replication {{ .Values.endpoints.postgresql.auth.replica.username }} POD_IP_PATTERN/0 md5
- local replication {{ .Values.endpoints.postgresql.auth.admin.username }} md5
- local all all trust
postgresql:
{{/* Note: the postgres pod mounts a volume at /var/lib/postgresql/data,
so let's just avoid it and use /var/lib/postgresql/pgdata instead.
Patroni moves this directory to a backup under the parent directory
(/var/lib/postgresql) under certain failure recovery scenarios, so
/var/lib/postgres itself must be exposed to the pod as a pvc mount.*/}}
data_dir: '{{ .Values.storage.mount.path }}/pgdata'
pgpass: '{{ .Values.storage.mount.path }}/pgpass'
callbacks:
on_role_change: /tmp/set_password.sh
on_start: /tmp/set_password.sh
use_pg_rewind: true
parameters:
archive_mode: 'on'
archive_timeout: 1800s
autovacuum_analyze_scale_factor: 0.02
autovacuum_max_workers: 5
autovacuum_vacuum_scale_factor: 0.05
checkpoint_completion_target: 0.9
datestyle: 'iso, mdy'
default_text_search_config: 'pg_catalog.english'
external_pid_file: '/tmp/postgres.pid'
hot_standby: 'on'
lc_messages: 'en_US.utf8'
lc_monetary: 'en_US.utf8'
lc_numeric: 'en_US.utf8'
lc_time: 'en_US.utf8'
log_autovacuum_min_duration: 0
log_checkpoints: 'on'
log_connections: 'on'
log_disconnections: 'on'
log_line_prefix: 'postgresql: %t [%p]: [%l-1] %c %x %d %u %a %h %m '
log_lock_waits: 'on'
log_min_duration_statement: 500
log_statement: none
log_temp_files: 0
log_timezone: 'UTC'
max_connections: {{ .Values.conf.postgresql.max_connections }}
max_replication_slots: 10
max_wal_senders: 10
max_worker_processes: 10
tcp_keepalives_idle: 900
tcp_keepalives_interval: 100
timezone: 'UTC'
track_commit_timestamp: 'on'
track_functions: all
shared_buffers: {{ .Values.conf.postgresql.shared_buffers }}
wal_keep_segments: 100
wal_level: 'logical'
wal_log_hints: 'on'
pg_hba:
- host all all 127.0.0.1/32 trust
- host all all 0.0.0.0/0 md5
- host replication {{ .Values.endpoints.postgresql.auth.replica.username }} 127.0.0.1/32 md5 # Fixes issue with Postgres 9.5
- host replication {{ .Values.endpoints.postgresql.auth.replica.username }} POD_IP_PATTERN/0 md5
- local replication {{ .Values.endpoints.postgresql.auth.admin.username }} md5
- local all all trust
watchdog:
mode: off # Allowed values: off, automatic, required
backup:
enabled: true
base_path: /var/backup
days_of_backup_to_keep: 3
pg_dumpall_options: null
secrets:
postgresql:
admin: postgresql-admin
replica: postgresql-replication
exporter: postgresql-exporter
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
postgresql:
auth:
admin:
username: postgres
password: password
replica:
username: standby
password: password
exporter:
username: psql_exporter
password: psql_exp_pass
hosts:
default: postgresql
host_fqdn_override:
default: null
path: null
scheme: postgresql
port:
postgresql:
default: 5432
postgresql_restapi:
hosts:
default: postgresql-restapi
host_fqdn_override:
default: null
path: null
scheme: postgresql
port:
restapi:
default: 8008
prometheus_postgresql_exporter:
namespace: null
hosts:
default: postgresql-exporter
host_fqdn_override:
default: null
path:
default: /metrics
scheme:
default: 'http'
port:
metrics:
default: 9187
manifests:
configmap_bin: true
configmap_etc: true
job_image_repo_sync: true
secret_admin: true
secret_replica: true
secret_etc: true
service: true
statefulset: true
cron_job_postgresql_backup: false
pvc_backup: false
monitoring:
prometheus:
configmap_bin: true
deployment_exporter: true
job_user_create: true
secret_etc: true
service_exporter: true