Parsons, Cliff (cp769u) 5a2babd514 Backup/restore enhancements
This patchset introduces the framework by which all OSH-based database
systems can use to backup and restore their databases. The framework
is refactored from the Postgresql backup and restore logic. This will
prevent alot of code duplication in the backup restore scripts across
each cluster.

In the process, some improvements needed to be made:
1) Removing the need for 2 separate containers to do the backup
   and restore work to a remote gateway. This simplifies the design
   and enables a higher level of robustness.
2) Adding separate "days to keep" config value for remote backup files,
   as there may be different requirements for the remote files than the
   local backup files.
3) Adding capability to send Storage_Policy when creating the remote
   RGW swift container.
4) Making coding style improvement for readability and maintainability.
5) Fixing a deployment bug that occurs when remote backup is disabled.

Change-Id: I3a3482ad67320e89f04305b17da79abf7ad6eb45
2020-05-13 16:34:21 +00:00

546 lines
16 KiB
YAML

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Default values for postgresql.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
release_group: null
pod:
security_context:
prometheus_postgresql_exporter:
pod:
runAsUser: 65534
container:
postgresql_exporter:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
server:
pod:
runAsUser: 999
# fsGroup used to allows cert file be witten to file.
fsGroup: 999
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
container:
set_volume_perms:
runAsUser: 0
readOnlyRootFilesystem: true
postgresql:
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
patroni_conversion:
runAsUser: 999
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
affinity:
anti:
type:
default: preferredDuringSchedulingIgnoredDuringExecution
topologyKey:
default: kubernetes.io/hostname
weight:
default: 10
replicas:
server: 3
prometheus_postgresql_exporter: 1
lifecycle:
upgrades:
statefulsets:
pod_replacement_strategy: OnDelete
partition: 0
deployments:
revision_history: 3
pod_replacement_strategy: RollingUpdate
rolling_update:
max_unavailable: 1
max_surge: 3
termination_grace_period:
prometheus_postgresql_exporter:
timeout: 30
server:
timeout: 180
resources:
enabled: false
server:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
test:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
prometheus_postgresql_exporter:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "500m"
jobs:
image_repo_sync:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
prometheus_postgresql_exporter_create_user:
limits:
memory: "1024Mi"
cpu: "2000m"
requests:
memory: "128Mi"
cpu: "100m"
postgresql_backup:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
ks_user:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "1024Mi"
cpu: "2000m"
# using dockerhub patroni: https://hub.docker.com/r/openstackhelm/patroni/tags/
images:
tags:
postgresql: "docker.io/openstackhelm/patroni:latest-ubuntu_xenial"
dep_check: quay.io/airshipit/kubernetes-entrypoint:v1.0.0
image_repo_sync: docker.io/docker:17.07.0
ks_user: docker.io/openstackhelm/heat:stein-ubuntu_bionic
prometheus_postgresql_exporter: docker.io/wrouesnel/postgres_exporter:v0.4.6
prometheus_postgresql_exporter_create_user: "docker.io/postgres:9.5"
postgresql_backup: "quay.io/airshipit/porthole-postgresql-utility:latest-ubuntu_bionic"
pull_policy: "IfNotPresent"
local_registry:
active: false
exclude:
- dep_check
- image_repo_sync
storage:
pvc:
enabled: true
size: 5Gi
class_name: general
class_path: volume.beta.kubernetes.io/storage-class
host:
host_path: /data/openstack-helm/postgresql
mount:
path: /var/lib/postgresql
subpath: .
labels:
server:
node_selector_key: openstack-control-plane
node_selector_value: enabled
test:
node_selectory_key: openstack-control-plane
node_selector_value: enabled
prometheus_postgresql_exporter:
node_selector_key: openstack-control-plane
node_selector_value: enabled
job:
node_selector_key: openstack-control-plane
node_selector_value: enabled
dependencies:
dynamic:
common:
local_image_registry:
jobs:
- postgresql-image-repo-sync
services:
- endpoint: node
service: local_image_registry
static:
backup_postgresql:
jobs:
- postgresql-ks-user
tests:
services:
- endpoint: internal
service: postgresql
image_repo_sync:
services:
- endpoint: internal
service: local_image_registry
prometheus_postgresql_exporter_create_user:
services:
- endpoint: internal
service: postgresql
prometheus_postgresql_exporter:
services:
- endpoint: internal
service: postgresql
jobs:
- prometheus-postgresql-exporter-create-user
postgresql-backup:
services:
- endpoint: internal
service: postgresql
monitoring:
prometheus:
enabled: false
postgresql_exporter:
scrape: true
volume:
backup:
enabled: true
class_name: general
size: 5Gi
jobs:
backup_postgresql:
# activeDeadlineSeconds == 0 means no deadline
activeDeadlineSeconds: 0
backoffLimit: 6
cron: "0 0 * * *"
history:
success: 3
failed: 1
ks_user:
# activeDeadlineSeconds == 0 means no deadline
activeDeadlineSeconds: 0
backoffLimit: 6
network_policy:
postgresql:
ingress:
- {}
egress:
- {}
conf:
debug: false
postgresql:
shared_buffers: 128MB
max_connections: 100
patroni: |
scope: {{ tuple "postgresql" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
kubernetes:
labels:
application: {{ tuple "postgresql" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
component: server
use_endpoints: true
ports:
- name: {{ tuple "postgresql" "internal" . | include "helm-toolkit.endpoints.hostname_short_endpoint_lookup" }}
port: {{ tuple "postgresql" "internal" "postgresql" . | include "helm-toolkit.endpoints.endpoint_port_lookup" }}
bootstrap:
users:
{{ .Values.endpoints.postgresql.auth.admin.username }}:
password: {{ .Values.endpoints.postgresql.auth.admin.password }}
options:
- createrole
- createdb
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
postgresql:
data_dir: '{{ .Values.storage.mount.path }}/pgdata'
pgpass: '{{ .Values.storage.mount.path }}/pgpass'
use_pg_rewind: true
parameters:
archive_mode: 'off'
datestyle: 'iso, mdy'
external_pid_file: '/tmp/postgres.pid'
hot_standby: 'on'
log_checkpoints: 'on'
log_connections: 'on'
log_disconnections: 'on'
log_line_prefix: 'postgresql: %t [%p]: [%l-1] %c %x %d %u %a %h %m '
log_lock_waits: 'on'
log_temp_files: 0
log_timezone: 'UTC'
max_connections: {{ .Values.conf.postgresql.max_connections }}
max_replication_slots: 10
max_wal_senders: 10
max_worker_processes: 10
ssl: 'on'
# These relative paths are relative to data_dir
ssl_cert_file: {{ .Values.secrets.pki.server_cert_path }}/server.crt
ssl_ca_file: {{ .Values.secrets.pki.server_cert_path }}/ca.crt
ssl_key_file: {{ .Values.secrets.pki.server_cert_path }}/server.key
ssl_ciphers: 'HIGH:+3DES:!aNULL'
tcp_keepalives_idle: 900
tcp_keepalives_interval: 100
timezone: 'UTC'
track_commit_timestamp: 'on'
track_functions: all
wal_keep_segments: 8
wal_level: hot_standby
wal_log_hints: 'on'
initdb:
- auth-host: md5
- auth-local: trust
- encoding: UTF8
- locale: en_US.UTF-8
- data-checksums
pg_hba:
- host all all 127.0.0.1/32 trust
- host all all 0.0.0.0/0 md5
- hostssl replication {{ .Values.endpoints.postgresql.auth.replica.username }} {{ .Values.secrets.pki.pod_cidr }} cert clientcert=1
- hostssl replication {{ .Values.endpoints.postgresql.auth.replica.username }} 127.0.0.1/32 cert clientcert=1
- local all all trust
postgresql:
{{/* Note: the postgres pod mounts a volume at /var/lib/postgresql/data,
so let's just avoid it and use /var/lib/postgresql/pgdata instead.
Patroni moves this directory to a backup under the parent directory
(/var/lib/postgresql) under certain failure recovery scenarios, so
/var/lib/postgres itself must be exposed to the pod as a pvc mount.*/}}
authentication:
superuser:
username: {{ .Values.endpoints.postgresql.auth.admin.username }}
password: {{ .Values.endpoints.postgresql.auth.admin.password }}
data_dir: '{{ .Values.storage.mount.path }}/pgdata'
pgpass: '{{ .Values.storage.mount.path }}/pgpass'
callbacks:
on_role_change: /tmp/set_password.sh
on_start: /tmp/set_password.sh
use_pg_rewind: true
remove_data_directory_on_rewind_failure: true
remove_data_directory_on_diverged_timelines: true
parameters:
archive_mode: 'off'
datestyle: 'iso, mdy'
external_pid_file: '/tmp/postgres.pid'
hot_standby: 'on'
log_checkpoints: 'on'
log_connections: 'on'
log_disconnections: 'on'
log_line_prefix: 'postgresql: %t [%p]: [%l-1] %c %x %d %u %a %h %m '
log_lock_waits: 'on'
log_temp_files: 0
log_timezone: 'UTC'
max_connections: {{ .Values.conf.postgresql.max_connections }}
max_replication_slots: 10
max_wal_senders: 10
max_worker_processes: 10
ssl: 'on'
# These relative paths are relative to data_dir
ssl_cert_file: {{ .Values.secrets.pki.server_cert_path }}/server.crt
ssl_ca_file: {{ .Values.secrets.pki.server_cert_path }}/ca.crt
ssl_key_file: {{ .Values.secrets.pki.server_cert_path }}/server.key
ssl_ciphers: 'HIGH:+3DES:!aNULL'
tcp_keepalives_idle: 900
tcp_keepalives_interval: 100
timezone: 'UTC'
track_commit_timestamp: 'on'
track_functions: all
wal_keep_segments: 8
wal_level: hot_standby
wal_log_hints: 'on'
pg_hba:
- host all all 127.0.0.1/32 trust
- host all all 0.0.0.0/0 md5
- hostssl replication {{ .Values.endpoints.postgresql.auth.replica.username }} {{ .Values.secrets.pki.pod_cidr }} cert clientcert=1
- hostssl replication {{ .Values.endpoints.postgresql.auth.replica.username }} 127.0.0.1/32 cert clientcert=1
- local all all trust
watchdog:
mode: off # Allowed values: off, automatic, required
backup:
enabled: false
base_path: /var/backup
days_to_keep: 3
pg_dumpall_options: null
remote_backup:
enabled: false
container_name: postgresql
days_to_keep: 14
storage_policy: default-placement
exporter:
queries:
pg_replication:
query: "SELECT EXTRACT(epoch FROM (now() - pg_last_xact_replay_timestamp()))::int AS lag, CASE WHEN pg_is_in_recovery() THEN 1 ELSE 0 END AS is_replica"
master: true
metrics:
- lag:
usage: "GAUGE"
description: "Replication lag behind master in seconds"
- is_replica:
usage: "GAUGE"
description: "Indicates if this host is a replica"
pg_postmaster:
query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
master: true
metrics:
- start_time_seconds:
usage: "GAUGE"
description: "Time at which postmaster started"
secrets:
pki:
client_cert_path: /client_certs
server_cert_path: /server_certs
pod_cidr: 0.0.0.0/0
server:
hosts:
names:
# this name should be the service name for postgresql
- postgresql.ucp.svc.cluster.local
life: 365
replication:
hosts:
names:
# this name needs to be the same as endpoints.postgres.auth.replica.username
- standby
life: 365
postgresql:
admin: postgresql-admin
replica: postgresql-replication-pki
server: postgresql-server-pki
exporter: postgresql-exporter
audit: postgresql-audit
identity:
admin: keystone-admin-user
postgresql: postgresql-backup-user
endpoints:
cluster_domain_suffix: cluster.local
local_image_registry:
name: docker-registry
namespace: docker-registry
hosts:
default: localhost
internal: docker-registry
node: localhost
host_fqdn_override:
default: null
port:
registry:
node: 5000
postgresql:
auth:
admin:
username: postgres
password: password
replica:
username: standby
exporter:
username: psql_exporter
password: psql_exp_pass
audit:
username: audit
password: password
hosts:
default: postgresql
host_fqdn_override:
default: null
path: null
scheme: postgresql
port:
postgresql:
default: 5432
postgresql_restapi:
hosts:
default: postgresql-restapi
host_fqdn_override:
default: null
path: null
scheme: postgresql
port:
restapi:
default: 8008
prometheus_postgresql_exporter:
namespace: null
hosts:
default: postgresql-exporter
host_fqdn_override:
default: null
path:
default: /metrics
scheme:
default: 'http'
port:
metrics:
default: 9187
identity:
name: backup-storage-auth
namespace: openstack
auth:
admin:
# Auth URL of null indicates local authentication
# HTK will form the URL unless specified here
auth_url: null
region_name: RegionOne
username: admin
password: password
project_name: admin
user_domain_name: default
project_domain_name: default
postgresql:
# Auth URL of null indicates local authentication
# HTK will form the URL unless specified here
auth_url: null
role: admin
region_name: RegionOne
username: postgresql-backup-user
password: password
project_name: service
user_domain_name: service
project_domain_name: service
hosts:
default: keystone
internal: keystone-api
host_fqdn_override:
default: null
path:
default: /v3
scheme:
default: 'http'
port:
api:
default: 80
internal: 5000
manifests:
configmap_bin: true
configmap_etc: true
job_image_repo_sync: true
network_policy: false
job_ks_user: false
secret_admin: true
secret_replica: true
secret_server: true
secret_etc: true
secret_audit: true
service: true
statefulset: true
cron_job_postgresql_backup: false
pvc_backup: false
monitoring:
prometheus:
configmap_bin: true
configmap_etc: true
deployment_exporter: true
job_user_create: true
secret_etc: true
service_exporter: true