From 6a96c4fe72c3e51dea601d5d8a762efb34c659dd Mon Sep 17 00:00:00 2001
From: Litao Gao <litao.gao@windriver.com>
Date: Tue, 30 Jul 2019 09:25:42 -0400
Subject: [PATCH] ceph: add bbappend to port StarlingX ceph packages

ceph recipe is in meta-virtualization, however,
StarlingX has made many modifications, so needs
extra effort to port these using bbappend.

Signed-off-by: Litao Gao <litao.gao@windriver.com>
---
 .../stx-integ-ceph/ceph_13.2.2.bbappend       |   85 ++
 .../stx-integ-ceph/files/ceph-init-wrapper.sh |  282 +++++
 .../files/ceph-manage-journal.py              |  334 ++++++
 .../stx-integ-ceph/files/ceph-preshutdown.sh  |   30 +
 .../stx-integ-ceph/files/ceph-radosgw.service |   18 +
 recipes-core/stx-integ-ceph/files/ceph.conf   |   58 +
 .../stx-integ-ceph/files/ceph.conf.pmon       |   26 +
 .../stx-integ-ceph/files/ceph.service         |   16 +
 recipes-core/stx-integ-ceph/files/ceph.sh     |   67 ++
 .../files/mgr-restful-plugin.py               | 1056 +++++++++++++++++
 .../files/mgr-restful-plugin.service          |   15 +
 .../files/starlingx-docker-override.conf      |    3 +
 ...s-for-orderly-shutdown-on-controller.patch |   59 +
 13 files changed, 2049 insertions(+)
 create mode 100644 recipes-core/stx-integ-ceph/ceph_13.2.2.bbappend
 create mode 100755 recipes-core/stx-integ-ceph/files/ceph-init-wrapper.sh
 create mode 100644 recipes-core/stx-integ-ceph/files/ceph-manage-journal.py
 create mode 100644 recipes-core/stx-integ-ceph/files/ceph-preshutdown.sh
 create mode 100644 recipes-core/stx-integ-ceph/files/ceph-radosgw.service
 create mode 100644 recipes-core/stx-integ-ceph/files/ceph.conf
 create mode 100644 recipes-core/stx-integ-ceph/files/ceph.conf.pmon
 create mode 100644 recipes-core/stx-integ-ceph/files/ceph.service
 create mode 100644 recipes-core/stx-integ-ceph/files/ceph.sh
 create mode 100644 recipes-core/stx-integ-ceph/files/mgr-restful-plugin.py
 create mode 100644 recipes-core/stx-integ-ceph/files/mgr-restful-plugin.service
 create mode 100644 recipes-core/stx-integ-ceph/files/starlingx-docker-override.conf
 create mode 100644 recipes-core/stx-integ-ceph/patches/0001-Add-hooks-for-orderly-shutdown-on-controller.patch

diff --git a/recipes-core/stx-integ-ceph/ceph_13.2.2.bbappend b/recipes-core/stx-integ-ceph/ceph_13.2.2.bbappend
new file mode 100644
index 0000000..251342f
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/ceph_13.2.2.bbappend
@@ -0,0 +1,85 @@
+FILESEXTRAPATHS_prepend := "${THISDIR}/patches:${THISDIR}/files:"
+SRC_URI_append = "file://0001-Add-hooks-for-orderly-shutdown-on-controller.patch \
+                  file://ceph.conf   \
+                  file://ceph-init-wrapper.sh   \
+                  file://ceph-preshutdown.sh   \
+                  file://ceph.service   \
+                  file://mgr-restful-plugin.py   \
+                  file://starlingx-docker-override.conf   \
+                  file://ceph.conf.pmon   \
+                  file://ceph-manage-journal.py   \
+                  file://ceph-radosgw.service   \
+                  file://ceph.sh   \
+                  file://mgr-restful-plugin.service   \
+"
+    
+
+do_install_append () {
+    install -d ${D}${sysconfdir}/ceph
+    install -m 0644 ${WORKDIR}/ceph.conf ${D}${sysconfdir}/ceph/
+    install -m 0644 ${WORKDIR}/ceph-radosgw.service ${D}${systemd_system_unitdir}/ceph-radosgw@.service
+    install -m 0644 ${WORKDIR}/ceph.service ${D}${systemd_system_unitdir}
+    install -m 0644 ${WORKDIR}/mgr-restful-plugin.service ${D}${systemd_system_unitdir}
+
+    install -m 0700 ${WORKDIR}/ceph-manage-journal.py ${D}${sbindir}/ceph-manage-journal
+    install -Dm 0750 ${WORKDIR}/mgr-restful-plugin.py  ${D}${sysconfdir}/rc.d/init.d/mgr-restful-plugin
+    install -m 0750 ${WORKDIR}/ceph.conf.pmon ${D}${sysconfdir}/ceph/
+
+    install -d -m 0750 ${D}${sysconfdir}/services.d/controller
+    install -d -m 0750 ${D}${sysconfdir}/services.d/storage
+    install -d -m 0750 ${D}${sysconfdir}/services.d/worker
+
+    install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/controller
+    install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/storage
+    install -m 0750 ${WORKDIR}/ceph.sh ${D}${sysconfdir}/services.d/worker
+
+    install -Dm 0750 ${WORKDIR}/ceph-init-wrapper.sh ${D}${sysconfdir}/rc.d/init.d/ceph-init-wrapper
+    install -m 0700 ${WORKDIR}/ceph-preshutdown.sh ${D}${sbindir}/ceph-preshutdown.sh
+    
+    install -Dm 0644 ${WORKDIR}/starlingx-docker-override.conf ${D}${systemd_system_unitdir}/docker.service.d/starlingx-docker-override.conf
+
+    install -m 0644 -D ${S}/src/etc-rbdmap ${D}${sysconfdir}/ceph/rbdmap 
+    install -m 0644 -D ${S}/etc/sysconfig/ceph ${D}${sysconfdir}/sysconfig/ceph
+    install -m 0644 -D ${S}/src/logrotate.conf ${D}${sysconfdir}/logrotate.d/ceph
+
+    install -m 0644 -D ${S}/COPYING ${D}${docdir}/ceph/COPYING    
+    install -m 0644 -D ${S}/etc/sysctl/90-ceph-osd.conf ${D}${libdir}/sysctl.d/90-ceph-osd.conf
+    install -m 0644 -D ${S}/udev/50-rbd.rules ${D}${libdir}/udev/rules.d/50-rbd.rules
+    install -m 0644 -D ${S}/udev/60-ceph-by-parttypeuuid.rules ${D}${libdir}/udev/rules.d/60-ceph-by-parttypeuuid.rules
+
+    mkdir -p ${D}${localstatedir}/ceph
+    mkdir -p ${D}${localstatedir}/run/ceph
+    mkdir -p ${D}${localstatedir}/log/ceph
+    mkdir -p ${D}${localstatedir}/lib/ceph/tmp
+    mkdir -p ${D}${localstatedir}/lib/ceph/mon
+    mkdir -p ${D}${localstatedir}/lib/ceph/osd
+    mkdir -p ${D}${localstatedir}/lib/ceph/mds
+    mkdir -p ${D}${localstatedir}/lib/ceph/mgr
+    mkdir -p ${D}${localstatedir}/lib/ceph/radosgw
+    mkdir -p ${D}${localstatedir}/lib/ceph/bootstrap-osd
+    mkdir -p ${D}${localstatedir}/lib/ceph/bootstrap-mds
+    mkdir -p ${D}${localstatedir}/lib/ceph/bootstrap-rgw
+    mkdir -p ${D}${localstatedir}/lib/ceph/bootstrap-mgr
+    mkdir -p ${D}${localstatedir}/lib/ceph/bootstrap-rbd
+
+    install -m 0750 -D ${S}/src/init-radosgw ${D}${sysconfdir}/rc.d/init.d/ceph-radosgw
+    sed -i '/### END INIT INFO/a SYSTEMCTL_SKIP_REDIRECT=1' ${D}${sysconfdir}/rc.d/init.d/ceph-radosgw
+    install -m 0750 -D ${S}/src/init-rbdmap ${D}${sysconfdir}/rc.d/init.d/rbdmap
+    install -m 0750 -D ${B}/bin/init-ceph ${D}${sysconfdir}/rc.d/init.d/ceph
+    install -m 0750 -D ${B}/bin/init-ceph ${D}${sysconfdir}/init.d/ceph
+    install -d -m 0750 ${D}${localstatedir}/log/radosgw 
+}
+
+TARGET_CC_ARCH += "${LDFLAGS}"
+RDEPENDS_${PN} += "\
+        bash \
+"
+
+FILES_${PN} += "\
+        ${localstatedir} ${libdir} ${docdir} \
+        ${systemd_system_unitdir}/mgr-restful-plugin.service \
+        ${systemd_system_unitdir}/ceph-radosgw@.service \
+        ${systemd_system_unitdir}/ceph.service \
+        ${systemd_system_unitdir}/docker.service.d \
+        /run \
+"
diff --git a/recipes-core/stx-integ-ceph/files/ceph-init-wrapper.sh b/recipes-core/stx-integ-ceph/files/ceph-init-wrapper.sh
new file mode 100755
index 0000000..0a5cd53
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/ceph-init-wrapper.sh
@@ -0,0 +1,282 @@
+#!/bin/bash
+#
+# Copyright (c) 2019 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# This script is a helper wrapper for pmon monitoring of ceph
+# processes. The "/etc/init.d/ceph" script does not know if ceph is
+# running on the node. For example when the node is locked, ceph
+# processes are not running. In that case we do not want pmond to
+# monitor these processes.
+#
+# The script "/etc/services.d/<node>/ceph.sh" will create the file
+# "/var/run/.ceph_started" when ceph is running and remove it when
+# is not.
+#
+# The script also extracts  one or more ceph process names  that are
+# reported as 'not running' or 'dead' or 'failed'  by '/etc/intit.d/ceph status'
+# and writes the names to a text file: /tmp/ceph_status_failure.txt for
+# pmond to access. The pmond adds the text to logs and alarms. Example of text
+# samples written to file by this script are:
+#   'osd.1'
+#   'osd.1, osd.2'
+#   'mon.storage-0'
+#   'mon.storage-0, osd.2'
+#
+# Moreover, for processes that are reported as 'hung' by '/etc/intit.d/ceph status'
+# the script will try increase their logging to 'debug' for a configurable interval.
+# With logging increased it will outputs a few stack traces then, at the end of this
+# interval, it dumps its stack core and kills it.
+#
+# Return values;
+# zero -   /etc/init.d/ceph returned success or ceph is not running on the node
+# non-zero /etc/init.d/ceph returned a failure or invalid syntax
+#
+
+source /usr/bin/tsconfig
+source /etc/platform/platform.conf
+
+CEPH_SCRIPT="/etc/init.d/ceph"
+CEPH_FILE="$VOLATILE_PATH/.ceph_started"
+CEPH_RESTARTING_FILE="$VOLATILE_PATH/.ceph_restarting"
+CEPH_GET_STATUS_FILE="$VOLATILE_PATH/.ceph_getting_status"
+CEPH_STATUS_FAILURE_TEXT_FILE="/tmp/ceph_status_failure.txt"
+
+BINDIR=/usr/bin
+SBINDIR=/usr/sbin
+LIBDIR=/usr/lib64/ceph
+ETCDIR=/etc/ceph
+source $LIBDIR/ceph_common.sh
+
+LOG_PATH=/var/log/ceph
+LOG_FILE=$LOG_PATH/ceph-process-states.log
+LOG_LEVEL=NORMAL  # DEBUG
+verbose=0
+
+DATA_PATH=$VOLATILE_PATH/ceph_hang    # folder where we keep state information
+mkdir -p $DATA_PATH                   # make sure folder exists
+
+MONITORING_INTERVAL=15
+TRACE_LOOP_INTERVAL=5
+GET_STATUS_TIMEOUT=120
+CEPH_STATUS_TIMEOUT=20
+
+WAIT_FOR_CMD=1
+
+RC=0
+
+args=("$@")
+
+if [ ! -z $ARGS ]; then
+    IFS=";" read -r -a new_args <<< "$ARGS"
+    args+=("${new_args[@]}")
+fi
+
+wait_for_status ()
+{
+    timeout=$GET_STATUS_TIMEOUT  # wait for status no more than $timeout seconds
+    while [ -f ${CEPH_GET_STATUS_FILE} ] && [ $timeout -gt 0 ]; do
+        sleep 1
+        let timeout-=1
+    done
+    if [ $timeout -eq 0 ]; then
+        wlog "-" "WARN" "Getting status takes more than ${GET_STATUS_TIMEOUT}s, continuing"
+        rm -f $CEPH_GET_STATUS_FILE
+    fi
+}
+
+start ()
+{
+    if [ -f ${CEPH_FILE} ]; then
+        wait_for_status
+        ${CEPH_SCRIPT} start $1
+        RC=$?
+    else
+        # Ceph is not running on this node, return success
+        exit 0
+    fi
+}
+
+stop ()
+{
+    wait_for_status
+    ${CEPH_SCRIPT} stop $1
+}
+
+restart ()
+{
+    if [ -f ${CEPH_FILE} ]; then
+        wait_for_status
+        touch $CEPH_RESTARTING_FILE
+        ${CEPH_SCRIPT} restart $1
+        rm -f $CEPH_RESTARTING_FILE
+    else
+        # Ceph is not running on this node, return success
+        exit 0
+    fi
+
+}
+
+log_and_restart_blocked_osds ()
+{
+    # Log info about the blocked osd daemons and then restart it
+    local names=$1
+    for name in $names; do
+        wlog $name "INFO" "Restarting OSD with blocked operations"
+        ${CEPH_SCRIPT} restart $name
+    done
+}
+
+log_and_kill_hung_procs ()
+{
+    # Log info about the hung processes and then kill them; later on pmon will restart them
+    local names=$1
+    for name in $names; do
+        type=`echo $name | cut -c 1-3`   # e.g. 'mon', if $item is 'mon1'
+        id=`echo $name | cut -c 4- | sed 's/^\\.//'`
+        get_conf run_dir "/var/run/ceph" "run dir"
+        get_conf pid_file "$run_dir/$type.$id.pid" "pid file"
+        pid=$(cat $pid_file)
+        wlog $name "INFO" "Dealing with hung process (pid:$pid)"
+
+        # monitoring interval
+        wlog $name "INFO" "Increasing log level"
+        execute_ceph_cmd ret $name "ceph daemon $name config set debug_$type 20/20"
+        monitoring=$MONITORING_INTERVAL
+        while [ $monitoring -gt 0 ]; do
+            if [ $(($monitoring % $TRACE_LOOP_INTERVAL)) -eq 0 ]; then
+                date=$(date "+%Y-%m-%d_%H-%M-%S")
+                log_file="$LOG_PATH/hang_trace_${name}_${pid}_${date}.log"
+                wlog $name "INFO" "Dumping stack trace to: $log_file"
+                $(pstack $pid >$log_file) &
+            fi
+            let monitoring-=1
+            sleep 1
+        done
+        wlog $name "INFO" "Trigger core dump"
+        kill -ABRT $pid &>/dev/null
+        rm -f $pid_file # process is dead, core dump is archiving, preparing for restart
+        # Wait for pending systemd core dumps
+        sleep 2 # hope systemd_coredump has started meanwhile
+        deadline=$(( $(date '+%s') + 300 ))
+        while [[ $(date '+%s') -lt "${deadline}" ]]; do
+            systemd_coredump_pid=$(pgrep -f "systemd-coredump.*${pid}.*ceph-${type}")
+            [[ -z "${systemd_coredump_pid}" ]] && break
+            wlog $name "INFO" "systemd-coredump ceph-${type} in progress: pid ${systemd_coredump_pid}"
+            sleep 2
+        done
+        kill -KILL $pid &>/dev/null
+    done
+}
+
+
+status ()
+{
+    if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]] && [[ "$1" == "osd" ]]; then
+        timeout $CEPH_STATUS_TIMEOUT ceph -s
+        if [ "$?" -ne 0 ]; then
+            # Ceph cluster is not accessible. Don't panic, controller swact
+	        # may be in progress.
+            wlog "-" INFO "Ceph is down, ignoring OSD status."
+            exit 0
+        fi
+    fi
+
+    if [ -f ${CEPH_RESTARTING_FILE} ]; then
+        # Ceph is restarting, we don't report state changes on the first pass
+        rm -f ${CEPH_RESTARTING_FILE}
+        exit 0
+    fi
+    if [ -f ${CEPH_FILE} ]; then
+        # Make sure the script does not 'exit' between here and the 'rm -f' below
+        # or the checkpoint file will be left behind
+        touch -f ${CEPH_GET_STATUS_FILE}
+        result=`${CEPH_SCRIPT} status $1`
+        RC=$?
+        if [ "$RC" -ne 0 ]; then
+            erred_procs=`echo "$result" | sort | uniq | awk ' /not running|dead|failed/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
+            hung_procs=`echo "$result" | sort | uniq | awk ' /hung/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
+            blocked_ops_procs=`echo "$result" | sort | uniq | awk ' /blocked ops/ {printf "%s ", $1}' | sed 's/://g' | sed 's/, $//g'`
+            invalid=0
+            host=`hostname`
+            if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
+                # On 2 node configuration we have a floating monitor
+                host="controller"
+            fi
+            for i in $(echo $erred_procs $hung_procs); do
+                if [[ "$i" =~ osd.?[0-9]?[0-9]|mon.$host ]]; then
+                    continue
+                else
+                    invalid=1
+                fi
+            done
+
+            log_and_restart_blocked_osds $blocked_ops_procs
+            log_and_kill_hung_procs $hung_procs
+
+            hung_procs_text=""
+            for i in $(echo $hung_procs); do
+                hung_procs_text+="$i(process hung) "
+            done
+
+            rm -f $CEPH_STATUS_FAILURE_TEXT_FILE
+            if [ $invalid -eq 0 ]; then
+                text=""
+                for i in $erred_procs; do
+                    text+="$i, "
+                done
+                for i in $hung_procs; do
+                    text+="$i (process hang), "
+                done
+                echo "$text" | tr -d '\n' > $CEPH_STATUS_FAILURE_TEXT_FILE
+            else
+                echo "$host: '${CEPH_SCRIPT} status $1' result contains invalid process names: $erred_procs"
+                echo "Undetermined osd or monitor id" > $CEPH_STATUS_FAILURE_TEXT_FILE
+            fi
+        fi
+
+        rm -f ${CEPH_GET_STATUS_FILE}
+
+        if [[ $RC == 0 ]] && [[ "$1" == "mon" ]] && [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" != "simplex" ]]; then
+            # SM needs exit code != 0 from 'status mon' argument of the init script on
+            # standby controller otherwise it thinks that the monitor is running and
+            # tries to stop it.
+            # '/etc/init.d/ceph status mon' checks the status of monitors configured in
+            # /etc/ceph/ceph.conf and if it should be running on current host.
+            # If it should not be running it just exits with code 0. This is what
+            # happens on the standby controller.
+            # When floating monitor is running on active controller /var/lib/ceph/mon of
+            # standby is not mounted (Ceph monitor partition is DRBD synced).
+            test -e "/var/lib/ceph/mon/ceph-controller"
+            if [ "$?" -ne 0 ]; then
+                exit 3
+            fi
+        fi
+    else
+        # Ceph is not running on this node, return success
+        exit 0
+    fi
+}
+
+
+case "${args[0]}" in
+    start)
+        start ${args[1]}
+        ;;
+    stop)
+        stop ${args[1]}
+        ;;
+    restart)
+        restart ${args[1]}
+        ;;
+    status)
+        status ${args[1]}
+        ;;
+    *)
+        echo "Usage: $0 {start|stop|restart|status} [{mon|osd|osd.<number>|mon.<hostname>}]"
+        exit 1
+        ;;
+esac
+
+exit $RC
diff --git a/recipes-core/stx-integ-ceph/files/ceph-manage-journal.py b/recipes-core/stx-integ-ceph/files/ceph-manage-journal.py
new file mode 100644
index 0000000..f91cbc1
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/ceph-manage-journal.py
@@ -0,0 +1,334 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2019 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+import ast
+import os
+import os.path
+import re
+import subprocess
+import sys
+
+DEVICE_NAME_NVME = "nvme"
+
+#########
+# Utils #
+#########
+
+
+def command(arguments, **kwargs):
+    """Execute e command and capture stdout, stderr & return code"""
+    process = subprocess.Popen(
+        arguments,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        **kwargs)
+    out, err = process.communicate()
+    return out, err, process.returncode
+
+
+def get_input(arg, valid_keys):
+    """Convert the input to a dict and perform basic validation"""
+    json_string = arg.replace("\\n", "\n")
+    try:
+        input_dict = ast.literal_eval(json_string)
+        if not all(k in input_dict for k in valid_keys):
+            return None
+    except Exception:
+        return None
+
+    return input_dict
+
+
+def get_partition_uuid(dev):
+    output, _, _ = command(['blkid', dev])
+    try:
+        return re.search('PARTUUID=\"(.+?)\"', output).group(1)
+    except AttributeError:
+        return None
+
+
+def device_path_to_device_node(device_path):
+    try:
+        output, _, _ = command(["udevadm", "settle", "-E", device_path])
+        out, err, retcode = command(["readlink", "-f", device_path])
+        out = out.rstrip()
+    except Exception as e:
+        return None
+
+    return out
+
+
+###########################################
+# Manage Journal Disk Partitioning Scheme #
+###########################################
+
+DISK_BY_PARTUUID = "/dev/disk/by-partuuid/"
+JOURNAL_UUID = '45b0969e-9b03-4f30-b4c6-b4b80ceff106'  # Type of a journal partition
+
+
+def is_partitioning_correct(disk_path, partition_sizes):
+    """Validate the existence and size of journal partitions"""
+
+    # Obtain the device node from the device path.
+    disk_node = device_path_to_device_node(disk_path)
+
+    # Check that partition table format is GPT
+    output, _, _ = command(["udevadm", "settle", "-E", disk_node])
+    output, _, _ = command(["parted", "-s", disk_node, "print"])
+    if not re.search('Partition Table: gpt', output):
+        print("Format of disk node %s is not GPT, zapping disk" % disk_node)
+        return False
+
+    # Check each partition size
+    partition_index = 1
+    for size in partition_sizes:
+        # Check that each partition size matches the one in input
+        if DEVICE_NAME_NVME in disk_node:
+            partition_node = '{}p{}'.format(disk_node, str(partition_index))
+        else:
+            partition_node = '{}{}'.format(disk_node, str(partition_index))
+
+        output, _, _ = command(["udevadm", "settle", "-E", partition_node])
+        cmd = ["parted", "-s", partition_node, "unit", "MiB", "print"]
+        output, _, _ = command(cmd)
+
+        regex = ("^Disk " + str(partition_node) + ":\\s*" +
+                 str(size) + "[\\.0]*MiB")
+        if not re.search(regex, output, re.MULTILINE):
+            print("Journal partition %(node)s size is not %(size)s, "
+                  "zapping disk" % {"node": partition_node, "size": size})
+            return False
+
+        partition_index += 1
+
+    output, _, _ = command(["udevadm", "settle", "-t", "10"])
+    return True
+
+
+def create_partitions(disk_path, partition_sizes):
+    """Recreate partitions"""
+
+    # Obtain the device node from the device path.
+    disk_node = device_path_to_device_node(disk_path)
+
+    # Issue: After creating a new partition table on a device, Udev does not
+    # always remove old symlinks (i.e. to previous partitions on that device).
+    # Also, even if links are erased before zapping the disk, some of them will
+    # be recreated even though there is no partition to back them!
+    # Therefore, we have to remove the links AFTER we erase the partition table
+    # Issue: DISK_BY_PARTUUID directory is not present at all if there are no
+    # GPT partitions on the storage node so nothing to remove in this case
+    links = []
+    if os.path.isdir(DISK_BY_PARTUUID):
+        links = [os.path.join(DISK_BY_PARTUUID, l) for l in os.listdir(DISK_BY_PARTUUID)
+                 if os.path.islink(os.path.join(DISK_BY_PARTUUID, l))]
+
+    # Erase all partitions on current node by creating a new GPT table
+    _, err, ret = command(["parted", "-s", disk_node, "mktable", "gpt"])
+    if ret:
+        print("Error erasing partition table of %(node)s\n"
+              "Return code: %(ret)s reason: %(reason)s" %
+              {"node": disk_node, "ret": ret, "reason": err})
+        exit(1)
+
+    # Erase old symlinks
+    for l in links:
+        if disk_node in os.path.realpath(l):
+            os.remove(l)
+
+    # Create partitions in order
+    used_space_mib = 1  # leave 1 MB at the beginning of the disk
+    num = 1
+    for size in partition_sizes:
+        cmd = ['parted', '-s', disk_node, 'unit', 'mib',
+               'mkpart', 'primary',
+               str(used_space_mib), str(used_space_mib + size)]
+        _, err, ret = command(cmd)
+        parms = {"disk_node": disk_node,
+                 "start": used_space_mib,
+                 "end": used_space_mib + size,
+                 "reason": err}
+        print("Created partition from start=%(start)s MiB to end=%(end)s MiB"
+              " on %(disk_node)s" % parms)
+        if ret:
+            print("Failed to create partition with "
+                  "start=%(start)s, end=%(end)s "
+                  "on %(disk_node)s reason: %(reason)s" % parms)
+            exit(1)
+        # Set partition type to ceph journal
+        # noncritical operation, it makes 'ceph-disk list' output correct info
+        cmd = ['sgdisk',
+               '--change-name={num}:ceph journal'.format(num=num),
+               '--typecode={num}:{uuid}'.format(
+                   num=num,
+                   uuid=JOURNAL_UUID,
+               ),
+               disk_node]
+        _, err, ret = command(cmd)
+        if ret:
+            print("WARNINIG: Failed to set partition name and typecode")
+        used_space_mib += size
+        num += 1
+
+
+###########################
+# Manage Journal Location #
+###########################
+
+OSD_PATH = "/var/lib/ceph/osd/"
+
+
+def mount_data_partition(data_path, osdid):
+    """Mount an OSD data partition and return the mounted path"""
+
+    # Obtain the device node from the device path.
+    data_node = device_path_to_device_node(data_path)
+
+    mount_path = OSD_PATH + "ceph-" + str(osdid)
+    output, _, _ = command(['mount'])
+    regex = "^" + data_node + ".*" + mount_path
+    if not re.search(regex, output, re.MULTILINE):
+        cmd = ['mount', '-t', 'xfs', data_node, mount_path]
+        _, _, ret = command(cmd)
+        params = {"node": data_node, "path": mount_path}
+        if ret:
+            print("Failed to mount %(node)s to %(path), aborting" % params)
+            exit(1)
+        else:
+            print("Mounted %(node)s to %(path)s" % params)
+    return mount_path
+
+
+def is_location_correct(path, journal_path, osdid):
+    """Check if location points to the correct device"""
+
+    # Obtain the device node from the device path.
+    journal_node = device_path_to_device_node(journal_path)
+
+    cur_node = os.path.realpath(path + "/journal")
+    if cur_node == journal_node:
+        return True
+    else:
+        return False
+
+
+def fix_location(mount_point, journal_path, osdid):
+    """Move the journal to the new partition"""
+
+    # Obtain the device node from the device path.
+    journal_node = device_path_to_device_node(journal_path)
+
+    # Fix symlink
+    path = mount_point + "/journal"  # 'journal' symlink path used by ceph-osd
+    journal_uuid = get_partition_uuid(journal_node)
+    new_target = DISK_BY_PARTUUID + journal_uuid
+    params = {"path": path, "target": new_target}
+    try:
+        if os.path.lexists(path):
+            os.unlink(path)  # delete the old symlink
+        os.symlink(new_target, path)
+        print("Symlink created: %(path)s -> %(target)s" % params)
+    except:
+        print("Failed to create symlink: %(path)s -> %(target)s" % params)
+        exit(1)
+    # Fix journal_uuid
+    path = mount_point + "/journal_uuid"
+    try:
+        with open(path, 'w') as f:
+            f.write(journal_uuid)
+    except Exception as ex:
+        # The operation is noncritical, it only makes 'ceph-disk list'
+        # display complete output. We log and continue.
+        params = {"path": path, "uuid": journal_uuid}
+        print("WARNING: Failed to set uuid of %(path)s to %(uuid)s" % params)
+
+    # Clean the journal partition
+    # even if erasing the partition table, if another journal was present here
+    # it's going to be reused. Journals are always bigger than 100MB.
+    command(['dd', 'if=/dev/zero', 'of=%s' % journal_node,
+             'bs=1M', 'count=100'])
+
+    # Format the journal
+    cmd = ['/usr/bin/ceph-osd', '-i', str(osdid),
+           '--pid-file', '/var/run/ceph/osd.%s.pid' % osdid,
+           '-c', '/etc/ceph/ceph.conf',
+           '--cluster', 'ceph',
+           '--mkjournal']
+    out, err, ret = command(cmd)
+    params = {"journal_node": journal_node,
+              "osdid": osdid,
+              "ret": ret,
+              "reason": err}
+    if not ret:
+        print("Prepared new journal partition: %(journal_node)s "
+              "for osd id: %(osdid)s" % params)
+    else:
+        print("Error initializing journal node: "
+              "%(journal_node)s for osd id: %(osdid)s "
+              "ceph-osd return code: %(ret)s reason: %(reason)s" % params)
+
+
+########
+# Main #
+########
+
+def main(argv):
+    # parse and validate arguments
+    err = False
+    partitions = None
+    location = None
+    if len(argv) != 2:
+        err = True
+    elif argv[0] == "partitions":
+        valid_keys = ['disk_path', 'journals']
+        partitions = get_input(argv[1], valid_keys)
+        if not partitions:
+            err = True
+        elif not isinstance(partitions['journals'], list):
+            err = True
+    elif argv[0] == "location":
+        valid_keys = ['data_path', 'journal_path', 'osdid']
+        location = get_input(argv[1], valid_keys)
+        if not location:
+            err = True
+        elif not isinstance(location['osdid'], int):
+            err = True
+    else:
+        err = True
+    if err:
+        print("Command intended for internal use only")
+        exit(-1)
+
+    if partitions:
+        # Recreate partitions only if the existing ones don't match input
+        if not is_partitioning_correct(partitions['disk_path'],
+                                       partitions['journals']):
+            create_partitions(partitions['disk_path'], partitions['journals'])
+        else:
+            print("Partition table for %s is correct, "
+                  "no need to repartition" %
+                  device_path_to_device_node(partitions['disk_path']))
+    elif location:
+        # we need to have the data partition mounted & we can let it mounted
+        mount_point = mount_data_partition(location['data_path'],
+                                           location['osdid'])
+        # Update journal location only if link point to another partition
+        if not is_location_correct(mount_point,
+                                   location['journal_path'],
+                                   location['osdid']):
+            print("Fixing journal location for "
+                  "OSD id: %(id)s" % {"node": location['data_path'],
+                                      "id": location['osdid']})
+            fix_location(mount_point,
+                         location['journal_path'],
+                         location['osdid'])
+        else:
+            print("Journal location for %s is correct,"
+                  "no need to change it" % location['data_path'])
+
+
+main(sys.argv[1:])
diff --git a/recipes-core/stx-integ-ceph/files/ceph-preshutdown.sh b/recipes-core/stx-integ-ceph/files/ceph-preshutdown.sh
new file mode 100644
index 0000000..5f59bd1
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/ceph-preshutdown.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+#
+# Copyright (c) 2019 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+script=$(basename $0)
+
+# Set nullglob so wildcards will return empty string if no match
+shopt -s nullglob
+
+for dev in /dev/rbd[0-9]*; do
+    for mnt in $(mount | awk -v dev=$dev '($1 == dev) {print $3}'); do
+        logger -t ${script} "Unmounting $mnt"
+        /usr/bin/umount $mnt
+    done
+    logger -t ${script} "Unmounted $dev"
+done
+
+for dev in /dev/rbd[0-9]*; do
+    /usr/bin/rbd unmap -o force $dev
+    logger -t ${script} "Unmapped $dev"
+done
+
+lsmod | grep -q '^rbd\>' && /usr/sbin/modprobe -r rbd
+lsmod | grep -q '^libceph\>' && /usr/sbin/modprobe -r libceph
+
+exit 0
+
diff --git a/recipes-core/stx-integ-ceph/files/ceph-radosgw.service b/recipes-core/stx-integ-ceph/files/ceph-radosgw.service
new file mode 100644
index 0000000..391ecf6
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/ceph-radosgw.service
@@ -0,0 +1,18 @@
+[Unit]
+Description=radosgw RESTful rados gateway
+After=network.target
+#After=remote-fs.target nss-lookup.target network-online.target time-sync.target
+#Wants=network-online.target
+
+[Service]
+Type=forking
+Restart=no
+KillMode=process
+RemainAfterExit=yes
+ExecStart=/etc/rc.d/init.d/ceph-radosgw start
+ExecStop=/etc/rc.d/init.d/ceph-radosgw stop
+ExecReload=/etc/rc.d/init.d/ceph-radosgw reload
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/recipes-core/stx-integ-ceph/files/ceph.conf b/recipes-core/stx-integ-ceph/files/ceph.conf
new file mode 100644
index 0000000..29e0711
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/ceph.conf
@@ -0,0 +1,58 @@
+[global]
+	# Unique ID for the cluster.
+	fsid = %CLUSTER_UUID%
+	# Public network where the monitor is connected to, i.e, 128.224.0.0/16
+	#public network = 127.0.0.1/24
+	# For version 0.55 and beyond, you must explicitly enable
+	# or disable authentication with "auth" entries in [global].
+	auth_cluster_required = cephx
+	auth_service_required = cephx
+	auth_client_required = cephx
+	osd_journal_size = 1024
+
+	# Uncomment the following line if you are mounting with ext4
+	# filestore xattr use omap = true
+
+	# Number of replicas of objects. Write an object 2 times.
+	# Cluster cannot reach an active + clean state until there's enough OSDs
+	# to handle the number of copies of an object. In this case, it requires
+	# at least 2 OSDs
+	osd_pool_default_size = 2
+
+	# Allow writing one copy in a degraded state.
+	osd_pool_default_min_size = 1
+
+	# Ensure you have a realistic number of placement groups. We recommend
+	# approximately 100 per OSD. E.g., total number of OSDs multiplied by 100
+	# divided by the number of replicas (i.e., osd pool default size). So for
+	# 2 OSDs and osd pool default size = 2, we'd recommend approximately
+	# (100 * 2) / 2 = 100.
+	osd_pool_default_pg_num = 64
+	osd_pool_default_pgp_num = 64
+	osd_crush_chooseleaf_type = 1
+	setuser match path = /var/lib/ceph/$type/$cluster-$id
+
+	# Override Jewel default of 2 reporters. StarlingX has replication factor 2
+	mon_osd_min_down_reporters = 1
+
+	# Use Hammer's report interval default value
+	osd_mon_report_interval_max = 120
+
+    # Configure max PGs per OSD to cover worst-case scenario of all possible
+    # StarlingX deployments i.e. AIO-SX with one OSD. Otherwise using
+    # the default value provided by Ceph Mimic leads to "too many PGs per OSD"
+    # health warning as the pools needed by stx-openstack are being created.
+    mon_max_pg_per_osd = 2048
+    osd_max_pg_per_osd_hard_ratio = 1.2
+
+[osd]
+	osd_mkfs_type = xfs
+	osd_mkfs_options_xfs = "-f"
+	osd_mount_options_xfs = "rw,noatime,inode64,logbufs=8,logbsize=256k"
+
+[mon]
+    mon warn on legacy crush tunables = false
+    # Quiet new warnings on move to Hammer
+    mon pg warn max per osd = 2048
+    mon pg warn max object skew = 0
+    mgr initial modules = restful
diff --git a/recipes-core/stx-integ-ceph/files/ceph.conf.pmon b/recipes-core/stx-integ-ceph/files/ceph.conf.pmon
new file mode 100644
index 0000000..00418b2
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/ceph.conf.pmon
@@ -0,0 +1,26 @@
+[process]
+process  = ceph
+script   = /etc/init.d/ceph-init-wrapper
+
+style    = lsb
+severity = major          ; minor, major, critical
+restarts = 3              ; restart retries before error assertion
+interval = 30             ; number of seconds to wait between restarts
+
+mode = status             ; Monitoring mode: passive (default) or active
+                          ; passive: process death monitoring (default: always)
+                          ; active : heartbeat monitoring, i.e. request / response messaging
+                          ; status : determine process health with executing "status" command
+                          ;          "start" is used to start the process(es) again
+                          ; ignore : do not monitor or stop monitoring
+
+; Status and Active Monitoring Options
+
+period     = 30           ; monitor period in seconds
+timeout    = 120          ; for active mode, messaging timeout period in seconds, must be shorter than period
+                          ; for status mode, max amount of time for a command to execute
+
+; Status Monitoring Options
+start_arg      = start        ; start argument for the script
+status_arg     = status       ; status argument for the script
+status_failure_text = /tmp/ceph_status_failure.txt   ; text to be added to alarms or logs, this is optional
diff --git a/recipes-core/stx-integ-ceph/files/ceph.service b/recipes-core/stx-integ-ceph/files/ceph.service
new file mode 100644
index 0000000..d3c2acc
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/ceph.service
@@ -0,0 +1,16 @@
+[Unit]
+Description=StarlingX Ceph Startup
+After=network.target
+
+[Service]
+Type=forking
+Restart=no
+KillMode=process
+RemainAfterExit=yes
+ExecStart=/etc/rc.d/init.d/ceph start
+ExecStop=/etc/rc.d/init.d/ceph stop
+PIDFile=/var/run/ceph/ceph.pid
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/recipes-core/stx-integ-ceph/files/ceph.sh b/recipes-core/stx-integ-ceph/files/ceph.sh
new file mode 100644
index 0000000..e7e6ecd
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/ceph.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+INITDIR=/etc/init.d
+LOGFILE=/var/log/ceph/ceph-init.log
+CEPH_FILE=/var/run/.ceph_started
+
+# Get our nodetype
+. /etc/platform/platform.conf
+
+# Exit immediately if ceph not configured (i.e. no mon in the config file)
+if ! grep -q "mon\." /etc/ceph/ceph.conf
+then
+    exit 0
+fi
+
+logecho ()
+{
+    echo $1
+    date >> ${LOGFILE}
+    echo $1 >> ${LOGFILE}
+}
+
+start ()
+{
+    logecho "Starting ceph services..."
+    ${INITDIR}/ceph start >> ${LOGFILE} 2>&1
+    RC=$?
+
+    if [ ! -f ${CEPH_FILE} ]; then
+        touch ${CEPH_FILE}
+    fi
+}
+
+stop ()
+{
+    if [[ "$system_type" == "All-in-one" ]] && [[ "$system_mode" == "simplex" ]]; then
+        logecho "Ceph services will continue to run on node"
+        exit 0
+    fi
+
+    logecho "Stopping ceph services..."
+
+    if [ -f ${CEPH_FILE} ]; then
+        rm -f ${CEPH_FILE}
+    fi
+
+    ${INITDIR}/ceph stop >> ${LOGFILE} 2>&1
+    RC=$?
+}
+
+RC=0
+
+case "$1" in
+    start)
+        start
+        ;;
+    stop)
+        stop
+        ;;
+    *)
+        echo "Usage: $0 {start|stop}"
+        exit 1
+        ;;
+esac
+
+logecho "RC was: $RC"
+exit $RC
diff --git a/recipes-core/stx-integ-ceph/files/mgr-restful-plugin.py b/recipes-core/stx-integ-ceph/files/mgr-restful-plugin.py
new file mode 100644
index 0000000..c1cae60
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/mgr-restful-plugin.py
@@ -0,0 +1,1056 @@
+#!/usr/bin/python
+#
+# Copyright (c) 2019 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+
+### BEGIN INIT INFO
+# Provides:          ceph/mgr RESTful API plugin
+# Required-Start:    $ceph
+# Required-Stop:     $ceph
+# Default-Start:     2 3 4 5
+# Default-Stop:      0 1 6
+# Short-Description: Ceph MGR RESTful API plugin
+# Description:       Ceph MGR RESTful API plugin
+### END INIT INFO
+
+import argparse
+import contextlib
+import errno
+import fcntl
+import inspect
+import json
+import logging
+import multiprocessing
+import os
+import signal
+import socket
+import subprocess
+import sys
+import time
+
+import daemon
+import psutil
+import requests
+
+# 'timeout' command returns exit status 124
+# if command times out (see man page)
+GNU_TIMEOUT_EXPIRED_RETCODE = 124
+
+
+def psutil_terminate_kill(target, timeout):
+
+    """Extend psutil functionality to stop a process.
+
+       SIGINT is sent to each target then after a grace period SIGKILL
+       is sent to the ones that are still running.
+    """
+
+    if not isinstance(target, list):
+        target = [target]
+    _, target = psutil.wait_procs(target, timeout=0)
+    for action in [lambda p: p.terminate(), lambda p: p.kill()]:
+        for proc in target:
+            action(proc)
+        _, target = psutil.wait_procs(
+            target, timeout=timeout)
+
+
+class Config(object):
+
+    """ceph-mgr service wrapper configuration options.
+
+        In the future we may want to load them from a configuration file
+        (for example /etc/ceph/mgr-restful-plugin.conf )
+    """
+
+    def __init__(self):
+        self.log_level = logging.INFO
+        self.log_dir = '/var/log'
+
+        self.ceph_mgr_service = '/usr/bin/ceph-mgr'
+        self.ceph_mgr_cluster = 'ceph'
+        self.ceph_mgr_rundir = '/var/run/ceph/mgr'
+        self.ceph_mgr_identity = socket.gethostname()
+
+        self.service_name = 'mgr-restful-plugin'
+        self.service_socket = os.path.join(
+            self.ceph_mgr_rundir, '{}.socket'.format(self.service_name))
+        self.service_lock = os.path.join(
+            self.ceph_mgr_rundir, '{}.lock'.format(self.service_name))
+        self.service_pid_file = os.path.join(
+            '/var/run/ceph', '{}.pid'.format(self.service_name))
+
+        self.restful_plugin_port = 5001
+
+        # maximum size of a message received/sent via
+        # service monitor control socket
+        self.service_socket_bufsize = 1024
+
+        # maximum time to wait for ceph cli to exit
+        self.ceph_cli_timeout_sec = 30
+
+        # how much time to wait after ceph cli commands fail with timeout
+        # before running any other commands
+        self.cluster_grace_period_sec = 30
+
+        # after ceph-mgr is started it goes through an internal initialization
+        # phase before; how much time to wait before querying ceph-mgr
+        self.ceph_mgr_grace_period_sec = 15
+
+        # after sending SIGTERM to ceph-mgr how much time to wait before
+        # sending SIGKILL (maximum time allowed for ceph-mgr cleanup)
+        self.ceph_mgr_kill_delay_sec = 5
+
+        # if service monitor is running a recovery procedure it reports
+        # status OK even if ceph-mgr is currently down. This sets the
+        # maximum number of consecutive ceph-mgr failures before reporting
+        # status error
+        self.ceph_mgr_fail_count_report_error = 3
+
+        # maximum number of consecutive ceph-mgr failures before
+        # stopping mgr-restful-plugin service
+        self.ceph_mgr_fail_count_exit = 5
+
+        # maximum time allowed for ceph-mgr to respond to a REST API request
+        self.rest_api_timeout_sec = 15
+
+        # interval between consecutive REST API requests (ping's). A smaller
+        # value here triggers more requests to ceph-mgr restful plugin. A
+        # higher value makes recovery slower when services become unavailable
+        self.restful_plugin_ping_delay_sec = 3
+
+        # where to save the self-signed certificate generated by ceph-mgr
+        self.restful_plugin_cert_path = os.path.join(
+            self.ceph_mgr_rundir, 'restful.crt')
+
+        # time to wait after enabling restful plugin
+        self.restful_plugin_grace_period_sec = 3
+
+        # after how many REST API ping failures to restart ceph-mgr
+        self.ping_fail_count_restart_mgr = 3
+
+        # after how many REST API ping failures to report status error.
+        # Until then service monitor reports status OK just in case
+        # restful plugin recovers
+        self.ping_fail_count_report_error = 5
+
+    @staticmethod
+    def load():
+        return Config()
+
+
+def setup_logging(name=None, cleanup_handlers=False):
+    if not name:
+        name = CONFIG.service_name
+    log = logging.getLogger(name)
+    log.setLevel(CONFIG.log_level)
+    if cleanup_handlers:
+        try:
+            for handler in log.handlers:
+                if isinstance(handler, logging.StreamHandler):
+                    handler.flush()
+                if isinstance(handler, logging.FileHandler):
+                    handler.close()
+            log.handlers = []
+        except Exception:
+            pass
+    elif log.handlers:
+        return log
+    handler = logging.FileHandler(
+        os.path.join(CONFIG.log_dir,
+                     '{}.log'.format(CONFIG.service_name)))
+    handler.setFormatter(
+        logging.Formatter('%(asctime)s %(process)s %(levelname)s %(name)s %(message)s'))
+    log.addHandler(handler)
+    return log
+
+
+CONFIG = Config.load()
+LOG = setup_logging(name='init-wrapper')
+
+
+class ServiceException(Exception):
+
+    """Generic mgr-restful-plugin service exception.
+
+       Build exception string based on static (per exception class)
+       string plus args, keyword args passed to exception constructor.
+    """
+
+    message = ""
+
+    def __init__(self, *args, **kwargs):
+        if "message" not in kwargs:
+            try:
+                message = self.message.format(*args, **kwargs)
+            except Exception:   # noqa
+                message = '{}, args:{}, kwargs: {}'.format(
+                    self.message, args, kwargs)
+        else:
+            message = kwargs["message"]
+        super(ServiceException, self).__init__(message)
+
+
+class ServiceAlreadyStarted(ServiceException):
+    message = ('Service monitor already started')
+
+
+class ServiceLockFailed(ServiceException):
+    message = ('Unable to lock service monitor: '
+               'reason={reason}')
+
+
+class ServiceNoSocket(ServiceException):
+    message = ('Unable to create service monitor socket: '
+               'reason={reason}')
+
+
+class ServiceSocketBindFailed(ServiceException):
+    message = ('Failed to bind service monitor socket: '
+               'path={path}, reason={reason}')
+
+
+class ServiceNoPidFile(ServiceException):
+    message = ('Failed to update pid file: '
+               'path={path}, reason={reason}')
+
+
+class CommandFailed(ServiceException):
+    message = ('Command failed: command={command}, '
+               'reason={reason}, out={out}')
+
+
+class CommandTimeout(ServiceException):
+    message = ('Command timeout: command={command}, '
+               'timeout={timeout}')
+
+
+class CephMgrStartFailed(ServiceException):
+    message = ('Failed to start ceph_mgr: '
+               'reason={reason}')
+
+
+class CephRestfulPluginFailed(ServiceException):
+    message = ('Failed to start restful plugin: '
+               'reason={reason}')
+
+
+class RestApiPingFailed(ServiceException):
+    message = ('REST API ping failed: '
+               'reason={reason}')
+
+
+class ServiceMonitor(object):
+
+    """Configure and monitor ceph-mgr and restful plugin (Ceph REST API)
+
+       1. process init script service requests: status, stop. Requests are
+          received via a control socket. Stop has priority over whatever
+          the monitor is doing currently. Any ceph command that may be running
+          is terminated/killed. Note that while ceph-mgr and restful plugin
+          configuration is in progress ServiceMonitor reports status OK to
+          avoid being restarted by SM.
+
+       2. configure ceph-mgr and mgr restful plugin: authentication, REST API
+          service port, self signed certificate. This runs as a separate
+          process so it can be stopped when init script requests it.
+
+       3. periodically check (ping) REST API responds to HTTPS requests.
+          Recovery actions are taken if REST API fails to respond: restart
+          ceph-mgr, wait for cluster to become available again.
+    """
+
+    def __init__(self):
+        # process running configuration & REST API ping loop
+        self.monitor = None
+
+        # command socket used by init script
+        self.command = None
+
+        # ceph-mgr process
+        self.ceph_mgr = None
+
+        # consecutive ceph-mgr/restful-plugin start failures. Service monitor
+        # reports failure after CONFIG.ceph_mgr_max_failure_count
+        self.ceph_mgr_failure_count = 0
+
+        # consecutive REST API ping failures. ceph-mgr service is restarted
+        # after CONFIG.ping_fail_count_restart_mgr threshold is exceeded
+        self.ping_failure_count = 0
+
+        # REST API url reported by ceph-mgr after enabling restful plugin
+        self.restful_plugin_url = ''
+
+        # REST API self signed certificate generated by restful plugin
+        self.certificate = ''
+
+    def run(self):
+        self.disable_certificate_check()
+        with self.service_lock(), self.service_socket(), \
+                self.service_pid_file():
+            self.start_monitor()
+            self.server_loop()
+
+    def disable_certificate_check(self):
+        # ceph-mgr restful plugin is configured with a self-signed
+        # certificate. Certificate host is hard-coded to "ceph-restful"
+        # which causes HTTPS requests to fail because they don't
+        # match current host name ("controller-..."). Disable HTTPS
+        # certificates check in urllib3
+        LOG.warning('Disable urllib3 certifcates check')
+        requests.packages.urllib3.disable_warnings()
+
+    def server_loop(self):
+        self.command.listen(2)
+        while True:
+            try:
+                client, _ = self.command.accept()
+                request = client.recv(CONFIG.service_socket_bufsize)
+                LOG.debug('Monitor command socket: request=%s', str(request))
+                cmd = request.split(' ')
+                cmd, args = cmd[0], cmd[1:]
+                if cmd == 'status':
+                    self.send_response(client, request, self.status())
+                elif cmd == 'stop':
+                    self.stop()
+                    self.send_response(client, request, 'OK')
+                    break
+                elif cmd == 'restful-url':
+                    try:
+                        self.restful_plugin_url = args[0]
+                        self.send_response(client, request, 'OK')
+                    except IndexError:
+                        LOG.warning('Failed to update restful plugin url: '
+                                    'args=%s', str(args))
+                        self.send_response(client, request, 'ERR')
+                elif cmd == 'certificate':
+                    try:
+                        self.certificate = args[0] if args else ''
+                        self.send_response(client, request, 'OK')
+                    except IndexError:
+                        LOG.warning('Failed to update certificate path: '
+                                    'args=%s', str(args))
+                        self.send_response(client, request, 'ERR')
+                elif cmd == 'ceph-mgr-failures':
+                    try:
+                        self.ceph_mgr_failure_count = int(args[0])
+                        self.send_response(client, request, 'OK')
+                        if self.ceph_mgr_failure_count >= CONFIG.ceph_mgr_fail_count_exit:
+                            self.stop()
+                            break
+                    except (IndexError, ValueError):
+                        LOG.warning('Failed to update ceph-mgr failures: '
+                                    'args=%s', str(args))
+                        self.send_response(client, request, 'ERR')
+                elif cmd == 'ping-failures':
+                    try:
+                        self.ping_failure_count = int(args[0])
+                        self.send_response(client, request, 'OK')
+                    except (IndexError, ValueError):
+                        LOG.warning('Failed to update ping failures: '
+                                    'args=%s', str(args))
+                        self.send_response(client, request, 'ERR')
+            except Exception as err:
+                LOG.exception(err)
+
+    @staticmethod
+    def send_response(client, request, response):
+        try:
+            client.send(response)
+        except socket.error as err:
+            LOG.warning('Failed to send response back. '
+                        'request=%s, response=%s, reason=%s',
+                        request, response, err)
+
+    def status(self):
+        if not self.restful_plugin_url:
+            if self.ceph_mgr_failure_count < CONFIG.ceph_mgr_fail_count_report_error \
+               and self.ping_failure_count < CONFIG.ping_fail_count_report_error:
+                LOG.debug('Monitor is starting services. Report status OK')
+                return 'OK'
+            LOG.debug('Too many failures: '
+                      'ceph_mgr=%d < %d, ping=%d < %d. '
+                      'Report status ERR',
+                      self.ceph_mgr_failure_count,
+                      CONFIG.ceph_mgr_fail_count_report_error,
+                      self.ping_failure_count,
+                      CONFIG.ping_fail_count_report_error)
+            return 'ERR.down'
+        try:
+            self.restful_plugin_ping()
+            LOG.debug('Restful plugin ping successful. Report status OK')
+            return 'OK'
+        except (CommandFailed, RestApiPingFailed):
+            if self.ceph_mgr_failure_count < CONFIG.ceph_mgr_fail_count_report_error \
+               and self.ping_failure_count < CONFIG.ping_fail_count_report_error:
+                LOG.info('Restful plugin does not respond but failure '
+                         'count is within acceptable limits: '
+                         ' ceph_mgr=%d < %d, ping=%d < %d. '
+                         'Report status OK',
+                         self.ceph_mgr_failure_count,
+                         CONFIG.ceph_mgr_fail_count_report_error,
+                         self.ping_failure_count,
+                         CONFIG.ping_fail_count_report_error)
+                return 'OK'
+            LOG.debug('Restful does not respond (ping failure count %d). '
+                      'Report status ERR', self.ping_failure_count)
+            return 'ERR.ping_failed'
+
+    def stop(self):
+        if not self.monitor:
+            return
+        LOG.info('Stop monitor with SIGTERM to process group %d',
+                 self.monitor.pid)
+        try:
+            os.killpg(self.monitor.pid, signal.SIGTERM)
+        except OSError as err:
+            LOG.info('Stop monitor failed: reason=%s', str(err))
+            return
+        time.sleep(CONFIG.ceph_mgr_kill_delay_sec)
+        LOG.info('Stop monitor with SIGKILL to process group %d',
+                 self.monitor.pid)
+        try:
+            os.killpg(self.monitor.pid, signal.SIGKILL)
+            os.waitpid(self.monitor.pid, 0)
+        except OSError as err:
+            LOG.info('Stop monitor failed: reason=%s', str(err))
+            return
+        LOG.info('Monitor stopped: pid=%d', self.monitor.pid)
+
+    @contextlib.contextmanager
+    def service_lock(self):
+        LOG.info('Take service lock: path=%s', CONFIG.service_lock)
+        try:
+            os.makedirs(os.path.dirname(CONFIG.service_lock))
+        except OSError:
+            pass
+        lock_file = open(CONFIG.service_lock, 'w')
+        try:
+            fcntl.flock(lock_file.fileno(),
+                        fcntl.LOCK_EX | fcntl.LOCK_NB)
+        except (IOError, OSError) as err:
+            if err.errno == errno.EAGAIN:
+                raise ServiceAlreadyStarted()
+            else:
+                raise ServiceLockFailed(reason=str(err))
+        # even if we have the lock here there might be another service manager
+        # running whose CONFIG.ceph_mgr_rundir was removed before starting
+        # this instance. Make sure there is only one service manager running
+        self.stop_other_service_managers()
+        try:
+            yield
+        finally:
+            os.unlink(CONFIG.service_lock)
+            lock_file.close()
+            LOG.info('Release service lock: path=%s', CONFIG.service_lock)
+
+    def stop_other_service_managers(self):
+        service = os.path.join('/etc/init.d', CONFIG.service_name)
+        for p in psutil.process_iter():
+            if p.cmdline()[:2] not in [[service], ['/usr/bin/python', service]]:
+                continue
+            if p.pid == os.getpid():
+                continue
+            p.kill()
+
+    @contextlib.contextmanager
+    def service_socket(self):
+        LOG.info('Create service socket')
+        try:
+            self.command = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET)
+        except socket.error as err:
+            raise ServiceNoSocket(reason=str(err))
+        LOG.info('Remove existing socket files')
+        try:
+            os.unlink(CONFIG.service_socket)
+        except OSError:
+            pass
+        LOG.info('Bind service socket: path=%s', CONFIG.service_socket)
+        try:
+            self.command.bind(CONFIG.service_socket)
+        except socket.error as err:
+            raise ServiceSocketBindFailed(
+                path=CONFIG.service_socket, reason=str(err))
+        try:
+            yield
+        finally:
+            LOG.info('Close service socket and remove file: path=%s',
+                     CONFIG.service_socket)
+            self.command.close()
+            os.unlink(CONFIG.service_socket)
+
+    @contextlib.contextmanager
+    def service_pid_file(self):
+        LOG.info('Update service pid file: path=%s', CONFIG.service_pid_file)
+        try:
+            pid_file = open(CONFIG.service_pid_file, 'w')
+            pid_file.write(str(os.getpid()))
+        except OSError as err:
+            raise ServiceNoPidFile(
+                path=CONFIG.service_pid_file, reason=str(err))
+        try:
+            yield
+        finally:
+            LOG.info('Remove service pid file: path=%s',
+                     CONFIG.service_pid_file)
+            try:
+                os.unlink(CONFIG.service_pid_file)
+            except OSError:
+                pass
+
+    def start_monitor(self):
+        LOG.info('Start monitor loop')
+        self.monitor = multiprocessing.Process(target=self.monitor_loop)
+        self.monitor.start()
+
+    def stop_unmanaged_ceph_mgr(self):
+        LOG.info('Stop unmanaged running ceph-mgr processes')
+        service_name = os.path.basename(CONFIG.ceph_mgr_service)
+        if self.ceph_mgr:
+            psutil_terminate_kill(
+                [proc for proc in psutil.process_iter()
+                 if (proc.name() == service_name
+                     and proc.pid != self.ceph_mgr.pid)],
+                CONFIG.ceph_mgr_kill_delay_sec)
+        else:
+            psutil_terminate_kill(
+                [proc for proc in psutil.process_iter()
+                 if proc.name() == service_name],
+                CONFIG.ceph_mgr_kill_delay_sec)
+
+    def monitor_loop(self):
+
+        """Bring up and monitor ceph-mgr restful plugin.
+
+           Steps:
+           - wait for Ceph cluster to become available
+           - configure and start ceph-mgr
+           - configure and enable restful plugin
+           - send periodic requests to REST API
+           - recover from failures
+
+           Note: because this runs as a separate process it
+               must send status updates to service monitor
+               via control socket for: ping_failure_count,
+               restful_plugin_url and certificate.
+        """
+
+        # Promote to process group leader so parent (service monitor)
+        # can kill the monitor plus processes spawned by it. Otherwise
+        # children of monitor_loop() will keep running in background and
+        # will be reaped by init when they finish but by then they might
+        # interfere with any new service instance.
+        os.setpgrp()
+
+        # Ignoring SIGTERM here ensures process group is not reused by
+        # the time parent (service monitor) issues the final SIGKILL.
+        signal.signal(signal.SIGTERM, signal.SIG_IGN)
+
+        while True:
+            try:
+                # steps to configure/start ceph-mgr and restful plugin
+                self.ceph_fsid_get()
+                self.ceph_mgr_auth_create()
+                self.ceph_mgr_start()
+                self.restful_plugin_set_server_port()
+                self.restful_plugin_enable()
+                self.restful_plugin_create_certificate()
+                self.restful_plugin_create_admin_key()
+                self.restful_plugin_get_url()
+                self.restful_plugin_get_certificate()
+
+                # REST API should be available now
+                # start making periodic requests (ping)
+                while True:
+                    try:
+                        self.restful_plugin_ping()
+                        self.ping_failure_count = 0
+                        self.request_update_ping_failures(
+                            self.ping_failure_count)
+                        self.ceph_mgr_failure_count = 0
+                        self.request_update_ceph_mgr_failures(
+                            self.ceph_mgr_failure_count)
+                        time.sleep(CONFIG.restful_plugin_ping_delay_sec)
+                        continue
+                    except RestApiPingFailed as err:
+                        LOG.warning(str(err))
+
+                    LOG.info('REST API ping failure count=%d',
+                             self.ping_failure_count)
+                    self.ping_failure_count += 1
+                    self.request_update_ping_failures(
+                        self.ping_failure_count)
+
+                    # maybe request failed because ceph-mgr is not running
+                    if not self.ceph_mgr_is_running():
+                        self.ceph_mgr_failure_count += 1
+                        self.request_update_ceph_mgr_failures(
+                            self.ceph_mgr_failure_count)
+                        self.ceph_mgr_start()
+                        time.sleep(CONFIG.ceph_mgr_grace_period_sec)
+                        continue
+
+                    # maybe request failed because cluster health is not ok
+                    if not self.ceph_fsid_get():
+                        LOG.info('Unable to get cluster fsid. '
+                                 'Sleep for a while')
+                        time.sleep(CONFIG.cluster_grace_period_sec)
+                        break
+
+                    # too many failures? Restart ceph-mgr and go again
+                    # through configuration steps
+                    if (self.ping_failure_count
+                            % CONFIG.ping_fail_count_restart_mgr == 0):
+                        LOG.info('Too many consecutive REST API failures. '
+                                 'Restart ceph-mgr. Update service '
+                                 'url and certificate')
+                        self.ceph_mgr_stop()
+                        self.restful_plugin_url = ''
+                        self.request_update_plugin_url(self.restful_plugin_url)
+                        self.certificate = ''
+                        self.request_update_certificate(self.certificate)
+                        break
+
+                    time.sleep(CONFIG.restful_plugin_ping_delay_sec)
+
+            except CommandFailed as err:
+                LOG.warning(str(err))
+                time.sleep(CONFIG.cluster_grace_period_sec)
+            except CommandTimeout as err:
+                LOG.warning(str(err))
+            except (CephMgrStartFailed, CephRestfulPluginFailed) as err:
+                LOG.warning(str(err))
+                self.ceph_mgr_failure_count += 1
+                self.request_update_ceph_mgr_failures(
+                    self.ceph_mgr_failure_count)
+                time.sleep(CONFIG.ceph_mgr_grace_period_sec)
+            except Exception as err:
+                LOG.exception(err)
+                time.sleep(CONFIG.cluster_grace_period_sec)
+
+    @staticmethod
+    def run_with_timeout(command, timeout, stderr=subprocess.STDOUT):
+        try:
+            LOG.info('Run command: %s', ' '.join(command))
+            return subprocess.check_output(
+                ['/usr/bin/timeout', str(timeout)] + command,
+                stderr=stderr, shell=False).strip()
+        except subprocess.CalledProcessError as err:
+            if err.returncode == GNU_TIMEOUT_EXPIRED_RETCODE:
+                raise CommandTimeout(command=err.cmd, timeout=timeout)
+            raise CommandFailed(command=err.cmd, reason=str(err),
+                                out=err.output)
+
+    def ceph_fsid_get(self):
+        return self.run_with_timeout(['/usr/bin/ceph', 'fsid'],
+                                     CONFIG.ceph_cli_timeout_sec)
+
+    def ceph_mgr_has_auth(self):
+        path = '{}/ceph-{}'.format(
+            CONFIG.ceph_mgr_rundir, CONFIG.ceph_mgr_identity)
+        try:
+            os.makedirs(path)
+        except OSError as err:
+            pass
+        try:
+            self.run_with_timeout(
+                ['/usr/bin/ceph', 'auth', 'get',
+                 'mgr.{}'.format(CONFIG.ceph_mgr_identity),
+                 '-o', '{}/keyring'.format(path)],
+                CONFIG.ceph_cli_timeout_sec)
+            return True
+        except CommandFailed as err:
+            if 'ENOENT' in str(err):
+                return False
+            raise
+
+    def ceph_mgr_auth_create(self):
+        if self.ceph_mgr_has_auth():
+            return
+        LOG.info('Create ceph-mgr authentication')
+        self.run_with_timeout(
+            ['/usr/bin/ceph', 'auth', 'get-or-create',
+             'mgr.{}'.format(CONFIG.ceph_mgr_identity),
+             'mon', 'allow *', 'osd', 'allow *'],
+            CONFIG.ceph_cli_timeout_sec)
+
+    def ceph_mgr_is_running(self):
+        if not self.ceph_mgr:
+            return None
+        try:
+            self.ceph_mgr.wait(timeout=0)
+        except psutil.TimeoutExpired:
+            return True
+        return False
+
+    def ceph_mgr_start(self):
+        if self.ceph_mgr_is_running():
+            return
+        self.stop_unmanaged_ceph_mgr()
+        LOG.info('Start ceph-mgr daemon')
+        try:
+            with open(os.devnull, 'wb') as null:
+                self.ceph_mgr = psutil.Popen(
+                    [CONFIG.ceph_mgr_service,
+                     '--cluster', CONFIG.ceph_mgr_cluster,
+                     '--id', CONFIG.ceph_mgr_identity,
+                     '-f'],
+                    close_fds=True,
+                    stdout=null,
+                    stderr=null,
+                    shell=False)
+        except (OSError, ValueError) as err:
+            raise CephMgrStartFailed(reason=str(err))
+        time.sleep(CONFIG.ceph_mgr_grace_period_sec)
+
+    def ceph_mgr_stop(self):
+        if not self.ceph_mgr:
+            return
+        LOG.info('Stop ceph-mgr')
+        psutil_terminate_kill(self.ceph_mgr, CONFIG.ceph_mgr_kill_delay_sec)
+
+    def restful_plugin_has_server_port(self):
+        try:
+            with open(os.devnull, 'wb') as null:
+                out = self.run_with_timeout(
+                    ['/usr/bin/ceph', 'config-key', 'get',
+                     'config/mgr/mgr/restful/server_port'],
+                    CONFIG.ceph_cli_timeout_sec, stderr=null)
+            if out == str(CONFIG.restful_plugin_port):
+                return True
+            LOG.warning('Restful plugin port mismatch: '
+                        'current=%d, expected=%d', out,
+                        CONFIG.restful_plugin_port)
+        except CommandFailed as err:
+            LOG.warning('Failed to get restful plugin port: '
+                        'reason=%s', str(err))
+        return False
+
+    def restful_plugin_set_server_port(self):
+        if self.restful_plugin_has_server_port():
+            return
+        LOG.info('Set restful plugin port=%d', CONFIG.restful_plugin_port)
+        self.run_with_timeout(
+            ['/usr/bin/ceph', 'config', 'set', 'mgr',
+             'mgr/restful/server_port', str(CONFIG.restful_plugin_port)],
+            CONFIG.ceph_cli_timeout_sec)
+
+    def restful_plugin_has_admin_key(self):
+        try:
+            self.run_with_timeout(
+                ['/usr/bin/ceph', 'config-key', 'get',
+                 'mgr/restful/keys/admin'],
+                CONFIG.ceph_cli_timeout_sec)
+            return True
+        except CommandFailed:
+            pass
+        return False
+
+    def restful_plugin_create_admin_key(self):
+        if self.restful_plugin_has_admin_key():
+            return
+        LOG.info('Create restful plugin admin key')
+        self.run_with_timeout(
+            ['/usr/bin/ceph', 'restful',
+             'create-key', 'admin'],
+            CONFIG.ceph_cli_timeout_sec)
+
+    def restful_plugin_has_certificate(self):
+        try:
+            self.run_with_timeout(
+                ['/usr/bin/ceph', 'config-key', 'get',
+                 'mgr/restful/{}/crt'.format(CONFIG.ceph_mgr_identity)],
+                CONFIG.ceph_cli_timeout_sec)
+            return True
+        except CommandFailed:
+            pass
+        return False
+
+    def restful_plugin_create_certificate(self):
+        if self.restful_plugin_has_certificate():
+            return
+        LOG.info('Create restful plugin self signed certificate')
+        self.run_with_timeout(
+            ['/usr/bin/ceph', 'restful',
+             'create-self-signed-cert'],
+            CONFIG.ceph_cli_timeout_sec)
+
+    def restful_plugin_is_enabled(self):
+        command = ['/usr/bin/ceph', 'mgr', 'module', 'ls',
+                   '--format', 'json']
+        with open(os.devnull, 'wb') as null:
+            out = self.run_with_timeout(
+                command, CONFIG.ceph_cli_timeout_sec, stderr=null)
+        try:
+            if 'restful' in json.loads(out)['enabled_modules']:
+                return True
+        except ValueError as err:
+            raise CommandFailed(
+                command=' '.join(command),
+                reason='unable to decode json: {}'.format(err), out=out)
+        except KeyError as err:
+            raise CommandFailed(
+                command=' '.join(command),
+                reason='missing expected key: {}'.format(err), out=out)
+        return False
+
+    def restful_plugin_enable(self):
+        if not self.restful_plugin_is_enabled():
+            LOG.info('Enable restful plugin')
+            self.run_with_timeout(
+                ['/usr/bin/ceph', 'mgr',
+                 'module', 'enable', 'restful'],
+                CONFIG.ceph_cli_timeout_sec)
+        time.sleep(CONFIG.restful_plugin_grace_period_sec)
+
+    def restful_plugin_get_url(self):
+        command = ['/usr/bin/ceph', 'mgr', 'services',
+                   '--format', 'json']
+        with open(os.devnull, 'wb') as null:
+            out = self.run_with_timeout(
+                command, CONFIG.ceph_cli_timeout_sec, stderr=null)
+        try:
+            self.restful_plugin_url = json.loads(out)['restful']
+        except ValueError as err:
+            raise CephRestfulPluginFailed(
+                reason='unable to decode json: {} output={}'.format(err, out))
+        except KeyError as err:
+            raise CephRestfulPluginFailed(
+                reason='missing expected key: {} in ouput={}'.format(err, out))
+        self.request_update_plugin_url(self.restful_plugin_url)
+
+    def restful_plugin_get_certificate(self):
+        command = ['/usr/bin/ceph', 'config-key', 'get',
+                   'mgr/restful/controller-0/crt']
+        with open(os.devnull, 'wb') as null:
+            certificate = self.run_with_timeout(
+                command, CONFIG.ceph_cli_timeout_sec, stderr=null)
+            with open(CONFIG.restful_plugin_cert_path, 'wb') as cert_file:
+                cert_file.write(certificate)
+            self.certificate = CONFIG.restful_plugin_cert_path
+            self.request_update_certificate(
+                self.certificate)
+
+    def restful_plugin_ping(self):
+        if not self.restful_plugin_url:
+            raise RestApiPingFailed(reason='missing service url')
+        if not self.certificate:
+            raise RestApiPingFailed(reason='missing certificate')
+        LOG.debug('Ping restful plugin: url=%d', self.restful_plugin_url)
+        try:
+            response = requests.request(
+                'GET', self.restful_plugin_url, verify=False,
+                timeout=CONFIG.rest_api_timeout_sec)
+            if not response.ok:
+                raise RestApiPingFailed(
+                    reason='response not ok ({})'.format(response))
+            LOG.debug('Ping restful plugin OK')
+        except (requests.ConnectionError,
+                requests.Timeout,
+                requests.HTTPError) as err:
+            raise RestApiPingFailed(reason=str(err))
+
+    @staticmethod
+    def _make_client_socket():
+        sock = socket.socket(
+            socket.AF_UNIX, socket.SOCK_SEQPACKET)
+        sock.settimeout(2 * CONFIG.rest_api_timeout_sec)
+        sock.connect(CONFIG.service_socket)
+        return sock
+
+    @staticmethod
+    def request_status():
+        try:
+            with contextlib.closing(
+                    ServiceMonitor._make_client_socket()) as sock:
+                sock.send('status')
+                status = sock.recv(CONFIG.service_socket_bufsize)
+                LOG.debug('Status %s', status)
+                return status.startswith('OK')
+        except socket.error as err:
+            LOG.error('Status error: reason=%s', err)
+            return False
+
+    @staticmethod
+    def request_stop():
+        try:
+            with contextlib.closing(
+                    ServiceMonitor._make_client_socket()) as sock:
+                sock.send('stop')
+                response = sock.recv(CONFIG.service_socket_bufsize)
+                LOG.debug('Stop response: %s', response)
+                return True
+        except socket.error as err:
+            LOG.error('Stop error: reason=%s', err)
+            return False
+
+    @staticmethod
+    def request_update_ceph_mgr_failures(count):
+        try:
+            with contextlib.closing(
+                    ServiceMonitor._make_client_socket()) as sock:
+                sock.send('ceph-mgr-failures {}'.format(count))
+                sock.recv(CONFIG.service_socket_bufsize)
+                return True
+        except socket.error as err:
+            LOG.error('Stop error: reason=%s', err)
+            return False
+
+    @staticmethod
+    def request_update_ping_failures(count):
+        try:
+            with contextlib.closing(
+                    ServiceMonitor._make_client_socket()) as sock:
+                sock.send('ping-failures {}'.format(count))
+                sock.recv(CONFIG.service_socket_bufsize)
+                return True
+        except socket.error as err:
+            LOG.error('Stop error: reason=%s', err)
+            return False
+
+    @staticmethod
+    def request_update_plugin_url(url):
+        try:
+            with contextlib.closing(
+                    ServiceMonitor._make_client_socket()) as sock:
+                sock.send('restful-url {}'.format(url))
+                sock.recv(CONFIG.service_socket_bufsize)
+                return True
+        except socket.error as err:
+            LOG.error('Stop error: reason=%s', err)
+            return False
+
+    @staticmethod
+    def request_update_certificate(path):
+        try:
+            with contextlib.closing(
+                    ServiceMonitor._make_client_socket()) as sock:
+                sock.send('certificate {}'.format(path))
+                sock.recv(CONFIG.service_socket_bufsize)
+                return True
+        except socket.error as err:
+            LOG.error('Stop error: reason=%s', err)
+            return False
+
+
+class InitWrapper(object):
+
+    """Handle System V init script actions: start, stop, restart, etc. """
+
+    def __init__(self):
+
+        """Dispatch command line action to the corresponding function.
+
+           Candidate action functions are all class methods except ones
+           that start with an underscore.
+        """
+
+        parser = argparse.ArgumentParser()
+        actions = [m[0]
+                   for m in inspect.getmembers(self)
+                   if (inspect.ismethod(m[1])
+                       and not m[0].startswith('_'))]
+        parser.add_argument(
+            'action',
+            choices=actions)
+        self.args = parser.parse_args()
+        getattr(self, self.args.action)()
+
+    def start(self):
+
+        """Start ServiceMonitor as a daemon unless one is already running.
+
+           Use a pipe to report monitor status back to this process.
+        """
+
+        pipe = os.pipe()
+        child = os.fork()
+        if child == 0:
+            os.close(pipe[0])
+            with daemon.DaemonContext(files_preserve=[pipe[1]]):
+                # prevent duplication of messages in log
+                global LOG
+                LOG = setup_logging(cleanup_handlers=True)
+                try:
+                    monitor = ServiceMonitor()
+                    status = 'OK'
+                except ServiceAlreadyStarted:
+                    os.write(pipe[1], 'OK')
+                    os.close(pipe[1])
+                    return
+                except Exception as err:
+                    status = str(err)
+                os.write(pipe[1], status)
+                os.close(pipe[1])
+                if status == 'OK':
+                    try:
+                        monitor.run()
+                    except ServiceException as err:
+                        LOG.warning(str(err))
+                    except Exception as err:
+                        LOG.exception('Service monitor error: reason=%s', err)
+        else:
+            os.close(pipe[1])
+            try:
+                status = os.read(pipe[0], CONFIG.service_socket_bufsize)
+                if status == 'OK':
+                    sys.exit(0)
+                else:
+                    LOG.warning('Service monitor failed to start: '
+                                'status=%s', status)
+            except IOError as err:
+                LOG.warning('Failed to read monitor status: reason=%s', err)
+            os.close(pipe[0])
+            os.waitpid(child, 0)
+            sys.exit(1)
+
+    def stop(self):
+
+        """Tell ServiceMonitor daemon to stop running.
+
+           In case request fails stop ServiceMonitor and ceph_mgr proecsses
+           using SIGTERM followed by SIGKILL.
+        """
+
+        result = ServiceMonitor.request_stop()
+        if not result:
+            ceph_mgr = os.path.basename(CONFIG.ceph_mgr_service)
+            procs = []
+            for proc in psutil.process_iter():
+                name = proc.name()
+                if name == CONFIG.service_name:
+                    procs.append(proc)
+                if name == ceph_mgr:
+                    procs.append(proc)
+            psutil_terminate_kill(procs, CONFIG.ceph_mgr_kill_delay_sec)
+
+    def restart(self):
+        self.stop()
+        self.start()
+
+    def force_reload(self):
+        self.stop()
+        self.start()
+
+    def reload(self):
+        self.stop()
+        self.start()
+
+    def status(self):
+
+        """Report status from ServiceMonitor.
+
+           We don't just try to access REST API here because ServiceMonitor may
+           be in the process of starting/configuring ceph-mgr and restful
+           plugin in which case we report OK to avoid being restarted by SM.
+        """
+
+        status = ServiceMonitor.request_status()
+        sys.exit(0 if status is True else 1)
+
+
+if __name__ == '__main__':
+    InitWrapper()
diff --git a/recipes-core/stx-integ-ceph/files/mgr-restful-plugin.service b/recipes-core/stx-integ-ceph/files/mgr-restful-plugin.service
new file mode 100644
index 0000000..b3e61f0
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/mgr-restful-plugin.service
@@ -0,0 +1,15 @@
+[Unit]
+Description=Ceph MGR RESTful API Plugin
+After=network-online.target sw-patch.service
+
+[Service]
+Type=forking
+Restart=no
+KillMode=process
+RemainAfterExit=yes
+ExecStart=/etc/rc.d/init.d/mgr-restful-plugin start
+ExecStop=/etc/rc.d/init.d/mgr-restul-plugin stop
+ExecReload=/etc/rc.d/init.d/mgr-restful-plugin reload
+
+[Install]
+WantedBy=multi-user.target
diff --git a/recipes-core/stx-integ-ceph/files/starlingx-docker-override.conf b/recipes-core/stx-integ-ceph/files/starlingx-docker-override.conf
new file mode 100644
index 0000000..5ffd859
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/files/starlingx-docker-override.conf
@@ -0,0 +1,3 @@
+[Service]
+ExecStopPost=/usr/sbin/ceph-preshutdown.sh
+
diff --git a/recipes-core/stx-integ-ceph/patches/0001-Add-hooks-for-orderly-shutdown-on-controller.patch b/recipes-core/stx-integ-ceph/patches/0001-Add-hooks-for-orderly-shutdown-on-controller.patch
new file mode 100644
index 0000000..15bb7c3
--- /dev/null
+++ b/recipes-core/stx-integ-ceph/patches/0001-Add-hooks-for-orderly-shutdown-on-controller.patch
@@ -0,0 +1,59 @@
+From 03340eaf0004e3cc8e3f8991ea96a46757d92830 Mon Sep 17 00:00:00 2001
+From: Don Penney <don.penney@windriver.com>
+Date: Sat, 26 Jan 2019 13:34:55 -0500
+Subject: [PATCH] Add hooks for orderly shutdown on controller
+
+Hook the ceph init script to add systemd overrides to define
+an orderly shutdown for StarlingX controllers.
+
+Signed-off-by: Don Penney <don.penney@windriver.com>
+---
+ src/init-ceph.in | 32 ++++++++++++++++++++++++++++++++
+ 1 file changed, 32 insertions(+)
+
+diff --git a/src/init-ceph.in b/src/init-ceph.in
+index 1fdb4b3..515d818 100644
+--- a/src/init-ceph.in
++++ b/src/init-ceph.in
+@@ -861,6 +861,38 @@ for name in $what; do
+ 		fi
+ 	    fi
+ 
++            . /etc/platform/platform.conf
++            if [ "${nodetype}" = "controller" ]; then
++                # StarlingX: Hook the transient services launched by systemd-run
++                # to allow for proper cleanup and orderly shutdown
++
++                # Set nullglob so wildcards will return empty string if no match
++                shopt -s nullglob
++
++                OSD_SERVICES=$(for svc in /run/systemd/system/ceph-osd*.service; do basename $svc; done | xargs echo)
++                for d in /run/systemd/system/ceph-osd*.d; do
++                    cat <<EOF > $d/starlingx-overrides.conf
++[Unit]
++Before=docker.service
++After=sm-shutdown.service
++
++EOF
++                done
++
++                for d in /run/systemd/system/ceph-mon*.d; do
++                    cat <<EOF > $d/starlingx-overrides.conf
++[Unit]
++Before=docker.service
++After=sm-shutdown.service ${OSD_SERVICES}
++
++EOF
++                done
++
++                shopt -u nullglob
++
++                systemctl daemon-reload
++            fi
++
+ 	    [ -n "$post_start" ] && do_cmd "$post_start"
+ 	    [ -n "$lockfile" ] && [ "$?" -eq 0 ] && touch $lockfile
+ 	    ;;
+-- 
+1.8.3.1
+