a8790f2303
This patch adds an action to replace a hard drive for an particular osd server. The user executing the action will give the OSD number and also the device name of the replacement drive. The rest is taken care of by the action. The action will attempt to go through all the osd removal steps for the failed drive. It will force unmount the drive and if that fails it will lazy unmount the drive. This force and then lazy pattern comes from experience with dead hard drives not behaving nicely with umount. Change-Id: I914cd484280ac3f9b9f1fad8b35ee53e92438a0a
85 lines
2.6 KiB
Python
Executable File
85 lines
2.6 KiB
Python
Executable File
#!/usr/bin/python
|
|
|
|
from charmhelpers.core.hookenv import action_get, log, config, action_fail
|
|
|
|
__author__ = 'chris'
|
|
|
|
import os
|
|
import sys
|
|
|
|
sys.path.append('hooks')
|
|
|
|
import ceph
|
|
|
|
"""
|
|
Given a OSD number this script will attempt to turn that back into a mount
|
|
point and then replace the OSD with a new one.
|
|
"""
|
|
|
|
|
|
def get_disk_stats():
|
|
try:
|
|
# https://www.kernel.org/doc/Documentation/iostats.txt
|
|
with open('/proc/diskstats', 'r') as diskstats:
|
|
return diskstats.readlines()
|
|
except IOError as err:
|
|
log('Could not open /proc/diskstats. Error: {}'.format(err.message))
|
|
action_fail('replace-osd failed because /proc/diskstats could not '
|
|
'be opened {}'.format(err.message))
|
|
return None
|
|
|
|
|
|
def lookup_device_name(major_number, minor_number):
|
|
"""
|
|
|
|
:param major_number: int. The major device number
|
|
:param minor_number: int. The minor device number
|
|
:return: string. The name of the device. Example: /dev/sda.
|
|
Returns None on error.
|
|
"""
|
|
diskstats = get_disk_stats()
|
|
for line in diskstats:
|
|
parts = line.split()
|
|
if not len(parts) > 3:
|
|
# Skip bogus lines
|
|
continue
|
|
try:
|
|
if int(parts[0]) is major_number and int(parts[1]) is \
|
|
minor_number:
|
|
# Found our device. Return its name
|
|
return parts[2]
|
|
except ValueError as value_err:
|
|
log('Could not convert {} or {} into an integer. Error: {}'
|
|
.format(parts[0], parts[1], value_err.message))
|
|
continue
|
|
return None
|
|
|
|
|
|
def get_device_number(osd_number):
|
|
"""
|
|
This function will return a tuple of (major_number, minor_number)
|
|
device number for the given osd.
|
|
:param osd_number: int
|
|
:rtype : (major_number,minor_number)
|
|
"""
|
|
path = "/var/lib/ceph/osd/ceph-{}".format(osd_number)
|
|
info = os.lstat(path)
|
|
major_number = os.major(info.st_dev)
|
|
minor_number = os.minor(info.st_dev)
|
|
return major_number, minor_number
|
|
|
|
|
|
if __name__ == '__main__':
|
|
dead_osd_number = action_get("osd-number")
|
|
replacement_device = action_get("replacement-device")
|
|
major, minor = get_device_number(dead_osd_number)
|
|
device_name = lookup_device_name(major, minor)
|
|
osd_format = config('osd-format')
|
|
osd_journal = config('osd-journal')
|
|
|
|
ceph.replace_osd(dead_osd_number=dead_osd_number,
|
|
dead_osd_device="/dev/{}".format(device_name),
|
|
new_osd_device=replacement_device,
|
|
osd_format=osd_format,
|
|
osd_journal=osd_journal)
|