Added tool to capture L3 agent status
Change-Id: Ica6be14059dfd38662993265164817661d4a68d2
This commit is contained in:
parent
c697f1da8c
commit
14e08c59fe
2
bindep.txt
Normal file
2
bindep.txt
Normal file
@ -0,0 +1,2 @@
|
||||
gcc [compile]
|
||||
libc-dev [compile]
|
0
openstack_tools/cmd/prometheus/__init__.py
Normal file
0
openstack_tools/cmd/prometheus/__init__.py
Normal file
80
openstack_tools/cmd/prometheus/routers_l3_ha.py
Normal file
80
openstack_tools/cmd/prometheus/routers_l3_ha.py
Normal file
@ -0,0 +1,80 @@
|
||||
# Copyright (c) 2020 CLOUD&HEAT GmbH https://www.cloudandheat.com
|
||||
# Copyright 2020 VEXXHOST, Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Generate data for all routers and their L3 HA states.
|
||||
|
||||
There are scenarios where an L3 HA router can end up being active in many
|
||||
different L3 agents. This can be tricky to find and cause chaos in the system,
|
||||
while effort should be done in finding the root cause of this, this will help
|
||||
alert and catch any occurances of it.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import time
|
||||
import os
|
||||
|
||||
import psutil
|
||||
|
||||
|
||||
def main():
|
||||
"""Entry-point for script."""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--metric", default="node_openstack_l3_router_master",
|
||||
help="Name of metric")
|
||||
parser.add_argument("--state", default="/var/lib/neutron",
|
||||
help="Neutron state path")
|
||||
parser.add_argument("--loop", type=int, help="Loop every N seconds")
|
||||
parser.add_argument("--output", help="Output file (default to STDOUT)")
|
||||
args = parser.parse_args()
|
||||
|
||||
while True:
|
||||
ha_conf_dir = os.path.join(args.state, 'ha_confs')
|
||||
pid_glob = os.path.join(ha_conf_dir, '*.pid.keepalived-vrrp')
|
||||
pid_files = glob.glob(pid_glob)
|
||||
|
||||
output = ""
|
||||
for pid_file in pid_files:
|
||||
with open(pid_file) as pid_fd:
|
||||
pid = int(pid_fd.read())
|
||||
|
||||
# Check if the process is _actually_ running
|
||||
if psutil.pid_exists(pid) is False:
|
||||
continue
|
||||
|
||||
state_path = pid_file.replace('.pid.keepalived-vrrp', '')
|
||||
state_file = os.path.join(state_path, 'state')
|
||||
|
||||
router_id = os.path.basename(state_path)
|
||||
with open(state_file) as state_fd:
|
||||
master = 1 if 'master' in state_fd.read() else 0
|
||||
|
||||
output += '%s{router_id="%s"} %d\n' % (
|
||||
args.metric,
|
||||
router_id,
|
||||
master
|
||||
)
|
||||
|
||||
if args.output:
|
||||
with open(args.output, 'w') as output_fd:
|
||||
output_fd.write(output)
|
||||
|
||||
print(output)
|
||||
|
||||
if args.loop:
|
||||
time.sleep(args.loop)
|
||||
else:
|
||||
break
|
@ -1 +1,2 @@
|
||||
ovs
|
||||
psutil
|
Loading…
Reference in New Issue
Block a user