Merge "Prevent pmond process recovery when system is not running"
This commit is contained in:
commit
a316fea461
@ -1835,33 +1835,69 @@ int execute_pipe_cmd(const char *command, char *result, unsigned int result_size
|
||||
#define PIPE_COMMAND_RESPON_LEN (100)
|
||||
#endif
|
||||
|
||||
system_state_enum get_system_state ( void )
|
||||
const char * get_system_state_str ( system_state_enum system_state )
|
||||
{
|
||||
char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ;
|
||||
execute_pipe_cmd ( "systemctl is-system-running", &pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN );
|
||||
if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 )
|
||||
switch(system_state)
|
||||
{
|
||||
ilog ("systemctl reports host as '%s'\n", pipe_cmd_output );
|
||||
string temp = pipe_cmd_output ;
|
||||
if ( temp.find ("stopping") != string::npos )
|
||||
return MTC_SYSTEM_STATE__STOPPING;
|
||||
if ( temp.find ("running") != string::npos )
|
||||
return MTC_SYSTEM_STATE__RUNNING;
|
||||
if ( temp.find ("degraded") != string::npos )
|
||||
return MTC_SYSTEM_STATE__DEGRADED;
|
||||
if ( temp.find ("starting") != string::npos )
|
||||
return MTC_SYSTEM_STATE__STARTING;
|
||||
if ( temp.find ("initializing") != string::npos )
|
||||
return MTC_SYSTEM_STATE__INITIALIZING;
|
||||
if ( temp.find ("offline") != string::npos )
|
||||
return MTC_SYSTEM_STATE__OFFLINE;
|
||||
if ( temp.find ("maintenance") != string::npos )
|
||||
return MTC_SYSTEM_STATE__MAINTENANCE;
|
||||
slog ("unexpected response: <%s>\n", temp.c_str());
|
||||
case MTC_SYSTEM_STATE__RUNNING: return("running");
|
||||
case MTC_SYSTEM_STATE__STOPPING: return("stopping");
|
||||
case MTC_SYSTEM_STATE__STARTING: return("starting");
|
||||
case MTC_SYSTEM_STATE__DEGRADED: return("degraded");
|
||||
case MTC_SYSTEM_STATE__INITIALIZING: return("initializing");
|
||||
case MTC_SYSTEM_STATE__OFFLINE: return("offline");
|
||||
case MTC_SYSTEM_STATE__MAINTENANCE: return("maintenance");
|
||||
default: return("unknown");
|
||||
}
|
||||
else
|
||||
{
|
||||
wlog ("systemctl is-system-running yielded no response\n");
|
||||
}
|
||||
return MTC_SYSTEM_STATE__UNKNOWN ;
|
||||
}
|
||||
|
||||
system_state_enum get_system_state ( bool verbose )
|
||||
{
|
||||
|
||||
int retry = 0 ;
|
||||
bool unexpected_response = false ;
|
||||
string temp = "" ;
|
||||
system_state_enum system_state = MTC_SYSTEM_STATE__UNKNOWN ;
|
||||
for ( ; retry < 3 ; retry++ )
|
||||
{
|
||||
char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ;
|
||||
execute_pipe_cmd ( "systemctl is-system-running",
|
||||
&pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN );
|
||||
if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 )
|
||||
{
|
||||
temp = pipe_cmd_output ;
|
||||
if ( temp.find ("stopping") != string::npos )
|
||||
system_state = MTC_SYSTEM_STATE__STOPPING;
|
||||
else if ( temp.find ("running") != string::npos )
|
||||
system_state = MTC_SYSTEM_STATE__RUNNING;
|
||||
else if ( temp.find ("degraded") != string::npos )
|
||||
system_state = MTC_SYSTEM_STATE__DEGRADED;
|
||||
else if ( temp.find ("starting") != string::npos )
|
||||
system_state = MTC_SYSTEM_STATE__STARTING;
|
||||
else if ( temp.find ("initializing") != string::npos )
|
||||
system_state = MTC_SYSTEM_STATE__INITIALIZING;
|
||||
else if ( temp.find ("offline") != string::npos )
|
||||
system_state = MTC_SYSTEM_STATE__OFFLINE;
|
||||
else if ( temp.find ("maintenance") != string::npos )
|
||||
system_state = MTC_SYSTEM_STATE__MAINTENANCE;
|
||||
else
|
||||
unexpected_response = true ;
|
||||
}
|
||||
|
||||
if ( system_state != MTC_SYSTEM_STATE__UNKNOWN )
|
||||
break ;
|
||||
}
|
||||
|
||||
if ( verbose || unexpected_response )
|
||||
{
|
||||
if ( unexpected_response )
|
||||
{
|
||||
ilog ("systemctl provided unexpected response:'%s'", temp.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
ilog ("systemctl reports host in '%s' state (%d)",
|
||||
get_system_state_str(system_state), retry);
|
||||
}
|
||||
}
|
||||
return system_state ;
|
||||
}
|
||||
|
@ -127,6 +127,7 @@ typedef enum
|
||||
MTC_SYSTEM_STATE__UNKNOWN
|
||||
} system_state_enum ;
|
||||
|
||||
system_state_enum get_system_state ( void );
|
||||
system_state_enum get_system_state ( bool verbose=true );
|
||||
const char * get_system_state_str ( system_state_enum system_state );
|
||||
|
||||
#endif
|
||||
|
@ -1807,7 +1807,7 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
|
||||
ilog ("Starting 'Degrade Audit' timer (%d secs)\n", degrade_period );
|
||||
mtcTimer_start ( pmonTimer_degrade, pmon_timer_handler, degrade_period );
|
||||
|
||||
ilog ("Starting 'Pulse' timer (%d secs)\n", pulse_period );
|
||||
ilog ("Starting 'Pulse' timer (%d msecs)\n", pulse_period );
|
||||
mtcTimer_start_msec ( pmonTimer_pulse, pmon_timer_handler, pulse_period );
|
||||
|
||||
ilog ("Starting 'Host Watchdog' timer (%d secs)\n", hostwd_period );
|
||||
@ -1887,17 +1887,6 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
|
||||
}
|
||||
}
|
||||
|
||||
/* Avoid pmond thrashing trying to recover processes during
|
||||
* system shutdown. */
|
||||
if ( _pmon_ctrl_ptr->system_state == MTC_SYSTEM_STATE__STOPPING )
|
||||
{
|
||||
wlog_throttled ( shutdown_log_throttle, 500,
|
||||
"process monitoring disabled during system shutdown\n");
|
||||
usleep (500);
|
||||
continue ;
|
||||
}
|
||||
if ( shutdown_log_throttle ) shutdown_log_throttle = 0 ;
|
||||
|
||||
if ( inotify_fault == false )
|
||||
{
|
||||
if ( get_inotify_events ( ctrl_ptr->fd ) == true )
|
||||
@ -1992,9 +1981,48 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
|
||||
_get_events ( );
|
||||
}
|
||||
|
||||
/* Check system state before managing processes.
|
||||
*
|
||||
* Prevent process recoverty while not in the
|
||||
* running or degraded state. */
|
||||
if (( _pmon_ctrl_ptr->system_state != MTC_SYSTEM_STATE__RUNNING ) &&
|
||||
( _pmon_ctrl_ptr->system_state != MTC_SYSTEM_STATE__DEGRADED ))
|
||||
{
|
||||
system_state_enum system_state = get_system_state(false);
|
||||
if ( system_state != _pmon_ctrl_ptr->system_state )
|
||||
{
|
||||
_pmon_ctrl_ptr->system_state = system_state ;
|
||||
if (( system_state != MTC_SYSTEM_STATE__RUNNING ) &&
|
||||
( system_state != MTC_SYSTEM_STATE__DEGRADED ))
|
||||
{
|
||||
/* log every state change that is not running / degraded */
|
||||
wlog ("process monitoring disabled while in '%s' state",
|
||||
get_system_state_str(system_state));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* log every state change that is not running / degraded */
|
||||
wlog ("process monitoring re-enabled while in '%s' state",
|
||||
get_system_state_str(system_state));
|
||||
}
|
||||
}
|
||||
|
||||
/* throttle the disabled state during shutdown log */
|
||||
if ( _pmon_ctrl_ptr->system_state == MTC_SYSTEM_STATE__STOPPING )
|
||||
{
|
||||
wlog_throttled ( shutdown_log_throttle, 60,
|
||||
"process monitoring disabled during system shutdown\n");
|
||||
}
|
||||
sleep (1);
|
||||
continue ;
|
||||
}
|
||||
else if ( shutdown_log_throttle )
|
||||
shutdown_log_throttle = 0 ;
|
||||
|
||||
/* Monitor Processes */
|
||||
for ( int i = 0 ; i < ctrl_ptr->processes ; i++ )
|
||||
{
|
||||
|
||||
/* Allow a process to be ignored */
|
||||
if ( process_config[i].ignore == true )
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user