Merge "Prevent pmond process recovery when system is not running"
This commit is contained in:
commit
a316fea461
@ -1835,33 +1835,69 @@ int execute_pipe_cmd(const char *command, char *result, unsigned int result_size
|
|||||||
#define PIPE_COMMAND_RESPON_LEN (100)
|
#define PIPE_COMMAND_RESPON_LEN (100)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
system_state_enum get_system_state ( void )
|
const char * get_system_state_str ( system_state_enum system_state )
|
||||||
{
|
{
|
||||||
char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ;
|
switch(system_state)
|
||||||
execute_pipe_cmd ( "systemctl is-system-running", &pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN );
|
|
||||||
if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 )
|
|
||||||
{
|
{
|
||||||
ilog ("systemctl reports host as '%s'\n", pipe_cmd_output );
|
case MTC_SYSTEM_STATE__RUNNING: return("running");
|
||||||
string temp = pipe_cmd_output ;
|
case MTC_SYSTEM_STATE__STOPPING: return("stopping");
|
||||||
if ( temp.find ("stopping") != string::npos )
|
case MTC_SYSTEM_STATE__STARTING: return("starting");
|
||||||
return MTC_SYSTEM_STATE__STOPPING;
|
case MTC_SYSTEM_STATE__DEGRADED: return("degraded");
|
||||||
if ( temp.find ("running") != string::npos )
|
case MTC_SYSTEM_STATE__INITIALIZING: return("initializing");
|
||||||
return MTC_SYSTEM_STATE__RUNNING;
|
case MTC_SYSTEM_STATE__OFFLINE: return("offline");
|
||||||
if ( temp.find ("degraded") != string::npos )
|
case MTC_SYSTEM_STATE__MAINTENANCE: return("maintenance");
|
||||||
return MTC_SYSTEM_STATE__DEGRADED;
|
default: return("unknown");
|
||||||
if ( temp.find ("starting") != string::npos )
|
|
||||||
return MTC_SYSTEM_STATE__STARTING;
|
|
||||||
if ( temp.find ("initializing") != string::npos )
|
|
||||||
return MTC_SYSTEM_STATE__INITIALIZING;
|
|
||||||
if ( temp.find ("offline") != string::npos )
|
|
||||||
return MTC_SYSTEM_STATE__OFFLINE;
|
|
||||||
if ( temp.find ("maintenance") != string::npos )
|
|
||||||
return MTC_SYSTEM_STATE__MAINTENANCE;
|
|
||||||
slog ("unexpected response: <%s>\n", temp.c_str());
|
|
||||||
}
|
}
|
||||||
else
|
}
|
||||||
{
|
|
||||||
wlog ("systemctl is-system-running yielded no response\n");
|
system_state_enum get_system_state ( bool verbose )
|
||||||
}
|
{
|
||||||
return MTC_SYSTEM_STATE__UNKNOWN ;
|
|
||||||
|
int retry = 0 ;
|
||||||
|
bool unexpected_response = false ;
|
||||||
|
string temp = "" ;
|
||||||
|
system_state_enum system_state = MTC_SYSTEM_STATE__UNKNOWN ;
|
||||||
|
for ( ; retry < 3 ; retry++ )
|
||||||
|
{
|
||||||
|
char pipe_cmd_output [PIPE_COMMAND_RESPON_LEN] ;
|
||||||
|
execute_pipe_cmd ( "systemctl is-system-running",
|
||||||
|
&pipe_cmd_output[0], PIPE_COMMAND_RESPON_LEN );
|
||||||
|
if ( strnlen ( pipe_cmd_output, PIPE_COMMAND_RESPON_LEN ) > 0 )
|
||||||
|
{
|
||||||
|
temp = pipe_cmd_output ;
|
||||||
|
if ( temp.find ("stopping") != string::npos )
|
||||||
|
system_state = MTC_SYSTEM_STATE__STOPPING;
|
||||||
|
else if ( temp.find ("running") != string::npos )
|
||||||
|
system_state = MTC_SYSTEM_STATE__RUNNING;
|
||||||
|
else if ( temp.find ("degraded") != string::npos )
|
||||||
|
system_state = MTC_SYSTEM_STATE__DEGRADED;
|
||||||
|
else if ( temp.find ("starting") != string::npos )
|
||||||
|
system_state = MTC_SYSTEM_STATE__STARTING;
|
||||||
|
else if ( temp.find ("initializing") != string::npos )
|
||||||
|
system_state = MTC_SYSTEM_STATE__INITIALIZING;
|
||||||
|
else if ( temp.find ("offline") != string::npos )
|
||||||
|
system_state = MTC_SYSTEM_STATE__OFFLINE;
|
||||||
|
else if ( temp.find ("maintenance") != string::npos )
|
||||||
|
system_state = MTC_SYSTEM_STATE__MAINTENANCE;
|
||||||
|
else
|
||||||
|
unexpected_response = true ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( system_state != MTC_SYSTEM_STATE__UNKNOWN )
|
||||||
|
break ;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( verbose || unexpected_response )
|
||||||
|
{
|
||||||
|
if ( unexpected_response )
|
||||||
|
{
|
||||||
|
ilog ("systemctl provided unexpected response:'%s'", temp.c_str());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ilog ("systemctl reports host in '%s' state (%d)",
|
||||||
|
get_system_state_str(system_state), retry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return system_state ;
|
||||||
}
|
}
|
||||||
|
@ -127,6 +127,7 @@ typedef enum
|
|||||||
MTC_SYSTEM_STATE__UNKNOWN
|
MTC_SYSTEM_STATE__UNKNOWN
|
||||||
} system_state_enum ;
|
} system_state_enum ;
|
||||||
|
|
||||||
system_state_enum get_system_state ( void );
|
system_state_enum get_system_state ( bool verbose=true );
|
||||||
|
const char * get_system_state_str ( system_state_enum system_state );
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1807,7 +1807,7 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
|
|||||||
ilog ("Starting 'Degrade Audit' timer (%d secs)\n", degrade_period );
|
ilog ("Starting 'Degrade Audit' timer (%d secs)\n", degrade_period );
|
||||||
mtcTimer_start ( pmonTimer_degrade, pmon_timer_handler, degrade_period );
|
mtcTimer_start ( pmonTimer_degrade, pmon_timer_handler, degrade_period );
|
||||||
|
|
||||||
ilog ("Starting 'Pulse' timer (%d secs)\n", pulse_period );
|
ilog ("Starting 'Pulse' timer (%d msecs)\n", pulse_period );
|
||||||
mtcTimer_start_msec ( pmonTimer_pulse, pmon_timer_handler, pulse_period );
|
mtcTimer_start_msec ( pmonTimer_pulse, pmon_timer_handler, pulse_period );
|
||||||
|
|
||||||
ilog ("Starting 'Host Watchdog' timer (%d secs)\n", hostwd_period );
|
ilog ("Starting 'Host Watchdog' timer (%d secs)\n", hostwd_period );
|
||||||
@ -1887,17 +1887,6 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Avoid pmond thrashing trying to recover processes during
|
|
||||||
* system shutdown. */
|
|
||||||
if ( _pmon_ctrl_ptr->system_state == MTC_SYSTEM_STATE__STOPPING )
|
|
||||||
{
|
|
||||||
wlog_throttled ( shutdown_log_throttle, 500,
|
|
||||||
"process monitoring disabled during system shutdown\n");
|
|
||||||
usleep (500);
|
|
||||||
continue ;
|
|
||||||
}
|
|
||||||
if ( shutdown_log_throttle ) shutdown_log_throttle = 0 ;
|
|
||||||
|
|
||||||
if ( inotify_fault == false )
|
if ( inotify_fault == false )
|
||||||
{
|
{
|
||||||
if ( get_inotify_events ( ctrl_ptr->fd ) == true )
|
if ( get_inotify_events ( ctrl_ptr->fd ) == true )
|
||||||
@ -1992,9 +1981,48 @@ void pmon_service ( pmon_ctrl_type * ctrl_ptr )
|
|||||||
_get_events ( );
|
_get_events ( );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check system state before managing processes.
|
||||||
|
*
|
||||||
|
* Prevent process recoverty while not in the
|
||||||
|
* running or degraded state. */
|
||||||
|
if (( _pmon_ctrl_ptr->system_state != MTC_SYSTEM_STATE__RUNNING ) &&
|
||||||
|
( _pmon_ctrl_ptr->system_state != MTC_SYSTEM_STATE__DEGRADED ))
|
||||||
|
{
|
||||||
|
system_state_enum system_state = get_system_state(false);
|
||||||
|
if ( system_state != _pmon_ctrl_ptr->system_state )
|
||||||
|
{
|
||||||
|
_pmon_ctrl_ptr->system_state = system_state ;
|
||||||
|
if (( system_state != MTC_SYSTEM_STATE__RUNNING ) &&
|
||||||
|
( system_state != MTC_SYSTEM_STATE__DEGRADED ))
|
||||||
|
{
|
||||||
|
/* log every state change that is not running / degraded */
|
||||||
|
wlog ("process monitoring disabled while in '%s' state",
|
||||||
|
get_system_state_str(system_state));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* log every state change that is not running / degraded */
|
||||||
|
wlog ("process monitoring re-enabled while in '%s' state",
|
||||||
|
get_system_state_str(system_state));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* throttle the disabled state during shutdown log */
|
||||||
|
if ( _pmon_ctrl_ptr->system_state == MTC_SYSTEM_STATE__STOPPING )
|
||||||
|
{
|
||||||
|
wlog_throttled ( shutdown_log_throttle, 60,
|
||||||
|
"process monitoring disabled during system shutdown\n");
|
||||||
|
}
|
||||||
|
sleep (1);
|
||||||
|
continue ;
|
||||||
|
}
|
||||||
|
else if ( shutdown_log_throttle )
|
||||||
|
shutdown_log_throttle = 0 ;
|
||||||
|
|
||||||
/* Monitor Processes */
|
/* Monitor Processes */
|
||||||
for ( int i = 0 ; i < ctrl_ptr->processes ; i++ )
|
for ( int i = 0 ; i < ctrl_ptr->processes ; i++ )
|
||||||
{
|
{
|
||||||
|
|
||||||
/* Allow a process to be ignored */
|
/* Allow a process to be ignored */
|
||||||
if ( process_config[i].ignore == true )
|
if ( process_config[i].ignore == true )
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user