root/daemons/controld/controld_timers.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. do_timer_control
  2. get_timer_desc
  3. crm_timer_popped
  4. controld_init_fsa_timers
  5. controld_free_fsa_timers
  6. is_timer_started
  7. controld_start_timer
  8. controld_start_recheck_timer
  9. controld_stop_timer

   1 /*
   2  * Copyright 2004-2021 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <time.h>
  13 #include <stdlib.h>
  14 
  15 #include <crm/crm.h>
  16 #include <crm/msg_xml.h>
  17 #include <pacemaker-controld.h>
  18 
  19 // Wait before retrying a failed cib or executor connection
  20 fsa_timer_t *wait_timer = NULL;
  21 
  22 // Periodically re-run scheduler (for date_spec evaluation and as a failsafe)
  23 fsa_timer_t *recheck_timer = NULL;
  24 
  25 // Wait at start-up, or after an election, for DC to make contact
  26 fsa_timer_t *election_trigger = NULL;
  27 
  28 // Delay start of new transition with expectation something else might happen
  29 fsa_timer_t *transition_timer = NULL;
  30 
  31 // join-integration-timeout
  32 fsa_timer_t *integration_timer = NULL;
  33 
  34 // join-finalization-timeout
  35 fsa_timer_t *finalization_timer = NULL;
  36 
  37 // Wait for DC to stop all resources and give us the all-clear to shut down
  38 fsa_timer_t *shutdown_escalation_timer = NULL;
  39 
  40 // Cluster recheck interval (from configuration)
  41 guint recheck_interval_ms = 0;
  42 
  43 // When scheduler should be re-run (from most recent transition graph)
  44 time_t recheck_by = 0;
  45 
  46 /*      A_DC_TIMER_STOP, A_DC_TIMER_START,
  47  *      A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START
  48  *      A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START
  49  */
  50 void
  51 do_timer_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
  52                  enum crmd_fsa_cause cause,
  53                  enum crmd_fsa_state cur_state,
  54                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
  55 {
  56     gboolean timer_op_ok = TRUE;
  57 
  58     if (action & A_DC_TIMER_STOP) {
  59         timer_op_ok = controld_stop_timer(election_trigger);
  60 
  61     } else if (action & A_FINALIZE_TIMER_STOP) {
  62         timer_op_ok = controld_stop_timer(finalization_timer);
  63 
  64     } else if (action & A_INTEGRATE_TIMER_STOP) {
  65         timer_op_ok = controld_stop_timer(integration_timer);
  66     }
  67 
  68     /* don't start a timer that wasn't already running */
  69     if (action & A_DC_TIMER_START && timer_op_ok) {
  70         controld_start_timer(election_trigger);
  71         if (AM_I_DC) {
  72             /* there can be only one */
  73             register_fsa_input(cause, I_ELECTION, NULL);
  74         }
  75 
  76     } else if (action & A_FINALIZE_TIMER_START) {
  77         controld_start_timer(finalization_timer);
  78 
  79     } else if (action & A_INTEGRATE_TIMER_START) {
  80         controld_start_timer(integration_timer);
  81     }
  82 }
  83 
  84 const char *
  85 get_timer_desc(fsa_timer_t * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
  86 {
  87     if (timer == election_trigger) {
  88         return "Election Trigger";
  89 
  90     } else if (timer == shutdown_escalation_timer) {
  91         return "Shutdown Escalation";
  92 
  93     } else if (timer == integration_timer) {
  94         return "Integration Timer";
  95 
  96     } else if (timer == finalization_timer) {
  97         return "Finalization Timer";
  98 
  99     } else if (timer == transition_timer) {
 100         return "New Transition Timer";
 101 
 102     } else if (timer == wait_timer) {
 103         return "Wait Timer";
 104 
 105     } else if (timer == recheck_timer) {
 106         return "Cluster Recheck Timer";
 107 
 108     }
 109     return "Unknown Timer";
 110 }
 111 
 112 static gboolean
 113 crm_timer_popped(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 114 {
 115     fsa_timer_t *timer = (fsa_timer_t *) data;
 116 
 117     if (timer->log_error) {
 118         crm_err("%s just popped in state %s! " CRM_XS " input=%s time=%ums",
 119                 get_timer_desc(timer), fsa_state2string(fsa_state),
 120                 fsa_input2string(timer->fsa_input), timer->period_ms);
 121     } else {
 122         crm_info("%s just popped " CRM_XS " input=%s time=%ums",
 123                  get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 124                  timer->period_ms);
 125         timer->counter++;
 126     }
 127 
 128     if (timer == election_trigger && election_trigger->counter > 5) {
 129         crm_notice("We appear to be in an election loop, something may be wrong");
 130         crm_write_blackbox(0, NULL);
 131         election_trigger->counter = 0;
 132     }
 133 
 134     controld_stop_timer(timer);  // Make timer _not_ go off again
 135 
 136     if (timer->fsa_input == I_INTEGRATED) {
 137         crm_info("Welcomed: %d, Integrated: %d",
 138                  crmd_join_phase_count(crm_join_welcomed),
 139                  crmd_join_phase_count(crm_join_integrated));
 140         if (crmd_join_phase_count(crm_join_welcomed) == 0) {
 141             // If we don't even have ourselves, start again
 142             register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL,
 143                                    __func__);
 144 
 145         } else {
 146             register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL);
 147         }
 148 
 149     } else if (timer == recheck_timer && fsa_state != S_IDLE) {
 150         crm_debug("Discarding %s event in state: %s",
 151                   fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state));
 152 
 153     } else if (timer == finalization_timer && fsa_state != S_FINALIZE_JOIN) {
 154         crm_debug("Discarding %s event in state: %s",
 155                   fsa_input2string(timer->fsa_input), fsa_state2string(fsa_state));
 156 
 157     } else if (timer->fsa_input != I_NULL) {
 158         register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL);
 159     }
 160 
 161     crm_trace("Triggering FSA: %s", __func__);
 162     mainloop_set_trigger(fsa_source);
 163 
 164     return TRUE;
 165 }
 166 
 167 bool
 168 controld_init_fsa_timers()
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170     transition_timer = calloc(1, sizeof(fsa_timer_t));
 171     if (transition_timer == NULL) {
 172         return FALSE;
 173     }
 174 
 175     integration_timer = calloc(1, sizeof(fsa_timer_t));
 176     if (integration_timer == NULL) {
 177         return FALSE;
 178     }
 179 
 180     finalization_timer = calloc(1, sizeof(fsa_timer_t));
 181     if (finalization_timer == NULL) {
 182         return FALSE;
 183     }
 184 
 185     election_trigger = calloc(1, sizeof(fsa_timer_t));
 186     if (election_trigger == NULL) {
 187         return FALSE;
 188     }
 189 
 190     shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
 191     if (shutdown_escalation_timer == NULL) {
 192         return FALSE;
 193     }
 194 
 195     wait_timer = calloc(1, sizeof(fsa_timer_t));
 196     if (wait_timer == NULL) {
 197         return FALSE;
 198     }
 199 
 200     recheck_timer = calloc(1, sizeof(fsa_timer_t));
 201     if (recheck_timer == NULL) {
 202         return FALSE;
 203     }
 204 
 205     election_trigger->source_id = 0;
 206     election_trigger->period_ms = 0;
 207     election_trigger->fsa_input = I_DC_TIMEOUT;
 208     election_trigger->callback = crm_timer_popped;
 209     election_trigger->log_error = FALSE;
 210 
 211     transition_timer->source_id = 0;
 212     transition_timer->period_ms = 0;
 213     transition_timer->fsa_input = I_PE_CALC;
 214     transition_timer->callback = crm_timer_popped;
 215     transition_timer->log_error = FALSE;
 216 
 217     integration_timer->source_id = 0;
 218     integration_timer->period_ms = 0;
 219     integration_timer->fsa_input = I_INTEGRATED;
 220     integration_timer->callback = crm_timer_popped;
 221     integration_timer->log_error = TRUE;
 222 
 223     finalization_timer->source_id = 0;
 224     finalization_timer->period_ms = 0;
 225     finalization_timer->fsa_input = I_FINALIZED;
 226     finalization_timer->callback = crm_timer_popped;
 227     finalization_timer->log_error = FALSE;
 228 
 229     /* We can't use I_FINALIZED here, because that creates a bug in the join
 230      * process where a joining node can be stuck in S_PENDING while we think it
 231      * is in S_NOT_DC. This created an infinite transition loop in which we
 232      * continually send probes which the node NACKs because it's pending.
 233      *
 234      * If we have nodes where the cluster layer is active but the controller is
 235      * not, we can avoid this causing an election/join loop, in the integration
 236      * phase.
 237      */
 238     finalization_timer->fsa_input = I_ELECTION;
 239 
 240     shutdown_escalation_timer->source_id = 0;
 241     shutdown_escalation_timer->period_ms = 0;
 242     shutdown_escalation_timer->fsa_input = I_STOP;
 243     shutdown_escalation_timer->callback = crm_timer_popped;
 244     shutdown_escalation_timer->log_error = TRUE;
 245 
 246     wait_timer->source_id = 0;
 247     wait_timer->period_ms = 2000;
 248     wait_timer->fsa_input = I_NULL;
 249     wait_timer->callback = crm_timer_popped;
 250     wait_timer->log_error = FALSE;
 251 
 252     recheck_timer->source_id = 0;
 253     recheck_timer->period_ms = 0;
 254     recheck_timer->fsa_input = I_PE_CALC;
 255     recheck_timer->callback = crm_timer_popped;
 256     recheck_timer->log_error = FALSE;
 257 
 258     return TRUE;
 259 }
 260 
 261 void
 262 controld_free_fsa_timers()
     /* [previous][next][first][last][top][bottom][index][help] */
 263 {
 264     controld_stop_timer(transition_timer);
 265     controld_stop_timer(integration_timer);
 266     controld_stop_timer(finalization_timer);
 267     controld_stop_timer(election_trigger);
 268     controld_stop_timer(shutdown_escalation_timer);
 269     controld_stop_timer(wait_timer);
 270     controld_stop_timer(recheck_timer);
 271 
 272     free(transition_timer); transition_timer = NULL;
 273     free(integration_timer); integration_timer = NULL;
 274     free(finalization_timer); finalization_timer = NULL;
 275     free(election_trigger); election_trigger = NULL;
 276     free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
 277     free(wait_timer); wait_timer = NULL;
 278     free(recheck_timer); recheck_timer = NULL;
 279 }
 280 
 281 gboolean
 282 is_timer_started(fsa_timer_t * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 283 {
 284     return (timer->period_ms > 0) && (timer->source_id != 0);
 285 }
 286 
 287 void
 288 controld_start_timer(fsa_timer_t *timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 289 {
 290     if (timer->source_id == 0 && timer->period_ms > 0) {
 291         timer->source_id = g_timeout_add(timer->period_ms, timer->callback, (void *)timer);
 292         CRM_ASSERT(timer->source_id != 0);
 293         crm_debug("Started %s (inject %s if pops after %ums, source=%d)",
 294                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 295                   timer->period_ms, timer->source_id);
 296     } else {
 297         crm_debug("%s already running (inject %s if pops after %ums, source=%d)",
 298                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 299                   timer->period_ms, timer->source_id);
 300     }
 301 }
 302 
 303 void
 304 controld_start_recheck_timer()
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306     // Default to recheck interval configured in CIB (if any)
 307     guint period_ms = recheck_interval_ms;
 308 
 309     // If scheduler supplied a "recheck by" time, check whether that's sooner
 310     if (recheck_by > 0) {
 311         time_t diff_seconds = recheck_by - time(NULL);
 312 
 313         if (diff_seconds < 1) {
 314             // We're already past the desired time
 315             period_ms = 500;
 316         } else {
 317             period_ms = (guint) diff_seconds * 1000;
 318         }
 319 
 320         // Use "recheck by" only if it's sooner than interval from CIB
 321         if (period_ms > recheck_interval_ms) {
 322             period_ms = recheck_interval_ms;
 323         }
 324     }
 325 
 326     if (period_ms > 0) {
 327         recheck_timer->period_ms = period_ms;
 328         controld_start_timer(recheck_timer);
 329     }
 330 }
 331 
 332 gboolean
 333 controld_stop_timer(fsa_timer_t *timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 334 {
 335     CRM_CHECK(timer != NULL, return FALSE);
 336 
 337     if (timer->source_id != 0) {
 338         crm_trace("Stopping %s (would inject %s if popped after %ums, src=%d)",
 339                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 340                   timer->period_ms, timer->source_id);
 341         g_source_remove(timer->source_id);
 342         timer->source_id = 0;
 343 
 344     } else {
 345         crm_trace("%s already stopped (would inject %s if popped after %ums)",
 346                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 347                   timer->period_ms);
 348         return FALSE;
 349     }
 350     return TRUE;
 351 }

/* [previous][next][first][last][top][bottom][index][help] */