root/daemons/controld/controld_timers.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_timer_desc
  2. controld_stop_timer
  3. controld_start_timer
  4. do_timer_control
  5. crm_timer_popped
  6. controld_init_fsa_timers
  7. controld_configure_fsa_timers
  8. controld_free_fsa_timers
  9. controld_is_started_transition_timer
  10. controld_start_recheck_timer
  11. controld_start_wait_timer
  12. controld_stop_recheck_timer
  13. controld_get_period_transition_timer
  14. controld_reset_counter_election_timer
  15. controld_stop_transition_timer
  16. controld_start_transition_timer
  17. controld_shutdown_start_countdown

   1 /*
   2  * Copyright 2004-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <time.h>
  13 #include <stdlib.h>
  14 
  15 #include <crm/crm.h>
  16 #include <crm/msg_xml.h>
  17 #include <pacemaker-controld.h>
  18 
  19 //! FSA mainloop timer type
  20 typedef struct fsa_timer_s {
  21     guint source_id;                        //!< Timer source ID
  22     guint period_ms;                        //!< Timer period
  23     enum crmd_fsa_input fsa_input;          //!< Input to register if timer pops
  24     gboolean (*callback) (gpointer data);   //!< What do if timer pops
  25     bool log_error;                         //!< Timer popping indicates error
  26     int counter;                            //!< For detecting loops
  27 } fsa_timer_t;
  28 
  29 //! Wait before retrying a failed cib or executor connection
  30 static fsa_timer_t *wait_timer = NULL;
  31 
  32 //! Periodically re-run scheduler (for date_spec evaluation and as a failsafe)
  33 static fsa_timer_t *recheck_timer = NULL;
  34 
  35 //! Wait at start-up, or after an election, for DC to make contact
  36 static fsa_timer_t *election_timer = NULL;
  37 
  38 //! Delay start of new transition with expectation something else might happen
  39 static fsa_timer_t *transition_timer = NULL;
  40 
  41 //! join-integration-timeout
  42 static fsa_timer_t *integration_timer = NULL;
  43 
  44 //! join-finalization-timeout
  45 static fsa_timer_t *finalization_timer = NULL;
  46 
  47 // Wait for DC to stop all resources and give us the all-clear to shut down
  48 fsa_timer_t *shutdown_escalation_timer = NULL;
  49 
  50 //! Cluster recheck interval (from configuration)
  51 static guint recheck_interval_ms = 0;
  52 
  53 static const char *
  54 get_timer_desc(fsa_timer_t * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
  55 {
  56     if (timer == election_timer) {
  57         return "Election Trigger";
  58 
  59     } else if (timer == shutdown_escalation_timer) {
  60         return "Shutdown Escalation";
  61 
  62     } else if (timer == integration_timer) {
  63         return "Integration Timer";
  64 
  65     } else if (timer == finalization_timer) {
  66         return "Finalization Timer";
  67 
  68     } else if (timer == transition_timer) {
  69         return "New Transition Timer";
  70 
  71     } else if (timer == wait_timer) {
  72         return "Wait Timer";
  73 
  74     } else if (timer == recheck_timer) {
  75         return "Cluster Recheck Timer";
  76 
  77     }
  78     return "Unknown Timer";
  79 }
  80 
  81 /*!
  82  * \internal
  83  * \brief Stop an FSA timer
  84  *
  85  * \param[in,out] timer  Timer to stop
  86  *
  87  * \return true if the timer was running, or false otherwise
  88  */
  89 static bool
  90 controld_stop_timer(fsa_timer_t *timer)
     /* [previous][next][first][last][top][bottom][index][help] */
  91 {
  92     CRM_CHECK(timer != NULL, return false);
  93 
  94     if (timer->source_id != 0) {
  95         crm_trace("Stopping %s (would inject %s if popped after %ums, src=%d)",
  96                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
  97                   timer->period_ms, timer->source_id);
  98         g_source_remove(timer->source_id);
  99         timer->source_id = 0;
 100 
 101     } else {
 102         crm_trace("%s already stopped (would inject %s if popped after %ums)",
 103                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 104                   timer->period_ms);
 105         return false;
 106     }
 107     return true;
 108 }
 109 
 110 /*!
 111  * \internal
 112  * \brief Start an FSA timer
 113  *
 114  * \param[in,out] timer  Timer to start
 115  */
 116 static void
 117 controld_start_timer(fsa_timer_t *timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 118 {
 119     if (timer->source_id == 0 && timer->period_ms > 0) {
 120         timer->source_id = g_timeout_add(timer->period_ms, timer->callback, (void *)timer);
 121         CRM_ASSERT(timer->source_id != 0);
 122         crm_debug("Started %s (inject %s if pops after %ums, source=%d)",
 123                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 124                   timer->period_ms, timer->source_id);
 125     } else {
 126         crm_debug("%s already running (inject %s if pops after %ums, source=%d)",
 127                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 128                   timer->period_ms, timer->source_id);
 129     }
 130 }
 131 
 132 /*      A_DC_TIMER_STOP, A_DC_TIMER_START,
 133  *      A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START
 134  *      A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START
 135  */
 136 void
 137 do_timer_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 138                  enum crmd_fsa_cause cause,
 139                  enum crmd_fsa_state cur_state,
 140                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 141 {
 142     gboolean timer_op_ok = TRUE;
 143 
 144     if (action & A_DC_TIMER_STOP) {
 145         timer_op_ok = controld_stop_timer(election_timer);
 146 
 147     } else if (action & A_FINALIZE_TIMER_STOP) {
 148         timer_op_ok = controld_stop_timer(finalization_timer);
 149 
 150     } else if (action & A_INTEGRATE_TIMER_STOP) {
 151         timer_op_ok = controld_stop_timer(integration_timer);
 152     }
 153 
 154     /* don't start a timer that wasn't already running */
 155     if (action & A_DC_TIMER_START && timer_op_ok) {
 156         controld_start_timer(election_timer);
 157         if (AM_I_DC) {
 158             /* there can be only one */
 159             register_fsa_input(cause, I_ELECTION, NULL);
 160         }
 161 
 162     } else if (action & A_FINALIZE_TIMER_START) {
 163         controld_start_timer(finalization_timer);
 164 
 165     } else if (action & A_INTEGRATE_TIMER_START) {
 166         controld_start_timer(integration_timer);
 167     }
 168 }
 169 
 170 static gboolean
 171 crm_timer_popped(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 172 {
 173     fsa_timer_t *timer = (fsa_timer_t *) data;
 174 
 175     if (timer->log_error) {
 176         crm_err("%s just popped in state %s! " CRM_XS " input=%s time=%ums",
 177                 get_timer_desc(timer),
 178                 fsa_state2string(controld_globals.fsa_state),
 179                 fsa_input2string(timer->fsa_input), timer->period_ms);
 180     } else {
 181         crm_info("%s just popped " CRM_XS " input=%s time=%ums",
 182                  get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 183                  timer->period_ms);
 184         timer->counter++;
 185     }
 186 
 187     if ((timer == election_timer) && (election_timer->counter > 5)) {
 188         crm_notice("We appear to be in an election loop, something may be wrong");
 189         crm_write_blackbox(0, NULL);
 190         election_timer->counter = 0;
 191     }
 192 
 193     controld_stop_timer(timer);  // Make timer _not_ go off again
 194 
 195     if (timer->fsa_input == I_INTEGRATED) {
 196         crm_info("Welcomed: %d, Integrated: %d",
 197                  crmd_join_phase_count(crm_join_welcomed),
 198                  crmd_join_phase_count(crm_join_integrated));
 199         if (crmd_join_phase_count(crm_join_welcomed) == 0) {
 200             // If we don't even have ourselves, start again
 201             register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL,
 202                                    __func__);
 203 
 204         } else {
 205             register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL);
 206         }
 207 
 208     } else if ((timer == recheck_timer)
 209                && (controld_globals.fsa_state != S_IDLE)) {
 210         crm_debug("Discarding %s event in state: %s",
 211                   fsa_input2string(timer->fsa_input),
 212                   fsa_state2string(controld_globals.fsa_state));
 213 
 214     } else if ((timer == finalization_timer)
 215                && (controld_globals.fsa_state != S_FINALIZE_JOIN)) {
 216         crm_debug("Discarding %s event in state: %s",
 217                   fsa_input2string(timer->fsa_input),
 218                   fsa_state2string(controld_globals.fsa_state));
 219 
 220     } else if (timer->fsa_input != I_NULL) {
 221         register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL);
 222     }
 223 
 224     controld_trigger_fsa();
 225 
 226     return TRUE;
 227 }
 228 
 229 bool
 230 controld_init_fsa_timers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 231 {
 232     transition_timer = calloc(1, sizeof(fsa_timer_t));
 233     if (transition_timer == NULL) {
 234         return FALSE;
 235     }
 236 
 237     integration_timer = calloc(1, sizeof(fsa_timer_t));
 238     if (integration_timer == NULL) {
 239         return FALSE;
 240     }
 241 
 242     finalization_timer = calloc(1, sizeof(fsa_timer_t));
 243     if (finalization_timer == NULL) {
 244         return FALSE;
 245     }
 246 
 247     election_timer = calloc(1, sizeof(fsa_timer_t));
 248     if (election_timer == NULL) {
 249         return FALSE;
 250     }
 251 
 252     shutdown_escalation_timer = calloc(1, sizeof(fsa_timer_t));
 253     if (shutdown_escalation_timer == NULL) {
 254         return FALSE;
 255     }
 256 
 257     wait_timer = calloc(1, sizeof(fsa_timer_t));
 258     if (wait_timer == NULL) {
 259         return FALSE;
 260     }
 261 
 262     recheck_timer = calloc(1, sizeof(fsa_timer_t));
 263     if (recheck_timer == NULL) {
 264         return FALSE;
 265     }
 266 
 267     election_timer->source_id = 0;
 268     election_timer->period_ms = 0;
 269     election_timer->fsa_input = I_DC_TIMEOUT;
 270     election_timer->callback = crm_timer_popped;
 271     election_timer->log_error = FALSE;
 272 
 273     transition_timer->source_id = 0;
 274     transition_timer->period_ms = 0;
 275     transition_timer->fsa_input = I_PE_CALC;
 276     transition_timer->callback = crm_timer_popped;
 277     transition_timer->log_error = FALSE;
 278 
 279     integration_timer->source_id = 0;
 280     integration_timer->period_ms = 0;
 281     integration_timer->fsa_input = I_INTEGRATED;
 282     integration_timer->callback = crm_timer_popped;
 283     integration_timer->log_error = TRUE;
 284 
 285     finalization_timer->source_id = 0;
 286     finalization_timer->period_ms = 0;
 287     finalization_timer->fsa_input = I_FINALIZED;
 288     finalization_timer->callback = crm_timer_popped;
 289     finalization_timer->log_error = FALSE;
 290 
 291     /* We can't use I_FINALIZED here, because that creates a bug in the join
 292      * process where a joining node can be stuck in S_PENDING while we think it
 293      * is in S_NOT_DC. This created an infinite transition loop in which we
 294      * continually send probes which the node NACKs because it's pending.
 295      *
 296      * If we have nodes where the cluster layer is active but the controller is
 297      * not, we can avoid this causing an election/join loop, in the integration
 298      * phase.
 299      */
 300     finalization_timer->fsa_input = I_ELECTION;
 301 
 302     shutdown_escalation_timer->source_id = 0;
 303     shutdown_escalation_timer->period_ms = 0;
 304     shutdown_escalation_timer->fsa_input = I_STOP;
 305     shutdown_escalation_timer->callback = crm_timer_popped;
 306     shutdown_escalation_timer->log_error = TRUE;
 307 
 308     wait_timer->source_id = 0;
 309     wait_timer->period_ms = 2000;
 310     wait_timer->fsa_input = I_NULL;
 311     wait_timer->callback = crm_timer_popped;
 312     wait_timer->log_error = FALSE;
 313 
 314     recheck_timer->source_id = 0;
 315     recheck_timer->period_ms = 0;
 316     recheck_timer->fsa_input = I_PE_CALC;
 317     recheck_timer->callback = crm_timer_popped;
 318     recheck_timer->log_error = FALSE;
 319 
 320     return TRUE;
 321 }
 322 
 323 /*!
 324  * \internal
 325  * \brief Configure timers based on the CIB
 326  *
 327  * \param[in,out] options  Name/value pairs for configured options
 328  */
 329 void
 330 controld_configure_fsa_timers(GHashTable *options)
     /* [previous][next][first][last][top][bottom][index][help] */
 331 {
 332     const char *value = NULL;
 333 
 334     // Election timer
 335     value = g_hash_table_lookup(options, XML_CONFIG_ATTR_DC_DEADTIME);
 336     election_timer->period_ms = crm_parse_interval_spec(value);
 337 
 338     // Integration timer
 339     value = g_hash_table_lookup(options, "join-integration-timeout");
 340     integration_timer->period_ms = crm_parse_interval_spec(value);
 341 
 342     // Finalization timer
 343     value = g_hash_table_lookup(options, "join-finalization-timeout");
 344     finalization_timer->period_ms = crm_parse_interval_spec(value);
 345 
 346     // Shutdown escalation timer
 347     value = g_hash_table_lookup(options, XML_CONFIG_ATTR_FORCE_QUIT);
 348     shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
 349     crm_debug("Shutdown escalation occurs if DC has not responded to request "
 350               "in %ums", shutdown_escalation_timer->period_ms);
 351 
 352     // Transition timer
 353     value = g_hash_table_lookup(options, "transition-delay");
 354     transition_timer->period_ms = crm_parse_interval_spec(value);
 355 
 356     // Recheck interval
 357     value = g_hash_table_lookup(options, XML_CONFIG_ATTR_RECHECK);
 358     recheck_interval_ms = crm_parse_interval_spec(value);
 359     crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);
 360 }
 361 
 362 void
 363 controld_free_fsa_timers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 364 {
 365     controld_stop_timer(transition_timer);
 366     controld_stop_timer(integration_timer);
 367     controld_stop_timer(finalization_timer);
 368     controld_stop_timer(election_timer);
 369     controld_stop_timer(shutdown_escalation_timer);
 370     controld_stop_timer(wait_timer);
 371     controld_stop_timer(recheck_timer);
 372 
 373     free(transition_timer); transition_timer = NULL;
 374     free(integration_timer); integration_timer = NULL;
 375     free(finalization_timer); finalization_timer = NULL;
 376     free(election_timer); election_timer = NULL;
 377     free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
 378     free(wait_timer); wait_timer = NULL;
 379     free(recheck_timer); recheck_timer = NULL;
 380 }
 381 
 382 /*!
 383  * \internal
 384  * \brief Check whether the transition timer is started
 385  * \return true if the transition timer is started, or false otherwise
 386  */
 387 bool
 388 controld_is_started_transition_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 389 {
 390     return (transition_timer->period_ms > 0)
 391            && (transition_timer->source_id != 0);
 392 }
 393 
 394 /*!
 395  * \internal
 396  * \brief Start the recheck timer
 397  */
 398 void
 399 controld_start_recheck_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 400 {
 401     // Default to recheck interval configured in CIB (if any)
 402     guint period_ms = recheck_interval_ms;
 403 
 404     // If scheduler supplied a "recheck by" time, check whether that's sooner
 405     if (controld_globals.transition_graph->recheck_by > 0) {
 406         time_t diff_seconds = controld_globals.transition_graph->recheck_by
 407                               - time(NULL);
 408 
 409         if (diff_seconds < 1) {
 410             // We're already past the desired time
 411             period_ms = 500;
 412         } else {
 413             period_ms = (guint) diff_seconds * 1000;
 414         }
 415 
 416         // Use "recheck by" only if it's sooner than interval from CIB
 417         if (period_ms > recheck_interval_ms) {
 418             period_ms = recheck_interval_ms;
 419         }
 420     }
 421 
 422     if (period_ms > 0) {
 423         recheck_timer->period_ms = period_ms;
 424         controld_start_timer(recheck_timer);
 425     }
 426 }
 427 
 428 /*!
 429  * \internal
 430  * \brief Start the wait timer
 431  */
 432 void
 433 controld_start_wait_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 434 {
 435     controld_start_timer(wait_timer);
 436 }
 437 
 438 /*!
 439  * \internal
 440  * \brief Stop the recheck timer
 441  *
 442  * \return true if the recheck timer was running, or false otherwise
 443  */
 444 bool
 445 controld_stop_recheck_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 446 {
 447     return controld_stop_timer(recheck_timer);
 448 }
 449 
 450 /*!
 451  * \brief Get the transition timer's configured period
 452  * \return The transition_timer's period
 453  */
 454 guint
 455 controld_get_period_transition_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 456 {
 457     return transition_timer->period_ms;
 458 }
 459 
 460 /*!
 461  * \internal
 462  * \brief Reset the election timer's counter to 0
 463  */
 464 void
 465 controld_reset_counter_election_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 466 {
 467     election_timer->counter = 0;
 468 }
 469 
 470 /*!
 471  * \internal
 472  * \brief Stop the transition timer
 473  *
 474  * \return true if the transition timer was running, or false otherwise
 475  */
 476 bool
 477 controld_stop_transition_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 478 {
 479     return controld_stop_timer(transition_timer);
 480 }
 481 
 482 /*!
 483  * \internal
 484  * \brief Start the transition timer
 485  */
 486 void
 487 controld_start_transition_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 488 {
 489     controld_start_timer(transition_timer);
 490 }
 491 
 492 /*!
 493  * \internal
 494  * \brief Start the countdown sequence for a shutdown
 495  *
 496  * \param[in] default_period_ms  Period to use if the shutdown escalation
 497  *                               timer's period is 0
 498  */
 499 void
 500 controld_shutdown_start_countdown(guint default_period_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 501 {
 502     if (shutdown_escalation_timer->period_ms == 0) {
 503         shutdown_escalation_timer->period_ms = default_period_ms;
 504     }
 505 
 506     crm_notice("Initiating controller shutdown sequence " CRM_XS " limit=%ums",
 507                shutdown_escalation_timer->period_ms);
 508     controld_start_timer(shutdown_escalation_timer);
 509 }

/* [previous][next][first][last][top][bottom][index][help] */