root/daemons/controld/controld_timers.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_timer_desc
  2. controld_stop_timer
  3. controld_start_timer
  4. do_timer_control
  5. crm_timer_popped
  6. controld_init_fsa_timers
  7. controld_configure_fsa_timers
  8. controld_free_fsa_timers
  9. controld_is_started_transition_timer
  10. controld_start_recheck_timer
  11. controld_start_wait_timer
  12. controld_stop_recheck_timer
  13. controld_get_period_transition_timer
  14. controld_reset_counter_election_timer
  15. controld_stop_transition_timer
  16. controld_start_transition_timer
  17. controld_shutdown_start_countdown

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <time.h>
  13 #include <stdlib.h>
  14 #include <glib.h>
  15 
  16 #include <crm/crm.h>
  17 #include <crm/common/xml.h>
  18 #include <pacemaker-controld.h>
  19 
  20 //! FSA mainloop timer type
  21 typedef struct fsa_timer_s {
  22     guint source_id;                        //!< Timer source ID
  23     guint period_ms;                        //!< Timer period
  24     enum crmd_fsa_input fsa_input;          //!< Input to register if timer pops
  25     gboolean (*callback) (gpointer data);   //!< What do if timer pops
  26     bool log_error;                         //!< Timer popping indicates error
  27     int counter;                            //!< For detecting loops
  28 } fsa_timer_t;
  29 
  30 //! Wait before retrying a failed cib or executor connection
  31 static fsa_timer_t *wait_timer = NULL;
  32 
  33 //! Periodically re-run scheduler (for date_spec evaluation and as a failsafe)
  34 static fsa_timer_t *recheck_timer = NULL;
  35 
  36 //! Wait at start-up, or after an election, for DC to make contact
  37 static fsa_timer_t *election_timer = NULL;
  38 
  39 //! Delay start of new transition with expectation something else might happen
  40 static fsa_timer_t *transition_timer = NULL;
  41 
  42 //! \c PCMK_OPT_JOIN_INTEGRATION_TIMEOUT
  43 static fsa_timer_t *integration_timer = NULL;
  44 
  45 //! \c PCMK_OPT_JOIN_FINALIZATION_TIMEOUT
  46 static fsa_timer_t *finalization_timer = NULL;
  47 
  48 // Wait for DC to stop all resources and give us the all-clear to shut down
  49 fsa_timer_t *shutdown_escalation_timer = NULL;
  50 
  51 //! Cluster recheck interval (from configuration)
  52 static guint recheck_interval_ms = 0;
  53 
  54 static const char *
  55 get_timer_desc(fsa_timer_t * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
  56 {
  57     if (timer == election_timer) {
  58         return "Election Trigger";
  59 
  60     } else if (timer == shutdown_escalation_timer) {
  61         return "Shutdown Escalation";
  62 
  63     } else if (timer == integration_timer) {
  64         return "Integration Timer";
  65 
  66     } else if (timer == finalization_timer) {
  67         return "Finalization Timer";
  68 
  69     } else if (timer == transition_timer) {
  70         return "New Transition Timer";
  71 
  72     } else if (timer == wait_timer) {
  73         return "Wait Timer";
  74 
  75     } else if (timer == recheck_timer) {
  76         return "Cluster Recheck Timer";
  77 
  78     }
  79     return "Unknown Timer";
  80 }
  81 
  82 /*!
  83  * \internal
  84  * \brief Stop an FSA timer
  85  *
  86  * \param[in,out] timer  Timer to stop
  87  *
  88  * \return true if the timer was running, or false otherwise
  89  */
  90 static bool
  91 controld_stop_timer(fsa_timer_t *timer)
     /* [previous][next][first][last][top][bottom][index][help] */
  92 {
  93     CRM_CHECK(timer != NULL, return false);
  94 
  95     if (timer->source_id != 0) {
  96         crm_trace("Stopping %s (would inject %s if popped after %ums, src=%d)",
  97                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
  98                   timer->period_ms, timer->source_id);
  99         g_source_remove(timer->source_id);
 100         timer->source_id = 0;
 101 
 102     } else {
 103         crm_trace("%s already stopped (would inject %s if popped after %ums)",
 104                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 105                   timer->period_ms);
 106         return false;
 107     }
 108     return true;
 109 }
 110 
 111 /*!
 112  * \internal
 113  * \brief Start an FSA timer
 114  *
 115  * \param[in,out] timer  Timer to start
 116  */
 117 static void
 118 controld_start_timer(fsa_timer_t *timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120     if (timer->source_id == 0 && timer->period_ms > 0) {
 121         timer->source_id = g_timeout_add(timer->period_ms, timer->callback, (void *)timer);
 122         pcmk__assert(timer->source_id != 0);
 123         crm_debug("Started %s (inject %s if pops after %ums, source=%d)",
 124                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 125                   timer->period_ms, timer->source_id);
 126     } else {
 127         crm_debug("%s already running (inject %s if pops after %ums, source=%d)",
 128                   get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 129                   timer->period_ms, timer->source_id);
 130     }
 131 }
 132 
 133 /*      A_DC_TIMER_STOP, A_DC_TIMER_START,
 134  *      A_FINALIZE_TIMER_STOP, A_FINALIZE_TIMER_START
 135  *      A_INTEGRATE_TIMER_STOP, A_INTEGRATE_TIMER_START
 136  */
 137 void
 138 do_timer_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 139                  enum crmd_fsa_cause cause,
 140                  enum crmd_fsa_state cur_state,
 141                  enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 142 {
 143     gboolean timer_op_ok = TRUE;
 144 
 145     if (action & A_DC_TIMER_STOP) {
 146         timer_op_ok = controld_stop_timer(election_timer);
 147 
 148     } else if (action & A_FINALIZE_TIMER_STOP) {
 149         timer_op_ok = controld_stop_timer(finalization_timer);
 150 
 151     } else if (action & A_INTEGRATE_TIMER_STOP) {
 152         timer_op_ok = controld_stop_timer(integration_timer);
 153     }
 154 
 155     /* don't start a timer that wasn't already running */
 156     if (action & A_DC_TIMER_START && timer_op_ok) {
 157         controld_start_timer(election_timer);
 158         if (AM_I_DC) {
 159             /* there can be only one */
 160             register_fsa_input(cause, I_ELECTION, NULL);
 161         }
 162 
 163     } else if (action & A_FINALIZE_TIMER_START) {
 164         controld_start_timer(finalization_timer);
 165 
 166     } else if (action & A_INTEGRATE_TIMER_START) {
 167         controld_start_timer(integration_timer);
 168     }
 169 }
 170 
 171 static gboolean
 172 crm_timer_popped(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 173 {
 174     fsa_timer_t *timer = (fsa_timer_t *) data;
 175 
 176     if (timer->log_error) {
 177         crm_err("%s just popped in state %s! " CRM_XS " input=%s time=%ums",
 178                 get_timer_desc(timer),
 179                 fsa_state2string(controld_globals.fsa_state),
 180                 fsa_input2string(timer->fsa_input), timer->period_ms);
 181     } else {
 182         crm_info("%s just popped " CRM_XS " input=%s time=%ums",
 183                  get_timer_desc(timer), fsa_input2string(timer->fsa_input),
 184                  timer->period_ms);
 185         timer->counter++;
 186     }
 187 
 188     if ((timer == election_timer) && (election_timer->counter > 5)) {
 189         crm_notice("We appear to be in an election loop, something may be wrong");
 190         crm_write_blackbox(0, NULL);
 191         election_timer->counter = 0;
 192     }
 193 
 194     controld_stop_timer(timer);  // Make timer _not_ go off again
 195 
 196     if (timer->fsa_input == I_INTEGRATED) {
 197         crm_info("Welcomed: %d, Integrated: %d",
 198                  crmd_join_phase_count(crm_join_welcomed),
 199                  crmd_join_phase_count(crm_join_integrated));
 200         if (crmd_join_phase_count(crm_join_welcomed) == 0) {
 201             // If we don't even have ourselves, start again
 202             register_fsa_error_adv(C_FSA_INTERNAL, I_ELECTION, NULL, NULL,
 203                                    __func__);
 204 
 205         } else {
 206             register_fsa_input_before(C_TIMER_POPPED, timer->fsa_input, NULL);
 207         }
 208 
 209     } else if ((timer == recheck_timer)
 210                && (controld_globals.fsa_state != S_IDLE)) {
 211         crm_debug("Discarding %s event in state: %s",
 212                   fsa_input2string(timer->fsa_input),
 213                   fsa_state2string(controld_globals.fsa_state));
 214 
 215     } else if ((timer == finalization_timer)
 216                && (controld_globals.fsa_state != S_FINALIZE_JOIN)) {
 217         crm_debug("Discarding %s event in state: %s",
 218                   fsa_input2string(timer->fsa_input),
 219                   fsa_state2string(controld_globals.fsa_state));
 220 
 221     } else if (timer->fsa_input != I_NULL) {
 222         register_fsa_input(C_TIMER_POPPED, timer->fsa_input, NULL);
 223     }
 224 
 225     controld_trigger_fsa();
 226 
 227     return TRUE;
 228 }
 229 
 230 bool
 231 controld_init_fsa_timers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 232 {
 233     transition_timer = pcmk__assert_alloc(1, sizeof(fsa_timer_t));
 234     integration_timer = pcmk__assert_alloc(1, sizeof(fsa_timer_t));
 235     finalization_timer = pcmk__assert_alloc(1, sizeof(fsa_timer_t));
 236     election_timer = pcmk__assert_alloc(1, sizeof(fsa_timer_t));
 237     shutdown_escalation_timer = pcmk__assert_alloc(1, sizeof(fsa_timer_t));
 238     wait_timer = pcmk__assert_alloc(1, sizeof(fsa_timer_t));
 239     recheck_timer = pcmk__assert_alloc(1, sizeof(fsa_timer_t));
 240 
 241     election_timer->source_id = 0;
 242     election_timer->period_ms = 0;
 243     election_timer->fsa_input = I_DC_TIMEOUT;
 244     election_timer->callback = crm_timer_popped;
 245     election_timer->log_error = FALSE;
 246 
 247     transition_timer->source_id = 0;
 248     transition_timer->period_ms = 0;
 249     transition_timer->fsa_input = I_PE_CALC;
 250     transition_timer->callback = crm_timer_popped;
 251     transition_timer->log_error = FALSE;
 252 
 253     integration_timer->source_id = 0;
 254     integration_timer->period_ms = 0;
 255     integration_timer->fsa_input = I_INTEGRATED;
 256     integration_timer->callback = crm_timer_popped;
 257     integration_timer->log_error = TRUE;
 258 
 259     finalization_timer->source_id = 0;
 260     finalization_timer->period_ms = 0;
 261     finalization_timer->fsa_input = I_FINALIZED;
 262     finalization_timer->callback = crm_timer_popped;
 263     finalization_timer->log_error = FALSE;
 264 
 265     /* We can't use I_FINALIZED here, because that creates a bug in the join
 266      * process where a joining node can be stuck in S_PENDING while we think it
 267      * is in S_NOT_DC. This created an infinite transition loop in which we
 268      * continually send probes which the node NACKs because it's pending.
 269      *
 270      * If we have nodes where the cluster layer is active but the controller is
 271      * not, we can avoid this causing an election/join loop, in the integration
 272      * phase.
 273      */
 274     finalization_timer->fsa_input = I_ELECTION;
 275 
 276     shutdown_escalation_timer->source_id = 0;
 277     shutdown_escalation_timer->period_ms = 0;
 278     shutdown_escalation_timer->fsa_input = I_STOP;
 279     shutdown_escalation_timer->callback = crm_timer_popped;
 280     shutdown_escalation_timer->log_error = TRUE;
 281 
 282     wait_timer->source_id = 0;
 283     wait_timer->period_ms = 2000;
 284     wait_timer->fsa_input = I_NULL;
 285     wait_timer->callback = crm_timer_popped;
 286     wait_timer->log_error = FALSE;
 287 
 288     recheck_timer->source_id = 0;
 289     recheck_timer->period_ms = 0;
 290     recheck_timer->fsa_input = I_PE_CALC;
 291     recheck_timer->callback = crm_timer_popped;
 292     recheck_timer->log_error = FALSE;
 293 
 294     return TRUE;
 295 }
 296 
 297 /*!
 298  * \internal
 299  * \brief Configure timers based on the CIB
 300  *
 301  * \param[in,out] options  Name/value pairs for configured options
 302  */
 303 void
 304 controld_configure_fsa_timers(GHashTable *options)
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306     const char *value = NULL;
 307 
 308     // Election timer
 309     value = g_hash_table_lookup(options, PCMK_OPT_DC_DEADTIME);
 310     pcmk_parse_interval_spec(value, &(election_timer->period_ms));
 311 
 312     // Integration timer
 313     value = g_hash_table_lookup(options, PCMK_OPT_JOIN_INTEGRATION_TIMEOUT);
 314     pcmk_parse_interval_spec(value, &(integration_timer->period_ms));
 315 
 316     // Finalization timer
 317     value = g_hash_table_lookup(options, PCMK_OPT_JOIN_FINALIZATION_TIMEOUT);
 318     pcmk_parse_interval_spec(value, &(finalization_timer->period_ms));
 319 
 320     // Shutdown escalation timer
 321     value = g_hash_table_lookup(options, PCMK_OPT_SHUTDOWN_ESCALATION);
 322     pcmk_parse_interval_spec(value, &(shutdown_escalation_timer->period_ms));
 323     crm_debug("Shutdown escalation occurs if DC has not responded to request "
 324               "in %ums", shutdown_escalation_timer->period_ms);
 325 
 326     // Transition timer
 327     value = g_hash_table_lookup(options, PCMK_OPT_TRANSITION_DELAY);
 328     pcmk_parse_interval_spec(value, &(transition_timer->period_ms));
 329 
 330     // Recheck interval
 331     value = g_hash_table_lookup(options, PCMK_OPT_CLUSTER_RECHECK_INTERVAL);
 332     pcmk_parse_interval_spec(value, &recheck_interval_ms);
 333     crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);
 334 }
 335 
 336 void
 337 controld_free_fsa_timers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 338 {
 339     controld_stop_timer(transition_timer);
 340     controld_stop_timer(integration_timer);
 341     controld_stop_timer(finalization_timer);
 342     controld_stop_timer(election_timer);
 343     controld_stop_timer(shutdown_escalation_timer);
 344     controld_stop_timer(wait_timer);
 345     controld_stop_timer(recheck_timer);
 346 
 347     free(transition_timer); transition_timer = NULL;
 348     free(integration_timer); integration_timer = NULL;
 349     free(finalization_timer); finalization_timer = NULL;
 350     free(election_timer); election_timer = NULL;
 351     free(shutdown_escalation_timer); shutdown_escalation_timer = NULL;
 352     free(wait_timer); wait_timer = NULL;
 353     free(recheck_timer); recheck_timer = NULL;
 354 }
 355 
 356 /*!
 357  * \internal
 358  * \brief Check whether the transition timer is started
 359  * \return true if the transition timer is started, or false otherwise
 360  */
 361 bool
 362 controld_is_started_transition_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 363 {
 364     return (transition_timer->period_ms > 0)
 365            && (transition_timer->source_id != 0);
 366 }
 367 
 368 /*!
 369  * \internal
 370  * \brief Start the recheck timer
 371  */
 372 void
 373 controld_start_recheck_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 374 {
 375     // Default to recheck interval configured in CIB (if any)
 376     guint period_ms = recheck_interval_ms;
 377 
 378     // If scheduler supplied a "recheck by" time, check whether that's sooner
 379     if (controld_globals.transition_graph->recheck_by > 0) {
 380         time_t diff_seconds = controld_globals.transition_graph->recheck_by
 381                               - time(NULL);
 382 
 383         if (diff_seconds < 1) {
 384             // We're already past the desired time
 385             period_ms = 500;
 386         } else {
 387             period_ms = (guint) QB_MIN(G_MAXUINT, diff_seconds * 1000LL);
 388         }
 389 
 390         // Use "recheck by" only if it's sooner than interval from CIB
 391         if (period_ms > recheck_interval_ms) {
 392             period_ms = recheck_interval_ms;
 393         }
 394     }
 395 
 396     if (period_ms > 0) {
 397         recheck_timer->period_ms = period_ms;
 398         controld_start_timer(recheck_timer);
 399     }
 400 }
 401 
 402 /*!
 403  * \internal
 404  * \brief Start the wait timer
 405  */
 406 void
 407 controld_start_wait_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 408 {
 409     controld_start_timer(wait_timer);
 410 }
 411 
 412 /*!
 413  * \internal
 414  * \brief Stop the recheck timer
 415  *
 416  * \return true if the recheck timer was running, or false otherwise
 417  */
 418 bool
 419 controld_stop_recheck_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 420 {
 421     return controld_stop_timer(recheck_timer);
 422 }
 423 
 424 /*!
 425  * \brief Get the transition timer's configured period
 426  * \return The transition_timer's period
 427  */
 428 guint
 429 controld_get_period_transition_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 430 {
 431     return transition_timer->period_ms;
 432 }
 433 
 434 /*!
 435  * \internal
 436  * \brief Reset the election timer's counter to 0
 437  */
 438 void
 439 controld_reset_counter_election_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 440 {
 441     election_timer->counter = 0;
 442 }
 443 
 444 /*!
 445  * \internal
 446  * \brief Stop the transition timer
 447  *
 448  * \return true if the transition timer was running, or false otherwise
 449  */
 450 bool
 451 controld_stop_transition_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 452 {
 453     return controld_stop_timer(transition_timer);
 454 }
 455 
 456 /*!
 457  * \internal
 458  * \brief Start the transition timer
 459  */
 460 void
 461 controld_start_transition_timer(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 462 {
 463     controld_start_timer(transition_timer);
 464 }
 465 
 466 /*!
 467  * \internal
 468  * \brief Start the countdown sequence for a shutdown
 469  *
 470  * \param[in] default_period_ms  Period to use if the shutdown escalation
 471  *                               timer's period is 0
 472  */
 473 void
 474 controld_shutdown_start_countdown(guint default_period_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 475 {
 476     if (shutdown_escalation_timer->period_ms == 0) {
 477         shutdown_escalation_timer->period_ms = default_period_ms;
 478     }
 479 
 480     crm_notice("Initiating controller shutdown sequence " CRM_XS " limit=%ums",
 481                shutdown_escalation_timer->period_ms);
 482     controld_start_timer(shutdown_escalation_timer);
 483 }

/* [previous][next][first][last][top][bottom][index][help] */