root/daemons/controld/controld_te_utils.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. stop_te_timer
  2. te_graph_trigger
  3. controld_init_transition_trigger
  4. controld_destroy_transition_trigger
  5. controld_trigger_graph_as
  6. abort_timer_popped
  7. abort_after_delay
  8. free_node_pending_timer
  9. node_pending_timer_popped
  10. init_node_pending_timer
  11. remove_node_pending_timer
  12. controld_node_pending_timer
  13. controld_free_node_pending_timers
  14. abort2text
  15. update_abort_priority
  16. abort_transition_graph

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include <crm/crm.h>
  12 #include <crm/common/xml.h>
  13 
  14 #include <pacemaker-controld.h>
  15 
  16 //! Triggers transition graph processing
  17 static crm_trigger_t *transition_trigger = NULL;
  18 
  19 static GHashTable *node_pending_timers = NULL;
  20 
  21 gboolean
  22 stop_te_timer(pcmk__graph_action_t *action)
     /* [previous][next][first][last][top][bottom][index][help] */
  23 {
  24     if (action == NULL) {
  25         return FALSE;
  26     }
  27     if (action->timer != 0) {
  28         crm_trace("Stopping action timer");
  29         g_source_remove(action->timer);
  30         action->timer = 0;
  31     } else {
  32         crm_trace("Action timer was already stopped");
  33         return FALSE;
  34     }
  35     return TRUE;
  36 }
  37 
  38 static gboolean
  39 te_graph_trigger(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  40 {
  41     if (controld_globals.transition_graph == NULL) {
  42         crm_debug("Nothing to do");
  43         return TRUE;
  44     }
  45 
  46     crm_trace("Invoking graph %d in state %s",
  47               controld_globals.transition_graph->id,
  48               fsa_state2string(controld_globals.fsa_state));
  49 
  50     switch (controld_globals.fsa_state) {
  51         case S_STARTING:
  52         case S_PENDING:
  53         case S_NOT_DC:
  54         case S_HALT:
  55         case S_ILLEGAL:
  56         case S_STOPPING:
  57         case S_TERMINATE:
  58             return TRUE;
  59         default:
  60             break;
  61     }
  62 
  63     if (!controld_globals.transition_graph->complete) {
  64         enum pcmk__graph_status graph_rc;
  65         int orig_limit = controld_globals.transition_graph->batch_limit;
  66         int throttled_limit = throttle_get_total_job_limit(orig_limit);
  67 
  68         controld_globals.transition_graph->batch_limit = throttled_limit;
  69         graph_rc = pcmk__execute_graph(controld_globals.transition_graph);
  70         controld_globals.transition_graph->batch_limit = orig_limit;
  71 
  72         if (graph_rc == pcmk__graph_active) {
  73             crm_trace("Transition not yet complete");
  74             return TRUE;
  75 
  76         } else if (graph_rc == pcmk__graph_pending) {
  77             crm_trace("Transition not yet complete - no actions fired");
  78             return TRUE;
  79         }
  80 
  81         if (graph_rc != pcmk__graph_complete) {
  82             crm_warn("Transition failed: %s",
  83                      pcmk__graph_status2text(graph_rc));
  84             pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph);
  85         }
  86     }
  87 
  88     crm_debug("Transition %d is now complete",
  89               controld_globals.transition_graph->id);
  90     controld_globals.transition_graph->complete = true;
  91     notify_crmd(controld_globals.transition_graph);
  92 
  93     return TRUE;
  94 }
  95 
  96 /*!
  97  * \internal
  98  * \brief Initialize transition trigger
  99  */
 100 void
 101 controld_init_transition_trigger(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 102 {
 103     transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger,
 104                                               NULL);
 105 }
 106 
 107 /*!
 108  * \internal
 109  * \brief Destroy transition trigger
 110  */
 111 void
 112 controld_destroy_transition_trigger(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114     mainloop_destroy_trigger(transition_trigger);
 115     transition_trigger = NULL;
 116 }
 117 
 118 void
 119 controld_trigger_graph_as(const char *fn, int line)
     /* [previous][next][first][last][top][bottom][index][help] */
 120 {
 121     crm_trace("%s:%d - Triggered graph processing", fn, line);
 122     mainloop_set_trigger(transition_trigger);
 123 }
 124 
 125 static struct abort_timer_s {
 126     bool aborted;
 127     guint id;
 128     int priority;
 129     enum pcmk__graph_next action;
 130     const char *text;
 131 } abort_timer = { 0, };
 132 
 133 static gboolean
 134 abort_timer_popped(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 135 {
 136     struct abort_timer_s *abort_timer = (struct abort_timer_s *) data;
 137 
 138     if (AM_I_DC && (abort_timer->aborted == FALSE)) {
 139         abort_transition(abort_timer->priority, abort_timer->action,
 140                          abort_timer->text, NULL);
 141     }
 142     abort_timer->id = 0;
 143     return FALSE; // do not immediately reschedule timer
 144 }
 145 
 146 /*!
 147  * \internal
 148  * \brief Abort transition after delay, if not already aborted in that time
 149  *
 150  * \param[in] abort_text  Must be literal string
 151  */
 152 void
 153 abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
     /* [previous][next][first][last][top][bottom][index][help] */
 154                   const char *abort_text, guint delay_ms)
 155 {
 156     if (abort_timer.id) {
 157         // Timer already in progress, stop and reschedule
 158         g_source_remove(abort_timer.id);
 159     }
 160     abort_timer.aborted = FALSE;
 161     abort_timer.priority = abort_priority;
 162     abort_timer.action = abort_action;
 163     abort_timer.text = abort_text;
 164     abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, &abort_timer);
 165 }
 166 
 167 static void
 168 free_node_pending_timer(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170     struct abort_timer_s *node_pending_timer = (struct abort_timer_s *) data;
 171 
 172     if (node_pending_timer->id != 0) {
 173         g_source_remove(node_pending_timer->id);
 174         node_pending_timer->id = 0;
 175     }
 176 
 177     free(node_pending_timer);
 178 }
 179 
 180 static gboolean
 181 node_pending_timer_popped(gpointer key)
     /* [previous][next][first][last][top][bottom][index][help] */
 182 {
 183     struct abort_timer_s *node_pending_timer = NULL;
 184 
 185     if (node_pending_timers == NULL) {
 186         return FALSE;
 187     }
 188 
 189     node_pending_timer = g_hash_table_lookup(node_pending_timers, key);
 190     if (node_pending_timer == NULL) {
 191         return FALSE;
 192     }
 193 
 194     crm_warn("Node with " PCMK_XA_ID " '%s' pending timed out (%us) "
 195              "on joining the process group",
 196              (const char *) key, controld_globals.node_pending_timeout);
 197 
 198     if (controld_globals.node_pending_timeout > 0) {
 199         abort_timer_popped(node_pending_timer);
 200     }
 201 
 202     g_hash_table_remove(node_pending_timers, key);
 203 
 204     return FALSE; // do not reschedule timer
 205 }
 206 
 207 static void
 208 init_node_pending_timer(const crm_node_t *node, guint timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
 209 {
 210     struct abort_timer_s *node_pending_timer = NULL;
 211     char *key = NULL;
 212 
 213     if (node->uuid == NULL) {
 214         return;
 215     }
 216 
 217     if (node_pending_timers == NULL) {
 218         node_pending_timers = pcmk__strikey_table(free,
 219                                                   free_node_pending_timer);
 220 
 221     // The timer is somehow already existing
 222     } else if (g_hash_table_lookup(node_pending_timers, node->uuid) != NULL) {
 223         return;
 224     }
 225 
 226     crm_notice("Waiting for pending %s with " PCMK_XA_ID " '%s' "
 227                "to join the process group (timeout=%us)",
 228                node->uname ? node->uname : "node", node->uuid,
 229                controld_globals.node_pending_timeout);
 230 
 231     key = pcmk__str_copy(node->uuid);
 232     node_pending_timer = pcmk__assert_alloc(1, sizeof(struct abort_timer_s));
 233 
 234     node_pending_timer->aborted = FALSE;
 235     node_pending_timer->priority = PCMK_SCORE_INFINITY;
 236     node_pending_timer->action = pcmk__graph_restart;
 237     node_pending_timer->text = "Node pending timed out";
 238 
 239     g_hash_table_replace(node_pending_timers, key, node_pending_timer);
 240 
 241     node_pending_timer->id = g_timeout_add_seconds(timeout,
 242                                                    node_pending_timer_popped,
 243                                                    key);
 244     CRM_ASSERT(node_pending_timer->id != 0);
 245 }
 246 
 247 static void
 248 remove_node_pending_timer(const char *node_uuid)
     /* [previous][next][first][last][top][bottom][index][help] */
 249 {
 250     if (node_pending_timers == NULL) {
 251         return;
 252     }
 253 
 254     g_hash_table_remove(node_pending_timers, node_uuid);
 255 }
 256 
 257 void
 258 controld_node_pending_timer(const crm_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 259 {
 260     long long remaining_timeout = 0;
 261 
 262     /* If the node is not an active cluster node, is leaving the cluster, or is
 263      * already part of CPG, or PCMK_OPT_NODE_PENDING_TIMEOUT is disabled, free
 264      * any node pending timer for it.
 265      */
 266     if (pcmk_is_set(node->flags, crm_remote_node)
 267         || (node->when_member <= 1) || (node->when_online > 0)
 268         || (controld_globals.node_pending_timeout == 0)) {
 269         remove_node_pending_timer(node->uuid);
 270         return;
 271     }
 272 
 273     // Node is a cluster member but offline in CPG
 274 
 275     remaining_timeout = node->when_member - time(NULL)
 276                         + controld_globals.node_pending_timeout;
 277 
 278     /* It already passed node pending timeout somehow.
 279      * Free any node pending timer of it.
 280      */
 281     if (remaining_timeout <= 0) {
 282         remove_node_pending_timer(node->uuid);
 283         return;
 284     }
 285 
 286     init_node_pending_timer(node, remaining_timeout);
 287 }
 288 
 289 void
 290 controld_free_node_pending_timers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 291 {
 292     if (node_pending_timers == NULL) {
 293         return;
 294     }
 295 
 296     g_hash_table_destroy(node_pending_timers);
 297     node_pending_timers = NULL;
 298 }
 299 
 300 static const char *
 301 abort2text(enum pcmk__graph_next abort_action)
     /* [previous][next][first][last][top][bottom][index][help] */
 302 {
 303     switch (abort_action) {
 304         case pcmk__graph_done:      return "done";
 305         case pcmk__graph_wait:      return "stop";
 306         case pcmk__graph_restart:   return "restart";
 307         case pcmk__graph_shutdown:  return "shutdown";
 308     }
 309     return "unknown";
 310 }
 311 
 312 static bool
 313 update_abort_priority(pcmk__graph_t *graph, int priority,
     /* [previous][next][first][last][top][bottom][index][help] */
 314                       enum pcmk__graph_next action, const char *abort_reason)
 315 {
 316     bool change = FALSE;
 317 
 318     if (graph == NULL) {
 319         return change;
 320     }
 321 
 322     if (graph->abort_priority < priority) {
 323         crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority);
 324         graph->abort_priority = priority;
 325         if (graph->abort_reason != NULL) {
 326             crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason);
 327         }
 328         graph->abort_reason = abort_reason;
 329         change = TRUE;
 330     }
 331 
 332     if (graph->completion_action < action) {
 333         crm_debug("Abort action %s superseded by %s: %s",
 334                   abort2text(graph->completion_action), abort2text(action), abort_reason);
 335         graph->completion_action = action;
 336         change = TRUE;
 337     }
 338 
 339     return change;
 340 }
 341 
 342 void
 343 abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action,
     /* [previous][next][first][last][top][bottom][index][help] */
 344                        const char *abort_text, const xmlNode *reason,
 345                        const char *fn, int line)
 346 {
 347     int add[] = { 0, 0, 0 };
 348     int del[] = { 0, 0, 0 };
 349     int level = LOG_INFO;
 350     const xmlNode *diff = NULL;
 351     const xmlNode *change = NULL;
 352 
 353     CRM_CHECK(controld_globals.transition_graph != NULL, return);
 354 
 355     switch (controld_globals.fsa_state) {
 356         case S_STARTING:
 357         case S_PENDING:
 358         case S_NOT_DC:
 359         case S_HALT:
 360         case S_ILLEGAL:
 361         case S_STOPPING:
 362         case S_TERMINATE:
 363             crm_info("Abort %s suppressed: state=%s (%scomplete)",
 364                      abort_text, fsa_state2string(controld_globals.fsa_state),
 365                      (controld_globals.transition_graph->complete? "" : "in"));
 366             return;
 367         default:
 368             break;
 369     }
 370 
 371     abort_timer.aborted = TRUE;
 372     controld_expect_sched_reply(NULL);
 373 
 374     if (!controld_globals.transition_graph->complete
 375         && update_abort_priority(controld_globals.transition_graph,
 376                                  abort_priority, abort_action,
 377                                  abort_text)) {
 378         level = LOG_NOTICE;
 379     }
 380 
 381     if (reason != NULL) {
 382         const xmlNode *search = NULL;
 383 
 384         for(search = reason; search; search = search->parent) {
 385             if (pcmk__xe_is(search, PCMK_XE_DIFF)) {
 386                 diff = search;
 387                 break;
 388             }
 389         }
 390 
 391         if(diff) {
 392             xml_patch_versions(diff, add, del);
 393             for(search = reason; search; search = search->parent) {
 394                 if (pcmk__xe_is(search, PCMK_XE_CHANGE)) {
 395                     change = search;
 396                     break;
 397                 }
 398             }
 399         }
 400     }
 401 
 402     if (reason == NULL) {
 403         do_crm_log(level,
 404                    "Transition %d aborted: %s " CRM_XS " source=%s:%d "
 405                    "complete=%s", controld_globals.transition_graph->id,
 406                    abort_text, fn, line,
 407                    pcmk__btoa(controld_globals.transition_graph->complete));
 408 
 409     } else if(change == NULL) {
 410         GString *local_path = pcmk__element_xpath(reason);
 411         CRM_ASSERT(local_path != NULL);
 412 
 413         do_crm_log(level, "Transition %d aborted by %s.%s: %s "
 414                    CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 415                    controld_globals.transition_graph->id, reason->name,
 416                    pcmk__xe_id(reason), abort_text, add[0], add[1], add[2], fn,
 417                    line, (const char *) local_path->str,
 418                    pcmk__btoa(controld_globals.transition_graph->complete));
 419         g_string_free(local_path, TRUE);
 420 
 421     } else {
 422         const char *op = crm_element_value(change, PCMK_XA_OPERATION);
 423         const char *path = crm_element_value(change, PCMK_XA_PATH);
 424 
 425         if(change == reason) {
 426             if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
 427                 reason = reason->children;
 428 
 429             } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
 430                 reason = pcmk__xe_first_child(reason, PCMK_XE_CHANGE_RESULT,
 431                                               NULL, NULL);
 432                 if(reason) {
 433                     reason = reason->children;
 434                 }
 435             }
 436             CRM_CHECK(reason != NULL, goto done);
 437         }
 438 
 439         if (strcmp(op, PCMK_VALUE_DELETE) == 0) {
 440             const char *shortpath = strrchr(path, '/');
 441 
 442             do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
 443                        CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 444                        controld_globals.transition_graph->id,
 445                        (shortpath? (shortpath + 1) : path), abort_text,
 446                        add[0], add[1], add[2], fn, line, path,
 447                        pcmk__btoa(controld_globals.transition_graph->complete));
 448 
 449         } else if (pcmk__xe_is(reason, PCMK_XE_NVPAIR)) {
 450             do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
 451                        CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 452                        controld_globals.transition_graph->id,
 453                        crm_element_value(reason, PCMK_XA_ID), op,
 454                        crm_element_value(reason, PCMK_XA_NAME),
 455                        crm_element_value(reason, PCMK_XA_VALUE),
 456                        abort_text, add[0], add[1], add[2], fn, line, path,
 457                        pcmk__btoa(controld_globals.transition_graph->complete));
 458 
 459         } else if (pcmk__xe_is(reason, PCMK__XE_LRM_RSC_OP)) {
 460             const char *magic = crm_element_value(reason,
 461                                                   PCMK__XA_TRANSITION_MAGIC);
 462 
 463             do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
 464                        CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
 465                        controld_globals.transition_graph->id,
 466                        crm_element_value(reason, PCMK__XA_OPERATION_KEY), op,
 467                        crm_element_value(reason, PCMK__META_ON_NODE),
 468                        abort_text,
 469                        magic, add[0], add[1], add[2], fn, line,
 470                        pcmk__btoa(controld_globals.transition_graph->complete));
 471 
 472         } else if (pcmk__str_any_of((const char *) reason->name,
 473                    PCMK__XE_NODE_STATE, PCMK_XE_NODE, NULL)) {
 474             const char *uname = pcmk__node_name_from_uuid(pcmk__xe_id(reason));
 475 
 476             do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
 477                        CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
 478                        controld_globals.transition_graph->id,
 479                        reason->name, op, pcmk__s(uname, pcmk__xe_id(reason)),
 480                        abort_text, add[0], add[1], add[2], fn, line,
 481                        pcmk__btoa(controld_globals.transition_graph->complete));
 482 
 483         } else {
 484             const char *id = pcmk__xe_id(reason);
 485 
 486             do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
 487                        CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 488                        controld_globals.transition_graph->id,
 489                        reason->name, pcmk__s(id, ""), pcmk__s(op, "change"),
 490                        abort_text, add[0], add[1], add[2], fn, line, path,
 491                        pcmk__btoa(controld_globals.transition_graph->complete));
 492         }
 493     }
 494 
 495 done:
 496     if (controld_globals.transition_graph->complete) {
 497         if (controld_get_period_transition_timer() > 0) {
 498             controld_stop_transition_timer();
 499             controld_start_transition_timer();
 500         } else {
 501             register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
 502         }
 503         return;
 504     }
 505 
 506     trigger_graph();
 507 }

/* [previous][next][first][last][top][bottom][index][help] */