root/daemons/controld/controld_te_utils.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. stop_te_timer
  2. te_graph_trigger
  3. controld_init_transition_trigger
  4. controld_destroy_transition_trigger
  5. controld_trigger_graph_as
  6. abort_timer_popped
  7. abort_after_delay
  8. free_node_pending_timer
  9. node_pending_timer_popped
  10. init_node_pending_timer
  11. remove_node_pending_timer
  12. controld_node_pending_timer
  13. controld_free_node_pending_timers
  14. abort2text
  15. update_abort_priority
  16. abort_transition_graph

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include <crm/crm.h>
  12 #include <crm/common/xml.h>
  13 
  14 #include <pacemaker-controld.h>
  15 
  16 //! Triggers transition graph processing
  17 static crm_trigger_t *transition_trigger = NULL;
  18 
  19 static GHashTable *node_pending_timers = NULL;
  20 
  21 gboolean
  22 stop_te_timer(pcmk__graph_action_t *action)
     /* [previous][next][first][last][top][bottom][index][help] */
  23 {
  24     if (action == NULL) {
  25         return FALSE;
  26     }
  27     if (action->timer != 0) {
  28         crm_trace("Stopping action timer");
  29         g_source_remove(action->timer);
  30         action->timer = 0;
  31     } else {
  32         crm_trace("Action timer was already stopped");
  33         return FALSE;
  34     }
  35     return TRUE;
  36 }
  37 
  38 static gboolean
  39 te_graph_trigger(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  40 {
  41     if (controld_globals.transition_graph == NULL) {
  42         crm_debug("Nothing to do");
  43         return TRUE;
  44     }
  45 
  46     crm_trace("Invoking graph %d in state %s",
  47               controld_globals.transition_graph->id,
  48               fsa_state2string(controld_globals.fsa_state));
  49 
  50     switch (controld_globals.fsa_state) {
  51         case S_STARTING:
  52         case S_PENDING:
  53         case S_NOT_DC:
  54         case S_HALT:
  55         case S_ILLEGAL:
  56         case S_STOPPING:
  57         case S_TERMINATE:
  58             return TRUE;
  59         default:
  60             break;
  61     }
  62 
  63     if (!controld_globals.transition_graph->complete) {
  64         enum pcmk__graph_status graph_rc;
  65         int orig_limit = controld_globals.transition_graph->batch_limit;
  66         int throttled_limit = throttle_get_total_job_limit(orig_limit);
  67 
  68         controld_globals.transition_graph->batch_limit = throttled_limit;
  69         graph_rc = pcmk__execute_graph(controld_globals.transition_graph);
  70         controld_globals.transition_graph->batch_limit = orig_limit;
  71 
  72         if (graph_rc == pcmk__graph_active) {
  73             crm_trace("Transition not yet complete");
  74             return TRUE;
  75 
  76         } else if (graph_rc == pcmk__graph_pending) {
  77             crm_trace("Transition not yet complete - no actions fired");
  78             return TRUE;
  79         }
  80 
  81         if (graph_rc != pcmk__graph_complete) {
  82             crm_warn("Transition failed: %s",
  83                      pcmk__graph_status2text(graph_rc));
  84             pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph);
  85         }
  86     }
  87 
  88     crm_debug("Transition %d is now complete",
  89               controld_globals.transition_graph->id);
  90     controld_globals.transition_graph->complete = true;
  91     notify_crmd(controld_globals.transition_graph);
  92 
  93     return TRUE;
  94 }
  95 
  96 /*!
  97  * \internal
  98  * \brief Initialize transition trigger
  99  */
 100 void
 101 controld_init_transition_trigger(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 102 {
 103     transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger,
 104                                               NULL);
 105 }
 106 
 107 /*!
 108  * \internal
 109  * \brief Destroy transition trigger
 110  */
 111 void
 112 controld_destroy_transition_trigger(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 113 {
 114     mainloop_destroy_trigger(transition_trigger);
 115     transition_trigger = NULL;
 116 }
 117 
 118 void
 119 controld_trigger_graph_as(const char *fn, int line)
     /* [previous][next][first][last][top][bottom][index][help] */
 120 {
 121     crm_trace("%s:%d - Triggered graph processing", fn, line);
 122     mainloop_set_trigger(transition_trigger);
 123 }
 124 
 125 static struct abort_timer_s {
 126     bool aborted;
 127     guint id;
 128     int priority;
 129     enum pcmk__graph_next action;
 130     const char *text;
 131 } abort_timer = { 0, };
 132 
 133 static gboolean
 134 abort_timer_popped(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 135 {
 136     struct abort_timer_s *abort_timer = (struct abort_timer_s *) data;
 137 
 138     if (AM_I_DC && (abort_timer->aborted == FALSE)) {
 139         abort_transition(abort_timer->priority, abort_timer->action,
 140                          abort_timer->text, NULL);
 141     }
 142     abort_timer->id = 0;
 143     return FALSE; // do not immediately reschedule timer
 144 }
 145 
 146 /*!
 147  * \internal
 148  * \brief Abort transition after delay, if not already aborted in that time
 149  *
 150  * \param[in] abort_text  Must be literal string
 151  */
 152 void
 153 abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
     /* [previous][next][first][last][top][bottom][index][help] */
 154                   const char *abort_text, guint delay_ms)
 155 {
 156     if (abort_timer.id) {
 157         // Timer already in progress, stop and reschedule
 158         g_source_remove(abort_timer.id);
 159     }
 160     abort_timer.aborted = FALSE;
 161     abort_timer.priority = abort_priority;
 162     abort_timer.action = abort_action;
 163     abort_timer.text = abort_text;
 164     abort_timer.id = pcmk__create_timer(delay_ms, abort_timer_popped, &abort_timer);
 165 }
 166 
 167 static void
 168 free_node_pending_timer(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170     struct abort_timer_s *node_pending_timer = (struct abort_timer_s *) data;
 171 
 172     if (node_pending_timer->id != 0) {
 173         g_source_remove(node_pending_timer->id);
 174         node_pending_timer->id = 0;
 175     }
 176 
 177     free(node_pending_timer);
 178 }
 179 
 180 static gboolean
 181 node_pending_timer_popped(gpointer key)
     /* [previous][next][first][last][top][bottom][index][help] */
 182 {
 183     struct abort_timer_s *node_pending_timer = NULL;
 184 
 185     if (node_pending_timers == NULL) {
 186         return FALSE;
 187     }
 188 
 189     node_pending_timer = g_hash_table_lookup(node_pending_timers, key);
 190     if (node_pending_timer == NULL) {
 191         return FALSE;
 192     }
 193 
 194     crm_warn("Node with " PCMK_XA_ID " '%s' pending timed out (%us) "
 195              "on joining the process group",
 196              (const char *) key, controld_globals.node_pending_timeout);
 197 
 198     if (controld_globals.node_pending_timeout > 0) {
 199         abort_timer_popped(node_pending_timer);
 200     }
 201 
 202     g_hash_table_remove(node_pending_timers, key);
 203 
 204     return FALSE; // do not reschedule timer
 205 }
 206 
 207 static void
 208 init_node_pending_timer(const pcmk__node_status_t *node, guint timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
 209 {
 210     struct abort_timer_s *node_pending_timer = NULL;
 211     char *key = NULL;
 212 
 213     if (node->xml_id == NULL) {
 214         return;
 215     }
 216 
 217     if (node_pending_timers == NULL) {
 218         node_pending_timers = pcmk__strikey_table(free,
 219                                                   free_node_pending_timer);
 220 
 221     // The timer is somehow already existing
 222     } else if (g_hash_table_lookup(node_pending_timers, node->xml_id) != NULL) {
 223         return;
 224     }
 225 
 226     crm_notice("Waiting for pending %s with " PCMK_XA_ID " '%s' "
 227                "to join the process group (timeout=%us)",
 228                pcmk__s(node->name, "node"), node->xml_id,
 229                controld_globals.node_pending_timeout);
 230 
 231     key = pcmk__str_copy(node->xml_id);
 232     node_pending_timer = pcmk__assert_alloc(1, sizeof(struct abort_timer_s));
 233 
 234     node_pending_timer->aborted = FALSE;
 235     node_pending_timer->priority = PCMK_SCORE_INFINITY;
 236     node_pending_timer->action = pcmk__graph_restart;
 237     node_pending_timer->text = "Node pending timed out";
 238 
 239     g_hash_table_replace(node_pending_timers, key, node_pending_timer);
 240 
 241     node_pending_timer->id = pcmk__create_timer(timeout * 1000,
 242                                                 node_pending_timer_popped,
 243                                                 key);
 244     pcmk__assert(node_pending_timer->id != 0);
 245 }
 246 
 247 static void
 248 remove_node_pending_timer(const char *node_uuid)
     /* [previous][next][first][last][top][bottom][index][help] */
 249 {
 250     if (node_pending_timers == NULL) {
 251         return;
 252     }
 253 
 254     g_hash_table_remove(node_pending_timers, node_uuid);
 255 }
 256 
 257 void
 258 controld_node_pending_timer(const pcmk__node_status_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 259 {
 260     long long remaining_timeout = 0;
 261 
 262     /* If the node is not an active cluster node, is leaving the cluster, or is
 263      * already part of CPG, or PCMK_OPT_NODE_PENDING_TIMEOUT is disabled, free
 264      * any node pending timer for it.
 265      */
 266     if (pcmk_is_set(node->flags, pcmk__node_status_remote)
 267         || (node->when_member <= 1) || (node->when_online > 0)
 268         || (controld_globals.node_pending_timeout == 0)) {
 269 
 270         remove_node_pending_timer(node->xml_id);
 271         return;
 272     }
 273 
 274     // Node is a cluster member but offline in CPG
 275 
 276     remaining_timeout = node->when_member - time(NULL)
 277                         + controld_globals.node_pending_timeout;
 278 
 279     /* It already passed node pending timeout somehow.
 280      * Free any node pending timer of it.
 281      */
 282     if (remaining_timeout <= 0) {
 283         remove_node_pending_timer(node->xml_id);
 284         return;
 285     }
 286 
 287     init_node_pending_timer(node, remaining_timeout);
 288 }
 289 
 290 void
 291 controld_free_node_pending_timers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 292 {
 293     if (node_pending_timers == NULL) {
 294         return;
 295     }
 296 
 297     g_hash_table_destroy(node_pending_timers);
 298     node_pending_timers = NULL;
 299 }
 300 
 301 static const char *
 302 abort2text(enum pcmk__graph_next abort_action)
     /* [previous][next][first][last][top][bottom][index][help] */
 303 {
 304     switch (abort_action) {
 305         case pcmk__graph_done:      return "done";
 306         case pcmk__graph_wait:      return "stop";
 307         case pcmk__graph_restart:   return "restart";
 308         case pcmk__graph_shutdown:  return "shutdown";
 309     }
 310     return "unknown";
 311 }
 312 
 313 static bool
 314 update_abort_priority(pcmk__graph_t *graph, int priority,
     /* [previous][next][first][last][top][bottom][index][help] */
 315                       enum pcmk__graph_next action, const char *abort_reason)
 316 {
 317     bool change = FALSE;
 318 
 319     if (graph == NULL) {
 320         return change;
 321     }
 322 
 323     if (graph->abort_priority < priority) {
 324         crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority);
 325         graph->abort_priority = priority;
 326         if (graph->abort_reason != NULL) {
 327             crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason);
 328         }
 329         graph->abort_reason = abort_reason;
 330         change = TRUE;
 331     }
 332 
 333     if (graph->completion_action < action) {
 334         crm_debug("Abort action %s superseded by %s: %s",
 335                   abort2text(graph->completion_action), abort2text(action), abort_reason);
 336         graph->completion_action = action;
 337         change = TRUE;
 338     }
 339 
 340     return change;
 341 }
 342 
 343 void
 344 abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action,
     /* [previous][next][first][last][top][bottom][index][help] */
 345                        const char *abort_text, const xmlNode *reason,
 346                        const char *fn, int line)
 347 {
 348     int add[] = { 0, 0, 0 };
 349     int del[] = { 0, 0, 0 };
 350     int level = LOG_INFO;
 351     const xmlNode *diff = NULL;
 352     const xmlNode *change = NULL;
 353 
 354     CRM_CHECK(controld_globals.transition_graph != NULL, return);
 355 
 356     switch (controld_globals.fsa_state) {
 357         case S_STARTING:
 358         case S_PENDING:
 359         case S_NOT_DC:
 360         case S_HALT:
 361         case S_ILLEGAL:
 362         case S_STOPPING:
 363         case S_TERMINATE:
 364             crm_info("Abort %s suppressed: state=%s (%scomplete)",
 365                      abort_text, fsa_state2string(controld_globals.fsa_state),
 366                      (controld_globals.transition_graph->complete? "" : "in"));
 367             return;
 368         default:
 369             break;
 370     }
 371 
 372     abort_timer.aborted = TRUE;
 373     controld_expect_sched_reply(NULL);
 374 
 375     if (!controld_globals.transition_graph->complete
 376         && update_abort_priority(controld_globals.transition_graph,
 377                                  abort_priority, abort_action,
 378                                  abort_text)) {
 379         level = LOG_NOTICE;
 380     }
 381 
 382     if (reason != NULL) {
 383         const xmlNode *search = NULL;
 384 
 385         for(search = reason; search; search = search->parent) {
 386             if (pcmk__xe_is(search, PCMK_XE_DIFF)) {
 387                 diff = search;
 388                 break;
 389             }
 390         }
 391 
 392         if(diff) {
 393             xml_patch_versions(diff, add, del);
 394             for(search = reason; search; search = search->parent) {
 395                 if (pcmk__xe_is(search, PCMK_XE_CHANGE)) {
 396                     change = search;
 397                     break;
 398                 }
 399             }
 400         }
 401     }
 402 
 403     if (reason == NULL) {
 404         do_crm_log(level,
 405                    "Transition %d aborted: %s " QB_XS " source=%s:%d "
 406                    "complete=%s", controld_globals.transition_graph->id,
 407                    abort_text, fn, line,
 408                    pcmk__btoa(controld_globals.transition_graph->complete));
 409 
 410     } else if(change == NULL) {
 411         GString *local_path = pcmk__element_xpath(reason);
 412         pcmk__assert(local_path != NULL);
 413 
 414         do_crm_log(level, "Transition %d aborted by %s.%s: %s "
 415                    QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 416                    controld_globals.transition_graph->id, reason->name,
 417                    pcmk__xe_id(reason), abort_text, add[0], add[1], add[2], fn,
 418                    line, (const char *) local_path->str,
 419                    pcmk__btoa(controld_globals.transition_graph->complete));
 420         g_string_free(local_path, TRUE);
 421 
 422     } else {
 423         const char *op = crm_element_value(change, PCMK_XA_OPERATION);
 424         const char *path = crm_element_value(change, PCMK_XA_PATH);
 425 
 426         if(change == reason) {
 427             if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
 428                 reason = reason->children;
 429 
 430             } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
 431                 reason = pcmk__xe_first_child(reason, PCMK_XE_CHANGE_RESULT,
 432                                               NULL, NULL);
 433                 if(reason) {
 434                     reason = reason->children;
 435                 }
 436             }
 437             CRM_CHECK(reason != NULL, goto done);
 438         }
 439 
 440         if (strcmp(op, PCMK_VALUE_DELETE) == 0) {
 441             const char *shortpath = strrchr(path, '/');
 442 
 443             do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
 444                        QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 445                        controld_globals.transition_graph->id,
 446                        (shortpath? (shortpath + 1) : path), abort_text,
 447                        add[0], add[1], add[2], fn, line, path,
 448                        pcmk__btoa(controld_globals.transition_graph->complete));
 449 
 450         } else if (pcmk__xe_is(reason, PCMK_XE_NVPAIR)) {
 451             do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
 452                        QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 453                        controld_globals.transition_graph->id,
 454                        crm_element_value(reason, PCMK_XA_ID), op,
 455                        crm_element_value(reason, PCMK_XA_NAME),
 456                        crm_element_value(reason, PCMK_XA_VALUE),
 457                        abort_text, add[0], add[1], add[2], fn, line, path,
 458                        pcmk__btoa(controld_globals.transition_graph->complete));
 459 
 460         } else if (pcmk__xe_is(reason, PCMK__XE_LRM_RSC_OP)) {
 461             const char *magic = crm_element_value(reason,
 462                                                   PCMK__XA_TRANSITION_MAGIC);
 463 
 464             do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
 465                        QB_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
 466                        controld_globals.transition_graph->id,
 467                        crm_element_value(reason, PCMK__XA_OPERATION_KEY), op,
 468                        crm_element_value(reason, PCMK__META_ON_NODE),
 469                        abort_text,
 470                        magic, add[0], add[1], add[2], fn, line,
 471                        pcmk__btoa(controld_globals.transition_graph->complete));
 472 
 473         } else if (pcmk__str_any_of((const char *) reason->name,
 474                    PCMK__XE_NODE_STATE, PCMK_XE_NODE, NULL)) {
 475             const char *uname = pcmk__node_name_from_uuid(pcmk__xe_id(reason));
 476 
 477             do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
 478                        QB_XS " cib=%d.%d.%d source=%s:%d complete=%s",
 479                        controld_globals.transition_graph->id,
 480                        reason->name, op, pcmk__s(uname, pcmk__xe_id(reason)),
 481                        abort_text, add[0], add[1], add[2], fn, line,
 482                        pcmk__btoa(controld_globals.transition_graph->complete));
 483 
 484         } else {
 485             const char *id = pcmk__xe_id(reason);
 486 
 487             do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
 488                        QB_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 489                        controld_globals.transition_graph->id,
 490                        reason->name, pcmk__s(id, ""), pcmk__s(op, "change"),
 491                        abort_text, add[0], add[1], add[2], fn, line, path,
 492                        pcmk__btoa(controld_globals.transition_graph->complete));
 493         }
 494     }
 495 
 496 done:
 497     if (controld_globals.transition_graph->complete) {
 498         if (controld_get_period_transition_timer() > 0) {
 499             controld_stop_transition_timer();
 500             controld_start_transition_timer();
 501         } else {
 502             register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
 503         }
 504         return;
 505     }
 506 
 507     trigger_graph();
 508 }

/* [previous][next][first][last][top][bottom][index][help] */