root/daemons/controld/controld_te_utils.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. stop_te_timer
  2. te_graph_trigger
  3. controld_init_transition_trigger
  4. controld_destroy_transition_trigger
  5. controld_trigger_graph_as
  6. abort_timer_popped
  7. abort_after_delay
  8. free_node_pending_timer
  9. node_pending_timer_popped
  10. init_node_pending_timer
  11. remove_node_pending_timer
  12. controld_node_pending_timer
  13. controld_free_node_pending_timers
  14. abort2text
  15. update_abort_priority
  16. abort_transition_graph

   1 /*
   2  * Copyright 2004-2023 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include <crm/crm.h>
  12 #include <crm/msg_xml.h>
  13 #include <crm/common/xml.h>
  14 
  15 #include <pacemaker-controld.h>
  16 
  17 //! Triggers transition graph processing
  18 static crm_trigger_t *transition_trigger = NULL;
  19 
  20 static GHashTable *node_pending_timers = NULL;
  21 
  22 gboolean
  23 stop_te_timer(pcmk__graph_action_t *action)
     /* [previous][next][first][last][top][bottom][index][help] */
  24 {
  25     if (action == NULL) {
  26         return FALSE;
  27     }
  28     if (action->timer != 0) {
  29         crm_trace("Stopping action timer");
  30         g_source_remove(action->timer);
  31         action->timer = 0;
  32     } else {
  33         crm_trace("Action timer was already stopped");
  34         return FALSE;
  35     }
  36     return TRUE;
  37 }
  38 
  39 static gboolean
  40 te_graph_trigger(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  41 {
  42     if (controld_globals.transition_graph == NULL) {
  43         crm_debug("Nothing to do");
  44         return TRUE;
  45     }
  46 
  47     crm_trace("Invoking graph %d in state %s",
  48               controld_globals.transition_graph->id,
  49               fsa_state2string(controld_globals.fsa_state));
  50 
  51     switch (controld_globals.fsa_state) {
  52         case S_STARTING:
  53         case S_PENDING:
  54         case S_NOT_DC:
  55         case S_HALT:
  56         case S_ILLEGAL:
  57         case S_STOPPING:
  58         case S_TERMINATE:
  59             return TRUE;
  60         default:
  61             break;
  62     }
  63 
  64     if (!controld_globals.transition_graph->complete) {
  65         enum pcmk__graph_status graph_rc;
  66         int orig_limit = controld_globals.transition_graph->batch_limit;
  67         int throttled_limit = throttle_get_total_job_limit(orig_limit);
  68 
  69         controld_globals.transition_graph->batch_limit = throttled_limit;
  70         graph_rc = pcmk__execute_graph(controld_globals.transition_graph);
  71         controld_globals.transition_graph->batch_limit = orig_limit;
  72 
  73         if (graph_rc == pcmk__graph_active) {
  74             crm_trace("Transition not yet complete");
  75             return TRUE;
  76 
  77         } else if (graph_rc == pcmk__graph_pending) {
  78             crm_trace("Transition not yet complete - no actions fired");
  79             return TRUE;
  80         }
  81 
  82         if (graph_rc != pcmk__graph_complete) {
  83             crm_warn("Transition failed: %s",
  84                      pcmk__graph_status2text(graph_rc));
  85             pcmk__log_graph(LOG_NOTICE, controld_globals.transition_graph);
  86         }
  87     }
  88 
  89     crm_debug("Transition %d is now complete",
  90               controld_globals.transition_graph->id);
  91     controld_globals.transition_graph->complete = true;
  92     notify_crmd(controld_globals.transition_graph);
  93 
  94     return TRUE;
  95 }
  96 
  97 /*!
  98  * \internal
  99  * \brief Initialize transition trigger
 100  */
 101 void
 102 controld_init_transition_trigger(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 103 {
 104     transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger,
 105                                               NULL);
 106 }
 107 
 108 /*!
 109  * \internal
 110  * \brief Destroy transition trigger
 111  */
 112 void
 113 controld_destroy_transition_trigger(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 114 {
 115     mainloop_destroy_trigger(transition_trigger);
 116     transition_trigger = NULL;
 117 }
 118 
 119 void
 120 controld_trigger_graph_as(const char *fn, int line)
     /* [previous][next][first][last][top][bottom][index][help] */
 121 {
 122     crm_trace("%s:%d - Triggered graph processing", fn, line);
 123     mainloop_set_trigger(transition_trigger);
 124 }
 125 
 126 static struct abort_timer_s {
 127     bool aborted;
 128     guint id;
 129     int priority;
 130     enum pcmk__graph_next action;
 131     const char *text;
 132 } abort_timer = { 0, };
 133 
 134 static gboolean
 135 abort_timer_popped(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 136 {
 137     struct abort_timer_s *abort_timer = (struct abort_timer_s *) data;
 138 
 139     if (AM_I_DC && (abort_timer->aborted == FALSE)) {
 140         abort_transition(abort_timer->priority, abort_timer->action,
 141                          abort_timer->text, NULL);
 142     }
 143     abort_timer->id = 0;
 144     return FALSE; // do not immediately reschedule timer
 145 }
 146 
 147 /*!
 148  * \internal
 149  * \brief Abort transition after delay, if not already aborted in that time
 150  *
 151  * \param[in] abort_text  Must be literal string
 152  */
 153 void
 154 abort_after_delay(int abort_priority, enum pcmk__graph_next abort_action,
     /* [previous][next][first][last][top][bottom][index][help] */
 155                   const char *abort_text, guint delay_ms)
 156 {
 157     if (abort_timer.id) {
 158         // Timer already in progress, stop and reschedule
 159         g_source_remove(abort_timer.id);
 160     }
 161     abort_timer.aborted = FALSE;
 162     abort_timer.priority = abort_priority;
 163     abort_timer.action = abort_action;
 164     abort_timer.text = abort_text;
 165     abort_timer.id = g_timeout_add(delay_ms, abort_timer_popped, &abort_timer);
 166 }
 167 
 168 static void
 169 free_node_pending_timer(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 170 {
 171     struct abort_timer_s *node_pending_timer = (struct abort_timer_s *) data;
 172 
 173     if (node_pending_timer->id != 0) {
 174         g_source_remove(node_pending_timer->id);
 175         node_pending_timer->id = 0;
 176     }
 177 
 178     free(node_pending_timer);
 179 }
 180 
 181 static gboolean
 182 node_pending_timer_popped(gpointer key)
     /* [previous][next][first][last][top][bottom][index][help] */
 183 {
 184     struct abort_timer_s *node_pending_timer = NULL;
 185 
 186     if (node_pending_timers == NULL) {
 187         return FALSE;
 188     }
 189 
 190     node_pending_timer = g_hash_table_lookup(node_pending_timers, key);
 191     if (node_pending_timer == NULL) {
 192         return FALSE;
 193     }
 194 
 195     crm_warn("Node with id '%s' pending timed out (%us) on joining the process "
 196              "group",
 197              (const char *) key, controld_globals.node_pending_timeout);
 198 
 199     if (controld_globals.node_pending_timeout > 0) {
 200         abort_timer_popped(node_pending_timer);
 201     }
 202 
 203     g_hash_table_remove(node_pending_timers, key);
 204 
 205     return FALSE; // do not reschedule timer
 206 }
 207 
 208 static void
 209 init_node_pending_timer(const crm_node_t *node, guint timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
 210 {
 211     struct abort_timer_s *node_pending_timer = NULL;
 212     char *key = NULL;
 213 
 214     if (node->uuid == NULL) {
 215         return;
 216     }
 217 
 218     if (node_pending_timers == NULL) {
 219         node_pending_timers = pcmk__strikey_table(free,
 220                                                   free_node_pending_timer);
 221 
 222     // The timer is somehow already existing
 223     } else if (g_hash_table_lookup(node_pending_timers, node->uuid) != NULL) {
 224         return;
 225     }
 226 
 227     crm_notice("Waiting for pending %s with id '%s' to join the process "
 228                "group (timeout=%us)",
 229                node->uname ? node->uname : "node", node->uuid,
 230                controld_globals.node_pending_timeout);
 231 
 232     node_pending_timer = calloc(1, sizeof(struct abort_timer_s));
 233     CRM_ASSERT(node_pending_timer != NULL);
 234 
 235     node_pending_timer->aborted = FALSE;
 236     node_pending_timer->priority = INFINITY;
 237     node_pending_timer->action = pcmk__graph_restart;
 238     node_pending_timer->text = "Node pending timed out";
 239 
 240     key = strdup(node->uuid);
 241     CRM_ASSERT(key != NULL);
 242 
 243     g_hash_table_replace(node_pending_timers, key, node_pending_timer);
 244 
 245     node_pending_timer->id = g_timeout_add_seconds(timeout,
 246                                                    node_pending_timer_popped,
 247                                                    key);
 248     CRM_ASSERT(node_pending_timer->id != 0);
 249 }
 250 
 251 static void
 252 remove_node_pending_timer(const char *node_uuid)
     /* [previous][next][first][last][top][bottom][index][help] */
 253 {
 254     if (node_pending_timers == NULL) {
 255         return;
 256     }
 257 
 258     g_hash_table_remove(node_pending_timers, node_uuid);
 259 }
 260 
 261 void
 262 controld_node_pending_timer(const crm_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 263 {
 264     long long remaining_timeout = 0;
 265 
 266     /* If the node is not an active cluster node, is leaving the cluster, or is
 267      * already part of CPG, or node-pending-timeout is disabled, free any
 268      * node pending timer for it.
 269      */
 270     if (pcmk_is_set(node->flags, crm_remote_node)
 271         || (node->when_member <= 1) || (node->when_online > 0)
 272         || (controld_globals.node_pending_timeout == 0)) {
 273         remove_node_pending_timer(node->uuid);
 274         return;
 275     }
 276 
 277     // Node is a cluster member but offline in CPG
 278 
 279     remaining_timeout = node->when_member - time(NULL)
 280                         + controld_globals.node_pending_timeout;
 281 
 282     /* It already passed node pending timeout somehow.
 283      * Free any node pending timer of it.
 284      */
 285     if (remaining_timeout <= 0) {
 286         remove_node_pending_timer(node->uuid);
 287         return;
 288     }
 289 
 290     init_node_pending_timer(node, remaining_timeout);
 291 }
 292 
 293 void
 294 controld_free_node_pending_timers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 295 {
 296     if (node_pending_timers == NULL) {
 297         return;
 298     }
 299 
 300     g_hash_table_destroy(node_pending_timers);
 301     node_pending_timers = NULL;
 302 }
 303 
 304 static const char *
 305 abort2text(enum pcmk__graph_next abort_action)
     /* [previous][next][first][last][top][bottom][index][help] */
 306 {
 307     switch (abort_action) {
 308         case pcmk__graph_done:      return "done";
 309         case pcmk__graph_wait:      return "stop";
 310         case pcmk__graph_restart:   return "restart";
 311         case pcmk__graph_shutdown:  return "shutdown";
 312     }
 313     return "unknown";
 314 }
 315 
 316 static bool
 317 update_abort_priority(pcmk__graph_t *graph, int priority,
     /* [previous][next][first][last][top][bottom][index][help] */
 318                       enum pcmk__graph_next action, const char *abort_reason)
 319 {
 320     bool change = FALSE;
 321 
 322     if (graph == NULL) {
 323         return change;
 324     }
 325 
 326     if (graph->abort_priority < priority) {
 327         crm_debug("Abort priority upgraded from %d to %d", graph->abort_priority, priority);
 328         graph->abort_priority = priority;
 329         if (graph->abort_reason != NULL) {
 330             crm_debug("'%s' abort superseded by %s", graph->abort_reason, abort_reason);
 331         }
 332         graph->abort_reason = abort_reason;
 333         change = TRUE;
 334     }
 335 
 336     if (graph->completion_action < action) {
 337         crm_debug("Abort action %s superseded by %s: %s",
 338                   abort2text(graph->completion_action), abort2text(action), abort_reason);
 339         graph->completion_action = action;
 340         change = TRUE;
 341     }
 342 
 343     return change;
 344 }
 345 
 346 void
 347 abort_transition_graph(int abort_priority, enum pcmk__graph_next abort_action,
     /* [previous][next][first][last][top][bottom][index][help] */
 348                        const char *abort_text, const xmlNode *reason,
 349                        const char *fn, int line)
 350 {
 351     int add[] = { 0, 0, 0 };
 352     int del[] = { 0, 0, 0 };
 353     int level = LOG_INFO;
 354     const xmlNode *diff = NULL;
 355     const xmlNode *change = NULL;
 356 
 357     CRM_CHECK(controld_globals.transition_graph != NULL, return);
 358 
 359     switch (controld_globals.fsa_state) {
 360         case S_STARTING:
 361         case S_PENDING:
 362         case S_NOT_DC:
 363         case S_HALT:
 364         case S_ILLEGAL:
 365         case S_STOPPING:
 366         case S_TERMINATE:
 367             crm_info("Abort %s suppressed: state=%s (%scomplete)",
 368                      abort_text, fsa_state2string(controld_globals.fsa_state),
 369                      (controld_globals.transition_graph->complete? "" : "in"));
 370             return;
 371         default:
 372             break;
 373     }
 374 
 375     abort_timer.aborted = TRUE;
 376     controld_expect_sched_reply(NULL);
 377 
 378     if (!controld_globals.transition_graph->complete
 379         && update_abort_priority(controld_globals.transition_graph,
 380                                  abort_priority, abort_action,
 381                                  abort_text)) {
 382         level = LOG_NOTICE;
 383     }
 384 
 385     if (reason != NULL) {
 386         const xmlNode *search = NULL;
 387 
 388         for(search = reason; search; search = search->parent) {
 389             if (pcmk__xe_is(search, XML_TAG_DIFF)) {
 390                 diff = search;
 391                 break;
 392             }
 393         }
 394 
 395         if(diff) {
 396             xml_patch_versions(diff, add, del);
 397             for(search = reason; search; search = search->parent) {
 398                 if (pcmk__xe_is(search, XML_DIFF_CHANGE)) {
 399                     change = search;
 400                     break;
 401                 }
 402             }
 403         }
 404     }
 405 
 406     if (reason == NULL) {
 407         do_crm_log(level,
 408                    "Transition %d aborted: %s " CRM_XS " source=%s:%d "
 409                    "complete=%s", controld_globals.transition_graph->id,
 410                    abort_text, fn, line,
 411                    pcmk__btoa(controld_globals.transition_graph->complete));
 412 
 413     } else if(change == NULL) {
 414         GString *local_path = pcmk__element_xpath(reason);
 415         CRM_ASSERT(local_path != NULL);
 416 
 417         do_crm_log(level, "Transition %d aborted by %s.%s: %s "
 418                    CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 419                    controld_globals.transition_graph->id, reason->name,
 420                    ID(reason), abort_text, add[0], add[1], add[2], fn, line,
 421                    (const char *) local_path->str,
 422                    pcmk__btoa(controld_globals.transition_graph->complete));
 423         g_string_free(local_path, TRUE);
 424 
 425     } else {
 426         const char *op = crm_element_value(change, XML_DIFF_OP);
 427         const char *path = crm_element_value(change, XML_DIFF_PATH);
 428 
 429         if(change == reason) {
 430             if(strcmp(op, "create") == 0) {
 431                 reason = reason->children;
 432 
 433             } else if(strcmp(op, "modify") == 0) {
 434                 reason = first_named_child(reason, XML_DIFF_RESULT);
 435                 if(reason) {
 436                     reason = reason->children;
 437                 }
 438             }
 439             CRM_CHECK(reason != NULL, goto done);
 440         }
 441 
 442         if(strcmp(op, "delete") == 0) {
 443             const char *shortpath = strrchr(path, '/');
 444 
 445             do_crm_log(level, "Transition %d aborted by deletion of %s: %s "
 446                        CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 447                        controld_globals.transition_graph->id,
 448                        (shortpath? (shortpath + 1) : path), abort_text,
 449                        add[0], add[1], add[2], fn, line, path,
 450                        pcmk__btoa(controld_globals.transition_graph->complete));
 451 
 452         } else if (pcmk__xe_is(reason, XML_CIB_TAG_NVPAIR)) {
 453             do_crm_log(level, "Transition %d aborted by %s doing %s %s=%s: %s "
 454                        CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 455                        controld_globals.transition_graph->id,
 456                        crm_element_value(reason, XML_ATTR_ID), op,
 457                        crm_element_value(reason, XML_NVPAIR_ATTR_NAME),
 458                        crm_element_value(reason, XML_NVPAIR_ATTR_VALUE),
 459                        abort_text, add[0], add[1], add[2], fn, line, path,
 460                        pcmk__btoa(controld_globals.transition_graph->complete));
 461 
 462         } else if (pcmk__xe_is(reason, XML_LRM_TAG_RSC_OP)) {
 463             const char *magic = crm_element_value(reason, XML_ATTR_TRANSITION_MAGIC);
 464 
 465             do_crm_log(level, "Transition %d aborted by operation %s '%s' on %s: %s "
 466                        CRM_XS " magic=%s cib=%d.%d.%d source=%s:%d complete=%s",
 467                        controld_globals.transition_graph->id,
 468                        crm_element_value(reason, XML_LRM_ATTR_TASK_KEY), op,
 469                        crm_element_value(reason, XML_LRM_ATTR_TARGET), abort_text,
 470                        magic, add[0], add[1], add[2], fn, line,
 471                        pcmk__btoa(controld_globals.transition_graph->complete));
 472 
 473         } else if (pcmk__str_any_of((const char *) reason->name,
 474                    XML_CIB_TAG_STATE, XML_CIB_TAG_NODE, NULL)) {
 475             const char *uname = crm_peer_uname(ID(reason));
 476 
 477             do_crm_log(level, "Transition %d aborted by %s '%s' on %s: %s "
 478                        CRM_XS " cib=%d.%d.%d source=%s:%d complete=%s",
 479                        controld_globals.transition_graph->id,
 480                        reason->name, op, pcmk__s(uname, ID(reason)),
 481                        abort_text, add[0], add[1], add[2], fn, line,
 482                        pcmk__btoa(controld_globals.transition_graph->complete));
 483 
 484         } else {
 485             const char *id = ID(reason);
 486 
 487             do_crm_log(level, "Transition %d aborted by %s.%s '%s': %s "
 488                        CRM_XS " cib=%d.%d.%d source=%s:%d path=%s complete=%s",
 489                        controld_globals.transition_graph->id,
 490                        reason->name, pcmk__s(id, ""), pcmk__s(op, "change"),
 491                        abort_text, add[0], add[1], add[2], fn, line, path,
 492                        pcmk__btoa(controld_globals.transition_graph->complete));
 493         }
 494     }
 495 
 496 done:
 497     if (controld_globals.transition_graph->complete) {
 498         if (controld_get_period_transition_timer() > 0) {
 499             controld_stop_transition_timer();
 500             controld_start_transition_timer();
 501         } else {
 502             register_fsa_input(C_FSA_INTERNAL, I_PE_CALC, NULL);
 503         }
 504         return;
 505     }
 506 
 507     trigger_graph();
 508 }

/* [previous][next][first][last][top][bottom][index][help] */