root/daemons/controld/controld_te_callbacks.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. shutdown_lock_cleared
  2. process_lrm_resource_diff
  3. process_resource_updates
  4. extract_node_uuid
  5. abort_unless_down
  6. process_op_deletion
  7. process_delete_diff
  8. process_node_state_diff
  9. process_status_diff
  10. process_cib_diff
  11. te_update_diff_element
  12. te_update_diff
  13. process_te_message
  14. cib_action_updated
  15. action_timer_callback

   1 /*
   2  * Copyright 2004-2024 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/stat.h>
  13 
  14 #include <crm/crm.h>
  15 #include <crm/common/xml.h>
  16 #include <crm/common/xml_internal.h>
  17 
  18 #include <pacemaker-controld.h>
  19 
  20 // An explicit PCMK_OPT_SHUTDOWN_LOCK of 0 means the lock has been cleared
  21 static bool
  22 shutdown_lock_cleared(xmlNode *lrm_resource)
     /* [previous][next][first][last][top][bottom][index][help] */
  23 {
  24     time_t shutdown_lock = 0;
  25 
  26     return (crm_element_value_epoch(lrm_resource, PCMK_OPT_SHUTDOWN_LOCK,
  27                                     &shutdown_lock) == pcmk_ok)
  28            && (shutdown_lock == 0);
  29 }
  30 
  31 static void
  32 process_lrm_resource_diff(xmlNode *lrm_resource, const char *node)
     /* [previous][next][first][last][top][bottom][index][help] */
  33 {
  34     for (xmlNode *rsc_op = pcmk__xe_first_child(lrm_resource, NULL, NULL, NULL);
  35          rsc_op != NULL; rsc_op = pcmk__xe_next(rsc_op, NULL)) {
  36         process_graph_event(rsc_op, node);
  37     }
  38     if (shutdown_lock_cleared(lrm_resource)) {
  39         // @TODO would be more efficient to abort once after transition done
  40         abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
  41                          "Shutdown lock cleared", lrm_resource);
  42     }
  43 }
  44 
  45 static void
  46 process_resource_updates(const char *node, xmlNode *xml, xmlNode *change,
     /* [previous][next][first][last][top][bottom][index][help] */
  47                          const char *op, const char *xpath)
  48 {
  49     xmlNode *rsc = NULL;
  50 
  51     if (xml == NULL) {
  52         return;
  53     }
  54 
  55     if (pcmk__xe_is(xml, PCMK__XE_LRM)) {
  56         xml = pcmk__xe_first_child(xml, PCMK__XE_LRM_RESOURCES, NULL, NULL);
  57         CRM_CHECK(xml != NULL, return);
  58     }
  59 
  60     CRM_CHECK(pcmk__xe_is(xml, PCMK__XE_LRM_RESOURCES), return);
  61 
  62     /*
  63      * Updates by, or in response to, TE actions will never contain updates
  64      * for more than one resource at a time, so such updates indicate an
  65      * LRM refresh.
  66      *
  67      * In that case, start a new transition rather than check each result
  68      * individually, which can result in _huge_ speedups in large clusters.
  69      *
  70      * Unfortunately, we can only do so when there are no pending actions.
  71      * Otherwise, we could mistakenly throw away those results here, and
  72      * the cluster will stall waiting for them and time out the operation.
  73      */
  74     if ((controld_globals.transition_graph->pending == 0)
  75         && (xml->children != NULL) && (xml->children->next != NULL)) {
  76 
  77         crm_log_xml_trace(change, "lrm-refresh");
  78         abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
  79                          "History refresh", NULL);
  80         return;
  81     }
  82 
  83     for (rsc = pcmk__xe_first_child(xml, NULL, NULL, NULL); rsc != NULL;
  84          rsc = pcmk__xe_next(rsc, NULL)) {
  85         crm_trace("Processing %s", pcmk__xe_id(rsc));
  86         process_lrm_resource_diff(rsc, node);
  87     }
  88 }
  89 
  90 static char *extract_node_uuid(const char *xpath) 
     /* [previous][next][first][last][top][bottom][index][help] */
  91 {
  92     char *mutable_path = pcmk__str_copy(xpath);
  93     char *node_uuid = NULL;
  94     char *search = NULL;
  95     char *match = NULL;
  96 
  97     match = strstr(mutable_path, PCMK__XE_NODE_STATE "[@" PCMK_XA_ID "=\'");
  98     if (match == NULL) {
  99         free(mutable_path);
 100         return NULL;
 101     }
 102     match += strlen(PCMK__XE_NODE_STATE "[@" PCMK_XA_ID "=\'");
 103 
 104     search = strchr(match, '\'');
 105     if (search == NULL) {
 106         free(mutable_path);
 107         return NULL;
 108     }
 109     search[0] = 0;
 110 
 111     node_uuid = pcmk__str_copy(match);
 112     free(mutable_path);
 113     return node_uuid;
 114 }
 115 
 116 static void
 117 abort_unless_down(const char *xpath, const char *op, xmlNode *change,
     /* [previous][next][first][last][top][bottom][index][help] */
 118                   const char *reason)
 119 {
 120     char *node_uuid = NULL;
 121     pcmk__graph_action_t *down = NULL;
 122 
 123     if (!pcmk__str_eq(op, PCMK_VALUE_DELETE, pcmk__str_none)) {
 124         abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, reason,
 125                          change);
 126         return;
 127     }
 128 
 129     node_uuid = extract_node_uuid(xpath);
 130     if(node_uuid == NULL) {
 131         crm_err("Could not extract node ID from %s", xpath);
 132         abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, reason,
 133                          change);
 134         return;
 135     }
 136 
 137     down = match_down_event(node_uuid);
 138     if (down == NULL) {
 139         crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath);
 140         abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart, reason,
 141                          change);
 142     } else {
 143         crm_trace("Expecting changes to %s (%s)", node_uuid, xpath);
 144     }
 145     free(node_uuid);
 146 }
 147 
 148 static void
 149 process_op_deletion(const char *xpath, xmlNode *change)
     /* [previous][next][first][last][top][bottom][index][help] */
 150 {
 151     char *mutable_key = pcmk__str_copy(xpath);
 152     char *key;
 153     char *node_uuid;
 154 
 155     // Extract the part of xpath between last pair of single quotes
 156     key = strrchr(mutable_key, '\'');
 157     if (key != NULL) {
 158         *key = '\0';
 159         key = strrchr(mutable_key, '\'');
 160     }
 161     if (key == NULL) {
 162         crm_warn("Ignoring malformed CIB update (resource deletion of %s)",
 163                  xpath);
 164         free(mutable_key);
 165         return;
 166     }
 167     ++key;
 168 
 169     node_uuid = extract_node_uuid(xpath);
 170     if (confirm_cancel_action(key, node_uuid) == FALSE) {
 171         abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
 172                          "Resource operation removal", change);
 173     }
 174     free(mutable_key);
 175     free(node_uuid);
 176 }
 177 
 178 static void
 179 process_delete_diff(const char *xpath, const char *op, xmlNode *change)
     /* [previous][next][first][last][top][bottom][index][help] */
 180 {
 181     if (strstr(xpath, "/" PCMK__XE_LRM_RSC_OP "[")) {
 182         process_op_deletion(xpath, change);
 183 
 184     } else if (strstr(xpath, "/" PCMK__XE_LRM "[")) {
 185         abort_unless_down(xpath, op, change, "Resource state removal");
 186 
 187     } else if (strstr(xpath, "/" PCMK__XE_NODE_STATE "[")) {
 188         abort_unless_down(xpath, op, change, "Node state removal");
 189 
 190     } else {
 191         crm_trace("Ignoring delete of %s", xpath);
 192     }
 193 }
 194 
 195 static void
 196 process_node_state_diff(xmlNode *state, xmlNode *change, const char *op,
     /* [previous][next][first][last][top][bottom][index][help] */
 197                         const char *xpath)
 198 {
 199     xmlNode *lrm = pcmk__xe_first_child(state, PCMK__XE_LRM, NULL, NULL);
 200 
 201     process_resource_updates(pcmk__xe_id(state), lrm, change, op, xpath);
 202 }
 203 
 204 static void
 205 process_status_diff(xmlNode *status, xmlNode *change, const char *op,
     /* [previous][next][first][last][top][bottom][index][help] */
 206                     const char *xpath)
 207 {
 208     for (xmlNode *state = pcmk__xe_first_child(status, NULL, NULL, NULL);
 209          state != NULL; state = pcmk__xe_next(state, NULL)) {
 210 
 211         process_node_state_diff(state, change, op, xpath);
 212     }
 213 }
 214 
 215 static void
 216 process_cib_diff(xmlNode *cib, xmlNode *change, const char *op,
     /* [previous][next][first][last][top][bottom][index][help] */
 217                  const char *xpath)
 218 {
 219     xmlNode *status = pcmk__xe_first_child(cib, PCMK_XE_STATUS, NULL, NULL);
 220     xmlNode *config = pcmk__xe_first_child(cib, PCMK_XE_CONFIGURATION, NULL,
 221                                            NULL);
 222 
 223     if (status) {
 224         process_status_diff(status, change, op, xpath);
 225     }
 226     if (config) {
 227         abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
 228                          "Non-status-only change", change);
 229     }
 230 }
 231 
 232 static int
 233 te_update_diff_element(xmlNode *change, void *userdata)
     /* [previous][next][first][last][top][bottom][index][help] */
 234 {
 235     xmlNode *match = NULL;
 236     const char *name = NULL;
 237     const char *xpath = crm_element_value(change, PCMK_XA_PATH);
 238 
 239     // Possible ops: create, modify, delete, move
 240     const char *op = crm_element_value(change, PCMK_XA_OPERATION);
 241 
 242     // Ignore uninteresting updates
 243     if (op == NULL) {
 244         return pcmk_rc_ok;
 245 
 246     } else if (xpath == NULL) {
 247         crm_trace("Ignoring %s change for version field", op);
 248         return pcmk_rc_ok;
 249 
 250     } else if ((strcmp(op, PCMK_VALUE_MOVE) == 0)
 251                && (strstr(xpath,
 252                           "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION
 253                           "/" PCMK_XE_RESOURCES) == NULL)) {
 254         /* We still need to consider moves within the resources section,
 255          * since they affect placement order.
 256          */
 257         crm_trace("Ignoring move change at %s", xpath);
 258         return pcmk_rc_ok;
 259     }
 260 
 261     // Find the result of create/modify ops
 262     if (strcmp(op, PCMK_VALUE_CREATE) == 0) {
 263         match = change->children;
 264 
 265     } else if (strcmp(op, PCMK_VALUE_MODIFY) == 0) {
 266         match = pcmk__xe_first_child(change, PCMK_XE_CHANGE_RESULT, NULL, NULL);
 267         if(match) {
 268             match = match->children;
 269         }
 270 
 271     } else if (!pcmk__str_any_of(op,
 272                                  PCMK_VALUE_DELETE, PCMK_VALUE_MOVE,
 273                                  NULL)) {
 274         crm_warn("Ignoring malformed CIB update (%s operation on %s is unrecognized)",
 275                  op, xpath);
 276         return pcmk_rc_ok;
 277     }
 278 
 279     if (match) {
 280         if (match->type == XML_COMMENT_NODE) {
 281             crm_trace("Ignoring %s operation for comment at %s", op, xpath);
 282             return pcmk_rc_ok;
 283         }
 284         name = (const char *)match->name;
 285     }
 286 
 287     crm_trace("Handling %s operation for %s%s%s",
 288               op, (xpath? xpath : "CIB"),
 289               (name? " matched by " : ""), (name? name : ""));
 290 
 291     if (strstr(xpath, "/" PCMK_XE_CIB "/" PCMK_XE_CONFIGURATION)) {
 292         abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
 293                          "Configuration change", change);
 294         return pcmk_rc_cib_modified; // Won't be packaged with operation results we may be waiting for
 295 
 296     } else if (strstr(xpath, "/" PCMK_XE_TICKETS)
 297                || pcmk__str_eq(name, PCMK_XE_TICKETS, pcmk__str_none)) {
 298         abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
 299                          "Ticket attribute change", change);
 300         return pcmk_rc_cib_modified; // Won't be packaged with operation results we may be waiting for
 301 
 302     } else if (strstr(xpath, "/" PCMK__XE_TRANSIENT_ATTRIBUTES "[")
 303                || pcmk__str_eq(name, PCMK__XE_TRANSIENT_ATTRIBUTES,
 304                                pcmk__str_none)) {
 305         abort_unless_down(xpath, op, change, "Transient attribute change");
 306         return pcmk_rc_cib_modified; // Won't be packaged with operation results we may be waiting for
 307 
 308     } else if (strcmp(op, PCMK_VALUE_DELETE) == 0) {
 309         process_delete_diff(xpath, op, change);
 310 
 311     } else if (name == NULL) {
 312         crm_warn("Ignoring malformed CIB update (%s at %s has no result)",
 313                  op, xpath);
 314 
 315     } else if (strcmp(name, PCMK_XE_CIB) == 0) {
 316         process_cib_diff(match, change, op, xpath);
 317 
 318     } else if (strcmp(name, PCMK_XE_STATUS) == 0) {
 319         process_status_diff(match, change, op, xpath);
 320 
 321     } else if (strcmp(name, PCMK__XE_NODE_STATE) == 0) {
 322         process_node_state_diff(match, change, op, xpath);
 323 
 324     } else if (strcmp(name, PCMK__XE_LRM) == 0) {
 325         process_resource_updates(pcmk__xe_id(match), match, change, op,
 326                                  xpath);
 327 
 328     } else if (strcmp(name, PCMK__XE_LRM_RESOURCES) == 0) {
 329         char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
 330 
 331         process_resource_updates(local_node, match, change, op, xpath);
 332         free(local_node);
 333 
 334     } else if (strcmp(name, PCMK__XE_LRM_RESOURCE) == 0) {
 335         char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
 336 
 337         process_lrm_resource_diff(match, local_node);
 338         free(local_node);
 339 
 340     } else if (strcmp(name, PCMK__XE_LRM_RSC_OP) == 0) {
 341         char *local_node = pcmk__xpath_node_id(xpath, PCMK__XE_LRM);
 342 
 343         process_graph_event(match, local_node);
 344         free(local_node);
 345 
 346     } else {
 347         crm_warn("Ignoring malformed CIB update (%s at %s has unrecognized result %s)",
 348                  op, xpath, name);
 349     }
 350 
 351     return pcmk_rc_ok;
 352 }
 353 
 354 void
 355 te_update_diff(const char *event, xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 356 {
 357     xmlNode *wrapper = NULL;
 358     xmlNode *diff = NULL;
 359     const char *op = NULL;
 360     int rc = -EINVAL;
 361     int format = 1;
 362     int p_add[] = { 0, 0, 0 };
 363     int p_del[] = { 0, 0, 0 };
 364 
 365     CRM_CHECK(msg != NULL, return);
 366     crm_element_value_int(msg, PCMK__XA_CIB_RC, &rc);
 367 
 368     if (controld_globals.transition_graph == NULL) {
 369         crm_trace("No graph");
 370         return;
 371 
 372     } else if (rc < pcmk_ok) {
 373         crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
 374         return;
 375 
 376     } else if (controld_globals.transition_graph->complete
 377                && (controld_globals.fsa_state != S_IDLE)
 378                && (controld_globals.fsa_state != S_TRANSITION_ENGINE)
 379                && (controld_globals.fsa_state != S_POLICY_ENGINE)) {
 380         crm_trace("Filter state=%s (complete)",
 381                   fsa_state2string(controld_globals.fsa_state));
 382         return;
 383     }
 384 
 385     op = crm_element_value(msg, PCMK__XA_CIB_OP);
 386 
 387     wrapper = pcmk__xe_first_child(msg, PCMK__XE_CIB_UPDATE_RESULT, NULL, NULL);
 388     diff = pcmk__xe_first_child(wrapper, NULL, NULL, NULL);
 389 
 390     xml_patch_versions(diff, p_add, p_del);
 391     crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
 392               p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
 393               fsa_state2string(controld_globals.fsa_state));
 394 
 395     crm_element_value_int(diff, PCMK_XA_FORMAT, &format);
 396 
 397     if (format == 2) {
 398         crm_log_xml_trace(diff, "patch");
 399         pcmk__xe_foreach_child(diff, NULL, te_update_diff_element, NULL);
 400 
 401     } else {
 402         crm_warn("Ignoring malformed CIB update (unknown patch format %d)",
 403                  format);
 404     }
 405     controld_remove_all_outside_events();
 406 }
 407 
 408 void
 409 process_te_message(xmlNode * msg, xmlNode * xml_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 410 {
 411     const char *value = NULL;
 412     xmlXPathObject *xpathObj = NULL;
 413     int nmatches = 0;
 414 
 415     CRM_CHECK(msg != NULL, return);
 416 
 417     // Transition requests must specify transition engine as subsystem
 418     value = crm_element_value(msg, PCMK__XA_CRM_SYS_TO);
 419     if (pcmk__str_empty(value)
 420         || !pcmk__str_eq(value, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
 421         crm_info("Received invalid transition request: subsystem '%s' not '"
 422                  CRM_SYSTEM_TENGINE "'", pcmk__s(value, ""));
 423         return;
 424     }
 425 
 426     // Only the lrm_invoke command is supported as a transition request
 427     value = crm_element_value(msg, PCMK__XA_CRM_TASK);
 428     if (!pcmk__str_eq(value, CRM_OP_INVOKE_LRM, pcmk__str_none)) {
 429         crm_info("Received invalid transition request: command '%s' not '"
 430                  CRM_OP_INVOKE_LRM "'", pcmk__s(value, ""));
 431         return;
 432     }
 433 
 434     // Transition requests must be marked as coming from the executor
 435     value = crm_element_value(msg, PCMK__XA_CRM_SYS_FROM);
 436     if (!pcmk__str_eq(value, CRM_SYSTEM_LRMD, pcmk__str_none)) {
 437         crm_info("Received invalid transition request: from '%s' not '"
 438                  CRM_SYSTEM_LRMD "'", pcmk__s(value, ""));
 439         return;
 440     }
 441 
 442     crm_debug("Processing transition request with ref='%s' origin='%s'",
 443               pcmk__s(crm_element_value(msg, PCMK_XA_REFERENCE), ""),
 444               pcmk__s(crm_element_value(msg, PCMK__XA_SRC), ""));
 445 
 446     xpathObj = xpath_search(xml_data, "//" PCMK__XE_LRM_RSC_OP);
 447     nmatches = numXpathResults(xpathObj);
 448     if (nmatches == 0) {
 449         crm_err("Received transition request with no results (bug?)");
 450     } else {
 451         for (int lpc = 0; lpc < nmatches; lpc++) {
 452             xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
 453             const char *node = get_node_id(rsc_op);
 454 
 455             process_graph_event(rsc_op, node);
 456         }
 457     }
 458     freeXpathObject(xpathObj);
 459 }
 460 
 461 void
 462 cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 463 {
 464     if (rc < pcmk_ok) {
 465         crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
 466     }
 467 }
 468 
 469 /*!
 470  * \brief Handle a timeout in node-to-node communication
 471  *
 472  * \param[in,out] data  Pointer to graph action
 473  *
 474  * \return FALSE (indicating that source should be not be re-added)
 475  */
 476 gboolean
 477 action_timer_callback(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 478 {
 479     pcmk__graph_action_t *action = (pcmk__graph_action_t *) data;
 480     const char *task = NULL;
 481     const char *on_node = NULL;
 482     const char *via_node = NULL;
 483 
 484     CRM_CHECK(data != NULL, return FALSE);
 485 
 486     stop_te_timer(action);
 487 
 488     task = crm_element_value(action->xml, PCMK_XA_OPERATION);
 489     on_node = crm_element_value(action->xml, PCMK__META_ON_NODE);
 490     via_node = crm_element_value(action->xml, PCMK__XA_ROUTER_NODE);
 491 
 492     if (controld_globals.transition_graph->complete) {
 493         crm_notice("Node %s did not send %s result (via %s) within %dms "
 494                    "(ignoring because transition not in progress)",
 495                    (on_node? on_node : ""), (task? task : "unknown action"),
 496                    (via_node? via_node : "controller"), action->timeout);
 497     } else {
 498         /* fail the action */
 499 
 500         crm_err("Node %s did not send %s result (via %s) within %dms "
 501                 "(action timeout plus " PCMK_OPT_CLUSTER_DELAY ")",
 502                 (on_node? on_node : ""), (task? task : "unknown action"),
 503                 (via_node? via_node : "controller"),
 504                 (action->timeout
 505                  + controld_globals.transition_graph->network_delay));
 506         pcmk__log_graph_action(LOG_ERR, action);
 507 
 508         pcmk__set_graph_action_flags(action, pcmk__graph_action_failed);
 509 
 510         te_action_confirmed(action, controld_globals.transition_graph);
 511         abort_transition(PCMK_SCORE_INFINITY, pcmk__graph_restart,
 512                          "Action lost", NULL);
 513 
 514         // Record timeout in the CIB if appropriate
 515         if ((action->type == pcmk__rsc_graph_action)
 516             && controld_action_is_recordable(task)) {
 517             controld_record_action_timeout(action);
 518         }
 519     }
 520 
 521     return FALSE;
 522 }

/* [previous][next][first][last][top][bottom][index][help] */