root/crmd/te_callbacks.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. get_node_id
  2. update_stonith_max_attempts
  3. te_legacy_update_diff
  4. process_resource_updates
  5. get_node_from_xpath
  6. extract_node_uuid
  7. abort_unless_down
  8. te_update_diff
  9. process_te_message
  10. too_many_st_failures
  11. st_fail_count_reset
  12. st_fail_count_increment
  13. abort_for_stonith_failure
  14. tengine_stonith_callback
  15. cib_fencing_updated
  16. cib_action_updated
  17. action_timer_callback

   1 /*
   2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2 of the License, or (at your option) any later version.
   8  *
   9  * This software is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public
  15  * License along with this library; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  17  */
  18 
  19 #include <crm_internal.h>
  20 
  21 #include <sys/stat.h>
  22 
  23 #include <crm/crm.h>
  24 #include <crm/common/xml.h>
  25 #include <crm/msg_xml.h>
  26 
  27 #include <tengine.h>
  28 #include <te_callbacks.h>
  29 #include <crmd_fsa.h>
  30 
  31 #include <crm/cluster.h>        /* For ONLINESTATUS etc */
  32 
  33 void te_update_confirm(const char *event, xmlNode * msg);
  34 
  35 extern char *te_uuid;
  36 gboolean shuttingdown = FALSE;
  37 crm_graph_t *transition_graph;
  38 crm_trigger_t *transition_trigger = NULL;
  39 
  40 static unsigned long int stonith_max_attempts = 10;
  41 
  42 /* #define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_CIB_TAG_STATE"[@uname='%s']"//"XML_LRM_TAG_RSC_OP"[@id='%s]" */
  43 #define rsc_op_template "//"XML_TAG_DIFF_ADDED"//"XML_TAG_CIB"//"XML_LRM_TAG_RSC_OP"[@id='%s']"
  44 
  45 static const char *
  46 get_node_id(xmlNode * rsc_op)
     /* [previous][next][first][last][top][bottom][index][help] */
  47 {
  48     xmlNode *node = rsc_op;
  49 
  50     while (node != NULL && safe_str_neq(XML_CIB_TAG_STATE, TYPE(node))) {
  51         node = node->parent;
  52     }
  53 
  54     CRM_CHECK(node != NULL, return NULL);
  55     return ID(node);
  56 }
  57 
  58 void
  59 update_stonith_max_attempts(const char* value)
     /* [previous][next][first][last][top][bottom][index][help] */
  60 {
  61     if (safe_str_eq(value, INFINITY_S)) {
  62        stonith_max_attempts = node_score_infinity;
  63     }
  64     else {
  65        stonith_max_attempts = crm_int_helper(value, NULL);
  66     }
  67 }
  68 static void
  69 te_legacy_update_diff(const char *event, xmlNode * diff)
     /* [previous][next][first][last][top][bottom][index][help] */
  70 {
  71     int lpc, max;
  72     xmlXPathObject *xpathObj = NULL;
  73 
  74     CRM_CHECK(diff != NULL, return);
  75 
  76     xml_log_patchset(LOG_TRACE, __FUNCTION__, diff);
  77     if (cib_config_changed(NULL, NULL, &diff)) {
  78         abort_transition(INFINITY, tg_restart, "Non-status change", diff);
  79         goto bail;              /* configuration changed */
  80     }
  81 
  82     /* Tickets Attributes - Added/Updated */
  83     xpathObj =
  84         xpath_search(diff,
  85                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_CIB_TAG_TICKETS);
  86     if (numXpathResults(xpathObj) > 0) {
  87         xmlNode *aborted = getXpathResult(xpathObj, 0);
  88 
  89         abort_transition(INFINITY, tg_restart, "Ticket attribute: update", aborted);
  90         goto bail;
  91 
  92     }
  93     freeXpathObject(xpathObj);
  94 
  95     /* Tickets Attributes - Removed */
  96     xpathObj =
  97         xpath_search(diff,
  98                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//" XML_CIB_TAG_TICKETS);
  99     if (numXpathResults(xpathObj) > 0) {
 100         xmlNode *aborted = getXpathResult(xpathObj, 0);
 101 
 102         abort_transition(INFINITY, tg_restart, "Ticket attribute: removal", aborted);
 103         goto bail;
 104     }
 105     freeXpathObject(xpathObj);
 106 
 107     /* Transient Attributes - Added/Updated */
 108     xpathObj =
 109         xpath_search(diff,
 110                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//"
 111                      XML_TAG_TRANSIENT_NODEATTRS "//" XML_CIB_TAG_NVPAIR);
 112     max = numXpathResults(xpathObj);
 113 
 114     for (lpc = 0; lpc < max; lpc++) {
 115         xmlNode *attr = getXpathResult(xpathObj, lpc);
 116         const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
 117         const char *value = NULL;
 118 
 119         if (safe_str_eq(CRM_OP_PROBED, name)) {
 120             value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
 121         }
 122 
 123         if (crm_is_true(value) == FALSE) {
 124             abort_transition(INFINITY, tg_restart, "Transient attribute: update", attr);
 125             crm_log_xml_trace(attr, "Abort");
 126             goto bail;
 127         }
 128     }
 129 
 130     freeXpathObject(xpathObj);
 131 
 132     /* Transient Attributes - Removed */
 133     xpathObj =
 134         xpath_search(diff,
 135                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_REMOVED "//"
 136                      XML_TAG_TRANSIENT_NODEATTRS);
 137     if (numXpathResults(xpathObj) > 0) {
 138         xmlNode *aborted = getXpathResult(xpathObj, 0);
 139 
 140         abort_transition(INFINITY, tg_restart, "Transient attribute: removal", aborted);
 141         goto bail;
 142 
 143     }
 144     freeXpathObject(xpathObj);
 145 
 146     /*
 147      * Check for and fast-track the processing of LRM refreshes
 148      * In large clusters this can result in _huge_ speedups
 149      *
 150      * Unfortunately we can only do so when there are no pending actions
 151      * Otherwise we could miss updates we're waiting for and stall
 152      *
 153      */
 154     xpathObj = NULL;
 155     if (transition_graph->pending == 0) {
 156         xpathObj =
 157             xpath_search(diff,
 158                          "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//"
 159                          XML_LRM_TAG_RESOURCE);
 160     }
 161 
 162     max = numXpathResults(xpathObj);
 163     if (max > 1) {
 164         /* Updates by, or in response to, TE actions will never contain updates
 165          * for more than one resource at a time
 166          */
 167         crm_debug("Detected LRM refresh - %d resources updated: Skipping all resource events", max);
 168         crm_log_xml_trace(diff, "lrm-refresh");
 169         abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL);
 170         goto bail;
 171     }
 172     freeXpathObject(xpathObj);
 173 
 174     /* Process operation updates */
 175     xpathObj =
 176         xpath_search(diff,
 177                      "//" F_CIB_UPDATE_RESULT "//" XML_TAG_DIFF_ADDED "//" XML_LRM_TAG_RSC_OP);
 178     if (numXpathResults(xpathObj)) {
 179 /*
 180     <status>
 181        <node_state id="node1" state=CRMD_JOINSTATE_MEMBER exp_state="active">
 182           <lrm>
 183              <lrm_resources>
 184                 <rsc_state id="" rsc_id="rsc4" node_id="node1" rsc_state="stopped"/>
 185 */
 186         int lpc = 0, max = numXpathResults(xpathObj);
 187 
 188         for (lpc = 0; lpc < max; lpc++) {
 189             xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
 190             const char *node = get_node_id(rsc_op);
 191 
 192             process_graph_event(rsc_op, node);
 193         }
 194     }
 195     freeXpathObject(xpathObj);
 196 
 197     /* Detect deleted (as opposed to replaced or added) actions - eg. crm_resource -C */
 198     xpathObj = xpath_search(diff, "//" XML_TAG_DIFF_REMOVED "//" XML_LRM_TAG_RSC_OP);
 199     max = numXpathResults(xpathObj);
 200     for (lpc = 0; lpc < max; lpc++) {
 201         int path_max = 0;
 202         const char *op_id = NULL;
 203         char *rsc_op_xpath = NULL;
 204         xmlXPathObject *op_match = NULL;
 205         xmlNode *match = getXpathResult(xpathObj, lpc);
 206 
 207         CRM_LOG_ASSERT(match != NULL);
 208         if(match == NULL) { continue; };
 209 
 210         op_id = ID(match);
 211 
 212         path_max = strlen(rsc_op_template) + strlen(op_id) + 1;
 213         rsc_op_xpath = calloc(1, path_max);
 214         snprintf(rsc_op_xpath, path_max, rsc_op_template, op_id);
 215 
 216         op_match = xpath_search(diff, rsc_op_xpath);
 217         if (numXpathResults(op_match) == 0) {
 218             /* Prevent false positives by matching cancelations too */
 219             const char *node = get_node_id(match);
 220             crm_action_t *cancelled = get_cancel_action(op_id, node);
 221 
 222             if (cancelled == NULL) {
 223                 crm_debug("No match for deleted action %s (%s on %s)", rsc_op_xpath, op_id,
 224                           node);
 225                 abort_transition(INFINITY, tg_restart, "Resource op removal", match);
 226                 freeXpathObject(op_match);
 227                 free(rsc_op_xpath);
 228                 goto bail;
 229 
 230             } else {
 231                 crm_debug("Deleted lrm_rsc_op %s on %s was for graph event %d",
 232                           op_id, node, cancelled->id);
 233             }
 234         }
 235 
 236         freeXpathObject(op_match);
 237         free(rsc_op_xpath);
 238     }
 239 
 240   bail:
 241     freeXpathObject(xpathObj);
 242 }
 243 
 244 static void process_resource_updates(
     /* [previous][next][first][last][top][bottom][index][help] */
 245     const char *node, xmlNode *xml, xmlNode *change, const char *op, const char *xpath) 
 246 {
 247     xmlNode *cIter = NULL;
 248     xmlNode *rsc = NULL;
 249     xmlNode *rsc_op = NULL;
 250     int num_resources = 0;
 251 
 252     if(xml == NULL) {
 253         return;
 254 
 255     } else if(strcmp((const char*)xml->name, XML_CIB_TAG_LRM) == 0) {
 256         xml = first_named_child(xml, XML_LRM_TAG_RESOURCES);
 257         crm_trace("Got %p in %s", xml, XML_CIB_TAG_LRM);
 258     }
 259 
 260     CRM_ASSERT(strcmp((const char*)xml->name, XML_LRM_TAG_RESOURCES) == 0);
 261 
 262     for(cIter = xml->children; cIter; cIter = cIter->next) {
 263         num_resources++;
 264     }
 265 
 266     if(num_resources > 1) {
 267         /*
 268          * Check for and fast-track the processing of LRM refreshes
 269          * In large clusters this can result in _huge_ speedups
 270          *
 271          * Unfortunately we can only do so when there are no pending actions
 272          * Otherwise we could miss updates we're waiting for and stall
 273          *
 274          */
 275 
 276         crm_debug("Detected LRM refresh - %d resources updated", num_resources);
 277         crm_log_xml_trace(change, "lrm-refresh");
 278         abort_transition(INFINITY, tg_restart, "LRM Refresh", NULL);
 279         return;
 280     }
 281 
 282     for (rsc = __xml_first_child(xml); rsc != NULL; rsc = __xml_next(rsc)) {
 283         crm_trace("Processing %s", ID(rsc));
 284         for (rsc_op = __xml_first_child(rsc); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
 285             crm_trace("Processing %s", ID(rsc_op));
 286             process_graph_event(rsc_op, node);
 287         }
 288     }
 289 }
 290 
 291 #define NODE_PATT "/lrm[@id="
 292 static char *get_node_from_xpath(const char *xpath) 
     /* [previous][next][first][last][top][bottom][index][help] */
 293 {
 294     char *nodeid = NULL;
 295     char *tmp = strstr(xpath, NODE_PATT);
 296 
 297     if(tmp) {
 298         tmp += strlen(NODE_PATT);
 299         tmp += 1;
 300 
 301         nodeid = strdup(tmp);
 302         tmp = strstr(nodeid, "\'");
 303         CRM_ASSERT(tmp);
 304         tmp[0] = 0;
 305     }
 306     return nodeid;
 307 }
 308 
 309 static char *extract_node_uuid(const char *xpath) 
     /* [previous][next][first][last][top][bottom][index][help] */
 310 {
 311     char *mutable_path = strdup(xpath);
 312     char *node_uuid = NULL;
 313     char *search = NULL;
 314     char *match = NULL;
 315 
 316     match = strstr(mutable_path, "node_state[@id=\'");
 317     if (match == NULL) {
 318         free(mutable_path);
 319         return NULL;
 320     }
 321     match += strlen("node_state[@id=\'");
 322 
 323     search = strchr(match, '\'');
 324     if (search == NULL) {
 325         free(mutable_path);
 326         return NULL;
 327     }
 328     search[0] = 0;
 329 
 330     node_uuid = strdup(match);
 331     free(mutable_path);
 332     return node_uuid;
 333 }
 334 
 335 static void
 336 abort_unless_down(const char *xpath, const char *op, xmlNode *change,
     /* [previous][next][first][last][top][bottom][index][help] */
 337                   const char *reason)
 338 {
 339     char *node_uuid = NULL;
 340     crm_action_t *down = NULL;
 341 
 342     if(safe_str_neq(op, "delete")) {
 343         abort_transition(INFINITY, tg_restart, reason, change);
 344         return;
 345     }
 346 
 347     node_uuid = extract_node_uuid(xpath);
 348     if(node_uuid == NULL) {
 349         crm_err("Could not extract node ID from %s", xpath);
 350         abort_transition(INFINITY, tg_restart, reason, change);
 351         return;
 352     }
 353 
 354     down = match_down_event(node_uuid, TRUE);
 355     if(down == NULL || down->executed == false) {
 356         crm_trace("Not expecting %s to be down (%s)", node_uuid, xpath);
 357         abort_transition(INFINITY, tg_restart, reason, change);
 358     } else {
 359         crm_trace("Expecting changes to %s (%s)", node_uuid, xpath);
 360     }
 361     free(node_uuid);
 362 }
 363 
 364 void
 365 te_update_diff(const char *event, xmlNode * msg)
     /* [previous][next][first][last][top][bottom][index][help] */
 366 {
 367     int rc = -EINVAL;
 368     int format = 1;
 369     xmlNode *change = NULL;
 370     const char *op = NULL;
 371 
 372     xmlNode *diff = NULL;
 373 
 374     int p_add[] = { 0, 0, 0 };
 375     int p_del[] = { 0, 0, 0 };
 376 
 377     CRM_CHECK(msg != NULL, return);
 378     crm_element_value_int(msg, F_CIB_RC, &rc);
 379 
 380     if (transition_graph == NULL) {
 381         crm_trace("No graph");
 382         return;
 383 
 384     } else if (rc < pcmk_ok) {
 385         crm_trace("Filter rc=%d (%s)", rc, pcmk_strerror(rc));
 386         return;
 387 
 388     } else if (transition_graph->complete == TRUE
 389                && fsa_state != S_IDLE
 390                && fsa_state != S_TRANSITION_ENGINE && fsa_state != S_POLICY_ENGINE) {
 391         crm_trace("Filter state=%s, complete=%d", fsa_state2string(fsa_state),
 392                   transition_graph->complete);
 393         return;
 394     }
 395 
 396     op = crm_element_value(msg, F_CIB_OPERATION);
 397     diff = get_message_xml(msg, F_CIB_UPDATE_RESULT);
 398 
 399     xml_patch_versions(diff, p_add, p_del);
 400     crm_debug("Processing (%s) diff: %d.%d.%d -> %d.%d.%d (%s)", op,
 401               p_del[0], p_del[1], p_del[2], p_add[0], p_add[1], p_add[2],
 402               fsa_state2string(fsa_state));
 403 
 404     crm_element_value_int(diff, "format", &format);
 405     switch(format) {
 406         case 1:
 407             te_legacy_update_diff(event, diff);
 408             return;
 409         case 2:
 410             /* Cool, we know what to do here */
 411             crm_log_xml_trace(diff, "Patch:Raw");
 412             break;
 413         default:
 414             crm_warn("Unknown patch format: %d", format);
 415             return;
 416     }
 417 
 418     for (change = __xml_first_child(diff); change != NULL; change = __xml_next(change)) {
 419         const char *name = NULL;
 420         const char *op = crm_element_value(change, XML_DIFF_OP);
 421         const char *xpath = crm_element_value(change, XML_DIFF_PATH);
 422         xmlNode *match = NULL;
 423         const char *node = NULL;
 424 
 425         if(op == NULL) {
 426             continue;
 427 
 428         } else if(strcmp(op, "create") == 0) {
 429             match = change->children;
 430 
 431         } else if(strcmp(op, "move") == 0) {
 432             continue;
 433 
 434         } else if(strcmp(op, "modify") == 0) {
 435             match = first_named_child(change, XML_DIFF_RESULT);
 436             if(match) {
 437                 match = match->children;
 438             }
 439         }
 440 
 441         if(match) {
 442             if (match->type == XML_COMMENT_NODE) {
 443                 crm_trace("Ignoring %s operation for comment at %s", op, xpath);
 444                 continue;
 445             }
 446             name = (const char *)match->name;
 447         }
 448 
 449         crm_trace("Handling %s operation for %s%s%s",
 450                   op, (xpath? xpath : "CIB"),
 451                   (name? " matched by " : ""), (name? name : ""));
 452         if(xpath == NULL) {
 453             /* Version field, ignore */
 454 
 455         } else if(strstr(xpath, "/cib/configuration")) {
 456             abort_transition(INFINITY, tg_restart, "Configuration change", change);
 457             break; /* Won't be packaged with any resource operations we may be waiting for */
 458 
 459         } else if(strstr(xpath, "/"XML_CIB_TAG_TICKETS) || safe_str_eq(name, XML_CIB_TAG_TICKETS)) {
 460             abort_transition(INFINITY, tg_restart, "Ticket attribute change", change);
 461             break; /* Won't be packaged with any resource operations we may be waiting for */
 462 
 463         } else if(strstr(xpath, "/"XML_TAG_TRANSIENT_NODEATTRS"[") || safe_str_eq(name, XML_TAG_TRANSIENT_NODEATTRS)) {
 464             abort_unless_down(xpath, op, change, "Transient attribute change");
 465             break; /* Won't be packaged with any resource operations we may be waiting for */
 466 
 467         } else if(strstr(xpath, "/"XML_LRM_TAG_RSC_OP"[") && safe_str_eq(op, "delete")) {
 468             crm_action_t *cancel = NULL;
 469             char *mutable_key = strdup(xpath);
 470             char *key, *node_uuid;
 471 
 472             /* Extract the part of xpath between last pair of single quotes */
 473             key = strrchr(mutable_key, '\'');
 474             if (key != NULL) {
 475                 *key = '\0';
 476                 key = strrchr(mutable_key, '\'');
 477             }
 478             if (key == NULL) {
 479                 crm_warn("Ignoring malformed CIB update (resource deletion)");
 480                 free(mutable_key);
 481                 continue;
 482             }
 483             ++key;
 484 
 485             node_uuid = extract_node_uuid(xpath);
 486             cancel = get_cancel_action(key, node_uuid);
 487             if (cancel == NULL) {
 488                 abort_transition(INFINITY, tg_restart, "Resource operation removal", change);
 489 
 490             } else {
 491                 crm_info("Cancellation of %s on %s confirmed (%d)", key, node_uuid, cancel->id);
 492                 stop_te_timer(cancel->timer);
 493                 te_action_confirmed(cancel);
 494 
 495                 update_graph(transition_graph, cancel);
 496                 trigger_graph();
 497 
 498             }
 499             free(mutable_key);
 500             free(node_uuid);
 501 
 502         } else if(strstr(xpath, "/"XML_CIB_TAG_LRM"[") && safe_str_eq(op, "delete")) {
 503             abort_unless_down(xpath, op, change, "Resource state removal");
 504 
 505         } else if(strstr(xpath, "/"XML_CIB_TAG_STATE"[") && safe_str_eq(op, "delete")) {
 506             abort_unless_down(xpath, op, change, "Node state removal");
 507 
 508         } else if(name == NULL) {
 509             crm_debug("No result for %s operation to %s", op, xpath);
 510             CRM_ASSERT(strcmp(op, "delete") == 0 || strcmp(op, "move") == 0);
 511 
 512         } else if(strcmp(name, XML_TAG_CIB) == 0) {
 513             xmlNode *state = NULL;
 514             xmlNode *status = first_named_child(match, XML_CIB_TAG_STATUS);
 515             xmlNode *config = first_named_child(match, XML_CIB_TAG_CONFIGURATION);
 516 
 517             for (state = __xml_first_child(status); state != NULL; state = __xml_next(state)) {
 518                 xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
 519 
 520                 node = ID(state);
 521                 process_resource_updates(node, lrm, change, op, xpath);
 522             }
 523 
 524             if(config) {
 525                 abort_transition(INFINITY, tg_restart, "Non-status-only change", change);
 526             }
 527 
 528         } else if(strcmp(name, XML_CIB_TAG_STATUS) == 0) {
 529             xmlNode *state = NULL;
 530 
 531             for (state = __xml_first_child(match); state != NULL; state = __xml_next(state)) {
 532                 xmlNode *lrm = first_named_child(state, XML_CIB_TAG_LRM);
 533 
 534                 node = ID(state);
 535                 process_resource_updates(node, lrm, change, op, xpath);
 536             }
 537 
 538         } else if(strcmp(name, XML_CIB_TAG_STATE) == 0) {
 539             xmlNode *lrm = first_named_child(match, XML_CIB_TAG_LRM);
 540 
 541             node = ID(match);
 542             process_resource_updates(node, lrm, change, op, xpath);
 543 
 544         } else if(strcmp(name, XML_CIB_TAG_LRM) == 0) {
 545             node = ID(match);
 546             process_resource_updates(node, match, change, op, xpath);
 547 
 548         } else if(strcmp(name, XML_LRM_TAG_RESOURCES) == 0) {
 549             char *local_node = get_node_from_xpath(xpath);
 550 
 551             process_resource_updates(local_node, match, change, op, xpath);
 552             free(local_node);
 553 
 554         } else if(strcmp(name, XML_LRM_TAG_RESOURCE) == 0) {
 555 
 556             xmlNode *rsc_op;
 557             char *local_node = get_node_from_xpath(xpath);
 558 
 559             for (rsc_op = __xml_first_child(match); rsc_op != NULL; rsc_op = __xml_next(rsc_op)) {
 560                 process_graph_event(rsc_op, local_node);
 561             }
 562             free(local_node);
 563 
 564         } else if(strcmp(name, XML_LRM_TAG_RSC_OP) == 0) {
 565             char *local_node = get_node_from_xpath(xpath);
 566 
 567             process_graph_event(match, local_node);
 568             free(local_node);
 569 
 570         } else {
 571             crm_err("Ignoring %s operation for %s %p, %s", op, xpath, match, name);
 572         }
 573     }
 574 }
 575 
 576 
 577 gboolean
 578 process_te_message(xmlNode * msg, xmlNode * xml_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 579 {
 580     const char *from = crm_element_value(msg, F_ORIG);
 581     const char *sys_to = crm_element_value(msg, F_CRM_SYS_TO);
 582     const char *sys_from = crm_element_value(msg, F_CRM_SYS_FROM);
 583     const char *ref = crm_element_value(msg, F_CRM_REFERENCE);
 584     const char *op = crm_element_value(msg, F_CRM_TASK);
 585     const char *type = crm_element_value(msg, F_CRM_MSG_TYPE);
 586 
 587     crm_trace("Processing %s (%s) message", op, ref);
 588     crm_log_xml_trace(msg, "ipc");
 589 
 590     if (op == NULL) {
 591         /* error */
 592 
 593     } else if (sys_to == NULL || strcasecmp(sys_to, CRM_SYSTEM_TENGINE) != 0) {
 594         crm_trace("Bad sys-to %s", crm_str(sys_to));
 595         return FALSE;
 596 
 597     } else if (safe_str_eq(op, CRM_OP_INVOKE_LRM)
 598                && safe_str_eq(sys_from, CRM_SYSTEM_LRMD)
 599 /*                && safe_str_eq(type, XML_ATTR_RESPONSE) */
 600         ) {
 601         xmlXPathObject *xpathObj = NULL;
 602 
 603         crm_log_xml_trace(msg, "Processing (N)ACK");
 604         crm_debug("Processing (N)ACK %s from %s", crm_element_value(msg, F_CRM_REFERENCE), from);
 605 
 606         xpathObj = xpath_search(xml_data, "//" XML_LRM_TAG_RSC_OP);
 607         if (numXpathResults(xpathObj)) {
 608             int lpc = 0, max = numXpathResults(xpathObj);
 609 
 610             for (lpc = 0; lpc < max; lpc++) {
 611                 xmlNode *rsc_op = getXpathResult(xpathObj, lpc);
 612                 const char *node = get_node_id(rsc_op);
 613 
 614                 process_graph_event(rsc_op, node);
 615             }
 616             freeXpathObject(xpathObj);
 617 
 618         } else {
 619             crm_log_xml_err(msg, "Invalid (N)ACK");
 620             freeXpathObject(xpathObj);
 621             return FALSE;
 622         }
 623 
 624     } else {
 625         crm_err("Unknown command: %s::%s from %s", type, op, sys_from);
 626     }
 627 
 628     crm_trace("finished processing message");
 629 
 630     return TRUE;
 631 }
 632 
 633 GHashTable *stonith_failures = NULL;
 634 struct st_fail_rec {
 635     int count;
 636 };
 637 
 638 static gboolean
 639 too_many_st_failures(const char *target)
     /* [previous][next][first][last][top][bottom][index][help] */
 640 {
 641     GHashTableIter iter;
 642     const char *key = NULL;
 643     struct st_fail_rec *value = NULL;
 644 
 645     if (stonith_failures == NULL) {
 646         return FALSE;
 647     }
 648 
 649     if (target == NULL) {
 650         g_hash_table_iter_init(&iter, stonith_failures);
 651         while (g_hash_table_iter_next(&iter, (gpointer *) & key, (gpointer *) & value)) {
 652             if (value->count >= stonith_max_attempts) {
 653                 target = (const char*)key;
 654                 goto too_many;
 655             }
 656         }
 657     } else {
 658         value = g_hash_table_lookup(stonith_failures, target);
 659         if ((value != NULL) && (value->count >= stonith_max_attempts)) {
 660             goto too_many;
 661         }
 662     }
 663     return FALSE;
 664 
 665 too_many:
 666     crm_warn("Too many failures (%d) to fence %s, giving up",
 667              value->count, target);
 668     return TRUE;
 669 }
 670 
 671 /*!
 672  * \internal
 673  * \brief Reset a stonith fail count
 674  *
 675  * \param[in] target  Name of node to reset, or NULL for all
 676  */
 677 void
 678 st_fail_count_reset(const char *target)
     /* [previous][next][first][last][top][bottom][index][help] */
 679 {
 680     if (stonith_failures == NULL) {
 681         return;
 682     }
 683 
 684     if (target) {
 685         struct st_fail_rec *rec = NULL;
 686 
 687         rec = g_hash_table_lookup(stonith_failures, target);
 688         if (rec) {
 689             rec->count = 0;
 690         }
 691     } else {
 692         GHashTableIter iter;
 693         const char *key = NULL;
 694         struct st_fail_rec *rec = NULL;
 695 
 696         g_hash_table_iter_init(&iter, stonith_failures);
 697         while (g_hash_table_iter_next(&iter, (gpointer *) &key,
 698                                       (gpointer *) &rec)) {
 699             rec->count = 0;
 700         }
 701     }
 702 }
 703 
 704 void
 705 st_fail_count_increment(const char *target)
     /* [previous][next][first][last][top][bottom][index][help] */
 706 {
 707     struct st_fail_rec *rec = NULL;
 708 
 709     if (stonith_failures == NULL) {
 710         stonith_failures = crm_str_table_new();
 711     }
 712 
 713     rec = g_hash_table_lookup(stonith_failures, target);
 714     if (rec) {
 715         rec->count++;
 716     } else {
 717         rec = malloc(sizeof(struct st_fail_rec));
 718         if(rec == NULL) {
 719             return;
 720         }
 721 
 722         rec->count = 1;
 723         g_hash_table_insert(stonith_failures, strdup(target), rec);
 724     }
 725 }
 726 
 727 /*!
 728  * \internal
 729  * \brief Abort transition due to stonith failure
 730  *
 731  * \param[in] abort_action  Whether to restart or stop transition
 732  * \param[in] target  Don't restart if this (NULL for any) has too many failures
 733  * \param[in] reason  Log this stonith action XML as abort reason (or NULL)
 734  */
 735 void
 736 abort_for_stonith_failure(enum transition_action abort_action,
     /* [previous][next][first][last][top][bottom][index][help] */
 737                           const char *target, xmlNode *reason)
 738 {
 739     /* If stonith repeatedly fails, we eventually give up on starting a new
 740      * transition for that reason.
 741      */
 742     if ((abort_action != tg_stop) && too_many_st_failures(target)) {
 743         abort_action = tg_stop;
 744     }
 745     abort_transition(INFINITY, abort_action, "Stonith failed", reason);
 746 }
 747 
 748 void
 749 tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
     /* [previous][next][first][last][top][bottom][index][help] */
 750 {
 751     char *uuid = NULL;
 752     int target_rc = -1;
 753     int stonith_id = -1;
 754     int transition_id = -1;
 755     crm_action_t *action = NULL;
 756     int call_id = data->call_id;
 757     int rc = data->rc;
 758     char *userdata = data->userdata;
 759 
 760     CRM_CHECK(userdata != NULL, return);
 761     crm_notice("Stonith operation %d/%s: %s (%d)", call_id, (char *)userdata,
 762                pcmk_strerror(rc), rc);
 763 
 764     if (AM_I_DC == FALSE) {
 765         return;
 766     }
 767 
 768     /* crm_info("call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", */
 769     /*       op->call_id, op->optype, op->node_name, op->op_result, */
 770     /*       (char *)op->node_list, op->private_data); */
 771 
 772     /* filter out old STONITH actions */
 773     CRM_CHECK(decode_transition_key(userdata, &uuid, &transition_id, &stonith_id, &target_rc),
 774               crm_err("Invalid event detected");
 775               goto bail;
 776         );
 777 
 778     if (transition_graph->complete || stonith_id < 0 || safe_str_neq(uuid, te_uuid)
 779         || transition_graph->id != transition_id) {
 780         crm_info("Ignoring STONITH action initiated outside of the current transition");
 781         goto bail;
 782     }
 783 
 784     action = get_action(stonith_id, FALSE);
 785     if (action == NULL) {
 786         crm_err("Stonith action not matched");
 787         goto bail;
 788     }
 789 
 790     stop_te_timer(action->timer);
 791     if (rc == pcmk_ok) {
 792         const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
 793         const char *uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID);
 794         const char *op = crm_meta_value(action->params, "stonith_action"); 
 795 
 796         crm_info("Stonith operation %d for %s passed", call_id, target);
 797         if (action->confirmed == FALSE) {
 798             te_action_confirmed(action);
 799             if (safe_str_eq("on", op)) {
 800                 const char *value = NULL;
 801                 char *now = crm_itoa(time(NULL));
 802 
 803                 update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
 804                 free(now);
 805 
 806                 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
 807                 update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
 808 
 809                 value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
 810                 update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
 811 
 812             } else if (action->sent_update == FALSE) {
 813                 send_stonith_update(action, target, uuid);
 814                 action->sent_update = TRUE;
 815             }
 816         }
 817         st_fail_count_reset(target);
 818 
 819     } else {
 820         const char *target = crm_element_value_const(action->xml, XML_LRM_ATTR_TARGET);
 821         enum transition_action abort_action = tg_restart;
 822 
 823         action->failed = TRUE;
 824         crm_notice("Stonith operation %d for %s failed (%s): aborting transition.",
 825                    call_id, target, pcmk_strerror(rc));
 826 
 827         /* If no fence devices were available, there's no use in immediately
 828          * checking again, so don't start a new transition in that case.
 829          */
 830         if (rc == -ENODEV) {
 831             crm_warn("No devices found in cluster to fence %s, giving up",
 832                      target);
 833             abort_action = tg_stop;
 834         }
 835 
 836         /* Increment the fail count now, so abort_for_stonith_failure() can
 837          * check it. Non-DC nodes will increment it in tengine_stonith_notify().
 838          */
 839         st_fail_count_increment(target);
 840         abort_for_stonith_failure(abort_action, target, NULL);
 841     }
 842 
 843     update_graph(transition_graph, action);
 844     trigger_graph();
 845 
 846   bail:
 847     free(userdata);
 848     free(uuid);
 849     return;
 850 }
 851 
 852 void
 853 cib_fencing_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 854 {
 855     if (rc < pcmk_ok) {
 856         crm_err("Fencing update %d for %s: failed - %s (%d)",
 857                 call_id, (char *)user_data, pcmk_strerror(rc), rc);
 858         crm_log_xml_warn(msg, "Failed update");
 859         abort_transition(INFINITY, tg_shutdown, "CIB update failed", NULL);
 860 
 861     } else {
 862         crm_info("Fencing update %d for %s: complete", call_id, (char *)user_data);
 863     }
 864 }
 865 
 866 void
 867 cib_action_updated(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 868 {
 869     if (rc < pcmk_ok) {
 870         crm_err("Update %d FAILED: %s", call_id, pcmk_strerror(rc));
 871     }
 872 }
 873 
 874 gboolean
 875 action_timer_callback(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 876 {
 877     crm_action_timer_t *timer = NULL;
 878 
 879     CRM_CHECK(data != NULL, return FALSE);
 880 
 881     timer = (crm_action_timer_t *) data;
 882     stop_te_timer(timer);
 883 
 884     crm_warn("Timer popped (timeout=%d, abort_level=%d, complete=%s)",
 885              timer->timeout,
 886              transition_graph->abort_priority, transition_graph->complete ? "true" : "false");
 887 
 888     CRM_CHECK(timer->action != NULL, return FALSE);
 889 
 890     if (transition_graph->complete) {
 891         crm_warn("Ignoring timeout while not in transition");
 892 
 893     } else if (timer->reason == timeout_action_warn) {
 894         print_action(LOG_WARNING, "Action missed its timeout: ", timer->action);
 895 
 896         /* Don't check the FSA state
 897          *
 898          * We might also be in S_INTEGRATION or some other state waiting for this
 899          * action so we can close the transition and continue
 900          */
 901 
 902     } else {
 903         /* fail the action */
 904         gboolean send_update = TRUE;
 905         const char *task = crm_element_value(timer->action->xml, XML_LRM_ATTR_TASK);
 906 
 907         print_action(LOG_ERR, "Aborting transition, action lost: ", timer->action);
 908 
 909         timer->action->failed = TRUE;
 910         te_action_confirmed(timer->action);
 911         abort_transition(INFINITY, tg_restart, "Action lost", NULL);
 912 
 913         update_graph(transition_graph, timer->action);
 914         trigger_graph();
 915 
 916         if (timer->action->type != action_type_rsc) {
 917             send_update = FALSE;
 918         } else if (safe_str_eq(task, RSC_CANCEL)) {
 919             /* we don't need to update the CIB with these */
 920             send_update = FALSE;
 921         }
 922 
 923         if (send_update) {
 924             cib_action_update(timer->action, PCMK_LRM_OP_TIMEOUT, PCMK_OCF_UNKNOWN_ERROR);
 925         }
 926     }
 927 
 928     return FALSE;
 929 }

/* [previous][next][first][last][top][bottom][index][help] */