root/daemons/controld/controld_execd.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. lrm_connection_destroy
  2. make_stop_id
  3. copy_instance_keys
  4. copy_meta_keys
  5. history_remove_recurring_op
  6. history_free_recurring_ops
  7. history_free
  8. update_history_cache
  9. send_task_ok_ack
  10. op_node_name
  11. lrm_op_callback
  12. try_local_executor_connect
  13. do_lrm_control
  14. lrm_state_verify_stopped
  15. is_rsc_active
  16. build_active_RAs
  17. controld_query_executor_state
  18. controld_rc2event
  19. controld_trigger_delete_refresh
  20. notify_deleted
  21. lrm_remove_deleted_rsc
  22. lrm_remove_deleted_op
  23. delete_rsc_entry
  24. last_failed_matches_op
  25. lrm_clear_last_failure
  26. cancel_op
  27. cancel_action_by_key
  28. cancel_op_key
  29. get_lrm_resource
  30. delete_resource
  31. get_fake_call_id
  32. fake_op_status
  33. force_reprobe
  34. synthesize_lrmd_failure
  35. lrm_op_target
  36. fail_lrm_resource
  37. handle_reprobe_op
  38. do_lrm_cancel
  39. do_lrm_delete
  40. new_metadata_cb_data
  41. free_metadata_cb_data
  42. metadata_complete
  43. do_lrm_invoke
  44. construct_op
  45. controld_ack_event_directly
  46. verify_stopped
  47. stop_recurring_action_by_rsc
  48. stop_recurring_actions
  49. should_cancel_recurring
  50. should_nack_action
  51. do_lrm_rsc_op
  52. do_lrm_event
  53. unescape_newlines
  54. did_lrm_rsc_op_fail
  55. log_executor_event
  56. process_lrm_event

   1 /*
   2  * Copyright 2004-2023 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <regex.h>
  13 #include <sys/param.h>
  14 #include <sys/types.h>
  15 #include <sys/wait.h>
  16 
  17 #include <crm/crm.h>
  18 #include <crm/lrmd.h>           // lrmd_event_data_t, lrmd_rsc_info_t, etc.
  19 #include <crm/services.h>
  20 #include <crm/msg_xml.h>
  21 #include <crm/common/xml.h>
  22 #include <crm/pengine/rules.h>
  23 #include <crm/lrmd_internal.h>
  24 
  25 #include <pacemaker-internal.h>
  26 #include <pacemaker-controld.h>
  27 
  28 #define START_DELAY_THRESHOLD 5 * 60 * 1000
  29 #define MAX_LRM_REG_FAILS 30
  30 
  31 struct delete_event_s {
  32     int rc;
  33     const char *rsc;
  34     lrm_state_t *lrm_state;
  35 };
  36 
  37 static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
  38 static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
  39 static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
  40 
  41 static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state,
  42                                        const xmlNode *rsc_op,
  43                                        const char *rsc_id,
  44                                        const char *operation);
  45 static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
  46                           xmlNode *msg, struct ra_metadata_s *md);
  47 
  48 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
  49                                          int log_level);
  50 
  51 static void
  52 lrm_connection_destroy(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  53 {
  54     if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) {
  55         crm_crit("Connection to executor failed");
  56         register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
  57         controld_clear_fsa_input_flags(R_LRM_CONNECTED);
  58 
  59     } else {
  60         crm_info("Disconnected from executor");
  61     }
  62 
  63 }
  64 
  65 static char *
  66 make_stop_id(const char *rsc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
  67 {
  68     return crm_strdup_printf("%s:%d", rsc, call_id);
  69 }
  70 
  71 static void
  72 copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  73 {
  74     if (strstr(key, CRM_META "_") == NULL) {
  75         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
  76     }
  77 }
  78 
  79 static void
  80 copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  81 {
  82     if (strstr(key, CRM_META "_") != NULL) {
  83         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
  84     }
  85 }
  86 
  87 /*!
  88  * \internal
  89  * \brief Remove a recurring operation from a resource's history
  90  *
  91  * \param[in,out] history  Resource history to modify
  92  * \param[in]     op       Operation to remove
  93  *
  94  * \return TRUE if the operation was found and removed, FALSE otherwise
  95  */
  96 static gboolean
  97 history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
  98 {
  99     GList *iter;
 100 
 101     for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
 102         lrmd_event_data_t *existing = iter->data;
 103 
 104         if ((op->interval_ms == existing->interval_ms)
 105             && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none)
 106             && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) {
 107 
 108             history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
 109             lrmd_free_event(existing);
 110             return TRUE;
 111         }
 112     }
 113     return FALSE;
 114 }
 115 
 116 /*!
 117  * \internal
 118  * \brief Free all recurring operations in resource history
 119  *
 120  * \param[in,out] history  Resource history to modify
 121  */
 122 static void
 123 history_free_recurring_ops(rsc_history_t *history)
     /* [previous][next][first][last][top][bottom][index][help] */
 124 {
 125     GList *iter;
 126 
 127     for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
 128         lrmd_free_event(iter->data);
 129     }
 130     g_list_free(history->recurring_op_list);
 131     history->recurring_op_list = NULL;
 132 }
 133 
 134 /*!
 135  * \internal
 136  * \brief Free resource history
 137  *
 138  * \param[in,out] history  Resource history to free
 139  */
 140 void
 141 history_free(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 142 {
 143     rsc_history_t *history = (rsc_history_t*)data;
 144 
 145     if (history->stop_params) {
 146         g_hash_table_destroy(history->stop_params);
 147     }
 148 
 149     /* Don't need to free history->rsc.id because it's set to history->id */
 150     free(history->rsc.type);
 151     free(history->rsc.standard);
 152     free(history->rsc.provider);
 153 
 154     lrmd_free_event(history->failed);
 155     lrmd_free_event(history->last);
 156     free(history->id);
 157     history_free_recurring_ops(history);
 158     free(history);
 159 }
 160 
 161 static void
 162 update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 163 {
 164     int target_rc = 0;
 165     rsc_history_t *entry = NULL;
 166 
 167     if (op->rsc_deleted) {
 168         crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
 169         controld_delete_resource_history(op->rsc_id, lrm_state->node_name,
 170                                          NULL, crmd_cib_smart_opt());
 171         return;
 172     }
 173 
 174     if (pcmk__str_eq(op->op_type, RSC_NOTIFY, pcmk__str_casei)) {
 175         return;
 176     }
 177 
 178     crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
 179 
 180     entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
 181     if (entry == NULL && rsc) {
 182         entry = calloc(1, sizeof(rsc_history_t));
 183         entry->id = strdup(op->rsc_id);
 184         g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
 185 
 186         entry->rsc.id = entry->id;
 187         entry->rsc.type = strdup(rsc->type);
 188         entry->rsc.standard = strdup(rsc->standard);
 189         pcmk__str_update(&entry->rsc.provider, rsc->provider);
 190 
 191     } else if (entry == NULL) {
 192         crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
 193         return;
 194     }
 195 
 196     entry->last_callid = op->call_id;
 197     target_rc = rsc_op_expected_rc(op);
 198     if (op->op_status == PCMK_EXEC_CANCELLED) {
 199         if (op->interval_ms > 0) {
 200             crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT,
 201                       op->rsc_id, op->op_type, op->interval_ms);
 202             history_remove_recurring_op(entry, op);
 203             return;
 204         } else {
 205             crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d",
 206                       op->rsc_id, op->op_type, op->interval_ms, op->rc,
 207                       op->op_status);
 208         }
 209 
 210     } else if (did_rsc_op_fail(op, target_rc)) {
 211         /* Store failed monitors here, otherwise the block below will cause them
 212          * to be forgotten when a stop happens.
 213          */
 214         if (entry->failed) {
 215             lrmd_free_event(entry->failed);
 216         }
 217         entry->failed = lrmd_copy_event(op);
 218 
 219     } else if (op->interval_ms == 0) {
 220         if (entry->last) {
 221             lrmd_free_event(entry->last);
 222         }
 223         entry->last = lrmd_copy_event(op);
 224 
 225         if (op->params && pcmk__strcase_any_of(op->op_type, CRMD_ACTION_START,
 226                                                CRMD_ACTION_RELOAD,
 227                                                CRMD_ACTION_RELOAD_AGENT,
 228                                                CRMD_ACTION_STATUS, NULL)) {
 229             if (entry->stop_params) {
 230                 g_hash_table_destroy(entry->stop_params);
 231             }
 232             entry->stop_params = pcmk__strkey_table(free, free);
 233 
 234             g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
 235         }
 236     }
 237 
 238     if (op->interval_ms > 0) {
 239         /* Ensure there are no duplicates */
 240         history_remove_recurring_op(entry, op);
 241 
 242         crm_trace("Adding recurring op: " PCMK__OP_FMT,
 243                   op->rsc_id, op->op_type, op->interval_ms);
 244         entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
 245 
 246     } else if (entry->recurring_op_list && !pcmk__str_eq(op->op_type, RSC_STATUS, pcmk__str_casei)) {
 247         crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT,
 248                   g_list_length(entry->recurring_op_list), op->rsc_id,
 249                   op->op_type, op->interval_ms);
 250         history_free_recurring_ops(entry);
 251     }
 252 }
 253 
 254 /*!
 255  * \internal
 256  * \brief Send a direct OK ack for a resource task
 257  *
 258  * \param[in] lrm_state  LRM connection
 259  * \param[in] input      Input message being ack'ed
 260  * \param[in] rsc_id     ID of affected resource
 261  * \param[in] rsc        Affected resource (if available)
 262  * \param[in] task       Operation task being ack'ed
 263  * \param[in] ack_host   Name of host to send ack to
 264  * \param[in] ack_sys    IPC system name to ack
 265  */
 266 static void
 267 send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input,
     /* [previous][next][first][last][top][bottom][index][help] */
 268                  const char *rsc_id, const lrmd_rsc_info_t *rsc,
 269                  const char *task, const char *ack_host, const char *ack_sys)
 270 {
 271     lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
 272 
 273     lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 274     controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id);
 275     lrmd_free_event(op);
 276 }
 277 
 278 static inline const char *
 279 op_node_name(lrmd_event_data_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 280 {
 281     return pcmk__s(op->remote_nodename, controld_globals.our_nodename);
 282 }
 283 
 284 void
 285 lrm_op_callback(lrmd_event_data_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 286 {
 287     CRM_CHECK(op != NULL, return);
 288     switch (op->type) {
 289         case lrmd_event_disconnect:
 290             if (op->remote_nodename == NULL) {
 291                 /* If this is the local executor IPC connection, set the right
 292                  * bits in the controller when the connection goes down.
 293                  */
 294                 lrm_connection_destroy();
 295             }
 296             break;
 297 
 298         case lrmd_event_exec_complete:
 299             {
 300                 lrm_state_t *lrm_state = lrm_state_find(op_node_name(op));
 301 
 302                 CRM_ASSERT(lrm_state != NULL);
 303                 process_lrm_event(lrm_state, op, NULL, NULL);
 304             }
 305             break;
 306 
 307         default:
 308             break;
 309     }
 310 }
 311 
 312 static void
 313 try_local_executor_connect(long long action, fsa_data_t *msg_data,
     /* [previous][next][first][last][top][bottom][index][help] */
 314                            lrm_state_t *lrm_state)
 315 {
 316     int rc = pcmk_rc_ok;
 317 
 318     crm_debug("Connecting to the local executor");
 319 
 320     // If we can connect, great
 321     rc = controld_connect_local_executor(lrm_state);
 322     if (rc == pcmk_rc_ok) {
 323         controld_set_fsa_input_flags(R_LRM_CONNECTED);
 324         crm_info("Connection to the local executor established");
 325         return;
 326     }
 327 
 328     // Otherwise, if we can try again, set a timer to do so
 329     if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
 330         crm_warn("Failed to connect to the local executor %d time%s "
 331                  "(%d max): %s", lrm_state->num_lrm_register_fails,
 332                  pcmk__plural_s(lrm_state->num_lrm_register_fails),
 333                  MAX_LRM_REG_FAILS, pcmk_rc_str(rc));
 334         controld_start_wait_timer();
 335         crmd_fsa_stall(FALSE);
 336         return;
 337     }
 338 
 339     // Otherwise give up
 340     crm_err("Failed to connect to the executor the max allowed "
 341             "%d time%s: %s", lrm_state->num_lrm_register_fails,
 342             pcmk__plural_s(lrm_state->num_lrm_register_fails),
 343             pcmk_rc_str(rc));
 344     register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 345 }
 346 
 347 /*       A_LRM_CONNECT  */
 348 void
 349 do_lrm_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 350                enum crmd_fsa_cause cause,
 351                enum crmd_fsa_state cur_state,
 352                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 353 {
 354     /* This only pertains to local executor connections. Remote connections are
 355      * handled as resources within the scheduler. Connecting and disconnecting
 356      * from remote executor instances is handled differently.
 357      */
 358 
 359     lrm_state_t *lrm_state = NULL;
 360 
 361     if (controld_globals.our_nodename == NULL) {
 362         return; /* Nothing to do */
 363     }
 364     lrm_state = lrm_state_find_or_create(controld_globals.our_nodename);
 365     if (lrm_state == NULL) {
 366         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 367         return;
 368     }
 369 
 370     if (action & A_LRM_DISCONNECT) {
 371         if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
 372             if (action == A_LRM_DISCONNECT) {
 373                 crmd_fsa_stall(FALSE);
 374                 return;
 375             }
 376         }
 377 
 378         controld_clear_fsa_input_flags(R_LRM_CONNECTED);
 379         crm_info("Disconnecting from the executor");
 380         lrm_state_disconnect(lrm_state);
 381         lrm_state_reset_tables(lrm_state, FALSE);
 382         crm_notice("Disconnected from the executor");
 383     }
 384 
 385     if (action & A_LRM_CONNECT) {
 386         try_local_executor_connect(action, msg_data, lrm_state);
 387     }
 388 
 389     if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
 390         crm_err("Unexpected action %s in %s", fsa_action2string(action),
 391                 __func__);
 392     }
 393 }
 394 
 395 static gboolean
 396 lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
     /* [previous][next][first][last][top][bottom][index][help] */
 397 {
 398     int counter = 0;
 399     gboolean rc = TRUE;
 400     const char *when = "lrm disconnect";
 401 
 402     GHashTableIter gIter;
 403     const char *key = NULL;
 404     rsc_history_t *entry = NULL;
 405     active_op_t *pending = NULL;
 406 
 407     crm_debug("Checking for active resources before exit");
 408 
 409     if (cur_state == S_TERMINATE) {
 410         log_level = LOG_ERR;
 411         when = "shutdown";
 412 
 413     } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
 414         when = "shutdown... waiting";
 415     }
 416 
 417     if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) {
 418         guint removed = g_hash_table_foreach_remove(lrm_state->active_ops,
 419                                                     stop_recurring_actions,
 420                                                     lrm_state);
 421         guint nremaining = g_hash_table_size(lrm_state->active_ops);
 422 
 423         if (removed || nremaining) {
 424             crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
 425                        removed, pcmk__plural_s(removed), when, nremaining);
 426         }
 427     }
 428 
 429     if (lrm_state->active_ops != NULL) {
 430         g_hash_table_iter_init(&gIter, lrm_state->active_ops);
 431         while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
 432             /* Ignore recurring actions in the shutdown calculations */
 433             if (pending->interval_ms == 0) {
 434                 counter++;
 435             }
 436         }
 437     }
 438 
 439     if (counter > 0) {
 440         do_crm_log(log_level, "%d pending executor operation%s at %s",
 441                    counter, pcmk__plural_s(counter), when);
 442 
 443         if ((cur_state == S_TERMINATE)
 444             || !pcmk_is_set(controld_globals.fsa_input_register,
 445                             R_SENT_RSC_STOP)) {
 446             g_hash_table_iter_init(&gIter, lrm_state->active_ops);
 447             while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
 448                 do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
 449             }
 450 
 451         } else {
 452             rc = FALSE;
 453         }
 454         return rc;
 455     }
 456 
 457     if (lrm_state->resource_history == NULL) {
 458         return rc;
 459     }
 460 
 461     if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
 462         /* At this point we're not waiting, we're just shutting down */
 463         when = "shutdown";
 464     }
 465 
 466     counter = 0;
 467     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
 468     while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
 469         if (is_rsc_active(lrm_state, entry->id) == FALSE) {
 470             continue;
 471         }
 472 
 473         counter++;
 474         if (log_level == LOG_ERR) {
 475             crm_info("Found %s active at %s", entry->id, when);
 476         } else {
 477             crm_trace("Found %s active at %s", entry->id, when);
 478         }
 479         if (lrm_state->active_ops != NULL) {
 480             GHashTableIter hIter;
 481 
 482             g_hash_table_iter_init(&hIter, lrm_state->active_ops);
 483             while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
 484                 if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) {
 485                     crm_notice("%sction %s (%s) incomplete at %s",
 486                                pending->interval_ms == 0 ? "A" : "Recurring a",
 487                                key, pending->op_key, when);
 488                 }
 489             }
 490         }
 491     }
 492 
 493     if (counter) {
 494         crm_err("%d resource%s active at %s",
 495                 counter, (counter == 1)? " was" : "s were", when);
 496     }
 497 
 498     return rc;
 499 }
 500 
 501 static gboolean
 502 is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 503 {
 504     rsc_history_t *entry = NULL;
 505 
 506     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
 507     if (entry == NULL || entry->last == NULL) {
 508         return FALSE;
 509     }
 510 
 511     crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
 512               entry->last->interval_ms, entry->last->rc);
 513     if (entry->last->rc == PCMK_OCF_OK && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_STOP, pcmk__str_casei)) {
 514         return FALSE;
 515 
 516     } else if (entry->last->rc == PCMK_OCF_OK
 517                && pcmk__str_eq(entry->last->op_type, CRMD_ACTION_MIGRATE, pcmk__str_casei)) {
 518         // A stricter check is too complex ... leave that to the scheduler
 519         return FALSE;
 520 
 521     } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
 522         return FALSE;
 523 
 524     } else if ((entry->last->interval_ms == 0)
 525                && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
 526         /* Badly configured resources can't be reliably stopped */
 527         return FALSE;
 528     }
 529 
 530     return TRUE;
 531 }
 532 
 533 static gboolean
 534 build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
     /* [previous][next][first][last][top][bottom][index][help] */
 535 {
 536     GHashTableIter iter;
 537     rsc_history_t *entry = NULL;
 538 
 539     g_hash_table_iter_init(&iter, lrm_state->resource_history);
 540     while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
 541 
 542         GList *gIter = NULL;
 543         xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
 544 
 545         crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
 546         crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
 547         crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard);
 548         crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
 549 
 550         if (entry->last && entry->last->params) {
 551             const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
 552             if (container) {
 553                 crm_trace("Resource %s is a part of container resource %s", entry->id, container);
 554                 crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
 555             }
 556         }
 557         controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed,
 558                                           lrm_state->node_name);
 559         controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last,
 560                                           lrm_state->node_name);
 561         for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
 562             controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data,
 563                                               lrm_state->node_name);
 564         }
 565     }
 566 
 567     return FALSE;
 568 }
 569 
 570 xmlNode *
 571 controld_query_executor_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 572 {
 573     xmlNode *xml_state = NULL;
 574     xmlNode *xml_data = NULL;
 575     xmlNode *rsc_list = NULL;
 576     crm_node_t *peer = NULL;
 577     lrm_state_t *lrm_state = lrm_state_find(controld_globals.our_nodename);
 578 
 579     if (!lrm_state) {
 580         crm_err("Could not find executor state for node %s",
 581                 controld_globals.our_nodename);
 582         return NULL;
 583     }
 584 
 585     peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
 586     CRM_CHECK(peer != NULL, return NULL);
 587 
 588     xml_state = create_node_state_update(peer,
 589                                          node_update_cluster|node_update_peer,
 590                                          NULL, __func__);
 591     if (xml_state == NULL) {
 592         return NULL;
 593     }
 594 
 595     xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
 596     crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
 597     rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
 598 
 599     /* Build a list of active (not always running) resources */
 600     build_active_RAs(lrm_state, rsc_list);
 601 
 602     crm_log_xml_trace(xml_state, "Current executor state");
 603 
 604     return xml_state;
 605 }
 606 
 607 /*!
 608  * \internal
 609  * \brief Map standard Pacemaker return code to operation status and OCF code
 610  *
 611  * \param[out] event  Executor event whose status and return code should be set
 612  * \param[in]  rc     Standard Pacemaker return code
 613  */
 614 void
 615 controld_rc2event(lrmd_event_data_t *event, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 616 {
 617     /* This is called for cleanup requests from controller peers/clients, not
 618      * for resource actions, so no exit reason is needed.
 619      */
 620     switch (rc) {
 621         case pcmk_rc_ok:
 622             lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 623             break;
 624         case EACCES:
 625             lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV,
 626                              PCMK_EXEC_ERROR, NULL);
 627             break;
 628         default:
 629             lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
 630                              NULL);
 631             break;
 632     }
 633 }
 634 
 635 /*!
 636  * \internal
 637  * \brief Trigger a new transition after CIB status was deleted
 638  *
 639  * If a CIB status delete was not expected (as part of the transition graph),
 640  * trigger a new transition by updating the (arbitrary) "last-lrm-refresh"
 641  * cluster property.
 642  *
 643  * \param[in] from_sys  IPC name that requested the delete
 644  * \param[in] rsc_id    Resource whose status was deleted (for logging only)
 645  */
 646 void
 647 controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 648 {
 649     if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
 650         char *now_s = crm_strdup_printf("%lld", (long long) time(NULL));
 651 
 652         crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id);
 653         cib__update_node_attr(controld_globals.logger_out,
 654                               controld_globals.cib_conn, cib_none,
 655                               XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
 656                               "last-lrm-refresh", now_s, NULL, NULL);
 657         free(now_s);
 658     }
 659 }
 660 
 661 static void
 662 notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 663 {
 664     lrmd_event_data_t *op = NULL;
 665     const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
 666     const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
 667 
 668     crm_info("Notifying %s on %s that %s was%s deleted",
 669              from_sys, (from_host? from_host : "localhost"), rsc_id,
 670              ((rc == pcmk_ok)? "" : " not"));
 671     op = construct_op(lrm_state, input->xml, rsc_id, CRMD_ACTION_DELETE);
 672     controld_rc2event(op, pcmk_legacy2rc(rc));
 673     controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
 674     lrmd_free_event(op);
 675     controld_trigger_delete_refresh(from_sys, rsc_id);
 676 }
 677 
 678 static gboolean
 679 lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 680 {
 681     struct delete_event_s *event = user_data;
 682     struct pending_deletion_op_s *op = value;
 683 
 684     if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) {
 685         notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
 686         return TRUE;
 687     }
 688     return FALSE;
 689 }
 690 
 691 static gboolean
 692 lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 693 {
 694     const char *rsc = user_data;
 695     active_op_t *pending = value;
 696 
 697     if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) {
 698         crm_info("Removing op %s:%d for deleted resource %s",
 699                  pending->op_key, pending->call_id, rsc);
 700         return TRUE;
 701     }
 702     return FALSE;
 703 }
 704 
 705 static void
 706 delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input,
     /* [previous][next][first][last][top][bottom][index][help] */
 707                  const char *rsc_id, GHashTableIter *rsc_iter, int rc,
 708                  const char *user_name, bool from_cib)
 709 {
 710     struct delete_event_s event;
 711 
 712     CRM_CHECK(rsc_id != NULL, return);
 713 
 714     if (rc == pcmk_ok) {
 715         char *rsc_id_copy = strdup(rsc_id);
 716 
 717         if (rsc_iter) {
 718             g_hash_table_iter_remove(rsc_iter);
 719         } else {
 720             g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
 721         }
 722 
 723         if (from_cib) {
 724             controld_delete_resource_history(rsc_id_copy, lrm_state->node_name,
 725                                              user_name, crmd_cib_smart_opt());
 726         }
 727         g_hash_table_foreach_remove(lrm_state->active_ops,
 728                                     lrm_remove_deleted_op, rsc_id_copy);
 729         free(rsc_id_copy);
 730     }
 731 
 732     if (input) {
 733         notify_deleted(lrm_state, input, rsc_id, rc);
 734     }
 735 
 736     event.rc = rc;
 737     event.rsc = rsc_id;
 738     event.lrm_state = lrm_state;
 739     g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
 740 }
 741 
 742 static inline gboolean
 743 last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 744 {
 745     if (entry == NULL) {
 746         return FALSE;
 747     }
 748     if (op == NULL) {
 749         return TRUE;
 750     }
 751     return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei)
 752             && (interval_ms == entry->failed->interval_ms));
 753 }
 754 
 755 /*!
 756  * \internal
 757  * \brief Clear a resource's last failure
 758  *
 759  * Erase a resource's last failure on a particular node from both the
 760  * LRM resource history in the CIB, and the resource history remembered
 761  * for the LRM state.
 762  *
 763  * \param[in] rsc_id      Resource name
 764  * \param[in] node_name   Node name
 765  * \param[in] operation   If specified, only clear if matching this operation
 766  * \param[in] interval_ms If operation is specified, it has this interval
 767  */
 768 void
 769 lrm_clear_last_failure(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
 770                        const char *operation, guint interval_ms)
 771 {
 772     lrm_state_t *lrm_state = lrm_state_find(node_name);
 773 
 774     if (lrm_state == NULL) {
 775         return;
 776     }
 777     if (lrm_state->resource_history != NULL) {
 778         rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
 779                                                    rsc_id);
 780 
 781         if (last_failed_matches_op(entry, operation, interval_ms)) {
 782             lrmd_free_event(entry->failed);
 783             entry->failed = NULL;
 784         }
 785     }
 786 }
 787 
 788 /* Returns: gboolean - cancellation is in progress */
 789 static gboolean
 790 cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
     /* [previous][next][first][last][top][bottom][index][help] */
 791 {
 792     int rc = pcmk_ok;
 793     char *local_key = NULL;
 794     active_op_t *pending = NULL;
 795 
 796     CRM_CHECK(op != 0, return FALSE);
 797     CRM_CHECK(rsc_id != NULL, return FALSE);
 798     if (key == NULL) {
 799         local_key = make_stop_id(rsc_id, op);
 800         key = local_key;
 801     }
 802     pending = g_hash_table_lookup(lrm_state->active_ops, key);
 803 
 804     if (pending) {
 805         if (remove && !pcmk_is_set(pending->flags, active_op_remove)) {
 806             controld_set_active_op_flags(pending, active_op_remove);
 807             crm_debug("Scheduling %s for removal", key);
 808         }
 809 
 810         if (pcmk_is_set(pending->flags, active_op_cancelled)) {
 811             crm_debug("Operation %s already cancelled", key);
 812             free(local_key);
 813             return FALSE;
 814         }
 815         controld_set_active_op_flags(pending, active_op_cancelled);
 816 
 817     } else {
 818         crm_info("No pending op found for %s", key);
 819         free(local_key);
 820         return FALSE;
 821     }
 822 
 823     crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
 824     rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
 825                           pending->interval_ms);
 826     if (rc == pcmk_ok) {
 827         crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
 828         free(local_key);
 829         return TRUE;
 830     }
 831 
 832     crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
 833     /* The caller needs to make sure the entry is
 834      * removed from the active operations list
 835      *
 836      * Usually by returning TRUE inside the worker function
 837      * supplied to g_hash_table_foreach_remove()
 838      *
 839      * Not removing the entry from active operations will block
 840      * the node from shutting down
 841      */
 842     free(local_key);
 843     return FALSE;
 844 }
 845 
 846 struct cancel_data {
 847     gboolean done;
 848     gboolean remove;
 849     const char *key;
 850     lrmd_rsc_info_t *rsc;
 851     lrm_state_t *lrm_state;
 852 };
 853 
 854 static gboolean
 855 cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 856 {
 857     gboolean remove = FALSE;
 858     struct cancel_data *data = user_data;
 859     active_op_t *op = value;
 860 
 861     if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) {
 862         data->done = TRUE;
 863         remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
 864     }
 865     return remove;
 866 }
 867 
 868 static gboolean
 869 cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
     /* [previous][next][first][last][top][bottom][index][help] */
 870 {
 871     guint removed = 0;
 872     struct cancel_data data;
 873 
 874     CRM_CHECK(rsc != NULL, return FALSE);
 875     CRM_CHECK(key != NULL, return FALSE);
 876 
 877     data.key = key;
 878     data.rsc = rsc;
 879     data.done = FALSE;
 880     data.remove = remove;
 881     data.lrm_state = lrm_state;
 882 
 883     removed = g_hash_table_foreach_remove(lrm_state->active_ops,
 884                                           cancel_action_by_key, &data);
 885     crm_trace("Removed %u op cache entries, new size: %u",
 886               removed, g_hash_table_size(lrm_state->active_ops));
 887     return data.done;
 888 }
 889 
 890 /*!
 891  * \internal
 892  * \brief Retrieve resource information from LRM
 893  *
 894  * \param[in,out]  lrm_state  Executor connection state to use
 895  * \param[in]      rsc_xml    XML containing resource configuration
 896  * \param[in]      do_create  If true, register resource if not already
 897  * \param[out]     rsc_info   Where to store information obtained from executor
 898  *
 899  * \retval pcmk_ok   Success (and rsc_info holds newly allocated result)
 900  * \retval -EINVAL   Required information is missing from arguments
 901  * \retval -ENOTCONN No active connection to LRM
 902  * \retval -ENODEV   Resource not found
 903  * \retval -errno    Error communicating with executor when registering resource
 904  *
 905  * \note Caller is responsible for freeing result on success.
 906  */
 907 static int
 908 get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml,
     /* [previous][next][first][last][top][bottom][index][help] */
 909                  gboolean do_create, lrmd_rsc_info_t **rsc_info)
 910 {
 911     const char *id = ID(rsc_xml);
 912 
 913     CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
 914     CRM_CHECK(id, return -EINVAL);
 915 
 916     if (lrm_state_is_connected(lrm_state) == FALSE) {
 917         return -ENOTCONN;
 918     }
 919 
 920     crm_trace("Retrieving resource information for %s from the executor", id);
 921     *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
 922 
 923     // If resource isn't known by ID, try clone name, if provided
 924     if (!*rsc_info) {
 925         const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG);
 926 
 927         if (long_id) {
 928             *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
 929         }
 930     }
 931 
 932     if ((*rsc_info == NULL) && do_create) {
 933         const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS);
 934         const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER);
 935         const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE);
 936         int rc;
 937 
 938         crm_trace("Registering resource %s with the executor", id);
 939         rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
 940                                     lrmd_opt_drop_recurring);
 941         if (rc != pcmk_ok) {
 942             fsa_data_t *msg_data = NULL;
 943 
 944             crm_err("Could not register resource %s with the executor on %s: %s "
 945                     CRM_XS " rc=%d",
 946                     id, lrm_state->node_name, pcmk_strerror(rc), rc);
 947 
 948             /* Register this as an internal error if this involves the local
 949              * executor. Otherwise, we're likely dealing with an unresponsive
 950              * remote node, which is not an FSA failure.
 951              */
 952             if (lrm_state_is_local(lrm_state) == TRUE) {
 953                 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 954             }
 955             return rc;
 956         }
 957 
 958         *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
 959     }
 960     return *rsc_info? pcmk_ok : -ENODEV;
 961 }
 962 
 963 static void
 964 delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
 965                 GHashTableIter *iter, const char *sys, const char *user,
 966                 ha_msg_input_t *request, bool unregister, bool from_cib)
 967 {
 968     int rc = pcmk_ok;
 969 
 970     crm_info("Removing resource %s from executor for %s%s%s",
 971              id, sys, (user? " as " : ""), (user? user : ""));
 972 
 973     if (rsc && unregister) {
 974         rc = lrm_state_unregister_rsc(lrm_state, id, 0);
 975     }
 976 
 977     if (rc == pcmk_ok) {
 978         crm_trace("Resource %s deleted from executor", id);
 979     } else if (rc == -EINPROGRESS) {
 980         crm_info("Deletion of resource '%s' from executor is pending", id);
 981         if (request) {
 982             struct pending_deletion_op_s *op = NULL;
 983             char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
 984 
 985             op = calloc(1, sizeof(struct pending_deletion_op_s));
 986             op->rsc = strdup(rsc->id);
 987             op->input = copy_ha_msg_input(request);
 988             g_hash_table_insert(lrm_state->deletion_ops, ref, op);
 989         }
 990         return;
 991     } else {
 992         crm_warn("Could not delete '%s' from executor for %s%s%s: %s "
 993                  CRM_XS " rc=%d", id, sys, (user? " as " : ""),
 994                  (user? user : ""), pcmk_strerror(rc), rc);
 995     }
 996 
 997     delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib);
 998 }
 999 
1000 static int
1001 get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1002 {
1003     int call_id = 999999999;
1004     rsc_history_t *entry = NULL;
1005 
1006     if(lrm_state) {
1007         entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
1008     }
1009 
1010     /* Make sure the call id is greater than the last successful operation,
1011      * otherwise the failure will not result in a possible recovery of the resource
1012      * as it could appear the failure occurred before the successful start */
1013     if (entry) {
1014         call_id = entry->last_callid + 1;
1015     }
1016 
1017     if (call_id < 0) {
1018         call_id = 1;
1019     }
1020     return call_id;
1021 }
1022 
1023 static void
1024 fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
     /* [previous][next][first][last][top][bottom][index][help] */
1025                enum ocf_exitcode op_exitcode, const char *exit_reason)
1026 {
1027     op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
1028     op->t_run = time(NULL);
1029     op->t_rcchange = op->t_run;
1030     lrmd__set_result(op, op_exitcode, op_status, exit_reason);
1031 }
1032 
1033 static void
1034 force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
     /* [previous][next][first][last][top][bottom][index][help] */
1035               const char *from_host, const char *user_name,
1036               gboolean is_remote_node, bool reprobe_all_nodes)
1037 {
1038     GHashTableIter gIter;
1039     rsc_history_t *entry = NULL;
1040 
1041     crm_info("Clearing resource history on node %s", lrm_state->node_name);
1042     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
1043     while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
1044         /* only unregister the resource during a reprobe if it is not a remote connection
1045          * resource. otherwise unregistering the connection will terminate remote-node
1046          * membership */
1047         bool unregister = true;
1048 
1049         if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
1050             unregister = false;
1051 
1052             if (reprobe_all_nodes) {
1053                 lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
1054 
1055                 if (remote_lrm_state != NULL) {
1056                     /* If reprobing all nodes, be sure to reprobe the remote
1057                      * node before clearing its connection resource
1058                      */
1059                     force_reprobe(remote_lrm_state, from_sys, from_host,
1060                                   user_name, TRUE, reprobe_all_nodes);
1061                 }
1062             }
1063         }
1064 
1065         /* Don't delete from the CIB, since we'll delete the whole node's LRM
1066          * state from the CIB soon
1067          */
1068         delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys,
1069                         user_name, NULL, unregister, false);
1070     }
1071 
1072     /* Now delete the copy in the CIB */
1073     controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
1074                                cib_scope_local);
1075 
1076     // @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false)
1077     update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
1078 }
1079 
1080 /*!
1081  * \internal
1082  * \brief Fail a requested action without actually executing it
1083  *
1084  * For an action that can't be executed, process it similarly to an actual
1085  * execution result, with specified error status (except for notify actions,
1086  * which will always be treated as successful).
1087  *
1088  * \param[in,out] lrm_state    Executor connection that action is for
1089  * \param[in]     action       Action XML from request
1090  * \param[in]     rc           Desired return code to use
1091  * \param[in]     op_status    Desired operation status to use
1092  * \param[in]     exit_reason  Human-friendly detail, if error
1093  */
1094 static void
1095 synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action,
     /* [previous][next][first][last][top][bottom][index][help] */
1096                         int op_status, enum ocf_exitcode rc,
1097                         const char *exit_reason)
1098 {
1099     lrmd_event_data_t *op = NULL;
1100     const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
1101     const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
1102     xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
1103 
1104     if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) {
1105         /* @TODO Should we do something else, like direct ack? */
1106         crm_info("Can't fake %s failure (%d) on %s without resource configuration",
1107                  crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
1108                  target_node);
1109         return;
1110 
1111     } else if(operation == NULL) {
1112         /* This probably came from crm_resource -C, nothing to do */
1113         crm_info("Can't fake %s failure (%d) on %s without operation",
1114                  ID(xml_rsc), rc, target_node);
1115         return;
1116     }
1117 
1118     op = construct_op(lrm_state, action, ID(xml_rsc), operation);
1119 
1120     if (pcmk__str_eq(operation, RSC_NOTIFY, pcmk__str_casei)) { // Notifications can't fail
1121         fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL);
1122     } else {
1123         fake_op_status(lrm_state, op, op_status, rc, exit_reason);
1124     }
1125 
1126     crm_info("Faking " PCMK__OP_FMT " result (%d) on %s",
1127              op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
1128 
1129     // Process the result as if it came from the LRM
1130     process_lrm_event(lrm_state, op, NULL, action);
1131     lrmd_free_event(op);
1132 }
1133 
1134 /*!
1135  * \internal
1136  * \brief Get target of an LRM operation (replacing \p NULL with local node
1137  *        name)
1138  *
1139  * \param[in] xml  LRM operation data XML
1140  *
1141  * \return LRM operation target node name (local node or Pacemaker Remote node)
1142  */
1143 static const char *
1144 lrm_op_target(const xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
1145 {
1146     const char *target = NULL;
1147 
1148     if (xml) {
1149         target = crm_element_value(xml, XML_LRM_ATTR_TARGET);
1150     }
1151     if (target == NULL) {
1152         target = controld_globals.our_nodename;
1153     }
1154     return target;
1155 }
1156 
1157 static void
1158 fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
     /* [previous][next][first][last][top][bottom][index][help] */
1159                   const char *from_host, const char *from_sys)
1160 {
1161     lrmd_event_data_t *op = NULL;
1162     lrmd_rsc_info_t *rsc = NULL;
1163     xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE);
1164 
1165     CRM_CHECK(xml_rsc != NULL, return);
1166 
1167     /* The executor simply executes operations and reports the results, without
1168      * any concept of success or failure, so to fail a resource, we must fake
1169      * what a failure looks like.
1170      *
1171      * To do this, we create a fake executor operation event for the resource,
1172      * and pass that event to the executor client callback so it will be
1173      * processed as if it came from the executor.
1174      */
1175     op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon");
1176 
1177     free((char*) op->user_data);
1178     op->user_data = NULL;
1179     op->interval_ms = 0;
1180 
1181     if (user_name && !pcmk__is_privileged(user_name)) {
1182         crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
1183         fake_op_status(lrm_state, op, PCMK_EXEC_ERROR,
1184                        PCMK_OCF_INSUFFICIENT_PRIV,
1185                        "Unprivileged user cannot fail resources");
1186         controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc));
1187         lrmd_free_event(op);
1188         return;
1189     }
1190 
1191 
1192     if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
1193         crm_info("Failing resource %s...", rsc->id);
1194         fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR,
1195                        "Simulated failure");
1196         process_lrm_event(lrm_state, op, NULL, xml);
1197         op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded
1198         lrmd_free_rsc_info(rsc);
1199 
1200     } else {
1201         crm_info("Cannot find/create resource in order to fail it...");
1202         crm_log_xml_warn(xml, "bad input");
1203         fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR,
1204                        "Cannot fail unknown resource");
1205     }
1206 
1207     controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc));
1208     lrmd_free_event(op);
1209 }
1210 
1211 static void
1212 handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
     /* [previous][next][first][last][top][bottom][index][help] */
1213                   const char *from_host, const char *user_name,
1214                   gboolean is_remote_node, bool reprobe_all_nodes)
1215 {
1216     crm_notice("Forcing the status of all resources to be redetected");
1217     force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node,
1218                   reprobe_all_nodes);
1219 
1220     if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
1221 
1222         xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host,
1223                                         from_sys, CRM_SYSTEM_LRMD,
1224                                         controld_globals.our_uuid);
1225 
1226         crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
1227 
1228         if (relay_message(reply, TRUE) == FALSE) {
1229             crm_log_xml_err(reply, "Unable to route reply");
1230         }
1231         free_xml(reply);
1232     }
1233 }
1234 
1235 static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
     /* [previous][next][first][last][top][bottom][index][help] */
1236               lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
1237 {
1238     char *op_key = NULL;
1239     char *meta_key = NULL;
1240     int call = 0;
1241     const char *call_id = NULL;
1242     const char *op_task = NULL;
1243     guint interval_ms = 0;
1244     gboolean in_progress = FALSE;
1245     xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
1246 
1247     CRM_CHECK(params != NULL, return FALSE);
1248 
1249     meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
1250     op_task = crm_element_value(params, meta_key);
1251     free(meta_key);
1252     CRM_CHECK(op_task != NULL, return FALSE);
1253 
1254     meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
1255     if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) {
1256         free(meta_key);
1257         return FALSE;
1258     }
1259     free(meta_key);
1260 
1261     op_key = pcmk__op_key(rsc->id, op_task, interval_ms);
1262 
1263     meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
1264     call_id = crm_element_value(params, meta_key);
1265     free(meta_key);
1266 
1267     crm_debug("Scheduler requested op %s (call=%s) be cancelled",
1268               op_key, (call_id? call_id : "NA"));
1269     pcmk__scan_min_int(call_id, &call, 0);
1270     if (call == 0) {
1271         // Normal case when the scheduler cancels a recurring op
1272         in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
1273 
1274     } else {
1275         // Normal case when the scheduler cancels an orphan op
1276         in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
1277     }
1278 
1279     // Acknowledge cancellation operation if for a remote connection resource
1280     if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
1281         char *op_id = make_stop_id(rsc->id, call);
1282 
1283         if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
1284             crm_info("Nothing known about operation %d for %s", call, op_key);
1285         }
1286         controld_delete_action_history_by_key(rsc->id, lrm_state->node_name,
1287                                               op_key, call);
1288         send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
1289                          from_host, from_sys);
1290 
1291         /* needed at least for cancellation of a remote operation */
1292         if (lrm_state->active_ops != NULL) {
1293             g_hash_table_remove(lrm_state->active_ops, op_id);
1294         }
1295         free(op_id);
1296 
1297     } else {
1298         /* No ack is needed since abcdaa8, but peers with older versions
1299          * in a rolling upgrade need one. We didn't bump the feature set
1300          * at that commit, so we can only compare against the previous
1301          * CRM version (3.0.8). If any peers have feature set 3.0.9 but
1302          * not abcdaa8, they will time out waiting for the ack (no
1303          * released versions of Pacemaker are affected).
1304          */
1305         const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION);
1306 
1307         if (compare_version(peer_version, "3.0.8") <= 0) {
1308             crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
1309                      op_key, from_host, peer_version);
1310             send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
1311                              from_host, from_sys);
1312         }
1313     }
1314 
1315     free(op_key);
1316     return TRUE;
1317 }
1318 
1319 static void
1320 do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
     /* [previous][next][first][last][top][bottom][index][help] */
1321               lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
1322               bool crm_rsc_delete, const char *user_name)
1323 {
1324     bool unregister = true;
1325     int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name,
1326                                                   user_name,
1327                                                   cib_dryrun|cib_sync_call);
1328 
1329     if (cib_rc != pcmk_rc_ok) {
1330         lrmd_event_data_t *op = NULL;
1331 
1332         op = construct_op(lrm_state, input->xml, rsc->id, CRMD_ACTION_DELETE);
1333 
1334         /* These are resource clean-ups, not actions, so no exit reason is
1335          * needed.
1336          */
1337         lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL);
1338         controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id);
1339         lrmd_free_event(op);
1340         return;
1341     }
1342 
1343     if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
1344         unregister = false;
1345     }
1346 
1347     delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys,
1348                     user_name, input, unregister, true);
1349 }
1350 
1351 // User data for asynchronous metadata execution
1352 struct metadata_cb_data {
1353     lrmd_rsc_info_t *rsc;   // Copy of resource information
1354     xmlNode *input_xml;     // Copy of FSA input XML
1355 };
1356 
1357 static struct metadata_cb_data *
1358 new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
     /* [previous][next][first][last][top][bottom][index][help] */
1359 {
1360     struct metadata_cb_data *data = NULL;
1361 
1362     data = calloc(1, sizeof(struct metadata_cb_data));
1363     CRM_ASSERT(data != NULL);
1364     data->input_xml = copy_xml(input_xml);
1365     data->rsc = lrmd_copy_rsc_info(rsc);
1366     return data;
1367 }
1368 
1369 static void
1370 free_metadata_cb_data(struct metadata_cb_data *data)
     /* [previous][next][first][last][top][bottom][index][help] */
1371 {
1372     lrmd_free_rsc_info(data->rsc);
1373     free_xml(data->input_xml);
1374     free(data);
1375 }
1376 
1377 /*!
1378  * \internal
1379  * \brief Execute an action after metadata has been retrieved
1380  *
1381  * \param[in] pid        Ignored
1382  * \param[in] result     Result of metadata action
1383  * \param[in] user_data  Metadata callback data
1384  */
1385 static void
1386 metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1387 {
1388     struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
1389 
1390     struct ra_metadata_s *md = NULL;
1391     lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml));
1392 
1393     if ((lrm_state != NULL) && pcmk__result_ok(result)) {
1394         md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
1395                                      result->action_stdout);
1396     }
1397     do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
1398     free_metadata_cb_data(data);
1399 }
1400 
1401 /*       A_LRM_INVOKE   */
1402 void
1403 do_lrm_invoke(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
1404               enum crmd_fsa_cause cause,
1405               enum crmd_fsa_state cur_state,
1406               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1407 {
1408     lrm_state_t *lrm_state = NULL;
1409     const char *crm_op = NULL;
1410     const char *from_sys = NULL;
1411     const char *from_host = NULL;
1412     const char *operation = NULL;
1413     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
1414     const char *user_name = NULL;
1415     const char *target_node = lrm_op_target(input->xml);
1416     gboolean is_remote_node = FALSE;
1417     bool crm_rsc_delete = FALSE;
1418 
1419     // Message routed to the local node is targeting a specific, non-local node
1420     is_remote_node = !pcmk__str_eq(target_node, controld_globals.our_nodename,
1421                                    pcmk__str_casei);
1422 
1423     lrm_state = lrm_state_find(target_node);
1424     if ((lrm_state == NULL) && is_remote_node) {
1425         crm_err("Failing action because local node has never had connection to remote node %s",
1426                 target_node);
1427         synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED,
1428                                 PCMK_OCF_UNKNOWN_ERROR,
1429                                 "Local node has no connection to remote");
1430         return;
1431     }
1432     CRM_ASSERT(lrm_state != NULL);
1433 
1434     user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL);
1435     crm_op = crm_element_value(input->msg, F_CRM_TASK);
1436     from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
1437     if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
1438         from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
1439     }
1440 
1441     if (pcmk__str_eq(crm_op, CRM_OP_LRM_DELETE, pcmk__str_none)) {
1442         if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
1443             crm_rsc_delete = TRUE; // from crm_resource
1444         }
1445         operation = CRMD_ACTION_DELETE;
1446 
1447     } else if (input->xml != NULL) {
1448         operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
1449     }
1450 
1451     CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return);
1452 
1453     crm_trace("'%s' execution request from %s as %s user",
1454               pcmk__s(crm_op, operation),
1455               pcmk__s(from_sys, "unknown subsystem"),
1456               pcmk__s(user_name, "current"));
1457 
1458     if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) {
1459         fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
1460                           from_sys);
1461 
1462     } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_none)) {
1463         /* @COMPAT This can only be sent by crm_resource --refresh on a
1464          * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely
1465          * unlikely. It previously would cause the controller to re-write its
1466          * resource history to the CIB. Just ignore it.
1467          */
1468         crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
1469 
1470     // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op
1471     } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_none)) {
1472         update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
1473                      user_name, is_remote_node);
1474 
1475     } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none)
1476                || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
1477         const char *raw_target = NULL;
1478 
1479         if (input->xml != NULL) {
1480             // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes
1481             raw_target = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
1482         }
1483         handle_reprobe_op(lrm_state, from_sys, from_host, user_name,
1484                           is_remote_node, (raw_target == NULL));
1485 
1486     } else if (operation != NULL) {
1487         lrmd_rsc_info_t *rsc = NULL;
1488         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
1489         gboolean create_rsc = !pcmk__str_eq(operation, CRMD_ACTION_DELETE,
1490                                             pcmk__str_none);
1491         int rc;
1492 
1493         // We can't return anything meaningful without a resource ID
1494         CRM_CHECK(xml_rsc && ID(xml_rsc), return);
1495 
1496         rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
1497         if (rc == -ENOTCONN) {
1498             synthesize_lrmd_failure(lrm_state, input->xml,
1499                                     PCMK_EXEC_NOT_CONNECTED,
1500                                     PCMK_OCF_UNKNOWN_ERROR,
1501                                     "Not connected to remote executor");
1502             return;
1503 
1504         } else if ((rc < 0) && !create_rsc) {
1505             /* Delete of malformed or nonexistent resource
1506              * (deleting something that does not exist is a success)
1507              */
1508             crm_notice("Not registering resource '%s' for a %s event "
1509                        CRM_XS " get-rc=%d (%s) transition-key=%s",
1510                        ID(xml_rsc), operation,
1511                        rc, pcmk_strerror(rc), ID(input->xml));
1512             delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok,
1513                              user_name, true);
1514             return;
1515 
1516         } else if (rc == -EINVAL) {
1517             // Resource operation on malformed resource
1518             crm_err("Invalid resource definition for %s", ID(xml_rsc));
1519             crm_log_xml_warn(input->msg, "invalid resource");
1520             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
1521                                     PCMK_OCF_NOT_CONFIGURED, // fatal error
1522                                     "Invalid resource definition");
1523             return;
1524 
1525         } else if (rc < 0) {
1526             // Error communicating with the executor
1527             crm_err("Could not register resource '%s' with executor: %s "
1528                     CRM_XS " rc=%d",
1529                     ID(xml_rsc), pcmk_strerror(rc), rc);
1530             crm_log_xml_warn(input->msg, "failed registration");
1531             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
1532                                     PCMK_OCF_INVALID_PARAM, // hard error
1533                                     "Could not register resource with executor");
1534             return;
1535         }
1536 
1537         if (pcmk__str_eq(operation, CRMD_ACTION_CANCEL, pcmk__str_none)) {
1538             if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
1539                 crm_log_xml_warn(input->xml, "Bad command");
1540             }
1541 
1542         } else if (pcmk__str_eq(operation, CRMD_ACTION_DELETE, pcmk__str_none)) {
1543             do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
1544                           crm_rsc_delete, user_name);
1545 
1546         } else {
1547             struct ra_metadata_s *md = NULL;
1548 
1549             /* Getting metadata from cache is OK except for start actions --
1550              * always refresh from the agent for those, in case the resource
1551              * agent was updated.
1552              *
1553              * @TODO Only refresh metadata for starts if the agent actually
1554              * changed (using something like inotify, or a hash or modification
1555              * time of the agent executable).
1556              */
1557             if (strcmp(operation, CRMD_ACTION_START) != 0) {
1558                 md = controld_get_rsc_metadata(lrm_state, rsc,
1559                                                controld_metadata_from_cache);
1560             }
1561 
1562             if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
1563                                                       operation)) {
1564                 /* Most likely, we'll need the agent metadata to record the
1565                  * pending operation and the operation result. Get it now rather
1566                  * than wait until then, so the metadata action doesn't eat into
1567                  * the real action's timeout.
1568                  *
1569                  * @TODO Metadata is retrieved via direct execution of the
1570                  * agent, which has a couple of related issues: the executor
1571                  * should execute agents, not the controller; and metadata for
1572                  * Pacemaker Remote nodes should be collected on those nodes,
1573                  * not locally.
1574                  */
1575                 struct metadata_cb_data *data = NULL;
1576 
1577                 data = new_metadata_cb_data(rsc, input->xml);
1578                 crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
1579                          rsc->id, rsc->standard,
1580                          ((rsc->provider == NULL)? "" : ":"),
1581                          ((rsc->provider == NULL)? "" : rsc->provider),
1582                          rsc->type);
1583                 (void) lrmd__metadata_async(rsc, metadata_complete,
1584                                             (void *) data);
1585             } else {
1586                 do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
1587             }
1588         }
1589 
1590         lrmd_free_rsc_info(rsc);
1591 
1592     } else {
1593         crm_err("Invalid execution request: unknown command '%s' (bug?)",
1594                 crm_op);
1595         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
1596     }
1597 }
1598 
1599 static lrmd_event_data_t *
1600 construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op,
     /* [previous][next][first][last][top][bottom][index][help] */
1601              const char *rsc_id, const char *operation)
1602 {
1603     lrmd_event_data_t *op = NULL;
1604     const char *op_delay = NULL;
1605     const char *op_timeout = NULL;
1606     GHashTable *params = NULL;
1607 
1608     xmlNode *primitive = NULL;
1609     const char *class = NULL;
1610 
1611     const char *transition = NULL;
1612 
1613     CRM_ASSERT(rsc_id && operation);
1614 
1615     op = lrmd_new_event(rsc_id, operation, 0);
1616     op->type = lrmd_event_exec_complete;
1617     op->timeout = 0;
1618     op->start_delay = 0;
1619     lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
1620 
1621     if (rsc_op == NULL) {
1622         CRM_LOG_ASSERT(pcmk__str_eq(CRMD_ACTION_STOP, operation, pcmk__str_casei));
1623         op->user_data = NULL;
1624         /* the stop_all_resources() case
1625          * by definition there is no DC (or they'd be shutting
1626          *   us down).
1627          * So we should put our version here.
1628          */
1629         op->params = pcmk__strkey_table(free, free);
1630 
1631         g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
1632 
1633         crm_trace("Constructed %s op for %s", operation, rsc_id);
1634         return op;
1635     }
1636 
1637     params = xml2list(rsc_op);
1638     g_hash_table_remove(params, CRM_META "_op_target_rc");
1639 
1640     op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
1641     pcmk__scan_min_int(op_delay, &op->start_delay, 0);
1642 
1643     op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
1644     pcmk__scan_min_int(op_timeout, &op->timeout, 0);
1645 
1646     if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0,
1647                               &(op->interval_ms)) != pcmk_rc_ok) {
1648         op->interval_ms = 0;
1649     }
1650 
1651     /* Use pcmk_monitor_timeout instead of meta timeout for stonith
1652        recurring monitor, if set */
1653     primitive = find_xml_node(rsc_op, XML_CIB_TAG_RESOURCE, FALSE);
1654     class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS);
1655 
1656     if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params)
1657             && pcmk__str_eq(operation, CRMD_ACTION_STATUS, pcmk__str_casei)
1658             && (op->interval_ms > 0)) {
1659 
1660         op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout");
1661         if (op_timeout != NULL) {
1662             op->timeout = crm_get_msec(op_timeout);
1663         }
1664     }
1665 
1666     if (!pcmk__str_eq(operation, RSC_STOP, pcmk__str_casei)) {
1667         op->params = params;
1668 
1669     } else {
1670         rsc_history_t *entry = NULL;
1671 
1672         if (lrm_state) {
1673             entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
1674         }
1675 
1676         /* If we do not have stop parameters cached, use
1677          * whatever we are given */
1678         if (!entry || !entry->stop_params) {
1679             op->params = params;
1680         } else {
1681             /* Copy the cached parameter list so that we stop the resource
1682              * with the old attributes, not the new ones */
1683             op->params = pcmk__strkey_table(free, free);
1684 
1685             g_hash_table_foreach(params, copy_meta_keys, op->params);
1686             g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
1687             g_hash_table_destroy(params);
1688             params = NULL;
1689         }
1690     }
1691 
1692     /* sanity */
1693     if (op->timeout <= 0) {
1694         op->timeout = op->interval_ms;
1695     }
1696     if (op->start_delay < 0) {
1697         op->start_delay = 0;
1698     }
1699 
1700     transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
1701     CRM_CHECK(transition != NULL, return op);
1702 
1703     op->user_data = strdup(transition);
1704 
1705     if (op->interval_ms != 0) {
1706         if (pcmk__strcase_any_of(operation, CRMD_ACTION_START, CRMD_ACTION_STOP, NULL)) {
1707             crm_err("Start and Stop actions cannot have an interval: %u",
1708                     op->interval_ms);
1709             op->interval_ms = 0;
1710         }
1711     }
1712 
1713     crm_trace("Constructed %s op for %s: interval=%u",
1714               operation, rsc_id, op->interval_ms);
1715 
1716     return op;
1717 }
1718 
1719 /*!
1720  * \internal
1721  * \brief Send a (synthesized) event result
1722  *
1723  * Reply with a synthesized event result directly, as opposed to going through
1724  * the executor.
1725  *
1726  * \param[in]     to_host  Host to send result to
1727  * \param[in]     to_sys   IPC name to send result (NULL for transition engine)
1728  * \param[in]     rsc      Type information about resource the result is for
1729  * \param[in,out] op       Event with result to send
1730  * \param[in]     rsc_id   ID of resource the result is for
1731  */
1732 void
1733 controld_ack_event_directly(const char *to_host, const char *to_sys,
     /* [previous][next][first][last][top][bottom][index][help] */
1734                             const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op,
1735                             const char *rsc_id)
1736 {
1737     xmlNode *reply = NULL;
1738     xmlNode *update, *iter;
1739     crm_node_t *peer = NULL;
1740 
1741     CRM_CHECK(op != NULL, return);
1742     if (op->rsc_id == NULL) {
1743         CRM_ASSERT(rsc_id != NULL);
1744         op->rsc_id = strdup(rsc_id);
1745     }
1746     if (to_sys == NULL) {
1747         to_sys = CRM_SYSTEM_TENGINE;
1748     }
1749 
1750     peer = crm_get_peer(0, controld_globals.our_nodename);
1751     update = create_node_state_update(peer, node_update_none, NULL,
1752                                       __func__);
1753 
1754     iter = create_xml_node(update, XML_CIB_TAG_LRM);
1755     crm_xml_add(iter, XML_ATTR_ID, controld_globals.our_uuid);
1756     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
1757     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
1758 
1759     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
1760 
1761     controld_add_resource_history_xml(iter, rsc, op,
1762                                       controld_globals.our_nodename);
1763     reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
1764 
1765     crm_log_xml_trace(update, "[direct ACK]");
1766 
1767     crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s",
1768               op->rsc_id, op->op_type, op->interval_ms, op->user_data,
1769               crm_element_value(reply, XML_ATTR_REFERENCE));
1770 
1771     if (relay_message(reply, TRUE) == FALSE) {
1772         crm_log_xml_err(reply, "Unable to route reply");
1773     }
1774 
1775     free_xml(update);
1776     free_xml(reply);
1777 }
1778 
1779 gboolean
1780 verify_stopped(enum crmd_fsa_state cur_state, int log_level)
     /* [previous][next][first][last][top][bottom][index][help] */
1781 {
1782     gboolean res = TRUE;
1783     GList *lrm_state_list = lrm_state_get_list();
1784     GList *state_entry;
1785 
1786     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
1787         lrm_state_t *lrm_state = state_entry->data;
1788 
1789         if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
1790             /* keep iterating through all even when false is returned */
1791             res = FALSE;
1792         }
1793     }
1794 
1795     controld_set_fsa_input_flags(R_SENT_RSC_STOP);
1796     g_list_free(lrm_state_list); lrm_state_list = NULL;
1797     return res;
1798 }
1799 
1800 struct stop_recurring_action_s {
1801     lrmd_rsc_info_t *rsc;
1802     lrm_state_t *lrm_state;
1803 };
1804 
1805 static gboolean
1806 stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1807 {
1808     gboolean remove = FALSE;
1809     struct stop_recurring_action_s *event = user_data;
1810     active_op_t *op = value;
1811 
1812     if ((op->interval_ms != 0)
1813         && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) {
1814 
1815         crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
1816         remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
1817     }
1818 
1819     return remove;
1820 }
1821 
1822 static gboolean
1823 stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1824 {
1825     gboolean remove = FALSE;
1826     lrm_state_t *lrm_state = user_data;
1827     active_op_t *op = value;
1828 
1829     if (op->interval_ms != 0) {
1830         crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
1831                  (const char *) key);
1832         remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
1833     }
1834 
1835     return remove;
1836 }
1837 
1838 /*!
1839  * \internal
1840  * \brief Check whether recurring actions should be cancelled before an action
1841  *
1842  * \param[in] rsc_id       Resource that action is for
1843  * \param[in] action       Action being performed
1844  * \param[in] interval_ms  Operation interval of \p action (in milliseconds)
1845  *
1846  * \return true if recurring actions should be cancelled, otherwise false
1847  */
1848 static bool
1849 should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
1850 {
1851     if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0)
1852         && (strcmp(action, CRMD_ACTION_MIGRATE) == 0)) {
1853         /* Don't stop monitoring a migrating Pacemaker Remote connection
1854          * resource until the entire migration has completed. We must detect if
1855          * the connection is unexpectedly severed, even during a migration.
1856          */
1857         return false;
1858     }
1859 
1860     // Cancel recurring actions before changing resource state
1861     return (interval_ms == 0)
1862             && !pcmk__str_any_of(action, CRMD_ACTION_STATUS, CRMD_ACTION_NOTIFY,
1863                                  NULL);
1864 }
1865 
1866 /*!
1867  * \internal
1868  * \brief Check whether an action should not be performed at this time
1869  *
1870  * \param[in] operation  Action to be performed
1871  *
1872  * \return Readable description of why action should not be performed,
1873  *         or NULL if it should be performed
1874  */
1875 static const char *
1876 should_nack_action(const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
1877 {
1878     if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)
1879         && pcmk__str_eq(action, RSC_START, pcmk__str_none)) {
1880 
1881         register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
1882         return "Not attempting start due to shutdown in progress";
1883     }
1884 
1885     switch (controld_globals.fsa_state) {
1886         case S_NOT_DC:
1887         case S_POLICY_ENGINE:   // Recalculating
1888         case S_TRANSITION_ENGINE:
1889             break;
1890         default:
1891             if (!pcmk__str_eq(action, CRMD_ACTION_STOP, pcmk__str_none)) {
1892                 return "Controller cannot attempt actions at this time";
1893             }
1894             break;
1895     }
1896     return NULL;
1897 }
1898 
1899 static void
1900 do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
     /* [previous][next][first][last][top][bottom][index][help] */
1901               struct ra_metadata_s *md)
1902 {
1903     int rc;
1904     int call_id = 0;
1905     char *op_id = NULL;
1906     lrmd_event_data_t *op = NULL;
1907     fsa_data_t *msg_data = NULL;
1908     const char *transition = NULL;
1909     const char *operation = NULL;
1910     const char *nack_reason = NULL;
1911 
1912     CRM_CHECK((rsc != NULL) && (msg != NULL), return);
1913 
1914     operation = crm_element_value(msg, XML_LRM_ATTR_TASK);
1915     CRM_CHECK(!pcmk__str_empty(operation), return);
1916 
1917     transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
1918     if (pcmk__str_empty(transition)) {
1919         crm_log_xml_err(msg, "Missing transition number");
1920     }
1921 
1922     if (lrm_state == NULL) {
1923         // This shouldn't be possible, but provide a failsafe just in case
1924         crm_err("Cannot execute %s of %s: No executor connection "
1925                 CRM_XS " transition_key=%s",
1926                 operation, rsc->id, pcmk__s(transition, ""));
1927         synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
1928                                 PCMK_OCF_UNKNOWN_ERROR,
1929                                 "No executor connection");
1930         return;
1931     }
1932 
1933     if (pcmk__str_any_of(operation, CRMD_ACTION_RELOAD,
1934                          CRMD_ACTION_RELOAD_AGENT, NULL)) {
1935         /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
1936          * will schedule reload-agent actions only. In either case, we need
1937          * to map that to whatever the resource agent actually supports.
1938          * Default to the OCF 1.1 name.
1939          */
1940         if ((md != NULL)
1941             && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
1942             operation = CRMD_ACTION_RELOAD;
1943         } else {
1944             operation = CRMD_ACTION_RELOAD_AGENT;
1945         }
1946     }
1947 
1948     op = construct_op(lrm_state, msg, rsc->id, operation);
1949     CRM_CHECK(op != NULL, return);
1950 
1951     if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) {
1952         guint removed = 0;
1953         struct stop_recurring_action_s data;
1954 
1955         data.rsc = rsc;
1956         data.lrm_state = lrm_state;
1957         removed = g_hash_table_foreach_remove(lrm_state->active_ops,
1958                                               stop_recurring_action_by_rsc,
1959                                               &data);
1960 
1961         if (removed) {
1962             crm_debug("Stopped %u recurring operation%s in preparation for "
1963                       PCMK__OP_FMT, removed, pcmk__plural_s(removed),
1964                       rsc->id, operation, op->interval_ms);
1965         }
1966     }
1967 
1968     /* now do the op */
1969     crm_notice("Requesting local execution of %s operation for %s on %s "
1970                CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT,
1971                crm_action_str(op->op_type, op->interval_ms), rsc->id, lrm_state->node_name,
1972                pcmk__s(transition, ""), rsc->id, operation, op->interval_ms);
1973 
1974     nack_reason = should_nack_action(operation);
1975     if (nack_reason != NULL) {
1976         crm_notice("Discarding attempt to perform action %s on %s in state %s "
1977                    "(shutdown=%s)", operation, rsc->id,
1978                    fsa_state2string(controld_globals.fsa_state),
1979                    pcmk__btoa(pcmk_is_set(controld_globals.fsa_input_register,
1980                                           R_SHUTDOWN)));
1981 
1982         lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID,
1983                          nack_reason);
1984         controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
1985         lrmd_free_event(op);
1986         free(op_id);
1987         return;
1988     }
1989 
1990     controld_record_pending_op(lrm_state->node_name, rsc, op);
1991 
1992     op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);
1993 
1994     if (op->interval_ms > 0) {
1995         /* cancel it so we can then restart it without conflict */
1996         cancel_op_key(lrm_state, rsc, op_id, FALSE);
1997     }
1998 
1999     rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type,
2000                                          op->user_data, op->interval_ms,
2001                                          op->timeout, op->start_delay,
2002                                          op->params, &call_id);
2003     if (rc == pcmk_rc_ok) {
2004         /* record all operations so we can wait
2005          * for them to complete during shutdown
2006          */
2007         char *call_id_s = make_stop_id(rsc->id, call_id);
2008         active_op_t *pending = NULL;
2009 
2010         pending = calloc(1, sizeof(active_op_t));
2011         crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
2012 
2013         pending->call_id = call_id;
2014         pending->interval_ms = op->interval_ms;
2015         pending->op_type = strdup(operation);
2016         pending->op_key = strdup(op_id);
2017         pending->rsc_id = strdup(rsc->id);
2018         pending->start_time = time(NULL);
2019         pcmk__str_update(&pending->user_data, op->user_data);
2020         if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
2021                                     &(pending->lock_time)) != pcmk_ok) {
2022             pending->lock_time = 0;
2023         }
2024         g_hash_table_replace(lrm_state->active_ops, call_id_s, pending);
2025 
2026         if ((op->interval_ms > 0)
2027             && (op->start_delay > START_DELAY_THRESHOLD)) {
2028             int target_rc = PCMK_OCF_OK;
2029 
2030             crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
2031             decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
2032             lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL);
2033             controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
2034         }
2035 
2036         pending->params = op->params;
2037         op->params = NULL;
2038 
2039     } else if (lrm_state_is_local(lrm_state)) {
2040         crm_err("Could not initiate %s action for resource %s locally: %s "
2041                 CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc);
2042         fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
2043                        PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
2044         process_lrm_event(lrm_state, op, NULL, NULL);
2045         register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
2046 
2047     } else {
2048         crm_err("Could not initiate %s action for resource %s remotely on %s: "
2049                 "%s " CRM_XS " rc=%d",
2050                 operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc);
2051         fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
2052                        PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
2053         process_lrm_event(lrm_state, op, NULL, NULL);
2054     }
2055 
2056     free(op_id);
2057     lrmd_free_event(op);
2058 }
2059 
2060 void
2061 do_lrm_event(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
2062              enum crmd_fsa_cause cause,
2063              enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
2064 {
2065     CRM_CHECK(FALSE, return);
2066 }
2067 
2068 static char *
2069 unescape_newlines(const char *string)
     /* [previous][next][first][last][top][bottom][index][help] */
2070 {
2071     char *pch = NULL;
2072     char *ret = NULL;
2073     static const char *escaped_newline = "\\n";
2074 
2075     if (!string) {
2076         return NULL;
2077     }
2078 
2079     ret = strdup(string);
2080     pch = strstr(ret, escaped_newline);
2081     while (pch != NULL) {
2082         /* Replace newline escape pattern with actual newline (and a space so we
2083          * don't have to shuffle the rest of the buffer)
2084          */
2085         pch[0] = '\n';
2086         pch[1] = ' ';
2087         pch = strstr(pch, escaped_newline);
2088     }
2089 
2090     return ret;
2091 }
2092 
2093 static bool
2094 did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
     /* [previous][next][first][last][top][bottom][index][help] */
2095                     const char * op_type, guint interval_ms)
2096 {
2097     rsc_history_t *entry = NULL;
2098 
2099     CRM_CHECK(lrm_state != NULL, return FALSE);
2100     CRM_CHECK(rsc_id != NULL, return FALSE);
2101     CRM_CHECK(op_type != NULL, return FALSE);
2102 
2103     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
2104     if (entry == NULL || entry->failed == NULL) {
2105         return FALSE;
2106     }
2107 
2108     if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none)
2109         && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei)
2110         && entry->failed->interval_ms == interval_ms) {
2111         return TRUE;
2112     }
2113 
2114     return FALSE;
2115 }
2116 
2117 /*!
2118  * \internal
2119  * \brief Log the result of an executor action (actual or synthesized)
2120  *
2121  * \param[in] op         Executor action to log result for
2122  * \param[in] op_key     Operation key for action
2123  * \param[in] node_name  Name of node action was performed on, if known
2124  * \param[in] confirmed  Whether to log that graph action was confirmed
2125  */
2126 static void
2127 log_executor_event(const lrmd_event_data_t *op, const char *op_key,
     /* [previous][next][first][last][top][bottom][index][help] */
2128                    const char *node_name, gboolean confirmed)
2129 {
2130     int log_level = LOG_ERR;
2131     GString *str = g_string_sized_new(100); // reasonable starting size
2132 
2133     pcmk__g_strcat(str,
2134                    "Result of ", crm_action_str(op->op_type, op->interval_ms),
2135                    " operation for ", op->rsc_id, NULL);
2136 
2137     if (node_name != NULL) {
2138         pcmk__g_strcat(str, " on ", node_name, NULL);
2139     }
2140 
2141     switch (op->op_status) {
2142         case PCMK_EXEC_DONE:
2143             log_level = LOG_NOTICE;
2144             pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL);
2145             break;
2146 
2147         case PCMK_EXEC_TIMEOUT:
2148             pcmk__g_strcat(str,
2149                            ": ", pcmk_exec_status_str(op->op_status), " after ",
2150                            pcmk__readable_interval(op->timeout), NULL);
2151             break;
2152 
2153         case PCMK_EXEC_CANCELLED:
2154             log_level = LOG_INFO;
2155             /* order of __attribute__ and Fall through comment is IMPORTANT!
2156              * do not change it without proper testing with both clang and gcc
2157              * in multiple versions.
2158              * the clang check allows to build with all versions of clang.
2159              * the has_c_attribute check is to workaround a bug in clang version
2160              * in rhel7. has_attribute would happily return "YES SIR WE GOT IT"
2161              * and fail the build the next line.
2162              */
2163 #ifdef __clang__
2164 #ifdef __has_c_attribute
2165 #if __has_attribute(fallthrough)
2166             __attribute__((fallthrough));
2167 #endif
2168 #endif
2169 #endif
2170             // Fall through
2171         default:
2172             pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
2173                            NULL);
2174     }
2175 
2176     if ((op->exit_reason != NULL)
2177         && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) {
2178 
2179         pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL);
2180     }
2181 
2182     g_string_append(str, " " CRM_XS);
2183     g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s",
2184                            (confirmed? "" : "un"), op->call_id, op_key);
2185     if (op->op_status == PCMK_EXEC_DONE) {
2186         g_string_append_printf(str, " rc=%d", op->rc);
2187     }
2188 
2189     do_crm_log(log_level, "%s", str->str);
2190     g_string_free(str, TRUE);
2191 
2192     /* The services library has already logged the output at info or debug
2193      * level, so just raise to notice if it looks like a failure.
2194      */
2195     if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
2196         char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
2197                                          op->rsc_id, op->op_type,
2198                                          op->interval_ms, node_name);
2199 
2200         crm_log_output(LOG_NOTICE, prefix, op->output);
2201         free(prefix);
2202     }
2203 }
2204 
2205 void
2206 process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
2207                   active_op_t *pending, const xmlNode *action_xml)
2208 {
2209     char *op_id = NULL;
2210     char *op_key = NULL;
2211 
2212     gboolean remove = FALSE;
2213     gboolean removed = FALSE;
2214     bool need_direct_ack = FALSE;
2215     lrmd_rsc_info_t *rsc = NULL;
2216     const char *node_name = NULL;
2217 
2218     CRM_CHECK(op != NULL, return);
2219     CRM_CHECK(op->rsc_id != NULL, return);
2220 
2221     // Remap new status codes for older DCs
2222     if (compare_version(controld_globals.dc_version, "3.2.0") < 0) {
2223         switch (op->op_status) {
2224             case PCMK_EXEC_NOT_CONNECTED:
2225                 lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED,
2226                                  PCMK_EXEC_ERROR, op->exit_reason);
2227                 break;
2228             case PCMK_EXEC_INVALID:
2229                 lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR,
2230                                  op->exit_reason);
2231                 break;
2232             default:
2233                 break;
2234         }
2235     }
2236 
2237     op_id = make_stop_id(op->rsc_id, op->call_id);
2238     op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
2239 
2240     // Get resource info if available (from executor state or action XML)
2241     if (lrm_state) {
2242         rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
2243     }
2244     if ((rsc == NULL) && action_xml) {
2245         xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE);
2246 
2247         const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS);
2248         const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER);
2249         const char *type = crm_element_value(xml, XML_ATTR_TYPE);
2250 
2251         if (standard && type) {
2252             crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
2253                      op->rsc_id, standard,
2254                      (provider? ":" : ""), (provider? provider : ""), type);
2255             rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
2256         } else {
2257             crm_err("Can't process %s result because %s agent information not cached or in XML",
2258                     op_key, op->rsc_id);
2259         }
2260     }
2261 
2262     // Get node name if available (from executor state or action XML)
2263     if (lrm_state) {
2264         node_name = lrm_state->node_name;
2265     } else if (action_xml) {
2266         node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET);
2267     }
2268 
2269     if(pending == NULL) {
2270         remove = TRUE;
2271         if (lrm_state) {
2272             pending = g_hash_table_lookup(lrm_state->active_ops, op_id);
2273         }
2274     }
2275 
2276     if (op->op_status == PCMK_EXEC_ERROR) {
2277         switch(op->rc) {
2278             case PCMK_OCF_NOT_RUNNING:
2279             case PCMK_OCF_RUNNING_PROMOTED:
2280             case PCMK_OCF_DEGRADED:
2281             case PCMK_OCF_DEGRADED_PROMOTED:
2282                 // Leave it to the TE/scheduler to decide if this is an error
2283                 op->op_status = PCMK_EXEC_DONE;
2284                 break;
2285             default:
2286                 /* Nothing to do */
2287                 break;
2288         }
2289     }
2290 
2291     if (op->op_status != PCMK_EXEC_CANCELLED) {
2292         /* We might not record the result, so directly acknowledge it to the
2293          * originator instead, so it doesn't time out waiting for the result
2294          * (especially important if part of a transition).
2295          */
2296         need_direct_ack = TRUE;
2297 
2298         if (controld_action_is_recordable(op->op_type)) {
2299             if (node_name && rsc) {
2300                 // We should record the result, and happily, we can
2301                 time_t lock_time = (pending == NULL)? 0 : pending->lock_time;
2302 
2303                 controld_update_resource_history(node_name, rsc, op, lock_time);
2304                 need_direct_ack = FALSE;
2305 
2306             } else if (op->rsc_deleted) {
2307                 /* We shouldn't record the result (likely the resource was
2308                  * refreshed, cleaned, or removed while this operation was
2309                  * in flight).
2310                  */
2311                 crm_notice("Not recording %s result in CIB because "
2312                            "resource information was removed since it was initiated",
2313                            op_key);
2314             } else {
2315                 /* This shouldn't be possible; the executor didn't consider the
2316                  * resource deleted, but we couldn't find resource or node
2317                  * information.
2318                  */
2319                 crm_err("Unable to record %s result in CIB: %s", op_key,
2320                         (node_name? "No resource information" : "No node name"));
2321             }
2322         }
2323 
2324     } else if (op->interval_ms == 0) {
2325         /* A non-recurring operation was cancelled. Most likely, the
2326          * never-initiated action was removed from the executor's pending
2327          * operations list upon resource removal.
2328          */
2329         need_direct_ack = TRUE;
2330 
2331     } else if (pending == NULL) {
2332         /* This recurring operation was cancelled, but was not pending. No
2333          * transition actions are waiting on it, nothing needs to be done.
2334          */
2335 
2336     } else if (op->user_data == NULL) {
2337         /* This recurring operation was cancelled and pending, but we don't
2338          * have a transition key. This should never happen.
2339          */
2340         crm_err("Recurring operation %s was cancelled without transition information",
2341                 op_key);
2342 
2343     } else if (pcmk_is_set(pending->flags, active_op_remove)) {
2344         /* This recurring operation was cancelled (by us) and pending, and we
2345          * have been waiting for it to finish.
2346          */
2347         if (lrm_state) {
2348             controld_delete_action_history(op);
2349         }
2350 
2351         /* Directly acknowledge failed recurring actions here. The above call to
2352          * controld_delete_action_history() will not erase any corresponding
2353          * last_failure entry, which means that the DC won't confirm the
2354          * cancellation via process_op_deletion(), and the transition would
2355          * otherwise wait for the action timer to pop.
2356          */
2357         if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
2358                                 pending->op_type, pending->interval_ms)) {
2359             need_direct_ack = TRUE;
2360         }
2361 
2362     } else if (op->rsc_deleted) {
2363         /* This recurring operation was cancelled (but not by us, and the
2364          * executor does not have resource information, likely due to resource
2365          * cleanup, refresh, or removal) and pending.
2366          */
2367         crm_debug("Recurring op %s was cancelled due to resource deletion",
2368                   op_key);
2369         need_direct_ack = TRUE;
2370 
2371     } else {
2372         /* This recurring operation was cancelled (but not by us, likely by the
2373          * executor before stopping the resource) and pending. We don't need to
2374          * do anything special.
2375          */
2376     }
2377 
2378     if (need_direct_ack) {
2379         controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id);
2380     }
2381 
2382     if(remove == FALSE) {
2383         /* The caller will do this afterwards, but keep the logging consistent */
2384         removed = TRUE;
2385 
2386     } else if (lrm_state && ((op->interval_ms == 0)
2387                              || (op->op_status == PCMK_EXEC_CANCELLED))) {
2388 
2389         gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id);
2390 
2391         if (op->interval_ms != 0) {
2392             removed = TRUE;
2393         } else if (found) {
2394             removed = TRUE;
2395             crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
2396                       op_key, op->call_id, op_id,
2397                       g_hash_table_size(lrm_state->active_ops));
2398         }
2399     }
2400 
2401     log_executor_event(op, op_key, node_name, removed);
2402 
2403     if (lrm_state) {
2404         if (!pcmk__str_eq(op->op_type, RSC_METADATA, pcmk__str_casei)) {
2405             crmd_alert_resource_op(lrm_state->node_name, op);
2406         } else if (rsc && (op->rc == PCMK_OCF_OK)) {
2407             char *metadata = unescape_newlines(op->output);
2408 
2409             controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
2410             free(metadata);
2411         }
2412     }
2413 
2414     if (op->rsc_deleted) {
2415         crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
2416         if (lrm_state) {
2417             delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL,
2418                              true);
2419         }
2420     }
2421 
2422     /* If a shutdown was escalated while operations were pending,
2423      * then the FSA will be stalled right now... allow it to continue
2424      */
2425     controld_trigger_fsa();
2426     if (lrm_state && rsc) {
2427         update_history_cache(lrm_state, rsc, op);
2428     }
2429 
2430     lrmd_free_rsc_info(rsc);
2431     free(op_key);
2432     free(op_id);
2433 }

/* [previous][next][first][last][top][bottom][index][help] */