root/daemons/controld/controld_execd.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. lrm_connection_destroy
  2. make_stop_id
  3. copy_instance_keys
  4. copy_meta_keys
  5. history_remove_recurring_op
  6. history_free_recurring_ops
  7. history_free
  8. update_history_cache
  9. send_task_ok_ack
  10. op_node_name
  11. lrm_op_callback
  12. try_local_executor_connect
  13. do_lrm_control
  14. lrm_state_verify_stopped
  15. is_rsc_active
  16. build_active_RAs
  17. controld_query_executor_state
  18. controld_rc2event
  19. controld_trigger_delete_refresh
  20. notify_deleted
  21. lrm_remove_deleted_rsc
  22. lrm_remove_deleted_op
  23. delete_rsc_entry
  24. last_failed_matches_op
  25. lrm_clear_last_failure
  26. cancel_op
  27. cancel_action_by_key
  28. cancel_op_key
  29. get_lrm_resource
  30. delete_resource
  31. get_fake_call_id
  32. fake_op_status
  33. force_reprobe
  34. synthesize_lrmd_failure
  35. lrm_op_target
  36. fail_lrm_resource
  37. handle_reprobe_op
  38. do_lrm_cancel
  39. do_lrm_delete
  40. new_metadata_cb_data
  41. free_metadata_cb_data
  42. metadata_complete
  43. do_lrm_invoke
  44. construct_op
  45. controld_ack_event_directly
  46. verify_stopped
  47. stop_recurring_action_by_rsc
  48. stop_recurring_actions
  49. should_cancel_recurring
  50. should_nack_action
  51. do_lrm_rsc_op
  52. do_lrm_event
  53. unescape_newlines
  54. did_lrm_rsc_op_fail
  55. log_executor_event
  56. process_lrm_event

   1 /*
   2  * Copyright 2004-2023 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <regex.h>
  13 #include <sys/param.h>
  14 #include <sys/types.h>
  15 #include <sys/wait.h>
  16 
  17 #include <crm/crm.h>
  18 #include <crm/lrmd.h>           // lrmd_event_data_t, lrmd_rsc_info_t, etc.
  19 #include <crm/services.h>
  20 #include <crm/msg_xml.h>
  21 #include <crm/common/xml.h>
  22 #include <crm/pengine/rules.h>
  23 #include <crm/lrmd_internal.h>
  24 
  25 #include <pacemaker-internal.h>
  26 #include <pacemaker-controld.h>
  27 
  28 #define START_DELAY_THRESHOLD 5 * 60 * 1000
  29 #define MAX_LRM_REG_FAILS 30
  30 
  31 struct delete_event_s {
  32     int rc;
  33     const char *rsc;
  34     lrm_state_t *lrm_state;
  35 };
  36 
  37 static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
  38 static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
  39 static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
  40 
  41 static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state,
  42                                        const xmlNode *rsc_op,
  43                                        const char *rsc_id,
  44                                        const char *operation);
  45 static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
  46                           xmlNode *msg, struct ra_metadata_s *md);
  47 
  48 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
  49                                          int log_level);
  50 
  51 static void
  52 lrm_connection_destroy(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  53 {
  54     if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) {
  55         crm_crit("Lost connection to local executor");
  56         register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
  57         controld_clear_fsa_input_flags(R_LRM_CONNECTED);
  58     }
  59 }
  60 
  61 static char *
  62 make_stop_id(const char *rsc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
  63 {
  64     return crm_strdup_printf("%s:%d", rsc, call_id);
  65 }
  66 
  67 static void
  68 copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  69 {
  70     if (strstr(key, CRM_META "_") == NULL) {
  71         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
  72     }
  73 }
  74 
  75 static void
  76 copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  77 {
  78     if (strstr(key, CRM_META "_") != NULL) {
  79         g_hash_table_replace(user_data, strdup((const char *)key), strdup((const char *)value));
  80     }
  81 }
  82 
  83 /*!
  84  * \internal
  85  * \brief Remove a recurring operation from a resource's history
  86  *
  87  * \param[in,out] history  Resource history to modify
  88  * \param[in]     op       Operation to remove
  89  *
  90  * \return TRUE if the operation was found and removed, FALSE otherwise
  91  */
  92 static gboolean
  93 history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
  94 {
  95     GList *iter;
  96 
  97     for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
  98         lrmd_event_data_t *existing = iter->data;
  99 
 100         if ((op->interval_ms == existing->interval_ms)
 101             && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none)
 102             && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) {
 103 
 104             history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
 105             lrmd_free_event(existing);
 106             return TRUE;
 107         }
 108     }
 109     return FALSE;
 110 }
 111 
 112 /*!
 113  * \internal
 114  * \brief Free all recurring operations in resource history
 115  *
 116  * \param[in,out] history  Resource history to modify
 117  */
 118 static void
 119 history_free_recurring_ops(rsc_history_t *history)
     /* [previous][next][first][last][top][bottom][index][help] */
 120 {
 121     GList *iter;
 122 
 123     for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
 124         lrmd_free_event(iter->data);
 125     }
 126     g_list_free(history->recurring_op_list);
 127     history->recurring_op_list = NULL;
 128 }
 129 
 130 /*!
 131  * \internal
 132  * \brief Free resource history
 133  *
 134  * \param[in,out] history  Resource history to free
 135  */
 136 void
 137 history_free(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 138 {
 139     rsc_history_t *history = (rsc_history_t*)data;
 140 
 141     if (history->stop_params) {
 142         g_hash_table_destroy(history->stop_params);
 143     }
 144 
 145     /* Don't need to free history->rsc.id because it's set to history->id */
 146     free(history->rsc.type);
 147     free(history->rsc.standard);
 148     free(history->rsc.provider);
 149 
 150     lrmd_free_event(history->failed);
 151     lrmd_free_event(history->last);
 152     free(history->id);
 153     history_free_recurring_ops(history);
 154     free(history);
 155 }
 156 
 157 static void
 158 update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 159 {
 160     int target_rc = 0;
 161     rsc_history_t *entry = NULL;
 162 
 163     if (op->rsc_deleted) {
 164         crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
 165         controld_delete_resource_history(op->rsc_id, lrm_state->node_name,
 166                                          NULL, crmd_cib_smart_opt());
 167         return;
 168     }
 169 
 170     if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
 171         return;
 172     }
 173 
 174     crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
 175 
 176     entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
 177     if (entry == NULL && rsc) {
 178         entry = calloc(1, sizeof(rsc_history_t));
 179         entry->id = strdup(op->rsc_id);
 180         g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
 181 
 182         entry->rsc.id = entry->id;
 183         entry->rsc.type = strdup(rsc->type);
 184         entry->rsc.standard = strdup(rsc->standard);
 185         pcmk__str_update(&entry->rsc.provider, rsc->provider);
 186 
 187     } else if (entry == NULL) {
 188         crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
 189         return;
 190     }
 191 
 192     entry->last_callid = op->call_id;
 193     target_rc = rsc_op_expected_rc(op);
 194     if (op->op_status == PCMK_EXEC_CANCELLED) {
 195         if (op->interval_ms > 0) {
 196             crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT,
 197                       op->rsc_id, op->op_type, op->interval_ms);
 198             history_remove_recurring_op(entry, op);
 199             return;
 200         } else {
 201             crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d",
 202                       op->rsc_id, op->op_type, op->interval_ms, op->rc,
 203                       op->op_status);
 204         }
 205 
 206     } else if (did_rsc_op_fail(op, target_rc)) {
 207         /* Store failed monitors here, otherwise the block below will cause them
 208          * to be forgotten when a stop happens.
 209          */
 210         if (entry->failed) {
 211             lrmd_free_event(entry->failed);
 212         }
 213         entry->failed = lrmd_copy_event(op);
 214 
 215     } else if (op->interval_ms == 0) {
 216         if (entry->last) {
 217             lrmd_free_event(entry->last);
 218         }
 219         entry->last = lrmd_copy_event(op);
 220 
 221         if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START,
 222                                                PCMK_ACTION_RELOAD,
 223                                                PCMK_ACTION_RELOAD_AGENT,
 224                                                PCMK_ACTION_MONITOR, NULL)) {
 225             if (entry->stop_params) {
 226                 g_hash_table_destroy(entry->stop_params);
 227             }
 228             entry->stop_params = pcmk__strkey_table(free, free);
 229 
 230             g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
 231         }
 232     }
 233 
 234     if (op->interval_ms > 0) {
 235         /* Ensure there are no duplicates */
 236         history_remove_recurring_op(entry, op);
 237 
 238         crm_trace("Adding recurring op: " PCMK__OP_FMT,
 239                   op->rsc_id, op->op_type, op->interval_ms);
 240         entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
 241 
 242     } else if ((entry->recurring_op_list != NULL)
 243                 && !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR,
 244                                  pcmk__str_casei)) {
 245         crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT,
 246                   g_list_length(entry->recurring_op_list), op->rsc_id,
 247                   op->op_type, op->interval_ms);
 248         history_free_recurring_ops(entry);
 249     }
 250 }
 251 
 252 /*!
 253  * \internal
 254  * \brief Send a direct OK ack for a resource task
 255  *
 256  * \param[in] lrm_state  LRM connection
 257  * \param[in] input      Input message being ack'ed
 258  * \param[in] rsc_id     ID of affected resource
 259  * \param[in] rsc        Affected resource (if available)
 260  * \param[in] task       Operation task being ack'ed
 261  * \param[in] ack_host   Name of host to send ack to
 262  * \param[in] ack_sys    IPC system name to ack
 263  */
 264 static void
 265 send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input,
     /* [previous][next][first][last][top][bottom][index][help] */
 266                  const char *rsc_id, const lrmd_rsc_info_t *rsc,
 267                  const char *task, const char *ack_host, const char *ack_sys)
 268 {
 269     lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
 270 
 271     lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 272     controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id);
 273     lrmd_free_event(op);
 274 }
 275 
 276 static inline const char *
 277 op_node_name(lrmd_event_data_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 278 {
 279     return pcmk__s(op->remote_nodename, controld_globals.our_nodename);
 280 }
 281 
 282 void
 283 lrm_op_callback(lrmd_event_data_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 284 {
 285     CRM_CHECK(op != NULL, return);
 286     switch (op->type) {
 287         case lrmd_event_disconnect:
 288             if (op->remote_nodename == NULL) {
 289                 /* If this is the local executor IPC connection, set the right
 290                  * bits in the controller when the connection goes down.
 291                  */
 292                 lrm_connection_destroy();
 293             }
 294             break;
 295 
 296         case lrmd_event_exec_complete:
 297             {
 298                 lrm_state_t *lrm_state = lrm_state_find(op_node_name(op));
 299 
 300                 CRM_ASSERT(lrm_state != NULL);
 301                 process_lrm_event(lrm_state, op, NULL, NULL);
 302             }
 303             break;
 304 
 305         default:
 306             break;
 307     }
 308 }
 309 
 310 static void
 311 try_local_executor_connect(long long action, fsa_data_t *msg_data,
     /* [previous][next][first][last][top][bottom][index][help] */
 312                            lrm_state_t *lrm_state)
 313 {
 314     int rc = pcmk_rc_ok;
 315 
 316     crm_debug("Connecting to the local executor");
 317 
 318     // If we can connect, great
 319     rc = controld_connect_local_executor(lrm_state);
 320     if (rc == pcmk_rc_ok) {
 321         controld_set_fsa_input_flags(R_LRM_CONNECTED);
 322         crm_info("Connection to the local executor established");
 323         return;
 324     }
 325 
 326     // Otherwise, if we can try again, set a timer to do so
 327     if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
 328         crm_warn("Failed to connect to the local executor %d time%s "
 329                  "(%d max): %s", lrm_state->num_lrm_register_fails,
 330                  pcmk__plural_s(lrm_state->num_lrm_register_fails),
 331                  MAX_LRM_REG_FAILS, pcmk_rc_str(rc));
 332         controld_start_wait_timer();
 333         crmd_fsa_stall(FALSE);
 334         return;
 335     }
 336 
 337     // Otherwise give up
 338     crm_err("Failed to connect to the executor the max allowed "
 339             "%d time%s: %s", lrm_state->num_lrm_register_fails,
 340             pcmk__plural_s(lrm_state->num_lrm_register_fails),
 341             pcmk_rc_str(rc));
 342     register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 343 }
 344 
 345 /*       A_LRM_CONNECT  */
 346 void
 347 do_lrm_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 348                enum crmd_fsa_cause cause,
 349                enum crmd_fsa_state cur_state,
 350                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 351 {
 352     /* This only pertains to local executor connections. Remote connections are
 353      * handled as resources within the scheduler. Connecting and disconnecting
 354      * from remote executor instances is handled differently.
 355      */
 356 
 357     lrm_state_t *lrm_state = NULL;
 358 
 359     if (controld_globals.our_nodename == NULL) {
 360         return; /* Nothing to do */
 361     }
 362     lrm_state = lrm_state_find_or_create(controld_globals.our_nodename);
 363     if (lrm_state == NULL) {
 364         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 365         return;
 366     }
 367 
 368     if (action & A_LRM_DISCONNECT) {
 369         if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
 370             if (action == A_LRM_DISCONNECT) {
 371                 crmd_fsa_stall(FALSE);
 372                 return;
 373             }
 374         }
 375 
 376         controld_clear_fsa_input_flags(R_LRM_CONNECTED);
 377         lrm_state_disconnect(lrm_state);
 378         lrm_state_reset_tables(lrm_state, FALSE);
 379     }
 380 
 381     if (action & A_LRM_CONNECT) {
 382         try_local_executor_connect(action, msg_data, lrm_state);
 383     }
 384 
 385     if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
 386         crm_err("Unexpected action %s in %s", fsa_action2string(action),
 387                 __func__);
 388     }
 389 }
 390 
 391 static gboolean
 392 lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
     /* [previous][next][first][last][top][bottom][index][help] */
 393 {
 394     int counter = 0;
 395     gboolean rc = TRUE;
 396     const char *when = "lrm disconnect";
 397 
 398     GHashTableIter gIter;
 399     const char *key = NULL;
 400     rsc_history_t *entry = NULL;
 401     active_op_t *pending = NULL;
 402 
 403     crm_debug("Checking for active resources before exit");
 404 
 405     if (cur_state == S_TERMINATE) {
 406         log_level = LOG_ERR;
 407         when = "shutdown";
 408 
 409     } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
 410         when = "shutdown... waiting";
 411     }
 412 
 413     if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) {
 414         guint removed = g_hash_table_foreach_remove(lrm_state->active_ops,
 415                                                     stop_recurring_actions,
 416                                                     lrm_state);
 417         guint nremaining = g_hash_table_size(lrm_state->active_ops);
 418 
 419         if (removed || nremaining) {
 420             crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
 421                        removed, pcmk__plural_s(removed), when, nremaining);
 422         }
 423     }
 424 
 425     if (lrm_state->active_ops != NULL) {
 426         g_hash_table_iter_init(&gIter, lrm_state->active_ops);
 427         while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
 428             /* Ignore recurring actions in the shutdown calculations */
 429             if (pending->interval_ms == 0) {
 430                 counter++;
 431             }
 432         }
 433     }
 434 
 435     if (counter > 0) {
 436         do_crm_log(log_level, "%d pending executor operation%s at %s",
 437                    counter, pcmk__plural_s(counter), when);
 438 
 439         if ((cur_state == S_TERMINATE)
 440             || !pcmk_is_set(controld_globals.fsa_input_register,
 441                             R_SENT_RSC_STOP)) {
 442             g_hash_table_iter_init(&gIter, lrm_state->active_ops);
 443             while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
 444                 do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
 445             }
 446 
 447         } else {
 448             rc = FALSE;
 449         }
 450         return rc;
 451     }
 452 
 453     if (lrm_state->resource_history == NULL) {
 454         return rc;
 455     }
 456 
 457     if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
 458         /* At this point we're not waiting, we're just shutting down */
 459         when = "shutdown";
 460     }
 461 
 462     counter = 0;
 463     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
 464     while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
 465         if (is_rsc_active(lrm_state, entry->id) == FALSE) {
 466             continue;
 467         }
 468 
 469         counter++;
 470         if (log_level == LOG_ERR) {
 471             crm_info("Found %s active at %s", entry->id, when);
 472         } else {
 473             crm_trace("Found %s active at %s", entry->id, when);
 474         }
 475         if (lrm_state->active_ops != NULL) {
 476             GHashTableIter hIter;
 477 
 478             g_hash_table_iter_init(&hIter, lrm_state->active_ops);
 479             while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
 480                 if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) {
 481                     crm_notice("%sction %s (%s) incomplete at %s",
 482                                pending->interval_ms == 0 ? "A" : "Recurring a",
 483                                key, pending->op_key, when);
 484                 }
 485             }
 486         }
 487     }
 488 
 489     if (counter) {
 490         crm_err("%d resource%s active at %s",
 491                 counter, (counter == 1)? " was" : "s were", when);
 492     }
 493 
 494     return rc;
 495 }
 496 
 497 static gboolean
 498 is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 499 {
 500     rsc_history_t *entry = NULL;
 501 
 502     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
 503     if (entry == NULL || entry->last == NULL) {
 504         return FALSE;
 505     }
 506 
 507     crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
 508               entry->last->interval_ms, entry->last->rc);
 509     if ((entry->last->rc == PCMK_OCF_OK)
 510         && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP,
 511                         pcmk__str_casei)) {
 512         return FALSE;
 513 
 514     } else if (entry->last->rc == PCMK_OCF_OK
 515                && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO,
 516                                pcmk__str_casei)) {
 517         // A stricter check is too complex ... leave that to the scheduler
 518         return FALSE;
 519 
 520     } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
 521         return FALSE;
 522 
 523     } else if ((entry->last->interval_ms == 0)
 524                && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
 525         /* Badly configured resources can't be reliably stopped */
 526         return FALSE;
 527     }
 528 
 529     return TRUE;
 530 }
 531 
 532 static gboolean
 533 build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
     /* [previous][next][first][last][top][bottom][index][help] */
 534 {
 535     GHashTableIter iter;
 536     rsc_history_t *entry = NULL;
 537 
 538     g_hash_table_iter_init(&iter, lrm_state->resource_history);
 539     while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
 540 
 541         GList *gIter = NULL;
 542         xmlNode *xml_rsc = create_xml_node(rsc_list, XML_LRM_TAG_RESOURCE);
 543 
 544         crm_xml_add(xml_rsc, XML_ATTR_ID, entry->id);
 545         crm_xml_add(xml_rsc, XML_ATTR_TYPE, entry->rsc.type);
 546         crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, entry->rsc.standard);
 547         crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, entry->rsc.provider);
 548 
 549         if (entry->last && entry->last->params) {
 550             const char *container = g_hash_table_lookup(entry->last->params, CRM_META"_"XML_RSC_ATTR_CONTAINER);
 551             if (container) {
 552                 crm_trace("Resource %s is a part of container resource %s", entry->id, container);
 553                 crm_xml_add(xml_rsc, XML_RSC_ATTR_CONTAINER, container);
 554             }
 555         }
 556         controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed,
 557                                           lrm_state->node_name);
 558         controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last,
 559                                           lrm_state->node_name);
 560         for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
 561             controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data,
 562                                               lrm_state->node_name);
 563         }
 564     }
 565 
 566     return FALSE;
 567 }
 568 
 569 xmlNode *
 570 controld_query_executor_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 571 {
 572     xmlNode *xml_state = NULL;
 573     xmlNode *xml_data = NULL;
 574     xmlNode *rsc_list = NULL;
 575     crm_node_t *peer = NULL;
 576     lrm_state_t *lrm_state = lrm_state_find(controld_globals.our_nodename);
 577 
 578     if (!lrm_state) {
 579         crm_err("Could not find executor state for node %s",
 580                 controld_globals.our_nodename);
 581         return NULL;
 582     }
 583 
 584     peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY);
 585     CRM_CHECK(peer != NULL, return NULL);
 586 
 587     xml_state = create_node_state_update(peer,
 588                                          node_update_cluster|node_update_peer,
 589                                          NULL, __func__);
 590     if (xml_state == NULL) {
 591         return NULL;
 592     }
 593 
 594     xml_data = create_xml_node(xml_state, XML_CIB_TAG_LRM);
 595     crm_xml_add(xml_data, XML_ATTR_ID, peer->uuid);
 596     rsc_list = create_xml_node(xml_data, XML_LRM_TAG_RESOURCES);
 597 
 598     /* Build a list of active (not always running) resources */
 599     build_active_RAs(lrm_state, rsc_list);
 600 
 601     crm_log_xml_trace(xml_state, "Current executor state");
 602 
 603     return xml_state;
 604 }
 605 
 606 /*!
 607  * \internal
 608  * \brief Map standard Pacemaker return code to operation status and OCF code
 609  *
 610  * \param[out] event  Executor event whose status and return code should be set
 611  * \param[in]  rc     Standard Pacemaker return code
 612  */
 613 void
 614 controld_rc2event(lrmd_event_data_t *event, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 615 {
 616     /* This is called for cleanup requests from controller peers/clients, not
 617      * for resource actions, so no exit reason is needed.
 618      */
 619     switch (rc) {
 620         case pcmk_rc_ok:
 621             lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 622             break;
 623         case EACCES:
 624             lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV,
 625                              PCMK_EXEC_ERROR, NULL);
 626             break;
 627         default:
 628             lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
 629                              NULL);
 630             break;
 631     }
 632 }
 633 
 634 /*!
 635  * \internal
 636  * \brief Trigger a new transition after CIB status was deleted
 637  *
 638  * If a CIB status delete was not expected (as part of the transition graph),
 639  * trigger a new transition by updating the (arbitrary) "last-lrm-refresh"
 640  * cluster property.
 641  *
 642  * \param[in] from_sys  IPC name that requested the delete
 643  * \param[in] rsc_id    Resource whose status was deleted (for logging only)
 644  */
 645 void
 646 controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 647 {
 648     if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
 649         char *now_s = crm_strdup_printf("%lld", (long long) time(NULL));
 650 
 651         crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id);
 652         cib__update_node_attr(controld_globals.logger_out,
 653                               controld_globals.cib_conn, cib_none,
 654                               XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
 655                               "last-lrm-refresh", now_s, NULL, NULL);
 656         free(now_s);
 657     }
 658 }
 659 
 660 static void
 661 notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 662 {
 663     lrmd_event_data_t *op = NULL;
 664     const char *from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
 665     const char *from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
 666 
 667     crm_info("Notifying %s on %s that %s was%s deleted",
 668              from_sys, (from_host? from_host : "localhost"), rsc_id,
 669              ((rc == pcmk_ok)? "" : " not"));
 670     op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE);
 671     controld_rc2event(op, pcmk_legacy2rc(rc));
 672     controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
 673     lrmd_free_event(op);
 674     controld_trigger_delete_refresh(from_sys, rsc_id);
 675 }
 676 
 677 static gboolean
 678 lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 679 {
 680     struct delete_event_s *event = user_data;
 681     struct pending_deletion_op_s *op = value;
 682 
 683     if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) {
 684         notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
 685         return TRUE;
 686     }
 687     return FALSE;
 688 }
 689 
 690 static gboolean
 691 lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 692 {
 693     const char *rsc = user_data;
 694     active_op_t *pending = value;
 695 
 696     if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) {
 697         crm_info("Removing op %s:%d for deleted resource %s",
 698                  pending->op_key, pending->call_id, rsc);
 699         return TRUE;
 700     }
 701     return FALSE;
 702 }
 703 
 704 static void
 705 delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input,
     /* [previous][next][first][last][top][bottom][index][help] */
 706                  const char *rsc_id, GHashTableIter *rsc_iter, int rc,
 707                  const char *user_name, bool from_cib)
 708 {
 709     struct delete_event_s event;
 710 
 711     CRM_CHECK(rsc_id != NULL, return);
 712 
 713     if (rc == pcmk_ok) {
 714         char *rsc_id_copy = strdup(rsc_id);
 715 
 716         if (rsc_iter) {
 717             g_hash_table_iter_remove(rsc_iter);
 718         } else {
 719             g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
 720         }
 721 
 722         if (from_cib) {
 723             controld_delete_resource_history(rsc_id_copy, lrm_state->node_name,
 724                                              user_name, crmd_cib_smart_opt());
 725         }
 726         g_hash_table_foreach_remove(lrm_state->active_ops,
 727                                     lrm_remove_deleted_op, rsc_id_copy);
 728         free(rsc_id_copy);
 729     }
 730 
 731     if (input) {
 732         notify_deleted(lrm_state, input, rsc_id, rc);
 733     }
 734 
 735     event.rc = rc;
 736     event.rsc = rsc_id;
 737     event.lrm_state = lrm_state;
 738     g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
 739 }
 740 
 741 static inline gboolean
 742 last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 743 {
 744     if (entry == NULL) {
 745         return FALSE;
 746     }
 747     if (op == NULL) {
 748         return TRUE;
 749     }
 750     return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei)
 751             && (interval_ms == entry->failed->interval_ms));
 752 }
 753 
 754 /*!
 755  * \internal
 756  * \brief Clear a resource's last failure
 757  *
 758  * Erase a resource's last failure on a particular node from both the
 759  * LRM resource history in the CIB, and the resource history remembered
 760  * for the LRM state.
 761  *
 762  * \param[in] rsc_id      Resource name
 763  * \param[in] node_name   Node name
 764  * \param[in] operation   If specified, only clear if matching this operation
 765  * \param[in] interval_ms If operation is specified, it has this interval
 766  */
 767 void
 768 lrm_clear_last_failure(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
 769                        const char *operation, guint interval_ms)
 770 {
 771     lrm_state_t *lrm_state = lrm_state_find(node_name);
 772 
 773     if (lrm_state == NULL) {
 774         return;
 775     }
 776     if (lrm_state->resource_history != NULL) {
 777         rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
 778                                                    rsc_id);
 779 
 780         if (last_failed_matches_op(entry, operation, interval_ms)) {
 781             lrmd_free_event(entry->failed);
 782             entry->failed = NULL;
 783         }
 784     }
 785 }
 786 
 787 /* Returns: gboolean - cancellation is in progress */
 788 static gboolean
 789 cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
     /* [previous][next][first][last][top][bottom][index][help] */
 790 {
 791     int rc = pcmk_ok;
 792     char *local_key = NULL;
 793     active_op_t *pending = NULL;
 794 
 795     CRM_CHECK(op != 0, return FALSE);
 796     CRM_CHECK(rsc_id != NULL, return FALSE);
 797     if (key == NULL) {
 798         local_key = make_stop_id(rsc_id, op);
 799         key = local_key;
 800     }
 801     pending = g_hash_table_lookup(lrm_state->active_ops, key);
 802 
 803     if (pending) {
 804         if (remove && !pcmk_is_set(pending->flags, active_op_remove)) {
 805             controld_set_active_op_flags(pending, active_op_remove);
 806             crm_debug("Scheduling %s for removal", key);
 807         }
 808 
 809         if (pcmk_is_set(pending->flags, active_op_cancelled)) {
 810             crm_debug("Operation %s already cancelled", key);
 811             free(local_key);
 812             return FALSE;
 813         }
 814         controld_set_active_op_flags(pending, active_op_cancelled);
 815 
 816     } else {
 817         crm_info("No pending op found for %s", key);
 818         free(local_key);
 819         return FALSE;
 820     }
 821 
 822     crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
 823     rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
 824                           pending->interval_ms);
 825     if (rc == pcmk_ok) {
 826         crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
 827         free(local_key);
 828         return TRUE;
 829     }
 830 
 831     crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
 832     /* The caller needs to make sure the entry is
 833      * removed from the active operations list
 834      *
 835      * Usually by returning TRUE inside the worker function
 836      * supplied to g_hash_table_foreach_remove()
 837      *
 838      * Not removing the entry from active operations will block
 839      * the node from shutting down
 840      */
 841     free(local_key);
 842     return FALSE;
 843 }
 844 
 845 struct cancel_data {
 846     gboolean done;
 847     gboolean remove;
 848     const char *key;
 849     lrmd_rsc_info_t *rsc;
 850     lrm_state_t *lrm_state;
 851 };
 852 
 853 static gboolean
 854 cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 855 {
 856     gboolean remove = FALSE;
 857     struct cancel_data *data = user_data;
 858     active_op_t *op = value;
 859 
 860     if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) {
 861         data->done = TRUE;
 862         remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
 863     }
 864     return remove;
 865 }
 866 
 867 static gboolean
 868 cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
     /* [previous][next][first][last][top][bottom][index][help] */
 869 {
 870     guint removed = 0;
 871     struct cancel_data data;
 872 
 873     CRM_CHECK(rsc != NULL, return FALSE);
 874     CRM_CHECK(key != NULL, return FALSE);
 875 
 876     data.key = key;
 877     data.rsc = rsc;
 878     data.done = FALSE;
 879     data.remove = remove;
 880     data.lrm_state = lrm_state;
 881 
 882     removed = g_hash_table_foreach_remove(lrm_state->active_ops,
 883                                           cancel_action_by_key, &data);
 884     crm_trace("Removed %u op cache entries, new size: %u",
 885               removed, g_hash_table_size(lrm_state->active_ops));
 886     return data.done;
 887 }
 888 
 889 /*!
 890  * \internal
 891  * \brief Retrieve resource information from LRM
 892  *
 893  * \param[in,out]  lrm_state  Executor connection state to use
 894  * \param[in]      rsc_xml    XML containing resource configuration
 895  * \param[in]      do_create  If true, register resource if not already
 896  * \param[out]     rsc_info   Where to store information obtained from executor
 897  *
 898  * \retval pcmk_ok   Success (and rsc_info holds newly allocated result)
 899  * \retval -EINVAL   Required information is missing from arguments
 900  * \retval -ENOTCONN No active connection to LRM
 901  * \retval -ENODEV   Resource not found
 902  * \retval -errno    Error communicating with executor when registering resource
 903  *
 904  * \note Caller is responsible for freeing result on success.
 905  */
 906 static int
 907 get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml,
     /* [previous][next][first][last][top][bottom][index][help] */
 908                  gboolean do_create, lrmd_rsc_info_t **rsc_info)
 909 {
 910     const char *id = ID(rsc_xml);
 911 
 912     CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
 913     CRM_CHECK(id, return -EINVAL);
 914 
 915     if (lrm_state_is_connected(lrm_state) == FALSE) {
 916         return -ENOTCONN;
 917     }
 918 
 919     crm_trace("Retrieving resource information for %s from the executor", id);
 920     *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
 921 
 922     // If resource isn't known by ID, try clone name, if provided
 923     if (!*rsc_info) {
 924         const char *long_id = crm_element_value(rsc_xml, XML_ATTR_ID_LONG);
 925 
 926         if (long_id) {
 927             *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
 928         }
 929     }
 930 
 931     if ((*rsc_info == NULL) && do_create) {
 932         const char *class = crm_element_value(rsc_xml, XML_AGENT_ATTR_CLASS);
 933         const char *provider = crm_element_value(rsc_xml, XML_AGENT_ATTR_PROVIDER);
 934         const char *type = crm_element_value(rsc_xml, XML_ATTR_TYPE);
 935         int rc;
 936 
 937         crm_trace("Registering resource %s with the executor", id);
 938         rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
 939                                     lrmd_opt_drop_recurring);
 940         if (rc != pcmk_ok) {
 941             fsa_data_t *msg_data = NULL;
 942 
 943             crm_err("Could not register resource %s with the executor on %s: %s "
 944                     CRM_XS " rc=%d",
 945                     id, lrm_state->node_name, pcmk_strerror(rc), rc);
 946 
 947             /* Register this as an internal error if this involves the local
 948              * executor. Otherwise, we're likely dealing with an unresponsive
 949              * remote node, which is not an FSA failure.
 950              */
 951             if (lrm_state_is_local(lrm_state) == TRUE) {
 952                 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 953             }
 954             return rc;
 955         }
 956 
 957         *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
 958     }
 959     return *rsc_info? pcmk_ok : -ENODEV;
 960 }
 961 
 962 static void
 963 delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
 964                 GHashTableIter *iter, const char *sys, const char *user,
 965                 ha_msg_input_t *request, bool unregister, bool from_cib)
 966 {
 967     int rc = pcmk_ok;
 968 
 969     crm_info("Removing resource %s from executor for %s%s%s",
 970              id, sys, (user? " as " : ""), (user? user : ""));
 971 
 972     if (rsc && unregister) {
 973         rc = lrm_state_unregister_rsc(lrm_state, id, 0);
 974     }
 975 
 976     if (rc == pcmk_ok) {
 977         crm_trace("Resource %s deleted from executor", id);
 978     } else if (rc == -EINPROGRESS) {
 979         crm_info("Deletion of resource '%s' from executor is pending", id);
 980         if (request) {
 981             struct pending_deletion_op_s *op = NULL;
 982             char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE);
 983 
 984             op = calloc(1, sizeof(struct pending_deletion_op_s));
 985             op->rsc = strdup(rsc->id);
 986             op->input = copy_ha_msg_input(request);
 987             g_hash_table_insert(lrm_state->deletion_ops, ref, op);
 988         }
 989         return;
 990     } else {
 991         crm_warn("Could not delete '%s' from executor for %s%s%s: %s "
 992                  CRM_XS " rc=%d", id, sys, (user? " as " : ""),
 993                  (user? user : ""), pcmk_strerror(rc), rc);
 994     }
 995 
 996     delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib);
 997 }
 998 
 999 static int
1000 get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1001 {
1002     int call_id = 999999999;
1003     rsc_history_t *entry = NULL;
1004 
1005     if(lrm_state) {
1006         entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
1007     }
1008 
1009     /* Make sure the call id is greater than the last successful operation,
1010      * otherwise the failure will not result in a possible recovery of the resource
1011      * as it could appear the failure occurred before the successful start */
1012     if (entry) {
1013         call_id = entry->last_callid + 1;
1014     }
1015 
1016     if (call_id < 0) {
1017         call_id = 1;
1018     }
1019     return call_id;
1020 }
1021 
1022 static void
1023 fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
     /* [previous][next][first][last][top][bottom][index][help] */
1024                enum ocf_exitcode op_exitcode, const char *exit_reason)
1025 {
1026     op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
1027     op->t_run = time(NULL);
1028     op->t_rcchange = op->t_run;
1029     lrmd__set_result(op, op_exitcode, op_status, exit_reason);
1030 }
1031 
1032 static void
1033 force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
     /* [previous][next][first][last][top][bottom][index][help] */
1034               const char *from_host, const char *user_name,
1035               gboolean is_remote_node, bool reprobe_all_nodes)
1036 {
1037     GHashTableIter gIter;
1038     rsc_history_t *entry = NULL;
1039 
1040     crm_info("Clearing resource history on node %s", lrm_state->node_name);
1041     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
1042     while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
1043         /* only unregister the resource during a reprobe if it is not a remote connection
1044          * resource. otherwise unregistering the connection will terminate remote-node
1045          * membership */
1046         bool unregister = true;
1047 
1048         if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
1049             unregister = false;
1050 
1051             if (reprobe_all_nodes) {
1052                 lrm_state_t *remote_lrm_state = lrm_state_find(entry->id);
1053 
1054                 if (remote_lrm_state != NULL) {
1055                     /* If reprobing all nodes, be sure to reprobe the remote
1056                      * node before clearing its connection resource
1057                      */
1058                     force_reprobe(remote_lrm_state, from_sys, from_host,
1059                                   user_name, TRUE, reprobe_all_nodes);
1060                 }
1061             }
1062         }
1063 
1064         /* Don't delete from the CIB, since we'll delete the whole node's LRM
1065          * state from the CIB soon
1066          */
1067         delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys,
1068                         user_name, NULL, unregister, false);
1069     }
1070 
1071     /* Now delete the copy in the CIB */
1072     controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
1073                                cib_scope_local);
1074 
1075     // @COMPAT DCs < 1.1.14 need this deleted (in case it was explicitly false)
1076     update_attrd(lrm_state->node_name, CRM_OP_PROBED, NULL, user_name, is_remote_node);
1077 }
1078 
1079 /*!
1080  * \internal
1081  * \brief Fail a requested action without actually executing it
1082  *
1083  * For an action that can't be executed, process it similarly to an actual
1084  * execution result, with specified error status (except for notify actions,
1085  * which will always be treated as successful).
1086  *
1087  * \param[in,out] lrm_state    Executor connection that action is for
1088  * \param[in]     action       Action XML from request
1089  * \param[in]     rc           Desired return code to use
1090  * \param[in]     op_status    Desired operation status to use
1091  * \param[in]     exit_reason  Human-friendly detail, if error
1092  */
1093 static void
1094 synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action,
     /* [previous][next][first][last][top][bottom][index][help] */
1095                         int op_status, enum ocf_exitcode rc,
1096                         const char *exit_reason)
1097 {
1098     lrmd_event_data_t *op = NULL;
1099     const char *operation = crm_element_value(action, XML_LRM_ATTR_TASK);
1100     const char *target_node = crm_element_value(action, XML_LRM_ATTR_TARGET);
1101     xmlNode *xml_rsc = find_xml_node(action, XML_CIB_TAG_RESOURCE, TRUE);
1102 
1103     if ((xml_rsc == NULL) || (ID(xml_rsc) == NULL)) {
1104         /* @TODO Should we do something else, like direct ack? */
1105         crm_info("Can't fake %s failure (%d) on %s without resource configuration",
1106                  crm_element_value(action, XML_LRM_ATTR_TASK_KEY), rc,
1107                  target_node);
1108         return;
1109 
1110     } else if(operation == NULL) {
1111         /* This probably came from crm_resource -C, nothing to do */
1112         crm_info("Can't fake %s failure (%d) on %s without operation",
1113                  ID(xml_rsc), rc, target_node);
1114         return;
1115     }
1116 
1117     op = construct_op(lrm_state, action, ID(xml_rsc), operation);
1118 
1119     if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
1120         // Notifications can't fail
1121         fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL);
1122     } else {
1123         fake_op_status(lrm_state, op, op_status, rc, exit_reason);
1124     }
1125 
1126     crm_info("Faking " PCMK__OP_FMT " result (%d) on %s",
1127              op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
1128 
1129     // Process the result as if it came from the LRM
1130     process_lrm_event(lrm_state, op, NULL, action);
1131     lrmd_free_event(op);
1132 }
1133 
1134 /*!
1135  * \internal
1136  * \brief Get target of an LRM operation (replacing \p NULL with local node
1137  *        name)
1138  *
1139  * \param[in] xml  LRM operation data XML
1140  *
1141  * \return LRM operation target node name (local node or Pacemaker Remote node)
1142  */
1143 static const char *
1144 lrm_op_target(const xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
1145 {
1146     const char *target = NULL;
1147 
1148     if (xml) {
1149         target = crm_element_value(xml, XML_LRM_ATTR_TARGET);
1150     }
1151     if (target == NULL) {
1152         target = controld_globals.our_nodename;
1153     }
1154     return target;
1155 }
1156 
1157 static void
1158 fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
     /* [previous][next][first][last][top][bottom][index][help] */
1159                   const char *from_host, const char *from_sys)
1160 {
1161     lrmd_event_data_t *op = NULL;
1162     lrmd_rsc_info_t *rsc = NULL;
1163     xmlNode *xml_rsc = find_xml_node(xml, XML_CIB_TAG_RESOURCE, TRUE);
1164 
1165     CRM_CHECK(xml_rsc != NULL, return);
1166 
1167     /* The executor simply executes operations and reports the results, without
1168      * any concept of success or failure, so to fail a resource, we must fake
1169      * what a failure looks like.
1170      *
1171      * To do this, we create a fake executor operation event for the resource,
1172      * and pass that event to the executor client callback so it will be
1173      * processed as if it came from the executor.
1174      */
1175     op = construct_op(lrm_state, xml, ID(xml_rsc), "asyncmon");
1176 
1177     free((char*) op->user_data);
1178     op->user_data = NULL;
1179     op->interval_ms = 0;
1180 
1181     if (user_name && !pcmk__is_privileged(user_name)) {
1182         crm_err("%s does not have permission to fail %s", user_name, ID(xml_rsc));
1183         fake_op_status(lrm_state, op, PCMK_EXEC_ERROR,
1184                        PCMK_OCF_INSUFFICIENT_PRIV,
1185                        "Unprivileged user cannot fail resources");
1186         controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc));
1187         lrmd_free_event(op);
1188         return;
1189     }
1190 
1191 
1192     if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
1193         crm_info("Failing resource %s...", rsc->id);
1194         fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR,
1195                        "Simulated failure");
1196         process_lrm_event(lrm_state, op, NULL, xml);
1197         op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded
1198         lrmd_free_rsc_info(rsc);
1199 
1200     } else {
1201         crm_info("Cannot find/create resource in order to fail it...");
1202         crm_log_xml_warn(xml, "bad input");
1203         fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR,
1204                        "Cannot fail unknown resource");
1205     }
1206 
1207     controld_ack_event_directly(from_host, from_sys, NULL, op, ID(xml_rsc));
1208     lrmd_free_event(op);
1209 }
1210 
1211 static void
1212 handle_reprobe_op(lrm_state_t *lrm_state, const char *from_sys,
     /* [previous][next][first][last][top][bottom][index][help] */
1213                   const char *from_host, const char *user_name,
1214                   gboolean is_remote_node, bool reprobe_all_nodes)
1215 {
1216     crm_notice("Forcing the status of all resources to be redetected");
1217     force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node,
1218                   reprobe_all_nodes);
1219 
1220     if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
1221 
1222         xmlNode *reply = create_request(CRM_OP_INVOKE_LRM, NULL, from_host,
1223                                         from_sys, CRM_SYSTEM_LRMD,
1224                                         controld_globals.our_uuid);
1225 
1226         crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
1227 
1228         if (relay_message(reply, TRUE) == FALSE) {
1229             crm_log_xml_err(reply, "Unable to route reply");
1230         }
1231         free_xml(reply);
1232     }
1233 }
1234 
1235 static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
     /* [previous][next][first][last][top][bottom][index][help] */
1236               lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
1237 {
1238     char *op_key = NULL;
1239     char *meta_key = NULL;
1240     int call = 0;
1241     const char *call_id = NULL;
1242     const char *op_task = NULL;
1243     guint interval_ms = 0;
1244     gboolean in_progress = FALSE;
1245     xmlNode *params = find_xml_node(input->xml, XML_TAG_ATTRS, TRUE);
1246 
1247     CRM_CHECK(params != NULL, return FALSE);
1248 
1249     meta_key = crm_meta_name(XML_LRM_ATTR_TASK);
1250     op_task = crm_element_value(params, meta_key);
1251     free(meta_key);
1252     CRM_CHECK(op_task != NULL, return FALSE);
1253 
1254     meta_key = crm_meta_name(XML_LRM_ATTR_INTERVAL_MS);
1255     if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) {
1256         free(meta_key);
1257         return FALSE;
1258     }
1259     free(meta_key);
1260 
1261     op_key = pcmk__op_key(rsc->id, op_task, interval_ms);
1262 
1263     meta_key = crm_meta_name(XML_LRM_ATTR_CALLID);
1264     call_id = crm_element_value(params, meta_key);
1265     free(meta_key);
1266 
1267     crm_debug("Scheduler requested op %s (call=%s) be cancelled",
1268               op_key, (call_id? call_id : "NA"));
1269     pcmk__scan_min_int(call_id, &call, 0);
1270     if (call == 0) {
1271         // Normal case when the scheduler cancels a recurring op
1272         in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
1273 
1274     } else {
1275         // Normal case when the scheduler cancels an orphan op
1276         in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
1277     }
1278 
1279     // Acknowledge cancellation operation if for a remote connection resource
1280     if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
1281         char *op_id = make_stop_id(rsc->id, call);
1282 
1283         if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
1284             crm_info("Nothing known about operation %d for %s", call, op_key);
1285         }
1286         controld_delete_action_history_by_key(rsc->id, lrm_state->node_name,
1287                                               op_key, call);
1288         send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
1289                          from_host, from_sys);
1290 
1291         /* needed at least for cancellation of a remote operation */
1292         if (lrm_state->active_ops != NULL) {
1293             g_hash_table_remove(lrm_state->active_ops, op_id);
1294         }
1295         free(op_id);
1296 
1297     } else {
1298         /* No ack is needed since abcdaa8, but peers with older versions
1299          * in a rolling upgrade need one. We didn't bump the feature set
1300          * at that commit, so we can only compare against the previous
1301          * CRM version (3.0.8). If any peers have feature set 3.0.9 but
1302          * not abcdaa8, they will time out waiting for the ack (no
1303          * released versions of Pacemaker are affected).
1304          */
1305         const char *peer_version = crm_element_value(params, XML_ATTR_CRM_VERSION);
1306 
1307         if (compare_version(peer_version, "3.0.8") <= 0) {
1308             crm_info("Sending compatibility ack for %s cancellation to %s (CRM version %s)",
1309                      op_key, from_host, peer_version);
1310             send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
1311                              from_host, from_sys);
1312         }
1313     }
1314 
1315     free(op_key);
1316     return TRUE;
1317 }
1318 
1319 static void
1320 do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
     /* [previous][next][first][last][top][bottom][index][help] */
1321               lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
1322               bool crm_rsc_delete, const char *user_name)
1323 {
1324     bool unregister = true;
1325     int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name,
1326                                                   user_name,
1327                                                   cib_dryrun|cib_sync_call);
1328 
1329     if (cib_rc != pcmk_rc_ok) {
1330         lrmd_event_data_t *op = NULL;
1331 
1332         op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE);
1333 
1334         /* These are resource clean-ups, not actions, so no exit reason is
1335          * needed.
1336          */
1337         lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL);
1338         controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id);
1339         lrmd_free_event(op);
1340         return;
1341     }
1342 
1343     if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
1344         unregister = false;
1345     }
1346 
1347     delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys,
1348                     user_name, input, unregister, true);
1349 }
1350 
1351 // User data for asynchronous metadata execution
1352 struct metadata_cb_data {
1353     lrmd_rsc_info_t *rsc;   // Copy of resource information
1354     xmlNode *input_xml;     // Copy of FSA input XML
1355 };
1356 
1357 static struct metadata_cb_data *
1358 new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
     /* [previous][next][first][last][top][bottom][index][help] */
1359 {
1360     struct metadata_cb_data *data = NULL;
1361 
1362     data = calloc(1, sizeof(struct metadata_cb_data));
1363     CRM_ASSERT(data != NULL);
1364     data->input_xml = copy_xml(input_xml);
1365     data->rsc = lrmd_copy_rsc_info(rsc);
1366     return data;
1367 }
1368 
1369 static void
1370 free_metadata_cb_data(struct metadata_cb_data *data)
     /* [previous][next][first][last][top][bottom][index][help] */
1371 {
1372     lrmd_free_rsc_info(data->rsc);
1373     free_xml(data->input_xml);
1374     free(data);
1375 }
1376 
1377 /*!
1378  * \internal
1379  * \brief Execute an action after metadata has been retrieved
1380  *
1381  * \param[in] pid        Ignored
1382  * \param[in] result     Result of metadata action
1383  * \param[in] user_data  Metadata callback data
1384  */
1385 static void
1386 metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1387 {
1388     struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
1389 
1390     struct ra_metadata_s *md = NULL;
1391     lrm_state_t *lrm_state = lrm_state_find(lrm_op_target(data->input_xml));
1392 
1393     if ((lrm_state != NULL) && pcmk__result_ok(result)) {
1394         md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
1395                                      result->action_stdout);
1396     }
1397     if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
1398         do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
1399     }
1400     free_metadata_cb_data(data);
1401 }
1402 
1403 /*       A_LRM_INVOKE   */
1404 void
1405 do_lrm_invoke(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
1406               enum crmd_fsa_cause cause,
1407               enum crmd_fsa_state cur_state,
1408               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1409 {
1410     lrm_state_t *lrm_state = NULL;
1411     const char *crm_op = NULL;
1412     const char *from_sys = NULL;
1413     const char *from_host = NULL;
1414     const char *operation = NULL;
1415     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
1416     const char *user_name = NULL;
1417     const char *target_node = lrm_op_target(input->xml);
1418     gboolean is_remote_node = FALSE;
1419     bool crm_rsc_delete = FALSE;
1420 
1421     // Message routed to the local node is targeting a specific, non-local node
1422     is_remote_node = !pcmk__str_eq(target_node, controld_globals.our_nodename,
1423                                    pcmk__str_casei);
1424 
1425     lrm_state = lrm_state_find(target_node);
1426     if ((lrm_state == NULL) && is_remote_node) {
1427         crm_err("Failing action because local node has never had connection to remote node %s",
1428                 target_node);
1429         synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED,
1430                                 PCMK_OCF_UNKNOWN_ERROR,
1431                                 "Local node has no connection to remote");
1432         return;
1433     }
1434     CRM_ASSERT(lrm_state != NULL);
1435 
1436     user_name = pcmk__update_acl_user(input->msg, F_CRM_USER, NULL);
1437     crm_op = crm_element_value(input->msg, F_CRM_TASK);
1438     from_sys = crm_element_value(input->msg, F_CRM_SYS_FROM);
1439     if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
1440         from_host = crm_element_value(input->msg, F_CRM_HOST_FROM);
1441     }
1442 
1443     if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
1444         if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
1445             crm_rsc_delete = TRUE; // from crm_resource
1446         }
1447         operation = PCMK_ACTION_DELETE;
1448 
1449     } else if (input->xml != NULL) {
1450         operation = crm_element_value(input->xml, XML_LRM_ATTR_TASK);
1451     }
1452 
1453     CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return);
1454 
1455     crm_trace("'%s' execution request from %s as %s user",
1456               pcmk__s(crm_op, operation),
1457               pcmk__s(from_sys, "unknown subsystem"),
1458               pcmk__s(user_name, "current"));
1459 
1460     if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) {
1461         fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
1462                           from_sys);
1463 
1464     } else if (pcmk__str_eq(crm_op, CRM_OP_LRM_REFRESH, pcmk__str_none)) {
1465         /* @COMPAT This can only be sent by crm_resource --refresh on a
1466          * Pacemaker Remote node running Pacemaker 1.1.9, which is extremely
1467          * unlikely. It previously would cause the controller to re-write its
1468          * resource history to the CIB. Just ignore it.
1469          */
1470         crm_notice("Ignoring refresh request from Pacemaker Remote 1.1.9 node");
1471 
1472     // @COMPAT DCs <1.1.14 in a rolling upgrade might schedule this op
1473     } else if (pcmk__str_eq(operation, CRM_OP_PROBED, pcmk__str_none)) {
1474         update_attrd(lrm_state->node_name, CRM_OP_PROBED, XML_BOOLEAN_TRUE,
1475                      user_name, is_remote_node);
1476 
1477     } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none)
1478                || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
1479         const char *raw_target = NULL;
1480 
1481         if (input->xml != NULL) {
1482             // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes
1483             raw_target = crm_element_value(input->xml, XML_LRM_ATTR_TARGET);
1484         }
1485         handle_reprobe_op(lrm_state, from_sys, from_host, user_name,
1486                           is_remote_node, (raw_target == NULL));
1487 
1488     } else if (operation != NULL) {
1489         lrmd_rsc_info_t *rsc = NULL;
1490         xmlNode *xml_rsc = find_xml_node(input->xml, XML_CIB_TAG_RESOURCE, TRUE);
1491         gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE,
1492                                             pcmk__str_none);
1493         int rc;
1494 
1495         // We can't return anything meaningful without a resource ID
1496         CRM_CHECK(xml_rsc && ID(xml_rsc), return);
1497 
1498         rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
1499         if (rc == -ENOTCONN) {
1500             synthesize_lrmd_failure(lrm_state, input->xml,
1501                                     PCMK_EXEC_NOT_CONNECTED,
1502                                     PCMK_OCF_UNKNOWN_ERROR,
1503                                     "Not connected to remote executor");
1504             return;
1505 
1506         } else if ((rc < 0) && !create_rsc) {
1507             /* Delete of malformed or nonexistent resource
1508              * (deleting something that does not exist is a success)
1509              */
1510             crm_notice("Not registering resource '%s' for a %s event "
1511                        CRM_XS " get-rc=%d (%s) transition-key=%s",
1512                        ID(xml_rsc), operation,
1513                        rc, pcmk_strerror(rc), ID(input->xml));
1514             delete_rsc_entry(lrm_state, input, ID(xml_rsc), NULL, pcmk_ok,
1515                              user_name, true);
1516             return;
1517 
1518         } else if (rc == -EINVAL) {
1519             // Resource operation on malformed resource
1520             crm_err("Invalid resource definition for %s", ID(xml_rsc));
1521             crm_log_xml_warn(input->msg, "invalid resource");
1522             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
1523                                     PCMK_OCF_NOT_CONFIGURED, // fatal error
1524                                     "Invalid resource definition");
1525             return;
1526 
1527         } else if (rc < 0) {
1528             // Error communicating with the executor
1529             crm_err("Could not register resource '%s' with executor: %s "
1530                     CRM_XS " rc=%d",
1531                     ID(xml_rsc), pcmk_strerror(rc), rc);
1532             crm_log_xml_warn(input->msg, "failed registration");
1533             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
1534                                     PCMK_OCF_INVALID_PARAM, // hard error
1535                                     "Could not register resource with executor");
1536             return;
1537         }
1538 
1539         if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) {
1540             if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
1541                 crm_log_xml_warn(input->xml, "Bad command");
1542             }
1543 
1544         } else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE,
1545                                 pcmk__str_none)) {
1546             do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
1547                           crm_rsc_delete, user_name);
1548 
1549         } else {
1550             struct ra_metadata_s *md = NULL;
1551 
1552             /* Getting metadata from cache is OK except for start actions --
1553              * always refresh from the agent for those, in case the resource
1554              * agent was updated.
1555              *
1556              * @TODO Only refresh metadata for starts if the agent actually
1557              * changed (using something like inotify, or a hash or modification
1558              * time of the agent executable).
1559              */
1560             if (strcmp(operation, PCMK_ACTION_START) != 0) {
1561                 md = controld_get_rsc_metadata(lrm_state, rsc,
1562                                                controld_metadata_from_cache);
1563             }
1564 
1565             if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
1566                                                       operation)) {
1567                 /* Most likely, we'll need the agent metadata to record the
1568                  * pending operation and the operation result. Get it now rather
1569                  * than wait until then, so the metadata action doesn't eat into
1570                  * the real action's timeout.
1571                  *
1572                  * @TODO Metadata is retrieved via direct execution of the
1573                  * agent, which has a couple of related issues: the executor
1574                  * should execute agents, not the controller; and metadata for
1575                  * Pacemaker Remote nodes should be collected on those nodes,
1576                  * not locally.
1577                  */
1578                 struct metadata_cb_data *data = NULL;
1579 
1580                 data = new_metadata_cb_data(rsc, input->xml);
1581                 crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
1582                          rsc->id, rsc->standard,
1583                          ((rsc->provider == NULL)? "" : ":"),
1584                          ((rsc->provider == NULL)? "" : rsc->provider),
1585                          rsc->type);
1586                 (void) lrmd__metadata_async(rsc, metadata_complete,
1587                                             (void *) data);
1588             } else {
1589                 do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
1590             }
1591         }
1592 
1593         lrmd_free_rsc_info(rsc);
1594 
1595     } else {
1596         crm_err("Invalid execution request: unknown command '%s' (bug?)",
1597                 crm_op);
1598         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
1599     }
1600 }
1601 
1602 static lrmd_event_data_t *
1603 construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op,
     /* [previous][next][first][last][top][bottom][index][help] */
1604              const char *rsc_id, const char *operation)
1605 {
1606     lrmd_event_data_t *op = NULL;
1607     const char *op_delay = NULL;
1608     const char *op_timeout = NULL;
1609     GHashTable *params = NULL;
1610 
1611     xmlNode *primitive = NULL;
1612     const char *class = NULL;
1613 
1614     const char *transition = NULL;
1615 
1616     CRM_ASSERT(rsc_id && operation);
1617 
1618     op = lrmd_new_event(rsc_id, operation, 0);
1619     op->type = lrmd_event_exec_complete;
1620     op->timeout = 0;
1621     op->start_delay = 0;
1622     lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
1623 
1624     if (rsc_op == NULL) {
1625         CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP,
1626                                     pcmk__str_casei));
1627         op->user_data = NULL;
1628         /* the stop_all_resources() case
1629          * by definition there is no DC (or they'd be shutting
1630          *   us down).
1631          * So we should put our version here.
1632          */
1633         op->params = pcmk__strkey_table(free, free);
1634 
1635         g_hash_table_insert(op->params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET));
1636 
1637         crm_trace("Constructed %s op for %s", operation, rsc_id);
1638         return op;
1639     }
1640 
1641     params = xml2list(rsc_op);
1642     g_hash_table_remove(params, CRM_META "_op_target_rc");
1643 
1644     op_delay = crm_meta_value(params, XML_OP_ATTR_START_DELAY);
1645     pcmk__scan_min_int(op_delay, &op->start_delay, 0);
1646 
1647     op_timeout = crm_meta_value(params, XML_ATTR_TIMEOUT);
1648     pcmk__scan_min_int(op_timeout, &op->timeout, 0);
1649 
1650     if (pcmk__guint_from_hash(params, CRM_META "_" XML_LRM_ATTR_INTERVAL_MS, 0,
1651                               &(op->interval_ms)) != pcmk_rc_ok) {
1652         op->interval_ms = 0;
1653     }
1654 
1655     /* Use pcmk_monitor_timeout instead of meta timeout for stonith
1656        recurring monitor, if set */
1657     primitive = find_xml_node(rsc_op, XML_CIB_TAG_RESOURCE, FALSE);
1658     class = crm_element_value(primitive, XML_AGENT_ATTR_CLASS);
1659 
1660     if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params)
1661             && pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei)
1662             && (op->interval_ms > 0)) {
1663 
1664         op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout");
1665         if (op_timeout != NULL) {
1666             op->timeout = crm_get_msec(op_timeout);
1667         }
1668     }
1669 
1670     if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) {
1671         op->params = params;
1672 
1673     } else {
1674         rsc_history_t *entry = NULL;
1675 
1676         if (lrm_state) {
1677             entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
1678         }
1679 
1680         /* If we do not have stop parameters cached, use
1681          * whatever we are given */
1682         if (!entry || !entry->stop_params) {
1683             op->params = params;
1684         } else {
1685             /* Copy the cached parameter list so that we stop the resource
1686              * with the old attributes, not the new ones */
1687             op->params = pcmk__strkey_table(free, free);
1688 
1689             g_hash_table_foreach(params, copy_meta_keys, op->params);
1690             g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
1691             g_hash_table_destroy(params);
1692             params = NULL;
1693         }
1694     }
1695 
1696     /* sanity */
1697     if (op->timeout <= 0) {
1698         op->timeout = op->interval_ms;
1699     }
1700     if (op->start_delay < 0) {
1701         op->start_delay = 0;
1702     }
1703 
1704     transition = crm_element_value(rsc_op, XML_ATTR_TRANSITION_KEY);
1705     CRM_CHECK(transition != NULL, return op);
1706 
1707     op->user_data = strdup(transition);
1708 
1709     if (op->interval_ms != 0) {
1710         if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP,
1711                                  NULL)) {
1712             crm_err("Start and Stop actions cannot have an interval: %u",
1713                     op->interval_ms);
1714             op->interval_ms = 0;
1715         }
1716     }
1717 
1718     crm_trace("Constructed %s op for %s: interval=%u",
1719               operation, rsc_id, op->interval_ms);
1720 
1721     return op;
1722 }
1723 
1724 /*!
1725  * \internal
1726  * \brief Send a (synthesized) event result
1727  *
1728  * Reply with a synthesized event result directly, as opposed to going through
1729  * the executor.
1730  *
1731  * \param[in]     to_host  Host to send result to
1732  * \param[in]     to_sys   IPC name to send result (NULL for transition engine)
1733  * \param[in]     rsc      Type information about resource the result is for
1734  * \param[in,out] op       Event with result to send
1735  * \param[in]     rsc_id   ID of resource the result is for
1736  */
1737 void
1738 controld_ack_event_directly(const char *to_host, const char *to_sys,
     /* [previous][next][first][last][top][bottom][index][help] */
1739                             const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op,
1740                             const char *rsc_id)
1741 {
1742     xmlNode *reply = NULL;
1743     xmlNode *update, *iter;
1744     crm_node_t *peer = NULL;
1745 
1746     CRM_CHECK(op != NULL, return);
1747     if (op->rsc_id == NULL) {
1748         CRM_ASSERT(rsc_id != NULL);
1749         op->rsc_id = strdup(rsc_id);
1750     }
1751     if (to_sys == NULL) {
1752         to_sys = CRM_SYSTEM_TENGINE;
1753     }
1754 
1755     peer = crm_get_peer(0, controld_globals.our_nodename);
1756     update = create_node_state_update(peer, node_update_none, NULL,
1757                                       __func__);
1758 
1759     iter = create_xml_node(update, XML_CIB_TAG_LRM);
1760     crm_xml_add(iter, XML_ATTR_ID, controld_globals.our_uuid);
1761     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCES);
1762     iter = create_xml_node(iter, XML_LRM_TAG_RESOURCE);
1763 
1764     crm_xml_add(iter, XML_ATTR_ID, op->rsc_id);
1765 
1766     controld_add_resource_history_xml(iter, rsc, op,
1767                                       controld_globals.our_nodename);
1768     reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL);
1769 
1770     crm_log_xml_trace(update, "[direct ACK]");
1771 
1772     crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s",
1773               op->rsc_id, op->op_type, op->interval_ms, op->user_data,
1774               crm_element_value(reply, XML_ATTR_REFERENCE));
1775 
1776     if (relay_message(reply, TRUE) == FALSE) {
1777         crm_log_xml_err(reply, "Unable to route reply");
1778     }
1779 
1780     free_xml(update);
1781     free_xml(reply);
1782 }
1783 
1784 gboolean
1785 verify_stopped(enum crmd_fsa_state cur_state, int log_level)
     /* [previous][next][first][last][top][bottom][index][help] */
1786 {
1787     gboolean res = TRUE;
1788     GList *lrm_state_list = lrm_state_get_list();
1789     GList *state_entry;
1790 
1791     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
1792         lrm_state_t *lrm_state = state_entry->data;
1793 
1794         if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
1795             /* keep iterating through all even when false is returned */
1796             res = FALSE;
1797         }
1798     }
1799 
1800     controld_set_fsa_input_flags(R_SENT_RSC_STOP);
1801     g_list_free(lrm_state_list); lrm_state_list = NULL;
1802     return res;
1803 }
1804 
1805 struct stop_recurring_action_s {
1806     lrmd_rsc_info_t *rsc;
1807     lrm_state_t *lrm_state;
1808 };
1809 
1810 static gboolean
1811 stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1812 {
1813     gboolean remove = FALSE;
1814     struct stop_recurring_action_s *event = user_data;
1815     active_op_t *op = value;
1816 
1817     if ((op->interval_ms != 0)
1818         && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) {
1819 
1820         crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
1821         remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
1822     }
1823 
1824     return remove;
1825 }
1826 
1827 static gboolean
1828 stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1829 {
1830     gboolean remove = FALSE;
1831     lrm_state_t *lrm_state = user_data;
1832     active_op_t *op = value;
1833 
1834     if (op->interval_ms != 0) {
1835         crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
1836                  (const char *) key);
1837         remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
1838     }
1839 
1840     return remove;
1841 }
1842 
1843 /*!
1844  * \internal
1845  * \brief Check whether recurring actions should be cancelled before an action
1846  *
1847  * \param[in] rsc_id       Resource that action is for
1848  * \param[in] action       Action being performed
1849  * \param[in] interval_ms  Operation interval of \p action (in milliseconds)
1850  *
1851  * \return true if recurring actions should be cancelled, otherwise false
1852  */
1853 static bool
1854 should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
1855 {
1856     if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0)
1857         && (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) {
1858         /* Don't stop monitoring a migrating Pacemaker Remote connection
1859          * resource until the entire migration has completed. We must detect if
1860          * the connection is unexpectedly severed, even during a migration.
1861          */
1862         return false;
1863     }
1864 
1865     // Cancel recurring actions before changing resource state
1866     return (interval_ms == 0)
1867             && !pcmk__str_any_of(action, PCMK_ACTION_MONITOR,
1868                                  PCMK_ACTION_NOTIFY, NULL);
1869 }
1870 
1871 /*!
1872  * \internal
1873  * \brief Check whether an action should not be performed at this time
1874  *
1875  * \param[in] operation  Action to be performed
1876  *
1877  * \return Readable description of why action should not be performed,
1878  *         or NULL if it should be performed
1879  */
1880 static const char *
1881 should_nack_action(const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
1882 {
1883     if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)
1884         && pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) {
1885 
1886         register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
1887         return "Not attempting start due to shutdown in progress";
1888     }
1889 
1890     switch (controld_globals.fsa_state) {
1891         case S_NOT_DC:
1892         case S_POLICY_ENGINE:   // Recalculating
1893         case S_TRANSITION_ENGINE:
1894             break;
1895         default:
1896             if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) {
1897                 return "Controller cannot attempt actions at this time";
1898             }
1899             break;
1900     }
1901     return NULL;
1902 }
1903 
1904 static void
1905 do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
     /* [previous][next][first][last][top][bottom][index][help] */
1906               struct ra_metadata_s *md)
1907 {
1908     int rc;
1909     int call_id = 0;
1910     char *op_id = NULL;
1911     lrmd_event_data_t *op = NULL;
1912     fsa_data_t *msg_data = NULL;
1913     const char *transition = NULL;
1914     const char *operation = NULL;
1915     const char *nack_reason = NULL;
1916 
1917     CRM_CHECK((rsc != NULL) && (msg != NULL), return);
1918 
1919     operation = crm_element_value(msg, XML_LRM_ATTR_TASK);
1920     CRM_CHECK(!pcmk__str_empty(operation), return);
1921 
1922     transition = crm_element_value(msg, XML_ATTR_TRANSITION_KEY);
1923     if (pcmk__str_empty(transition)) {
1924         crm_log_xml_err(msg, "Missing transition number");
1925     }
1926 
1927     if (lrm_state == NULL) {
1928         // This shouldn't be possible, but provide a failsafe just in case
1929         crm_err("Cannot execute %s of %s: No executor connection "
1930                 CRM_XS " transition_key=%s",
1931                 operation, rsc->id, pcmk__s(transition, ""));
1932         synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
1933                                 PCMK_OCF_UNKNOWN_ERROR,
1934                                 "No executor connection");
1935         return;
1936     }
1937 
1938     if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD,
1939                          PCMK_ACTION_RELOAD_AGENT, NULL)) {
1940         /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
1941          * will schedule reload-agent actions only. In either case, we need
1942          * to map that to whatever the resource agent actually supports.
1943          * Default to the OCF 1.1 name.
1944          */
1945         if ((md != NULL)
1946             && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
1947             operation = PCMK_ACTION_RELOAD;
1948         } else {
1949             operation = PCMK_ACTION_RELOAD_AGENT;
1950         }
1951     }
1952 
1953     op = construct_op(lrm_state, msg, rsc->id, operation);
1954     CRM_CHECK(op != NULL, return);
1955 
1956     if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) {
1957         guint removed = 0;
1958         struct stop_recurring_action_s data;
1959 
1960         data.rsc = rsc;
1961         data.lrm_state = lrm_state;
1962         removed = g_hash_table_foreach_remove(lrm_state->active_ops,
1963                                               stop_recurring_action_by_rsc,
1964                                               &data);
1965 
1966         if (removed) {
1967             crm_debug("Stopped %u recurring operation%s in preparation for "
1968                       PCMK__OP_FMT, removed, pcmk__plural_s(removed),
1969                       rsc->id, operation, op->interval_ms);
1970         }
1971     }
1972 
1973     /* now do the op */
1974     crm_notice("Requesting local execution of %s operation for %s on %s "
1975                CRM_XS " transition_key=%s op_key=" PCMK__OP_FMT,
1976                pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
1977                lrm_state->node_name, pcmk__s(transition, ""), rsc->id,
1978                operation, op->interval_ms);
1979 
1980     nack_reason = should_nack_action(operation);
1981     if (nack_reason != NULL) {
1982         crm_notice("Discarding attempt to perform action %s on %s in state %s "
1983                    "(shutdown=%s)", operation, rsc->id,
1984                    fsa_state2string(controld_globals.fsa_state),
1985                    pcmk__btoa(pcmk_is_set(controld_globals.fsa_input_register,
1986                                           R_SHUTDOWN)));
1987 
1988         lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID,
1989                          nack_reason);
1990         controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
1991         lrmd_free_event(op);
1992         free(op_id);
1993         return;
1994     }
1995 
1996     controld_record_pending_op(lrm_state->node_name, rsc, op);
1997 
1998     op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);
1999 
2000     if (op->interval_ms > 0) {
2001         /* cancel it so we can then restart it without conflict */
2002         cancel_op_key(lrm_state, rsc, op_id, FALSE);
2003     }
2004 
2005     rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type,
2006                                          op->user_data, op->interval_ms,
2007                                          op->timeout, op->start_delay,
2008                                          op->params, &call_id);
2009     if (rc == pcmk_rc_ok) {
2010         /* record all operations so we can wait
2011          * for them to complete during shutdown
2012          */
2013         char *call_id_s = make_stop_id(rsc->id, call_id);
2014         active_op_t *pending = NULL;
2015 
2016         pending = calloc(1, sizeof(active_op_t));
2017         crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
2018 
2019         pending->call_id = call_id;
2020         pending->interval_ms = op->interval_ms;
2021         pending->op_type = strdup(operation);
2022         pending->op_key = strdup(op_id);
2023         pending->rsc_id = strdup(rsc->id);
2024         pending->start_time = time(NULL);
2025         pcmk__str_update(&pending->user_data, op->user_data);
2026         if (crm_element_value_epoch(msg, XML_CONFIG_ATTR_SHUTDOWN_LOCK,
2027                                     &(pending->lock_time)) != pcmk_ok) {
2028             pending->lock_time = 0;
2029         }
2030         g_hash_table_replace(lrm_state->active_ops, call_id_s, pending);
2031 
2032         if ((op->interval_ms > 0)
2033             && (op->start_delay > START_DELAY_THRESHOLD)) {
2034             int target_rc = PCMK_OCF_OK;
2035 
2036             crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
2037             decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
2038             lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL);
2039             controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
2040         }
2041 
2042         pending->params = op->params;
2043         op->params = NULL;
2044 
2045     } else if (lrm_state_is_local(lrm_state)) {
2046         crm_err("Could not initiate %s action for resource %s locally: %s "
2047                 CRM_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc);
2048         fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
2049                        PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
2050         process_lrm_event(lrm_state, op, NULL, NULL);
2051         register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
2052 
2053     } else {
2054         crm_err("Could not initiate %s action for resource %s remotely on %s: "
2055                 "%s " CRM_XS " rc=%d",
2056                 operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc);
2057         fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
2058                        PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
2059         process_lrm_event(lrm_state, op, NULL, NULL);
2060     }
2061 
2062     free(op_id);
2063     lrmd_free_event(op);
2064 }
2065 
2066 void
2067 do_lrm_event(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
2068              enum crmd_fsa_cause cause,
2069              enum crmd_fsa_state cur_state, enum crmd_fsa_input cur_input, fsa_data_t * msg_data)
2070 {
2071     CRM_CHECK(FALSE, return);
2072 }
2073 
2074 static char *
2075 unescape_newlines(const char *string)
     /* [previous][next][first][last][top][bottom][index][help] */
2076 {
2077     char *pch = NULL;
2078     char *ret = NULL;
2079     static const char *escaped_newline = "\\n";
2080 
2081     if (!string) {
2082         return NULL;
2083     }
2084 
2085     ret = strdup(string);
2086     pch = strstr(ret, escaped_newline);
2087     while (pch != NULL) {
2088         /* Replace newline escape pattern with actual newline (and a space so we
2089          * don't have to shuffle the rest of the buffer)
2090          */
2091         pch[0] = '\n';
2092         pch[1] = ' ';
2093         pch = strstr(pch, escaped_newline);
2094     }
2095 
2096     return ret;
2097 }
2098 
2099 static bool
2100 did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
     /* [previous][next][first][last][top][bottom][index][help] */
2101                     const char * op_type, guint interval_ms)
2102 {
2103     rsc_history_t *entry = NULL;
2104 
2105     CRM_CHECK(lrm_state != NULL, return FALSE);
2106     CRM_CHECK(rsc_id != NULL, return FALSE);
2107     CRM_CHECK(op_type != NULL, return FALSE);
2108 
2109     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
2110     if (entry == NULL || entry->failed == NULL) {
2111         return FALSE;
2112     }
2113 
2114     if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none)
2115         && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei)
2116         && entry->failed->interval_ms == interval_ms) {
2117         return TRUE;
2118     }
2119 
2120     return FALSE;
2121 }
2122 
2123 /*!
2124  * \internal
2125  * \brief Log the result of an executor action (actual or synthesized)
2126  *
2127  * \param[in] op         Executor action to log result for
2128  * \param[in] op_key     Operation key for action
2129  * \param[in] node_name  Name of node action was performed on, if known
2130  * \param[in] confirmed  Whether to log that graph action was confirmed
2131  */
2132 static void
2133 log_executor_event(const lrmd_event_data_t *op, const char *op_key,
     /* [previous][next][first][last][top][bottom][index][help] */
2134                    const char *node_name, gboolean confirmed)
2135 {
2136     int log_level = LOG_ERR;
2137     GString *str = g_string_sized_new(100); // reasonable starting size
2138 
2139     pcmk__g_strcat(str,
2140                    "Result of ",
2141                    pcmk__readable_action(op->op_type, op->interval_ms),
2142                    " operation for ", op->rsc_id, NULL);
2143 
2144     if (node_name != NULL) {
2145         pcmk__g_strcat(str, " on ", node_name, NULL);
2146     }
2147 
2148     switch (op->op_status) {
2149         case PCMK_EXEC_DONE:
2150             log_level = LOG_NOTICE;
2151             pcmk__g_strcat(str, ": ", services_ocf_exitcode_str(op->rc), NULL);
2152             break;
2153 
2154         case PCMK_EXEC_TIMEOUT:
2155             pcmk__g_strcat(str,
2156                            ": ", pcmk_exec_status_str(op->op_status), " after ",
2157                            pcmk__readable_interval(op->timeout), NULL);
2158             break;
2159 
2160         case PCMK_EXEC_CANCELLED:
2161             log_level = LOG_INFO;
2162             /* order of __attribute__ and Fall through comment is IMPORTANT!
2163              * do not change it without proper testing with both clang and gcc
2164              * in multiple versions.
2165              * the clang check allows to build with all versions of clang.
2166              * the has_c_attribute check is to workaround a bug in clang version
2167              * in rhel7. has_attribute would happily return "YES SIR WE GOT IT"
2168              * and fail the build the next line.
2169              */
2170 #ifdef __clang__
2171 #ifdef __has_c_attribute
2172 #if __has_attribute(fallthrough)
2173             __attribute__((fallthrough));
2174 #endif
2175 #endif
2176 #endif
2177             // Fall through
2178         default:
2179             pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
2180                            NULL);
2181     }
2182 
2183     if ((op->exit_reason != NULL)
2184         && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) {
2185 
2186         pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL);
2187     }
2188 
2189     g_string_append(str, " " CRM_XS);
2190     g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s",
2191                            (confirmed? "" : "un"), op->call_id, op_key);
2192     if (op->op_status == PCMK_EXEC_DONE) {
2193         g_string_append_printf(str, " rc=%d", op->rc);
2194     }
2195 
2196     do_crm_log(log_level, "%s", str->str);
2197     g_string_free(str, TRUE);
2198 
2199     /* The services library has already logged the output at info or debug
2200      * level, so just raise to notice if it looks like a failure.
2201      */
2202     if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
2203         char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
2204                                          op->rsc_id, op->op_type,
2205                                          op->interval_ms, node_name);
2206 
2207         crm_log_output(LOG_NOTICE, prefix, op->output);
2208         free(prefix);
2209     }
2210 }
2211 
2212 void
2213 process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
2214                   active_op_t *pending, const xmlNode *action_xml)
2215 {
2216     char *op_id = NULL;
2217     char *op_key = NULL;
2218 
2219     gboolean remove = FALSE;
2220     gboolean removed = FALSE;
2221     bool need_direct_ack = FALSE;
2222     lrmd_rsc_info_t *rsc = NULL;
2223     const char *node_name = NULL;
2224 
2225     CRM_CHECK(op != NULL, return);
2226     CRM_CHECK(op->rsc_id != NULL, return);
2227 
2228     // Remap new status codes for older DCs
2229     if (compare_version(controld_globals.dc_version, "3.2.0") < 0) {
2230         switch (op->op_status) {
2231             case PCMK_EXEC_NOT_CONNECTED:
2232                 lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED,
2233                                  PCMK_EXEC_ERROR, op->exit_reason);
2234                 break;
2235             case PCMK_EXEC_INVALID:
2236                 lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR,
2237                                  op->exit_reason);
2238                 break;
2239             default:
2240                 break;
2241         }
2242     }
2243 
2244     op_id = make_stop_id(op->rsc_id, op->call_id);
2245     op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
2246 
2247     // Get resource info if available (from executor state or action XML)
2248     if (lrm_state) {
2249         rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
2250     }
2251     if ((rsc == NULL) && action_xml) {
2252         xmlNode *xml = find_xml_node(action_xml, XML_CIB_TAG_RESOURCE, TRUE);
2253 
2254         const char *standard = crm_element_value(xml, XML_AGENT_ATTR_CLASS);
2255         const char *provider = crm_element_value(xml, XML_AGENT_ATTR_PROVIDER);
2256         const char *type = crm_element_value(xml, XML_ATTR_TYPE);
2257 
2258         if (standard && type) {
2259             crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
2260                      op->rsc_id, standard,
2261                      (provider? ":" : ""), (provider? provider : ""), type);
2262             rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
2263         } else {
2264             crm_err("Can't process %s result because %s agent information not cached or in XML",
2265                     op_key, op->rsc_id);
2266         }
2267     }
2268 
2269     // Get node name if available (from executor state or action XML)
2270     if (lrm_state) {
2271         node_name = lrm_state->node_name;
2272     } else if (action_xml) {
2273         node_name = crm_element_value(action_xml, XML_LRM_ATTR_TARGET);
2274     }
2275 
2276     if(pending == NULL) {
2277         remove = TRUE;
2278         if (lrm_state) {
2279             pending = g_hash_table_lookup(lrm_state->active_ops, op_id);
2280         }
2281     }
2282 
2283     if (op->op_status == PCMK_EXEC_ERROR) {
2284         switch(op->rc) {
2285             case PCMK_OCF_NOT_RUNNING:
2286             case PCMK_OCF_RUNNING_PROMOTED:
2287             case PCMK_OCF_DEGRADED:
2288             case PCMK_OCF_DEGRADED_PROMOTED:
2289                 // Leave it to the TE/scheduler to decide if this is an error
2290                 op->op_status = PCMK_EXEC_DONE;
2291                 break;
2292             default:
2293                 /* Nothing to do */
2294                 break;
2295         }
2296     }
2297 
2298     if (op->op_status != PCMK_EXEC_CANCELLED) {
2299         /* We might not record the result, so directly acknowledge it to the
2300          * originator instead, so it doesn't time out waiting for the result
2301          * (especially important if part of a transition).
2302          */
2303         need_direct_ack = TRUE;
2304 
2305         if (controld_action_is_recordable(op->op_type)) {
2306             if (node_name && rsc) {
2307                 // We should record the result, and happily, we can
2308                 time_t lock_time = (pending == NULL)? 0 : pending->lock_time;
2309 
2310                 controld_update_resource_history(node_name, rsc, op, lock_time);
2311                 need_direct_ack = FALSE;
2312 
2313             } else if (op->rsc_deleted) {
2314                 /* We shouldn't record the result (likely the resource was
2315                  * refreshed, cleaned, or removed while this operation was
2316                  * in flight).
2317                  */
2318                 crm_notice("Not recording %s result in CIB because "
2319                            "resource information was removed since it was initiated",
2320                            op_key);
2321             } else {
2322                 /* This shouldn't be possible; the executor didn't consider the
2323                  * resource deleted, but we couldn't find resource or node
2324                  * information.
2325                  */
2326                 crm_err("Unable to record %s result in CIB: %s", op_key,
2327                         (node_name? "No resource information" : "No node name"));
2328             }
2329         }
2330 
2331     } else if (op->interval_ms == 0) {
2332         /* A non-recurring operation was cancelled. Most likely, the
2333          * never-initiated action was removed from the executor's pending
2334          * operations list upon resource removal.
2335          */
2336         need_direct_ack = TRUE;
2337 
2338     } else if (pending == NULL) {
2339         /* This recurring operation was cancelled, but was not pending. No
2340          * transition actions are waiting on it, nothing needs to be done.
2341          */
2342 
2343     } else if (op->user_data == NULL) {
2344         /* This recurring operation was cancelled and pending, but we don't
2345          * have a transition key. This should never happen.
2346          */
2347         crm_err("Recurring operation %s was cancelled without transition information",
2348                 op_key);
2349 
2350     } else if (pcmk_is_set(pending->flags, active_op_remove)) {
2351         /* This recurring operation was cancelled (by us) and pending, and we
2352          * have been waiting for it to finish.
2353          */
2354         if (lrm_state) {
2355             controld_delete_action_history(op);
2356         }
2357 
2358         /* Directly acknowledge failed recurring actions here. The above call to
2359          * controld_delete_action_history() will not erase any corresponding
2360          * last_failure entry, which means that the DC won't confirm the
2361          * cancellation via process_op_deletion(), and the transition would
2362          * otherwise wait for the action timer to pop.
2363          */
2364         if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
2365                                 pending->op_type, pending->interval_ms)) {
2366             need_direct_ack = TRUE;
2367         }
2368 
2369     } else if (op->rsc_deleted) {
2370         /* This recurring operation was cancelled (but not by us, and the
2371          * executor does not have resource information, likely due to resource
2372          * cleanup, refresh, or removal) and pending.
2373          */
2374         crm_debug("Recurring op %s was cancelled due to resource deletion",
2375                   op_key);
2376         need_direct_ack = TRUE;
2377 
2378     } else {
2379         /* This recurring operation was cancelled (but not by us, likely by the
2380          * executor before stopping the resource) and pending. We don't need to
2381          * do anything special.
2382          */
2383     }
2384 
2385     if (need_direct_ack) {
2386         controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id);
2387     }
2388 
2389     if(remove == FALSE) {
2390         /* The caller will do this afterwards, but keep the logging consistent */
2391         removed = TRUE;
2392 
2393     } else if (lrm_state && ((op->interval_ms == 0)
2394                              || (op->op_status == PCMK_EXEC_CANCELLED))) {
2395 
2396         gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id);
2397 
2398         if (op->interval_ms != 0) {
2399             removed = TRUE;
2400         } else if (found) {
2401             removed = TRUE;
2402             crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
2403                       op_key, op->call_id, op_id,
2404                       g_hash_table_size(lrm_state->active_ops));
2405         }
2406     }
2407 
2408     log_executor_event(op, op_key, node_name, removed);
2409 
2410     if (lrm_state) {
2411         if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA,
2412                           pcmk__str_casei)) {
2413             crmd_alert_resource_op(lrm_state->node_name, op);
2414         } else if (rsc && (op->rc == PCMK_OCF_OK)) {
2415             char *metadata = unescape_newlines(op->output);
2416 
2417             controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
2418             free(metadata);
2419         }
2420     }
2421 
2422     if (op->rsc_deleted) {
2423         crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
2424         if (lrm_state) {
2425             delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL,
2426                              true);
2427         }
2428     }
2429 
2430     /* If a shutdown was escalated while operations were pending,
2431      * then the FSA will be stalled right now... allow it to continue
2432      */
2433     controld_trigger_fsa();
2434     if (lrm_state && rsc) {
2435         update_history_cache(lrm_state, rsc, op);
2436     }
2437 
2438     lrmd_free_rsc_info(rsc);
2439     free(op_key);
2440     free(op_id);
2441 }

/* [previous][next][first][last][top][bottom][index][help] */