root/daemons/controld/controld_execd.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. lrm_connection_destroy
  2. make_stop_id
  3. copy_instance_keys
  4. copy_meta_keys
  5. history_remove_recurring_op
  6. history_free_recurring_ops
  7. history_free
  8. update_history_cache
  9. send_task_ok_ack
  10. op_node_name
  11. lrm_op_callback
  12. try_local_executor_connect
  13. do_lrm_control
  14. lrm_state_verify_stopped
  15. is_rsc_active
  16. build_active_RAs
  17. controld_query_executor_state
  18. controld_rc2event
  19. controld_trigger_delete_refresh
  20. notify_deleted
  21. lrm_remove_deleted_rsc
  22. lrm_remove_deleted_op
  23. delete_rsc_entry
  24. last_failed_matches_op
  25. lrm_clear_last_failure
  26. cancel_op
  27. cancel_action_by_key
  28. cancel_op_key
  29. get_lrm_resource
  30. delete_resource
  31. get_fake_call_id
  32. fake_op_status
  33. force_reprobe
  34. synthesize_lrmd_failure
  35. lrm_op_target
  36. fail_lrm_resource
  37. handle_reprobe_op
  38. do_lrm_cancel
  39. do_lrm_delete
  40. new_metadata_cb_data
  41. free_metadata_cb_data
  42. metadata_complete
  43. do_lrm_invoke
  44. construct_op
  45. controld_ack_event_directly
  46. verify_stopped
  47. stop_recurring_action_by_rsc
  48. stop_recurring_actions
  49. should_cancel_recurring
  50. should_nack_action
  51. do_lrm_rsc_op
  52. unescape_newlines
  53. did_lrm_rsc_op_fail
  54. log_executor_event
  55. process_lrm_event

   1 /*
   2  * Copyright 2004-2025 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <regex.h>
  13 #include <sys/param.h>
  14 #include <sys/types.h>
  15 #include <sys/wait.h>
  16 
  17 #include <crm/crm.h>
  18 #include <crm/lrmd.h>           // lrmd_event_data_t, lrmd_rsc_info_t, etc.
  19 #include <crm/services.h>
  20 #include <crm/common/xml.h>
  21 #include <crm/lrmd_internal.h>
  22 
  23 #include <pacemaker-internal.h>
  24 #include <pacemaker-controld.h>
  25 
  26 #define START_DELAY_THRESHOLD 5 * 60 * 1000
  27 #define MAX_LRM_REG_FAILS 30
  28 
  29 struct delete_event_s {
  30     int rc;
  31     const char *rsc;
  32     lrm_state_t *lrm_state;
  33 };
  34 
  35 static gboolean is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id);
  36 static gboolean build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list);
  37 static gboolean stop_recurring_actions(gpointer key, gpointer value, gpointer user_data);
  38 
  39 static lrmd_event_data_t *construct_op(const lrm_state_t *lrm_state,
  40                                        const xmlNode *rsc_op,
  41                                        const char *rsc_id,
  42                                        const char *operation);
  43 static void do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc,
  44                           xmlNode *msg, struct ra_metadata_s *md);
  45 
  46 static gboolean lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state,
  47                                          int log_level);
  48 
  49 static void
  50 lrm_connection_destroy(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  51 {
  52     if (pcmk_is_set(controld_globals.fsa_input_register, R_LRM_CONNECTED)) {
  53         crm_crit("Lost connection to local executor");
  54         register_fsa_input(C_FSA_INTERNAL, I_ERROR, NULL);
  55         controld_clear_fsa_input_flags(R_LRM_CONNECTED);
  56     }
  57 }
  58 
  59 static char *
  60 make_stop_id(const char *rsc, int call_id)
     /* [previous][next][first][last][top][bottom][index][help] */
  61 {
  62     return crm_strdup_printf("%s:%d", rsc, call_id);
  63 }
  64 
  65 static void
  66 copy_instance_keys(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  67 {
  68     if (strstr(key, CRM_META "_") == NULL) {
  69         pcmk__insert_dup(user_data, (const char *) key, (const char *) value);
  70     }
  71 }
  72 
  73 static void
  74 copy_meta_keys(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
  75 {
  76     if (strstr(key, CRM_META "_") != NULL) {
  77         pcmk__insert_dup(user_data, (const char *) key, (const char *) value);
  78     }
  79 }
  80 
  81 /*!
  82  * \internal
  83  * \brief Remove a recurring operation from a resource's history
  84  *
  85  * \param[in,out] history  Resource history to modify
  86  * \param[in]     op       Operation to remove
  87  *
  88  * \return TRUE if the operation was found and removed, FALSE otherwise
  89  */
  90 static gboolean
  91 history_remove_recurring_op(rsc_history_t *history, const lrmd_event_data_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
  92 {
  93     GList *iter;
  94 
  95     for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
  96         lrmd_event_data_t *existing = iter->data;
  97 
  98         if ((op->interval_ms == existing->interval_ms)
  99             && pcmk__str_eq(op->rsc_id, existing->rsc_id, pcmk__str_none)
 100             && pcmk__str_eq(op->op_type, existing->op_type, pcmk__str_casei)) {
 101 
 102             history->recurring_op_list = g_list_delete_link(history->recurring_op_list, iter);
 103             lrmd_free_event(existing);
 104             return TRUE;
 105         }
 106     }
 107     return FALSE;
 108 }
 109 
 110 /*!
 111  * \internal
 112  * \brief Free all recurring operations in resource history
 113  *
 114  * \param[in,out] history  Resource history to modify
 115  */
 116 static void
 117 history_free_recurring_ops(rsc_history_t *history)
     /* [previous][next][first][last][top][bottom][index][help] */
 118 {
 119     GList *iter;
 120 
 121     for (iter = history->recurring_op_list; iter != NULL; iter = iter->next) {
 122         lrmd_free_event(iter->data);
 123     }
 124     g_list_free(history->recurring_op_list);
 125     history->recurring_op_list = NULL;
 126 }
 127 
 128 /*!
 129  * \internal
 130  * \brief Free resource history
 131  *
 132  * \param[in,out] history  Resource history to free
 133  */
 134 void
 135 history_free(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 136 {
 137     rsc_history_t *history = (rsc_history_t*)data;
 138 
 139     if (history->stop_params) {
 140         g_hash_table_destroy(history->stop_params);
 141     }
 142 
 143     /* Don't need to free history->rsc.id because it's set to history->id */
 144     free(history->rsc.type);
 145     free(history->rsc.standard);
 146     free(history->rsc.provider);
 147 
 148     lrmd_free_event(history->failed);
 149     lrmd_free_event(history->last);
 150     free(history->id);
 151     history_free_recurring_ops(history);
 152     free(history);
 153 }
 154 
 155 static void
 156 update_history_cache(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, lrmd_event_data_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 157 {
 158     int target_rc = 0;
 159     rsc_history_t *entry = NULL;
 160 
 161     if (op->rsc_deleted) {
 162         crm_debug("Purged history for '%s' after %s", op->rsc_id, op->op_type);
 163         controld_delete_resource_history(op->rsc_id, lrm_state->node_name,
 164                                          NULL, crmd_cib_smart_opt());
 165         return;
 166     }
 167 
 168     if (pcmk__str_eq(op->op_type, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
 169         return;
 170     }
 171 
 172     crm_debug("Updating history for '%s' with %s op", op->rsc_id, op->op_type);
 173 
 174     entry = g_hash_table_lookup(lrm_state->resource_history, op->rsc_id);
 175     if (entry == NULL && rsc) {
 176         entry = pcmk__assert_alloc(1, sizeof(rsc_history_t));
 177         entry->id = pcmk__str_copy(op->rsc_id);
 178         g_hash_table_insert(lrm_state->resource_history, entry->id, entry);
 179 
 180         entry->rsc.id = entry->id;
 181         entry->rsc.type = pcmk__str_copy(rsc->type);
 182         entry->rsc.standard = pcmk__str_copy(rsc->standard);
 183         entry->rsc.provider = pcmk__str_copy(rsc->provider);
 184 
 185     } else if (entry == NULL) {
 186         crm_info("Resource %s no longer exists, not updating cache", op->rsc_id);
 187         return;
 188     }
 189 
 190     entry->last_callid = op->call_id;
 191     target_rc = rsc_op_expected_rc(op);
 192     if (op->op_status == PCMK_EXEC_CANCELLED) {
 193         if (op->interval_ms > 0) {
 194             crm_trace("Removing cancelled recurring op: " PCMK__OP_FMT,
 195                       op->rsc_id, op->op_type, op->interval_ms);
 196             history_remove_recurring_op(entry, op);
 197             return;
 198         } else {
 199             crm_trace("Skipping " PCMK__OP_FMT " rc=%d, status=%d",
 200                       op->rsc_id, op->op_type, op->interval_ms, op->rc,
 201                       op->op_status);
 202         }
 203 
 204     } else if (did_rsc_op_fail(op, target_rc)) {
 205         /* Store failed monitors here, otherwise the block below will cause them
 206          * to be forgotten when a stop happens.
 207          */
 208         if (entry->failed) {
 209             lrmd_free_event(entry->failed);
 210         }
 211         entry->failed = lrmd_copy_event(op);
 212 
 213     } else if (op->interval_ms == 0) {
 214         if (entry->last) {
 215             lrmd_free_event(entry->last);
 216         }
 217         entry->last = lrmd_copy_event(op);
 218 
 219         if (op->params && pcmk__strcase_any_of(op->op_type, PCMK_ACTION_START,
 220                                                PCMK_ACTION_RELOAD,
 221                                                PCMK_ACTION_RELOAD_AGENT,
 222                                                PCMK_ACTION_MONITOR, NULL)) {
 223             if (entry->stop_params) {
 224                 g_hash_table_destroy(entry->stop_params);
 225             }
 226             entry->stop_params = pcmk__strkey_table(free, free);
 227 
 228             g_hash_table_foreach(op->params, copy_instance_keys, entry->stop_params);
 229         }
 230     }
 231 
 232     if (op->interval_ms > 0) {
 233         /* Ensure there are no duplicates */
 234         history_remove_recurring_op(entry, op);
 235 
 236         crm_trace("Adding recurring op: " PCMK__OP_FMT,
 237                   op->rsc_id, op->op_type, op->interval_ms);
 238         entry->recurring_op_list = g_list_prepend(entry->recurring_op_list, lrmd_copy_event(op));
 239 
 240     } else if ((entry->recurring_op_list != NULL)
 241                 && !pcmk__str_eq(op->op_type, PCMK_ACTION_MONITOR,
 242                                  pcmk__str_casei)) {
 243         crm_trace("Dropping %d recurring ops because of: " PCMK__OP_FMT,
 244                   g_list_length(entry->recurring_op_list), op->rsc_id,
 245                   op->op_type, op->interval_ms);
 246         history_free_recurring_ops(entry);
 247     }
 248 }
 249 
 250 /*!
 251  * \internal
 252  * \brief Send a direct OK ack for a resource task
 253  *
 254  * \param[in] lrm_state  LRM connection
 255  * \param[in] input      Input message being ack'ed
 256  * \param[in] rsc_id     ID of affected resource
 257  * \param[in] rsc        Affected resource (if available)
 258  * \param[in] task       Operation task being ack'ed
 259  * \param[in] ack_host   Name of host to send ack to
 260  * \param[in] ack_sys    IPC system name to ack
 261  */
 262 static void
 263 send_task_ok_ack(const lrm_state_t *lrm_state, const ha_msg_input_t *input,
     /* [previous][next][first][last][top][bottom][index][help] */
 264                  const char *rsc_id, const lrmd_rsc_info_t *rsc,
 265                  const char *task, const char *ack_host, const char *ack_sys)
 266 {
 267     lrmd_event_data_t *op = construct_op(lrm_state, input->xml, rsc_id, task);
 268 
 269     lrmd__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 270     controld_ack_event_directly(ack_host, ack_sys, rsc, op, rsc_id);
 271     lrmd_free_event(op);
 272 }
 273 
 274 static inline const char *
 275 op_node_name(lrmd_event_data_t *op)
     /* [previous][next][first][last][top][bottom][index][help] */
 276 {
 277     return pcmk__s(op->remote_nodename,
 278                    controld_globals.cluster->priv->node_name);
 279 }
 280 
 281 void
 282 lrm_op_callback(lrmd_event_data_t * op)
     /* [previous][next][first][last][top][bottom][index][help] */
 283 {
 284     CRM_CHECK(op != NULL, return);
 285     switch (op->type) {
 286         case lrmd_event_disconnect:
 287             if (op->remote_nodename == NULL) {
 288                 /* If this is the local executor IPC connection, set the right
 289                  * bits in the controller when the connection goes down.
 290                  */
 291                 lrm_connection_destroy();
 292             }
 293             break;
 294 
 295         case lrmd_event_exec_complete:
 296             {
 297                 lrm_state_t *lrm_state =
 298                     controld_get_executor_state(op_node_name(op), false);
 299 
 300                 pcmk__assert(lrm_state != NULL);
 301                 process_lrm_event(lrm_state, op, NULL, NULL);
 302             }
 303             break;
 304 
 305         default:
 306             break;
 307     }
 308 }
 309 
 310 static void
 311 try_local_executor_connect(long long action, fsa_data_t *msg_data,
     /* [previous][next][first][last][top][bottom][index][help] */
 312                            lrm_state_t *lrm_state)
 313 {
 314     int rc = pcmk_rc_ok;
 315 
 316     crm_debug("Connecting to the local executor");
 317 
 318     // If we can connect, great
 319     rc = controld_connect_local_executor(lrm_state);
 320     if (rc == pcmk_rc_ok) {
 321         controld_set_fsa_input_flags(R_LRM_CONNECTED);
 322         crm_info("Connection to the local executor established");
 323         return;
 324     }
 325 
 326     // Otherwise, if we can try again, set a timer to do so
 327     if (lrm_state->num_lrm_register_fails < MAX_LRM_REG_FAILS) {
 328         crm_warn("Failed to connect to the local executor %d time%s "
 329                  "(%d max): %s", lrm_state->num_lrm_register_fails,
 330                  pcmk__plural_s(lrm_state->num_lrm_register_fails),
 331                  MAX_LRM_REG_FAILS, pcmk_rc_str(rc));
 332         controld_start_wait_timer();
 333         crmd_fsa_stall(FALSE);
 334         return;
 335     }
 336 
 337     // Otherwise give up
 338     crm_err("Failed to connect to the executor the max allowed "
 339             "%d time%s: %s", lrm_state->num_lrm_register_fails,
 340             pcmk__plural_s(lrm_state->num_lrm_register_fails),
 341             pcmk_rc_str(rc));
 342     register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 343 }
 344 
 345 /*       A_LRM_CONNECT  */
 346 void
 347 do_lrm_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 348                enum crmd_fsa_cause cause,
 349                enum crmd_fsa_state cur_state,
 350                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 351 {
 352     /* This only pertains to local executor connections. Remote connections are
 353      * handled as resources within the scheduler. Connecting and disconnecting
 354      * from remote executor instances is handled differently.
 355      */
 356 
 357     lrm_state_t *lrm_state = NULL;
 358 
 359     if (controld_globals.cluster->priv->node_name == NULL) {
 360         return; // Shouldn't be possible
 361     }
 362     lrm_state = controld_get_executor_state(NULL, true);
 363     if (lrm_state == NULL) {
 364         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 365         return;
 366     }
 367 
 368     if (action & A_LRM_DISCONNECT) {
 369         if (lrm_state_verify_stopped(lrm_state, cur_state, LOG_INFO) == FALSE) {
 370             if (action == A_LRM_DISCONNECT) {
 371                 crmd_fsa_stall(FALSE);
 372                 return;
 373             }
 374         }
 375 
 376         controld_clear_fsa_input_flags(R_LRM_CONNECTED);
 377         lrm_state_disconnect(lrm_state);
 378         lrm_state_reset_tables(lrm_state, FALSE);
 379     }
 380 
 381     if (action & A_LRM_CONNECT) {
 382         try_local_executor_connect(action, msg_data, lrm_state);
 383     }
 384 
 385     if (action & ~(A_LRM_CONNECT | A_LRM_DISCONNECT)) {
 386         crm_err("Unexpected action %s in %s", fsa_action2string(action),
 387                 __func__);
 388     }
 389 }
 390 
 391 static gboolean
 392 lrm_state_verify_stopped(lrm_state_t * lrm_state, enum crmd_fsa_state cur_state, int log_level)
     /* [previous][next][first][last][top][bottom][index][help] */
 393 {
 394     int counter = 0;
 395     gboolean rc = TRUE;
 396     const char *when = "lrm disconnect";
 397 
 398     GHashTableIter gIter;
 399     const char *key = NULL;
 400     rsc_history_t *entry = NULL;
 401     active_op_t *pending = NULL;
 402 
 403     crm_debug("Checking for active resources before exit");
 404 
 405     if (cur_state == S_TERMINATE) {
 406         log_level = LOG_ERR;
 407         when = "shutdown";
 408 
 409     } else if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
 410         when = "shutdown... waiting";
 411     }
 412 
 413     if ((lrm_state->active_ops != NULL) && lrm_state_is_connected(lrm_state)) {
 414         guint removed = g_hash_table_foreach_remove(lrm_state->active_ops,
 415                                                     stop_recurring_actions,
 416                                                     lrm_state);
 417         guint nremaining = g_hash_table_size(lrm_state->active_ops);
 418 
 419         if (removed || nremaining) {
 420             crm_notice("Stopped %u recurring operation%s at %s (%u remaining)",
 421                        removed, pcmk__plural_s(removed), when, nremaining);
 422         }
 423     }
 424 
 425     if (lrm_state->active_ops != NULL) {
 426         g_hash_table_iter_init(&gIter, lrm_state->active_ops);
 427         while (g_hash_table_iter_next(&gIter, NULL, (void **)&pending)) {
 428             /* Ignore recurring actions in the shutdown calculations */
 429             if (pending->interval_ms == 0) {
 430                 counter++;
 431             }
 432         }
 433     }
 434 
 435     if (counter > 0) {
 436         do_crm_log(log_level, "%d pending executor operation%s at %s",
 437                    counter, pcmk__plural_s(counter), when);
 438 
 439         if ((cur_state == S_TERMINATE)
 440             || !pcmk_is_set(controld_globals.fsa_input_register,
 441                             R_SENT_RSC_STOP)) {
 442             g_hash_table_iter_init(&gIter, lrm_state->active_ops);
 443             while (g_hash_table_iter_next(&gIter, (gpointer*)&key, (gpointer*)&pending)) {
 444                 do_crm_log(log_level, "Pending action: %s (%s)", key, pending->op_key);
 445             }
 446 
 447         } else {
 448             rc = FALSE;
 449         }
 450         return rc;
 451     }
 452 
 453     if (lrm_state->resource_history == NULL) {
 454         return rc;
 455     }
 456 
 457     if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) {
 458         /* At this point we're not waiting, we're just shutting down */
 459         when = "shutdown";
 460     }
 461 
 462     counter = 0;
 463     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
 464     while (g_hash_table_iter_next(&gIter, NULL, (gpointer*)&entry)) {
 465         if (is_rsc_active(lrm_state, entry->id) == FALSE) {
 466             continue;
 467         }
 468 
 469         counter++;
 470         if (log_level == LOG_ERR) {
 471             crm_info("Found %s active at %s", entry->id, when);
 472         } else {
 473             crm_trace("Found %s active at %s", entry->id, when);
 474         }
 475         if (lrm_state->active_ops != NULL) {
 476             GHashTableIter hIter;
 477 
 478             g_hash_table_iter_init(&hIter, lrm_state->active_ops);
 479             while (g_hash_table_iter_next(&hIter, (gpointer*)&key, (gpointer*)&pending)) {
 480                 if (pcmk__str_eq(entry->id, pending->rsc_id, pcmk__str_none)) {
 481                     crm_notice("%sction %s (%s) incomplete at %s",
 482                                pending->interval_ms == 0 ? "A" : "Recurring a",
 483                                key, pending->op_key, when);
 484                 }
 485             }
 486         }
 487     }
 488 
 489     if (counter) {
 490         crm_err("%d resource%s active at %s",
 491                 counter, (counter == 1)? " was" : "s were", when);
 492     }
 493 
 494     return rc;
 495 }
 496 
 497 static gboolean
 498 is_rsc_active(lrm_state_t * lrm_state, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 499 {
 500     rsc_history_t *entry = NULL;
 501 
 502     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
 503     if (entry == NULL || entry->last == NULL) {
 504         return FALSE;
 505     }
 506 
 507     crm_trace("Processing %s: %s.%d=%d", rsc_id, entry->last->op_type,
 508               entry->last->interval_ms, entry->last->rc);
 509     if ((entry->last->rc == PCMK_OCF_OK)
 510         && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_STOP,
 511                         pcmk__str_casei)) {
 512         return FALSE;
 513 
 514     } else if (entry->last->rc == PCMK_OCF_OK
 515                && pcmk__str_eq(entry->last->op_type, PCMK_ACTION_MIGRATE_TO,
 516                                pcmk__str_casei)) {
 517         // A stricter check is too complex ... leave that to the scheduler
 518         return FALSE;
 519 
 520     } else if (entry->last->rc == PCMK_OCF_NOT_RUNNING) {
 521         return FALSE;
 522 
 523     } else if ((entry->last->interval_ms == 0)
 524                && (entry->last->rc == PCMK_OCF_NOT_CONFIGURED)) {
 525         /* Badly configured resources can't be reliably stopped */
 526         return FALSE;
 527     }
 528 
 529     return TRUE;
 530 }
 531 
 532 static gboolean
 533 build_active_RAs(lrm_state_t * lrm_state, xmlNode * rsc_list)
     /* [previous][next][first][last][top][bottom][index][help] */
 534 {
 535     GHashTableIter iter;
 536     rsc_history_t *entry = NULL;
 537 
 538     g_hash_table_iter_init(&iter, lrm_state->resource_history);
 539     while (g_hash_table_iter_next(&iter, NULL, (void **)&entry)) {
 540 
 541         GList *gIter = NULL;
 542         xmlNode *xml_rsc = pcmk__xe_create(rsc_list, PCMK__XE_LRM_RESOURCE);
 543 
 544         crm_xml_add(xml_rsc, PCMK_XA_ID, entry->id);
 545         crm_xml_add(xml_rsc, PCMK_XA_TYPE, entry->rsc.type);
 546         crm_xml_add(xml_rsc, PCMK_XA_CLASS, entry->rsc.standard);
 547         crm_xml_add(xml_rsc, PCMK_XA_PROVIDER, entry->rsc.provider);
 548 
 549         if (entry->last && entry->last->params) {
 550             static const char *name = CRM_META "_" PCMK__META_CONTAINER;
 551             const char *container = g_hash_table_lookup(entry->last->params,
 552                                                         name);
 553 
 554             if (container) {
 555                 crm_trace("Resource %s is a part of container resource %s", entry->id, container);
 556                 crm_xml_add(xml_rsc, PCMK__META_CONTAINER, container);
 557             }
 558         }
 559         controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->failed,
 560                                           lrm_state->node_name);
 561         controld_add_resource_history_xml(xml_rsc, &(entry->rsc), entry->last,
 562                                           lrm_state->node_name);
 563         for (gIter = entry->recurring_op_list; gIter != NULL; gIter = gIter->next) {
 564             controld_add_resource_history_xml(xml_rsc, &(entry->rsc), gIter->data,
 565                                               lrm_state->node_name);
 566         }
 567     }
 568 
 569     return FALSE;
 570 }
 571 
 572 xmlNode *
 573 controld_query_executor_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 574 {
 575     // @TODO Ensure all callers handle NULL returns
 576     xmlNode *xml_state = NULL;
 577     xmlNode *xml_data = NULL;
 578     xmlNode *rsc_list = NULL;
 579     pcmk__node_status_t *peer = NULL;
 580     lrm_state_t *lrm_state = controld_get_executor_state(NULL, false);
 581 
 582     if (!lrm_state) {
 583         crm_err("Could not get executor state for local node");
 584         return NULL;
 585     }
 586 
 587     peer = pcmk__get_node(0, lrm_state->node_name, NULL, pcmk__node_search_any);
 588     CRM_CHECK(peer != NULL, return NULL);
 589 
 590     xml_state = create_node_state_update(peer,
 591                                          controld_node_update_cluster
 592                                          |controld_node_update_peer,
 593                                          NULL, __func__);
 594     if (xml_state == NULL) {
 595         return NULL;
 596     }
 597 
 598     xml_data = pcmk__xe_create(xml_state, PCMK__XE_LRM);
 599     crm_xml_add(xml_data, PCMK_XA_ID, peer->xml_id);
 600     rsc_list = pcmk__xe_create(xml_data, PCMK__XE_LRM_RESOURCES);
 601 
 602     // Build a list of active (not necessarily running) resources
 603     build_active_RAs(lrm_state, rsc_list);
 604 
 605     crm_log_xml_trace(xml_state, "Current executor state");
 606 
 607     return xml_state;
 608 }
 609 
 610 /*!
 611  * \internal
 612  * \brief Map standard Pacemaker return code to operation status and OCF code
 613  *
 614  * \param[out] event  Executor event whose status and return code should be set
 615  * \param[in]  rc     Standard Pacemaker return code
 616  */
 617 void
 618 controld_rc2event(lrmd_event_data_t *event, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 619 {
 620     /* This is called for cleanup requests from controller peers/clients, not
 621      * for resource actions, so no exit reason is needed.
 622      */
 623     switch (rc) {
 624         case pcmk_rc_ok:
 625             lrmd__set_result(event, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
 626             break;
 627         case EACCES:
 628             lrmd__set_result(event, PCMK_OCF_INSUFFICIENT_PRIV,
 629                              PCMK_EXEC_ERROR, NULL);
 630             break;
 631         default:
 632             lrmd__set_result(event, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
 633                              NULL);
 634             break;
 635     }
 636 }
 637 
 638 /*!
 639  * \internal
 640  * \brief Trigger a new transition after CIB status was deleted
 641  *
 642  * If a CIB status delete was not expected (as part of the transition graph),
 643  * trigger a new transition by updating the (arbitrary) "last-lrm-refresh"
 644  * cluster property.
 645  *
 646  * \param[in] from_sys  IPC name that requested the delete
 647  * \param[in] rsc_id    Resource whose status was deleted (for logging only)
 648  */
 649 void
 650 controld_trigger_delete_refresh(const char *from_sys, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
 651 {
 652     if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_casei)) {
 653         char *now_s = crm_strdup_printf("%lld", (long long) time(NULL));
 654 
 655         crm_debug("Triggering a refresh after %s cleaned %s", from_sys, rsc_id);
 656         cib__update_node_attr(controld_globals.logger_out,
 657                               controld_globals.cib_conn, cib_none,
 658                               PCMK_XE_CRM_CONFIG, NULL, NULL, NULL, NULL,
 659                               "last-lrm-refresh", now_s, NULL, NULL);
 660         free(now_s);
 661     }
 662 }
 663 
 664 static void
 665 notify_deleted(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rsc_id, int rc)
     /* [previous][next][first][last][top][bottom][index][help] */
 666 {
 667     lrmd_event_data_t *op = NULL;
 668     const char *from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM);
 669     const char *from_host = crm_element_value(input->msg, PCMK__XA_SRC);
 670 
 671     crm_info("Notifying %s on %s that %s was%s deleted",
 672              from_sys, (from_host? from_host : "localhost"), rsc_id,
 673              ((rc == pcmk_ok)? "" : " not"));
 674     op = construct_op(lrm_state, input->xml, rsc_id, PCMK_ACTION_DELETE);
 675     controld_rc2event(op, pcmk_legacy2rc(rc));
 676     controld_ack_event_directly(from_host, from_sys, NULL, op, rsc_id);
 677     lrmd_free_event(op);
 678     controld_trigger_delete_refresh(from_sys, rsc_id);
 679 }
 680 
 681 static gboolean
 682 lrm_remove_deleted_rsc(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 683 {
 684     struct delete_event_s *event = user_data;
 685     struct pending_deletion_op_s *op = value;
 686 
 687     if (pcmk__str_eq(event->rsc, op->rsc, pcmk__str_none)) {
 688         notify_deleted(event->lrm_state, op->input, event->rsc, event->rc);
 689         return TRUE;
 690     }
 691     return FALSE;
 692 }
 693 
 694 static gboolean
 695 lrm_remove_deleted_op(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 696 {
 697     const char *rsc = user_data;
 698     active_op_t *pending = value;
 699 
 700     if (pcmk__str_eq(rsc, pending->rsc_id, pcmk__str_none)) {
 701         crm_info("Removing op %s:%d for deleted resource %s",
 702                  pending->op_key, pending->call_id, rsc);
 703         return TRUE;
 704     }
 705     return FALSE;
 706 }
 707 
 708 static void
 709 delete_rsc_entry(lrm_state_t *lrm_state, ha_msg_input_t *input,
     /* [previous][next][first][last][top][bottom][index][help] */
 710                  const char *rsc_id, GHashTableIter *rsc_iter, int rc,
 711                  const char *user_name, bool from_cib)
 712 {
 713     struct delete_event_s event;
 714 
 715     CRM_CHECK(rsc_id != NULL, return);
 716 
 717     if (rc == pcmk_ok) {
 718         char *rsc_id_copy = pcmk__str_copy(rsc_id);
 719 
 720         if (rsc_iter) {
 721             g_hash_table_iter_remove(rsc_iter);
 722         } else {
 723             g_hash_table_remove(lrm_state->resource_history, rsc_id_copy);
 724         }
 725 
 726         if (from_cib) {
 727             controld_delete_resource_history(rsc_id_copy, lrm_state->node_name,
 728                                              user_name, crmd_cib_smart_opt());
 729         }
 730         g_hash_table_foreach_remove(lrm_state->active_ops,
 731                                     lrm_remove_deleted_op, rsc_id_copy);
 732         free(rsc_id_copy);
 733     }
 734 
 735     if (input) {
 736         notify_deleted(lrm_state, input, rsc_id, rc);
 737     }
 738 
 739     event.rc = rc;
 740     event.rsc = rsc_id;
 741     event.lrm_state = lrm_state;
 742     g_hash_table_foreach_remove(lrm_state->deletion_ops, lrm_remove_deleted_rsc, &event);
 743 }
 744 
 745 static inline gboolean
 746 last_failed_matches_op(rsc_history_t *entry, const char *op, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
 747 {
 748     if (entry == NULL) {
 749         return FALSE;
 750     }
 751     if (op == NULL) {
 752         return TRUE;
 753     }
 754     return (pcmk__str_eq(op, entry->failed->op_type, pcmk__str_casei)
 755             && (interval_ms == entry->failed->interval_ms));
 756 }
 757 
 758 /*!
 759  * \internal
 760  * \brief Clear a resource's last failure
 761  *
 762  * Erase a resource's last failure on a particular node from both the
 763  * LRM resource history in the CIB, and the resource history remembered
 764  * for the LRM state.
 765  *
 766  * \param[in] rsc_id      Resource name
 767  * \param[in] node_name   Node name
 768  * \param[in] operation   If specified, only clear if matching this operation
 769  * \param[in] interval_ms If operation is specified, it has this interval
 770  */
 771 void
 772 lrm_clear_last_failure(const char *rsc_id, const char *node_name,
     /* [previous][next][first][last][top][bottom][index][help] */
 773                        const char *operation, guint interval_ms)
 774 {
 775     lrm_state_t *lrm_state = controld_get_executor_state(node_name, false);
 776 
 777     if (lrm_state == NULL) {
 778         return;
 779     }
 780     if (lrm_state->resource_history != NULL) {
 781         rsc_history_t *entry = g_hash_table_lookup(lrm_state->resource_history,
 782                                                    rsc_id);
 783 
 784         if (last_failed_matches_op(entry, operation, interval_ms)) {
 785             lrmd_free_event(entry->failed);
 786             entry->failed = NULL;
 787         }
 788     }
 789 }
 790 
 791 /* Returns: gboolean - cancellation is in progress */
 792 static gboolean
 793 cancel_op(lrm_state_t * lrm_state, const char *rsc_id, const char *key, int op, gboolean remove)
     /* [previous][next][first][last][top][bottom][index][help] */
 794 {
 795     int rc = pcmk_ok;
 796     char *local_key = NULL;
 797     active_op_t *pending = NULL;
 798 
 799     CRM_CHECK(op != 0, return FALSE);
 800     CRM_CHECK(rsc_id != NULL, return FALSE);
 801     if (key == NULL) {
 802         local_key = make_stop_id(rsc_id, op);
 803         key = local_key;
 804     }
 805     pending = g_hash_table_lookup(lrm_state->active_ops, key);
 806 
 807     if (pending) {
 808         if (remove && !pcmk_is_set(pending->flags, active_op_remove)) {
 809             controld_set_active_op_flags(pending, active_op_remove);
 810             crm_debug("Scheduling %s for removal", key);
 811         }
 812 
 813         if (pcmk_is_set(pending->flags, active_op_cancelled)) {
 814             crm_debug("Operation %s already cancelled", key);
 815             free(local_key);
 816             return FALSE;
 817         }
 818         controld_set_active_op_flags(pending, active_op_cancelled);
 819 
 820     } else {
 821         crm_info("No pending op found for %s", key);
 822         free(local_key);
 823         return FALSE;
 824     }
 825 
 826     crm_debug("Cancelling op %d for %s (%s)", op, rsc_id, key);
 827     rc = lrm_state_cancel(lrm_state, pending->rsc_id, pending->op_type,
 828                           pending->interval_ms);
 829     if (rc == pcmk_ok) {
 830         crm_debug("Op %d for %s (%s): cancelled", op, rsc_id, key);
 831         free(local_key);
 832         return TRUE;
 833     }
 834 
 835     crm_debug("Op %d for %s (%s): Nothing to cancel", op, rsc_id, key);
 836     /* The caller needs to make sure the entry is
 837      * removed from the active operations list
 838      *
 839      * Usually by returning TRUE inside the worker function
 840      * supplied to g_hash_table_foreach_remove()
 841      *
 842      * Not removing the entry from active operations will block
 843      * the node from shutting down
 844      */
 845     free(local_key);
 846     return FALSE;
 847 }
 848 
 849 struct cancel_data {
 850     gboolean done;
 851     gboolean remove;
 852     const char *key;
 853     lrmd_rsc_info_t *rsc;
 854     lrm_state_t *lrm_state;
 855 };
 856 
 857 static gboolean
 858 cancel_action_by_key(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 859 {
 860     gboolean remove = FALSE;
 861     struct cancel_data *data = user_data;
 862     active_op_t *op = value;
 863 
 864     if (pcmk__str_eq(op->op_key, data->key, pcmk__str_none)) {
 865         data->done = TRUE;
 866         remove = !cancel_op(data->lrm_state, data->rsc->id, key, op->call_id, data->remove);
 867     }
 868     return remove;
 869 }
 870 
 871 static gboolean
 872 cancel_op_key(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, const char *key, gboolean remove)
     /* [previous][next][first][last][top][bottom][index][help] */
 873 {
 874     guint removed = 0;
 875     struct cancel_data data;
 876 
 877     CRM_CHECK(rsc != NULL, return FALSE);
 878     CRM_CHECK(key != NULL, return FALSE);
 879 
 880     data.key = key;
 881     data.rsc = rsc;
 882     data.done = FALSE;
 883     data.remove = remove;
 884     data.lrm_state = lrm_state;
 885 
 886     removed = g_hash_table_foreach_remove(lrm_state->active_ops,
 887                                           cancel_action_by_key, &data);
 888     crm_trace("Removed %u op cache entries, new size: %u",
 889               removed, g_hash_table_size(lrm_state->active_ops));
 890     return data.done;
 891 }
 892 
 893 /*!
 894  * \internal
 895  * \brief Retrieve resource information from LRM
 896  *
 897  * \param[in,out]  lrm_state  Executor connection state to use
 898  * \param[in]      rsc_xml    XML containing resource configuration
 899  * \param[in]      do_create  If true, register resource if not already
 900  * \param[out]     rsc_info   Where to store information obtained from executor
 901  *
 902  * \retval pcmk_ok   Success (and rsc_info holds newly allocated result)
 903  * \retval -EINVAL   Required information is missing from arguments
 904  * \retval -ENOTCONN No active connection to LRM
 905  * \retval -ENODEV   Resource not found
 906  * \retval -errno    Error communicating with executor when registering resource
 907  *
 908  * \note Caller is responsible for freeing result on success.
 909  */
 910 static int
 911 get_lrm_resource(lrm_state_t *lrm_state, const xmlNode *rsc_xml,
     /* [previous][next][first][last][top][bottom][index][help] */
 912                  gboolean do_create, lrmd_rsc_info_t **rsc_info)
 913 {
 914     const char *id = pcmk__xe_id(rsc_xml);
 915 
 916     CRM_CHECK(lrm_state && rsc_xml && rsc_info, return -EINVAL);
 917     CRM_CHECK(id, return -EINVAL);
 918 
 919     if (lrm_state_is_connected(lrm_state) == FALSE) {
 920         return -ENOTCONN;
 921     }
 922 
 923     crm_trace("Retrieving resource information for %s from the executor", id);
 924     *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
 925 
 926     // If resource isn't known by ID, try clone name, if provided
 927     if (!*rsc_info) {
 928         const char *long_id = crm_element_value(rsc_xml, PCMK__XA_LONG_ID);
 929 
 930         if (long_id) {
 931             *rsc_info = lrm_state_get_rsc_info(lrm_state, long_id, 0);
 932         }
 933     }
 934 
 935     if ((*rsc_info == NULL) && do_create) {
 936         const char *class = crm_element_value(rsc_xml, PCMK_XA_CLASS);
 937         const char *provider = crm_element_value(rsc_xml, PCMK_XA_PROVIDER);
 938         const char *type = crm_element_value(rsc_xml, PCMK_XA_TYPE);
 939         int rc;
 940 
 941         crm_trace("Registering resource %s with the executor", id);
 942         rc = lrm_state_register_rsc(lrm_state, id, class, provider, type,
 943                                     lrmd_opt_drop_recurring);
 944         if (rc != pcmk_ok) {
 945             fsa_data_t *msg_data = NULL;
 946 
 947             crm_err("Could not register resource %s with the executor on %s: %s "
 948                     QB_XS " rc=%d",
 949                     id, lrm_state->node_name, pcmk_strerror(rc), rc);
 950 
 951             /* Register this as an internal error if this involves the local
 952              * executor. Otherwise, we're likely dealing with an unresponsive
 953              * remote node, which is not an FSA failure.
 954              */
 955             if (lrm_state_is_local(lrm_state) == TRUE) {
 956                 register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
 957             }
 958             return rc;
 959         }
 960 
 961         *rsc_info = lrm_state_get_rsc_info(lrm_state, id, 0);
 962     }
 963     return *rsc_info? pcmk_ok : -ENODEV;
 964 }
 965 
 966 static void
 967 delete_resource(lrm_state_t *lrm_state, const char *id, lrmd_rsc_info_t *rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
 968                 GHashTableIter *iter, const char *sys, const char *user,
 969                 ha_msg_input_t *request, bool unregister, bool from_cib)
 970 {
 971     int rc = pcmk_ok;
 972 
 973     crm_info("Removing resource %s from executor for %s%s%s",
 974              id, sys, (user? " as " : ""), (user? user : ""));
 975 
 976     if (rsc && unregister) {
 977         rc = lrm_state_unregister_rsc(lrm_state, id, 0);
 978     }
 979 
 980     if (rc == pcmk_ok) {
 981         crm_trace("Resource %s deleted from executor", id);
 982     } else if (rc == -EINPROGRESS) {
 983         crm_info("Deletion of resource '%s' from executor is pending", id);
 984         if (request) {
 985             struct pending_deletion_op_s *op = NULL;
 986             char *ref = crm_element_value_copy(request->msg, PCMK_XA_REFERENCE);
 987 
 988             op = pcmk__assert_alloc(1, sizeof(struct pending_deletion_op_s));
 989             op->rsc = pcmk__str_copy(rsc->id);
 990             op->input = copy_ha_msg_input(request);
 991             g_hash_table_insert(lrm_state->deletion_ops, ref, op);
 992         }
 993         return;
 994     } else {
 995         crm_warn("Could not delete '%s' from executor for %s%s%s: %s "
 996                  QB_XS " rc=%d", id, sys, (user? " as " : ""),
 997                  (user? user : ""), pcmk_strerror(rc), rc);
 998     }
 999 
1000     delete_rsc_entry(lrm_state, request, id, iter, rc, user, from_cib);
1001 }
1002 
1003 static int
1004 get_fake_call_id(lrm_state_t *lrm_state, const char *rsc_id)
     /* [previous][next][first][last][top][bottom][index][help] */
1005 {
1006     int call_id = 999999999;
1007     rsc_history_t *entry = NULL;
1008 
1009     if(lrm_state) {
1010         entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
1011     }
1012 
1013     /* Make sure the call id is greater than the last successful operation,
1014      * otherwise the failure will not result in a possible recovery of the resource
1015      * as it could appear the failure occurred before the successful start */
1016     if (entry) {
1017         call_id = entry->last_callid + 1;
1018     }
1019 
1020     if (call_id < 0) {
1021         call_id = 1;
1022     }
1023     return call_id;
1024 }
1025 
1026 static void
1027 fake_op_status(lrm_state_t *lrm_state, lrmd_event_data_t *op, int op_status,
     /* [previous][next][first][last][top][bottom][index][help] */
1028                enum ocf_exitcode op_exitcode, const char *exit_reason)
1029 {
1030     op->call_id = get_fake_call_id(lrm_state, op->rsc_id);
1031     op->t_run = time(NULL);
1032     op->t_rcchange = op->t_run;
1033     lrmd__set_result(op, op_exitcode, op_status, exit_reason);
1034 }
1035 
1036 static void
1037 force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
     /* [previous][next][first][last][top][bottom][index][help] */
1038               const char *from_host, const char *user_name,
1039               gboolean is_remote_node, bool reprobe_all_nodes)
1040 {
1041     GHashTableIter gIter;
1042     rsc_history_t *entry = NULL;
1043 
1044     crm_info("Clearing resource history on node %s", lrm_state->node_name);
1045     g_hash_table_iter_init(&gIter, lrm_state->resource_history);
1046     while (g_hash_table_iter_next(&gIter, NULL, (void **)&entry)) {
1047         /* only unregister the resource during a reprobe if it is not a remote connection
1048          * resource. otherwise unregistering the connection will terminate remote-node
1049          * membership */
1050         bool unregister = true;
1051 
1052         if (is_remote_lrmd_ra(NULL, NULL, entry->id)) {
1053             unregister = false;
1054 
1055             if (reprobe_all_nodes) {
1056                 lrm_state_t *remote_lrm_state =
1057                     controld_get_executor_state(entry->id, false);
1058 
1059                 if (remote_lrm_state != NULL) {
1060                     /* If reprobing all nodes, be sure to reprobe the remote
1061                      * node before clearing its connection resource
1062                      */
1063                     force_reprobe(remote_lrm_state, from_sys, from_host,
1064                                   user_name, TRUE, reprobe_all_nodes);
1065                 }
1066             }
1067         }
1068 
1069         /* Don't delete from the CIB, since we'll delete the whole node's LRM
1070          * state from the CIB soon
1071          */
1072         delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys,
1073                         user_name, NULL, unregister, false);
1074     }
1075 
1076     /* Now delete the copy in the CIB */
1077     controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
1078                                cib_none);
1079 }
1080 
1081 /*!
1082  * \internal
1083  * \brief Fail a requested action without actually executing it
1084  *
1085  * For an action that can't be executed, process it similarly to an actual
1086  * execution result, with specified error status (except for notify actions,
1087  * which will always be treated as successful).
1088  *
1089  * \param[in,out] lrm_state    Executor connection that action is for
1090  * \param[in]     action       Action XML from request
1091  * \param[in]     rc           Desired return code to use
1092  * \param[in]     op_status    Desired operation status to use
1093  * \param[in]     exit_reason  Human-friendly detail, if error
1094  */
1095 static void
1096 synthesize_lrmd_failure(lrm_state_t *lrm_state, const xmlNode *action,
     /* [previous][next][first][last][top][bottom][index][help] */
1097                         int op_status, enum ocf_exitcode rc,
1098                         const char *exit_reason)
1099 {
1100     lrmd_event_data_t *op = NULL;
1101     const char *operation = crm_element_value(action, PCMK_XA_OPERATION);
1102     const char *target_node = crm_element_value(action, PCMK__META_ON_NODE);
1103     xmlNode *xml_rsc = pcmk__xe_first_child(action, PCMK_XE_PRIMITIVE, NULL,
1104                                             NULL);
1105 
1106     if ((xml_rsc == NULL) || (pcmk__xe_id(xml_rsc) == NULL)) {
1107         /* @TODO Should we do something else, like direct ack? */
1108         crm_info("Can't fake %s failure (%d) on %s without resource configuration",
1109                  crm_element_value(action, PCMK__XA_OPERATION_KEY), rc,
1110                  target_node);
1111         return;
1112 
1113     } else if(operation == NULL) {
1114         /* This probably came from crm_resource -C, nothing to do */
1115         crm_info("Can't fake %s failure (%d) on %s without operation",
1116                  pcmk__xe_id(xml_rsc), rc, target_node);
1117         return;
1118     }
1119 
1120     op = construct_op(lrm_state, action, pcmk__xe_id(xml_rsc), operation);
1121 
1122     if (pcmk__str_eq(operation, PCMK_ACTION_NOTIFY, pcmk__str_casei)) {
1123         // Notifications can't fail
1124         fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_OK, NULL);
1125     } else {
1126         fake_op_status(lrm_state, op, op_status, rc, exit_reason);
1127     }
1128 
1129     crm_info("Faking " PCMK__OP_FMT " result (%d) on %s",
1130              op->rsc_id, op->op_type, op->interval_ms, op->rc, target_node);
1131 
1132     // Process the result as if it came from the LRM
1133     process_lrm_event(lrm_state, op, NULL, action);
1134     lrmd_free_event(op);
1135 }
1136 
1137 /*!
1138  * \internal
1139  * \brief Get target of an LRM operation (replacing \p NULL with local node
1140  *        name)
1141  *
1142  * \param[in] xml  LRM operation data XML
1143  *
1144  * \return LRM operation target node name (local node or Pacemaker Remote node)
1145  */
1146 static const char *
1147 lrm_op_target(const xmlNode *xml)
     /* [previous][next][first][last][top][bottom][index][help] */
1148 {
1149     const char *target = NULL;
1150 
1151     if (xml) {
1152         target = crm_element_value(xml, PCMK__META_ON_NODE);
1153     }
1154     if (target == NULL) {
1155         target = controld_globals.cluster->priv->node_name;
1156     }
1157     return target;
1158 }
1159 
1160 static void
1161 fail_lrm_resource(xmlNode *xml, lrm_state_t *lrm_state, const char *user_name,
     /* [previous][next][first][last][top][bottom][index][help] */
1162                   const char *from_host, const char *from_sys)
1163 {
1164     lrmd_event_data_t *op = NULL;
1165     lrmd_rsc_info_t *rsc = NULL;
1166     xmlNode *xml_rsc = pcmk__xe_first_child(xml, PCMK_XE_PRIMITIVE, NULL, NULL);
1167 
1168     CRM_CHECK(xml_rsc != NULL, return);
1169 
1170     /* The executor simply executes operations and reports the results, without
1171      * any concept of success or failure, so to fail a resource, we must fake
1172      * what a failure looks like.
1173      *
1174      * To do this, we create a fake executor operation event for the resource,
1175      * and pass that event to the executor client callback so it will be
1176      * processed as if it came from the executor.
1177      */
1178     op = construct_op(lrm_state, xml, pcmk__xe_id(xml_rsc), "asyncmon");
1179 
1180     free((char*) op->user_data);
1181     op->user_data = NULL;
1182     op->interval_ms = 0;
1183 
1184     if (user_name && !pcmk__is_privileged(user_name)) {
1185         crm_err("%s does not have permission to fail %s",
1186                 user_name, pcmk__xe_id(xml_rsc));
1187         fake_op_status(lrm_state, op, PCMK_EXEC_ERROR,
1188                        PCMK_OCF_INSUFFICIENT_PRIV,
1189                        "Unprivileged user cannot fail resources");
1190         controld_ack_event_directly(from_host, from_sys, NULL, op,
1191                                     pcmk__xe_id(xml_rsc));
1192         lrmd_free_event(op);
1193         return;
1194     }
1195 
1196 
1197     if (get_lrm_resource(lrm_state, xml_rsc, TRUE, &rsc) == pcmk_ok) {
1198         crm_info("Failing resource %s...", rsc->id);
1199         fake_op_status(lrm_state, op, PCMK_EXEC_DONE, PCMK_OCF_UNKNOWN_ERROR,
1200                        "Simulated failure");
1201         process_lrm_event(lrm_state, op, NULL, xml);
1202         op->rc = PCMK_OCF_OK; // The request to fail the resource succeeded
1203         lrmd_free_rsc_info(rsc);
1204 
1205     } else {
1206         crm_info("Cannot find/create resource in order to fail it...");
1207         crm_log_xml_warn(xml, "bad input");
1208         fake_op_status(lrm_state, op, PCMK_EXEC_ERROR, PCMK_OCF_UNKNOWN_ERROR,
1209                        "Cannot fail unknown resource");
1210     }
1211 
1212     controld_ack_event_directly(from_host, from_sys, NULL, op,
1213                                 pcmk__xe_id(xml_rsc));
1214     lrmd_free_event(op);
1215 }
1216 
1217 static void
1218 handle_reprobe_op(lrm_state_t *lrm_state, xmlNode *msg, const char *from_sys,
     /* [previous][next][first][last][top][bottom][index][help] */
1219                   const char *from_host, const char *user_name,
1220                   gboolean is_remote_node, bool reprobe_all_nodes)
1221 {
1222     crm_notice("Forcing the status of all resources to be redetected");
1223     force_reprobe(lrm_state, from_sys, from_host, user_name, is_remote_node,
1224                   reprobe_all_nodes);
1225 
1226     if (!pcmk__strcase_any_of(from_sys, CRM_SYSTEM_PENGINE, CRM_SYSTEM_TENGINE, NULL)) {
1227         xmlNode *reply = pcmk__new_reply(msg, NULL);
1228 
1229         crm_debug("ACK'ing re-probe from %s (%s)", from_sys, from_host);
1230 
1231         if (relay_message(reply, TRUE) == FALSE) {
1232             crm_log_xml_err(reply, "Unable to route reply");
1233         }
1234         pcmk__xml_free(reply);
1235     }
1236 }
1237 
1238 static bool do_lrm_cancel(ha_msg_input_t *input, lrm_state_t *lrm_state,
     /* [previous][next][first][last][top][bottom][index][help] */
1239               lrmd_rsc_info_t *rsc, const char *from_host, const char *from_sys)
1240 {
1241     char *op_key = NULL;
1242     char *meta_key = NULL;
1243     int call = 0;
1244     const char *call_id = NULL;
1245     const char *op_task = NULL;
1246     guint interval_ms = 0;
1247     gboolean in_progress = FALSE;
1248     xmlNode *params = pcmk__xe_first_child(input->xml, PCMK__XE_ATTRIBUTES,
1249                                            NULL, NULL);
1250 
1251     CRM_CHECK(params != NULL, return FALSE);
1252 
1253     meta_key = crm_meta_name(PCMK_XA_OPERATION);
1254     op_task = crm_element_value(params, meta_key);
1255     free(meta_key);
1256     CRM_CHECK(op_task != NULL, return FALSE);
1257 
1258     meta_key = crm_meta_name(PCMK_META_INTERVAL);
1259     if (crm_element_value_ms(params, meta_key, &interval_ms) != pcmk_ok) {
1260         free(meta_key);
1261         return FALSE;
1262     }
1263     free(meta_key);
1264 
1265     op_key = pcmk__op_key(rsc->id, op_task, interval_ms);
1266 
1267     meta_key = crm_meta_name(PCMK__XA_CALL_ID);
1268     call_id = crm_element_value(params, meta_key);
1269     free(meta_key);
1270 
1271     crm_debug("Scheduler requested op %s (call=%s) be cancelled",
1272               op_key, (call_id? call_id : "NA"));
1273     pcmk__scan_min_int(call_id, &call, 0);
1274     if (call == 0) {
1275         // Normal case when the scheduler cancels a recurring op
1276         in_progress = cancel_op_key(lrm_state, rsc, op_key, TRUE);
1277 
1278     } else {
1279         // Normal case when the scheduler cancels an orphan op
1280         in_progress = cancel_op(lrm_state, rsc->id, NULL, call, TRUE);
1281     }
1282 
1283     // Acknowledge cancellation operation if for a remote connection resource
1284     if (!in_progress || is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
1285         char *op_id = make_stop_id(rsc->id, call);
1286 
1287         if (is_remote_lrmd_ra(NULL, NULL, rsc->id) == FALSE) {
1288             crm_info("Nothing known about operation %d for %s", call, op_key);
1289         }
1290         controld_delete_action_history_by_key(rsc->id, lrm_state->node_name,
1291                                               op_key, call);
1292         send_task_ok_ack(lrm_state, input, rsc->id, rsc, op_task,
1293                          from_host, from_sys);
1294 
1295         /* needed at least for cancellation of a remote operation */
1296         if (lrm_state->active_ops != NULL) {
1297             g_hash_table_remove(lrm_state->active_ops, op_id);
1298         }
1299         free(op_id);
1300     }
1301 
1302     free(op_key);
1303     return TRUE;
1304 }
1305 
1306 static void
1307 do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state,
     /* [previous][next][first][last][top][bottom][index][help] */
1308               lrmd_rsc_info_t *rsc, const char *from_sys, const char *from_host,
1309               bool crm_rsc_delete, const char *user_name)
1310 {
1311     bool unregister = true;
1312     int cib_rc = controld_delete_resource_history(rsc->id, lrm_state->node_name,
1313                                                   user_name,
1314                                                   cib_dryrun|cib_sync_call);
1315 
1316     if (cib_rc != pcmk_rc_ok) {
1317         lrmd_event_data_t *op = NULL;
1318 
1319         op = construct_op(lrm_state, input->xml, rsc->id, PCMK_ACTION_DELETE);
1320 
1321         /* These are resource clean-ups, not actions, so no exit reason is
1322          * needed.
1323          */
1324         lrmd__set_result(op, pcmk_rc2ocf(cib_rc), PCMK_EXEC_ERROR, NULL);
1325         controld_ack_event_directly(from_host, from_sys, NULL, op, rsc->id);
1326         lrmd_free_event(op);
1327         return;
1328     }
1329 
1330     if (crm_rsc_delete && is_remote_lrmd_ra(NULL, NULL, rsc->id)) {
1331         unregister = false;
1332     }
1333 
1334     delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys,
1335                     user_name, input, unregister, true);
1336 }
1337 
1338 // User data for asynchronous metadata execution
1339 struct metadata_cb_data {
1340     lrmd_rsc_info_t *rsc;   // Copy of resource information
1341     xmlNode *input_xml;     // Copy of FSA input XML
1342 };
1343 
1344 static struct metadata_cb_data *
1345 new_metadata_cb_data(lrmd_rsc_info_t *rsc, xmlNode *input_xml)
     /* [previous][next][first][last][top][bottom][index][help] */
1346 {
1347     struct metadata_cb_data *data = NULL;
1348 
1349     data = pcmk__assert_alloc(1, sizeof(struct metadata_cb_data));
1350     data->input_xml = pcmk__xml_copy(NULL, input_xml);
1351     data->rsc = lrmd_copy_rsc_info(rsc);
1352     return data;
1353 }
1354 
1355 static void
1356 free_metadata_cb_data(struct metadata_cb_data *data)
     /* [previous][next][first][last][top][bottom][index][help] */
1357 {
1358     lrmd_free_rsc_info(data->rsc);
1359     pcmk__xml_free(data->input_xml);
1360     free(data);
1361 }
1362 
1363 /*!
1364  * \internal
1365  * \brief Execute an action after metadata has been retrieved
1366  *
1367  * \param[in] pid        Ignored
1368  * \param[in] result     Result of metadata action
1369  * \param[in] user_data  Metadata callback data
1370  */
1371 static void
1372 metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1373 {
1374     struct metadata_cb_data *data = (struct metadata_cb_data *) user_data;
1375 
1376     struct ra_metadata_s *md = NULL;
1377     lrm_state_t *lrm_state =
1378         controld_get_executor_state(lrm_op_target(data->input_xml), false);
1379 
1380     if ((lrm_state != NULL) && pcmk__result_ok(result)) {
1381         md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
1382                                      result->action_stdout);
1383     }
1384     if (!pcmk_is_set(controld_globals.fsa_input_register, R_HA_DISCONNECTED)) {
1385         do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
1386     }
1387     free_metadata_cb_data(data);
1388 }
1389 
1390 /*       A_LRM_INVOKE   */
1391 void
1392 do_lrm_invoke(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
1393               enum crmd_fsa_cause cause,
1394               enum crmd_fsa_state cur_state,
1395               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
1396 {
1397     lrm_state_t *lrm_state = NULL;
1398     const char *crm_op = NULL;
1399     const char *from_sys = NULL;
1400     const char *from_host = NULL;
1401     const char *operation = NULL;
1402     ha_msg_input_t *input = fsa_typed_data(fsa_dt_ha_msg);
1403     const char *user_name = NULL;
1404     const char *target_node = lrm_op_target(input->xml);
1405     gboolean is_remote_node = FALSE;
1406     bool crm_rsc_delete = FALSE;
1407 
1408     // Message routed to the local node is targeting a specific, non-local node
1409     is_remote_node = !controld_is_local_node(target_node);
1410 
1411     lrm_state = controld_get_executor_state(target_node, false);
1412     if ((lrm_state == NULL) && is_remote_node) {
1413         crm_err("Failing action because local node has never had connection to remote node %s",
1414                 target_node);
1415         synthesize_lrmd_failure(NULL, input->xml, PCMK_EXEC_NOT_CONNECTED,
1416                                 PCMK_OCF_UNKNOWN_ERROR,
1417                                 "Local node has no connection to remote");
1418         return;
1419     }
1420     pcmk__assert(lrm_state != NULL);
1421 
1422     user_name = pcmk__update_acl_user(input->msg, PCMK__XA_CRM_USER, NULL);
1423     crm_op = crm_element_value(input->msg, PCMK__XA_CRM_TASK);
1424     from_sys = crm_element_value(input->msg, PCMK__XA_CRM_SYS_FROM);
1425     if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
1426         from_host = crm_element_value(input->msg, PCMK__XA_SRC);
1427     }
1428 
1429     if (pcmk__str_eq(crm_op, PCMK_ACTION_LRM_DELETE, pcmk__str_none)) {
1430         if (!pcmk__str_eq(from_sys, CRM_SYSTEM_TENGINE, pcmk__str_none)) {
1431             crm_rsc_delete = TRUE; // from crm_resource
1432         }
1433         operation = PCMK_ACTION_DELETE;
1434 
1435     } else if (input->xml != NULL) {
1436         operation = crm_element_value(input->xml, PCMK_XA_OPERATION);
1437     }
1438 
1439     CRM_CHECK(!pcmk__str_empty(crm_op) || !pcmk__str_empty(operation), return);
1440 
1441     crm_trace("'%s' execution request from %s as %s user",
1442               pcmk__s(crm_op, operation),
1443               pcmk__s(from_sys, "unknown subsystem"),
1444               pcmk__s(user_name, "current"));
1445 
1446     if (pcmk__str_eq(crm_op, CRM_OP_LRM_FAIL, pcmk__str_none)) {
1447         fail_lrm_resource(input->xml, lrm_state, user_name, from_host,
1448                           from_sys);
1449 
1450     } else if (pcmk__str_eq(crm_op, CRM_OP_REPROBE, pcmk__str_none)
1451                || pcmk__str_eq(operation, CRM_OP_REPROBE, pcmk__str_none)) {
1452         const char *raw_target = NULL;
1453 
1454         if (input->xml != NULL) {
1455             // For CRM_OP_REPROBE, a NULL target means we're targeting all nodes
1456             raw_target = crm_element_value(input->xml, PCMK__META_ON_NODE);
1457         }
1458         handle_reprobe_op(lrm_state, input->msg, from_sys, from_host, user_name,
1459                           is_remote_node, (raw_target == NULL));
1460 
1461     } else if (operation != NULL) {
1462         lrmd_rsc_info_t *rsc = NULL;
1463         xmlNode *xml_rsc = pcmk__xe_first_child(input->xml, PCMK_XE_PRIMITIVE,
1464                                                 NULL, NULL);
1465         gboolean create_rsc = !pcmk__str_eq(operation, PCMK_ACTION_DELETE,
1466                                             pcmk__str_none);
1467         int rc;
1468 
1469         // We can't return anything meaningful without a resource ID
1470         CRM_CHECK((xml_rsc != NULL) && (pcmk__xe_id(xml_rsc) != NULL), return);
1471 
1472         rc = get_lrm_resource(lrm_state, xml_rsc, create_rsc, &rsc);
1473         if (rc == -ENOTCONN) {
1474             synthesize_lrmd_failure(lrm_state, input->xml,
1475                                     PCMK_EXEC_NOT_CONNECTED,
1476                                     PCMK_OCF_UNKNOWN_ERROR,
1477                                     "Not connected to remote executor");
1478             return;
1479 
1480         } else if ((rc < 0) && !create_rsc) {
1481             /* Delete of malformed or nonexistent resource
1482              * (deleting something that does not exist is a success)
1483              */
1484             crm_debug("Not registering resource '%s' for a %s event "
1485                       QB_XS " get-rc=%d (%s) transition-key=%s",
1486                       pcmk__xe_id(xml_rsc), operation,
1487                       rc, pcmk_strerror(rc), pcmk__xe_id(input->xml));
1488             delete_rsc_entry(lrm_state, input, pcmk__xe_id(xml_rsc), NULL,
1489                              pcmk_ok, user_name, true);
1490             return;
1491 
1492         } else if (rc == -EINVAL) {
1493             // Resource operation on malformed resource
1494             crm_err("Invalid resource definition for %s", pcmk__xe_id(xml_rsc));
1495             crm_log_xml_warn(input->msg, "invalid resource");
1496             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
1497                                     PCMK_OCF_NOT_CONFIGURED, // fatal error
1498                                     "Invalid resource definition");
1499             return;
1500 
1501         } else if (rc < 0) {
1502             // Error communicating with the executor
1503             crm_err("Could not register resource '%s' with executor: %s "
1504                     QB_XS " rc=%d",
1505                     pcmk__xe_id(xml_rsc), pcmk_strerror(rc), rc);
1506             crm_log_xml_warn(input->msg, "failed registration");
1507             synthesize_lrmd_failure(lrm_state, input->xml, PCMK_EXEC_ERROR,
1508                                     PCMK_OCF_INVALID_PARAM, // hard error
1509                                     "Could not register resource with executor");
1510             return;
1511         }
1512 
1513         if (pcmk__str_eq(operation, PCMK_ACTION_CANCEL, pcmk__str_none)) {
1514             if (!do_lrm_cancel(input, lrm_state, rsc, from_host, from_sys)) {
1515                 crm_log_xml_warn(input->xml, "Bad command");
1516             }
1517 
1518         } else if (pcmk__str_eq(operation, PCMK_ACTION_DELETE,
1519                                 pcmk__str_none)) {
1520             do_lrm_delete(input, lrm_state, rsc, from_sys, from_host,
1521                           crm_rsc_delete, user_name);
1522 
1523         } else {
1524             struct ra_metadata_s *md = NULL;
1525 
1526             /* Getting metadata from cache is OK except for start actions --
1527              * always refresh from the agent for those, in case the resource
1528              * agent was updated.
1529              *
1530              * @TODO Only refresh metadata for starts if the agent actually
1531              * changed (using something like inotify, or a hash or modification
1532              * time of the agent executable).
1533              */
1534             if (strcmp(operation, PCMK_ACTION_START) != 0) {
1535                 md = controld_get_rsc_metadata(lrm_state, rsc,
1536                                                controld_metadata_from_cache);
1537             }
1538 
1539             if ((md == NULL) && crm_op_needs_metadata(rsc->standard,
1540                                                       operation)) {
1541                 /* Most likely, we'll need the agent metadata to record the
1542                  * pending operation and the operation result. Get it now rather
1543                  * than wait until then, so the metadata action doesn't eat into
1544                  * the real action's timeout.
1545                  *
1546                  * @TODO Metadata is retrieved via direct execution of the
1547                  * agent, which has a couple of related issues: the executor
1548                  * should execute agents, not the controller; and metadata for
1549                  * Pacemaker Remote nodes should be collected on those nodes,
1550                  * not locally.
1551                  */
1552                 struct metadata_cb_data *data = NULL;
1553 
1554                 data = new_metadata_cb_data(rsc, input->xml);
1555                 crm_info("Retrieving metadata for %s (%s%s%s:%s) asynchronously",
1556                          rsc->id, rsc->standard,
1557                          ((rsc->provider == NULL)? "" : ":"),
1558                          ((rsc->provider == NULL)? "" : rsc->provider),
1559                          rsc->type);
1560                 (void) lrmd__metadata_async(rsc, metadata_complete,
1561                                             (void *) data);
1562             } else {
1563                 do_lrm_rsc_op(lrm_state, rsc, input->xml, md);
1564             }
1565         }
1566 
1567         lrmd_free_rsc_info(rsc);
1568 
1569     } else {
1570         crm_err("Invalid execution request: unknown command '%s' (bug?)",
1571                 crm_op);
1572         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
1573     }
1574 }
1575 
1576 static lrmd_event_data_t *
1577 construct_op(const lrm_state_t *lrm_state, const xmlNode *rsc_op,
     /* [previous][next][first][last][top][bottom][index][help] */
1578              const char *rsc_id, const char *operation)
1579 {
1580     lrmd_event_data_t *op = NULL;
1581     const char *op_delay = NULL;
1582     const char *op_timeout = NULL;
1583     GHashTable *params = NULL;
1584 
1585     xmlNode *primitive = NULL;
1586     const char *class = NULL;
1587 
1588     const char *transition = NULL;
1589 
1590     pcmk__assert((rsc_id != NULL) && (operation != NULL));
1591 
1592     op = lrmd_new_event(rsc_id, operation, 0);
1593     op->type = lrmd_event_exec_complete;
1594     op->timeout = 0;
1595     op->start_delay = 0;
1596     lrmd__set_result(op, PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING, NULL);
1597 
1598     if (rsc_op == NULL) {
1599         CRM_LOG_ASSERT(pcmk__str_eq(operation, PCMK_ACTION_STOP,
1600                                     pcmk__str_casei));
1601         op->user_data = NULL;
1602         /* the stop_all_resources() case
1603          * by definition there is no DC (or they'd be shutting
1604          *   us down).
1605          * So we should put our version here.
1606          */
1607         op->params = pcmk__strkey_table(free, free);
1608 
1609         pcmk__insert_dup(op->params, PCMK_XA_CRM_FEATURE_SET, CRM_FEATURE_SET);
1610 
1611         crm_trace("Constructed %s op for %s", operation, rsc_id);
1612         return op;
1613     }
1614 
1615     params = xml2list(rsc_op);
1616     g_hash_table_remove(params, CRM_META "_" PCMK__META_OP_TARGET_RC);
1617 
1618     op_delay = crm_meta_value(params, PCMK_META_START_DELAY);
1619     pcmk__scan_min_int(op_delay, &op->start_delay, 0);
1620 
1621     op_timeout = crm_meta_value(params, PCMK_META_TIMEOUT);
1622     pcmk__scan_min_int(op_timeout, &op->timeout, 0);
1623 
1624     if (pcmk__guint_from_hash(params, CRM_META "_" PCMK_META_INTERVAL, 0,
1625                               &(op->interval_ms)) != pcmk_rc_ok) {
1626         op->interval_ms = 0;
1627     }
1628 
1629     /* Use pcmk_monitor_timeout instead of meta timeout for stonith
1630        recurring monitor, if set */
1631     primitive = pcmk__xe_first_child(rsc_op, PCMK_XE_PRIMITIVE, NULL, NULL);
1632     class = crm_element_value(primitive, PCMK_XA_CLASS);
1633 
1634     if (pcmk_is_set(pcmk_get_ra_caps(class), pcmk_ra_cap_fence_params)
1635             && pcmk__str_eq(operation, PCMK_ACTION_MONITOR, pcmk__str_casei)
1636             && (op->interval_ms > 0)) {
1637 
1638         op_timeout = g_hash_table_lookup(params, "pcmk_monitor_timeout");
1639         if (op_timeout != NULL) {
1640             long long timeout_ms = crm_get_msec(op_timeout);
1641 
1642             op->timeout = (int) QB_MIN(timeout_ms, INT_MAX);
1643         }
1644     }
1645 
1646     if (!pcmk__str_eq(operation, PCMK_ACTION_STOP, pcmk__str_casei)) {
1647         op->params = params;
1648 
1649     } else {
1650         rsc_history_t *entry = NULL;
1651 
1652         if (lrm_state) {
1653             entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
1654         }
1655 
1656         /* If we do not have stop parameters cached, use
1657          * whatever we are given */
1658         if (!entry || !entry->stop_params) {
1659             op->params = params;
1660         } else {
1661             /* Copy the cached parameter list so that we stop the resource
1662              * with the old attributes, not the new ones */
1663             op->params = pcmk__strkey_table(free, free);
1664 
1665             g_hash_table_foreach(params, copy_meta_keys, op->params);
1666             g_hash_table_foreach(entry->stop_params, copy_instance_keys, op->params);
1667             g_hash_table_destroy(params);
1668             params = NULL;
1669         }
1670     }
1671 
1672     /* sanity */
1673     if (op->timeout <= 0) {
1674         op->timeout = op->interval_ms;
1675     }
1676     if (op->start_delay < 0) {
1677         op->start_delay = 0;
1678     }
1679 
1680     transition = crm_element_value(rsc_op, PCMK__XA_TRANSITION_KEY);
1681     CRM_CHECK(transition != NULL, return op);
1682 
1683     op->user_data = pcmk__str_copy(transition);
1684 
1685     if (op->interval_ms != 0) {
1686         if (pcmk__strcase_any_of(operation, PCMK_ACTION_START, PCMK_ACTION_STOP,
1687                                  NULL)) {
1688             crm_err("Start and Stop actions cannot have an interval: %u",
1689                     op->interval_ms);
1690             op->interval_ms = 0;
1691         }
1692     }
1693 
1694     crm_trace("Constructed %s op for %s: interval=%u",
1695               operation, rsc_id, op->interval_ms);
1696 
1697     return op;
1698 }
1699 
1700 /*!
1701  * \internal
1702  * \brief Send a (synthesized) event result
1703  *
1704  * Reply with a synthesized event result directly, as opposed to going through
1705  * the executor.
1706  *
1707  * \param[in]     to_host  Host to send result to
1708  * \param[in]     to_sys   IPC name to send result (NULL for transition engine)
1709  * \param[in]     rsc      Type information about resource the result is for
1710  * \param[in,out] op       Event with result to send
1711  * \param[in]     rsc_id   ID of resource the result is for
1712  */
1713 void
1714 controld_ack_event_directly(const char *to_host, const char *to_sys,
     /* [previous][next][first][last][top][bottom][index][help] */
1715                             const lrmd_rsc_info_t *rsc, lrmd_event_data_t *op,
1716                             const char *rsc_id)
1717 {
1718     xmlNode *reply = NULL;
1719     xmlNode *update, *iter;
1720     pcmk__node_status_t *peer = NULL;
1721 
1722     CRM_CHECK(op != NULL, return);
1723     if (op->rsc_id == NULL) {
1724         // op->rsc_id is a (const char *) but lrmd_free_event() frees it
1725         pcmk__assert(rsc_id != NULL);
1726         op->rsc_id = pcmk__str_copy(rsc_id);
1727     }
1728     if (to_sys == NULL) {
1729         to_sys = CRM_SYSTEM_TENGINE;
1730     }
1731 
1732     peer = controld_get_local_node_status();
1733     update = create_node_state_update(peer, controld_node_update_none, NULL,
1734                                       __func__);
1735 
1736     iter = pcmk__xe_create(update, PCMK__XE_LRM);
1737     crm_xml_add(iter, PCMK_XA_ID, controld_globals.our_uuid);
1738     iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCES);
1739     iter = pcmk__xe_create(iter, PCMK__XE_LRM_RESOURCE);
1740 
1741     crm_xml_add(iter, PCMK_XA_ID, op->rsc_id);
1742 
1743     controld_add_resource_history_xml(iter, rsc, op,
1744                                       controld_globals.cluster->priv->node_name);
1745 
1746     /* We don't have the original message ID, so use "direct-ack" (we just need
1747      * something non-NULL for this to create a reply)
1748      *
1749      * @TODO It would be better to use the server, message ID, and task from the
1750      * original request when callers have it available
1751      */
1752     reply = pcmk__new_message(pcmk_ipc_controld, "direct-ack", CRM_SYSTEM_LRMD,
1753                               to_host, to_sys, CRM_OP_INVOKE_LRM, update);
1754 
1755     crm_log_xml_trace(update, "[direct ACK]");
1756 
1757     crm_debug("ACK'ing resource op " PCMK__OP_FMT " from %s: %s",
1758               op->rsc_id, op->op_type, op->interval_ms, op->user_data,
1759               crm_element_value(reply, PCMK_XA_REFERENCE));
1760 
1761     if (relay_message(reply, TRUE) == FALSE) {
1762         crm_log_xml_err(reply, "Unable to route reply");
1763     }
1764 
1765     pcmk__xml_free(update);
1766     pcmk__xml_free(reply);
1767 }
1768 
1769 gboolean
1770 verify_stopped(enum crmd_fsa_state cur_state, int log_level)
     /* [previous][next][first][last][top][bottom][index][help] */
1771 {
1772     gboolean res = TRUE;
1773     GList *lrm_state_list = lrm_state_get_list();
1774     GList *state_entry;
1775 
1776     for (state_entry = lrm_state_list; state_entry != NULL; state_entry = state_entry->next) {
1777         lrm_state_t *lrm_state = state_entry->data;
1778 
1779         if (!lrm_state_verify_stopped(lrm_state, cur_state, log_level)) {
1780             /* keep iterating through all even when false is returned */
1781             res = FALSE;
1782         }
1783     }
1784 
1785     controld_set_fsa_input_flags(R_SENT_RSC_STOP);
1786     g_list_free(lrm_state_list); lrm_state_list = NULL;
1787     return res;
1788 }
1789 
1790 struct stop_recurring_action_s {
1791     lrmd_rsc_info_t *rsc;
1792     lrm_state_t *lrm_state;
1793 };
1794 
1795 static gboolean
1796 stop_recurring_action_by_rsc(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1797 {
1798     gboolean remove = FALSE;
1799     struct stop_recurring_action_s *event = user_data;
1800     active_op_t *op = value;
1801 
1802     if ((op->interval_ms != 0)
1803         && pcmk__str_eq(op->rsc_id, event->rsc->id, pcmk__str_none)) {
1804 
1805         crm_debug("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id, (char*)key);
1806         remove = !cancel_op(event->lrm_state, event->rsc->id, key, op->call_id, FALSE);
1807     }
1808 
1809     return remove;
1810 }
1811 
1812 static gboolean
1813 stop_recurring_actions(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1814 {
1815     gboolean remove = FALSE;
1816     lrm_state_t *lrm_state = user_data;
1817     active_op_t *op = value;
1818 
1819     if (op->interval_ms != 0) {
1820         crm_info("Cancelling op %d for %s (%s)", op->call_id, op->rsc_id,
1821                  (const char *) key);
1822         remove = !cancel_op(lrm_state, op->rsc_id, key, op->call_id, FALSE);
1823     }
1824 
1825     return remove;
1826 }
1827 
1828 /*!
1829  * \internal
1830  * \brief Check whether recurring actions should be cancelled before an action
1831  *
1832  * \param[in] rsc_id       Resource that action is for
1833  * \param[in] action       Action being performed
1834  * \param[in] interval_ms  Operation interval of \p action (in milliseconds)
1835  *
1836  * \return true if recurring actions should be cancelled, otherwise false
1837  */
1838 static bool
1839 should_cancel_recurring(const char *rsc_id, const char *action, guint interval_ms)
     /* [previous][next][first][last][top][bottom][index][help] */
1840 {
1841     if (is_remote_lrmd_ra(NULL, NULL, rsc_id) && (interval_ms == 0)
1842         && (strcmp(action, PCMK_ACTION_MIGRATE_TO) == 0)) {
1843         /* Don't stop monitoring a migrating Pacemaker Remote connection
1844          * resource until the entire migration has completed. We must detect if
1845          * the connection is unexpectedly severed, even during a migration.
1846          */
1847         return false;
1848     }
1849 
1850     // Cancel recurring actions before changing resource state
1851     return (interval_ms == 0)
1852             && !pcmk__str_any_of(action, PCMK_ACTION_MONITOR,
1853                                  PCMK_ACTION_NOTIFY, NULL);
1854 }
1855 
1856 /*!
1857  * \internal
1858  * \brief Check whether an action should not be performed at this time
1859  *
1860  * \param[in] operation  Action to be performed
1861  *
1862  * \return Readable description of why action should not be performed,
1863  *         or NULL if it should be performed
1864  */
1865 static const char *
1866 should_nack_action(const char *action)
     /* [previous][next][first][last][top][bottom][index][help] */
1867 {
1868     if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)
1869         && pcmk__str_eq(action, PCMK_ACTION_START, pcmk__str_none)) {
1870 
1871         register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
1872         return "Not attempting start due to shutdown in progress";
1873     }
1874 
1875     switch (controld_globals.fsa_state) {
1876         case S_NOT_DC:
1877         case S_POLICY_ENGINE:   // Recalculating
1878         case S_TRANSITION_ENGINE:
1879             break;
1880         default:
1881             if (!pcmk__str_eq(action, PCMK_ACTION_STOP, pcmk__str_none)) {
1882                 return "Controller cannot attempt actions at this time";
1883             }
1884             break;
1885     }
1886     return NULL;
1887 }
1888 
1889 static void
1890 do_lrm_rsc_op(lrm_state_t *lrm_state, lrmd_rsc_info_t *rsc, xmlNode *msg,
     /* [previous][next][first][last][top][bottom][index][help] */
1891               struct ra_metadata_s *md)
1892 {
1893     int rc;
1894     int call_id = 0;
1895     char *op_id = NULL;
1896     lrmd_event_data_t *op = NULL;
1897     fsa_data_t *msg_data = NULL;
1898     const char *transition = NULL;
1899     const char *operation = NULL;
1900     const char *nack_reason = NULL;
1901 
1902     CRM_CHECK((rsc != NULL) && (msg != NULL), return);
1903 
1904     operation = crm_element_value(msg, PCMK_XA_OPERATION);
1905     CRM_CHECK(!pcmk__str_empty(operation), return);
1906 
1907     transition = crm_element_value(msg, PCMK__XA_TRANSITION_KEY);
1908     if (pcmk__str_empty(transition)) {
1909         crm_log_xml_err(msg, "Missing transition number");
1910     }
1911 
1912     if (lrm_state == NULL) {
1913         // This shouldn't be possible, but provide a failsafe just in case
1914         crm_err("Cannot execute %s of %s: No executor connection "
1915                 QB_XS " transition_key=%s",
1916                 operation, rsc->id, pcmk__s(transition, ""));
1917         synthesize_lrmd_failure(NULL, msg, PCMK_EXEC_INVALID,
1918                                 PCMK_OCF_UNKNOWN_ERROR,
1919                                 "No executor connection");
1920         return;
1921     }
1922 
1923     if (pcmk__str_any_of(operation, PCMK_ACTION_RELOAD,
1924                          PCMK_ACTION_RELOAD_AGENT, NULL)) {
1925         /* Pre-2.1.0 DCs will schedule reload actions only, and 2.1.0+ DCs
1926          * will schedule reload-agent actions only. In either case, we need
1927          * to map that to whatever the resource agent actually supports.
1928          * Default to the OCF 1.1 name.
1929          */
1930         if ((md != NULL)
1931             && pcmk_is_set(md->ra_flags, ra_supports_legacy_reload)) {
1932             operation = PCMK_ACTION_RELOAD;
1933         } else {
1934             operation = PCMK_ACTION_RELOAD_AGENT;
1935         }
1936     }
1937 
1938     op = construct_op(lrm_state, msg, rsc->id, operation);
1939     CRM_CHECK(op != NULL, return);
1940 
1941     if (should_cancel_recurring(rsc->id, operation, op->interval_ms)) {
1942         guint removed = 0;
1943         struct stop_recurring_action_s data;
1944 
1945         data.rsc = rsc;
1946         data.lrm_state = lrm_state;
1947         removed = g_hash_table_foreach_remove(lrm_state->active_ops,
1948                                               stop_recurring_action_by_rsc,
1949                                               &data);
1950 
1951         if (removed) {
1952             crm_debug("Stopped %u recurring operation%s in preparation for "
1953                       PCMK__OP_FMT, removed, pcmk__plural_s(removed),
1954                       rsc->id, operation, op->interval_ms);
1955         }
1956     }
1957 
1958     nack_reason = should_nack_action(operation);
1959     if (nack_reason != NULL) {
1960         crm_notice("Not requesting local execution of %s operation for %s on %s"
1961                    " in state %s: %s",
1962                    pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
1963                    lrm_state->node_name,
1964                    fsa_state2string(controld_globals.fsa_state), nack_reason);
1965 
1966         lrmd__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_INVALID,
1967                          nack_reason);
1968         controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
1969         lrmd_free_event(op);
1970         free(op_id);
1971         return;
1972     }
1973 
1974     crm_notice("Requesting local execution of %s operation for %s on %s "
1975                QB_XS " transition %s",
1976                pcmk__readable_action(op->op_type, op->interval_ms), rsc->id,
1977                lrm_state->node_name, pcmk__s(transition, ""));
1978 
1979     controld_record_pending_op(lrm_state->node_name, rsc, op);
1980 
1981     op_id = pcmk__op_key(rsc->id, op->op_type, op->interval_ms);
1982 
1983     if (op->interval_ms > 0) {
1984         /* cancel it so we can then restart it without conflict */
1985         cancel_op_key(lrm_state, rsc, op_id, FALSE);
1986     }
1987 
1988     rc = controld_execute_resource_agent(lrm_state, rsc->id, op->op_type,
1989                                          op->user_data, op->interval_ms,
1990                                          op->timeout, op->start_delay,
1991                                          op->params, &call_id);
1992     if (rc == pcmk_rc_ok) {
1993         /* record all operations so we can wait
1994          * for them to complete during shutdown
1995          */
1996         char *call_id_s = make_stop_id(rsc->id, call_id);
1997         active_op_t *pending = NULL;
1998 
1999         pending = pcmk__assert_alloc(1, sizeof(active_op_t));
2000         crm_trace("Recording pending op: %d - %s %s", call_id, op_id, call_id_s);
2001 
2002         pending->call_id = call_id;
2003         pending->interval_ms = op->interval_ms;
2004         pending->op_type = pcmk__str_copy(operation);
2005         pending->op_key = pcmk__str_copy(op_id);
2006         pending->rsc_id = pcmk__str_copy(rsc->id);
2007         pending->start_time = time(NULL);
2008         pending->user_data = pcmk__str_copy(op->user_data);
2009         if (crm_element_value_epoch(msg, PCMK_OPT_SHUTDOWN_LOCK,
2010                                     &(pending->lock_time)) != pcmk_ok) {
2011             pending->lock_time = 0;
2012         }
2013         g_hash_table_replace(lrm_state->active_ops, call_id_s, pending);
2014 
2015         if ((op->interval_ms > 0)
2016             && (op->start_delay > START_DELAY_THRESHOLD)) {
2017             int target_rc = PCMK_OCF_OK;
2018 
2019             crm_info("Faking confirmation of %s: execution postponed for over 5 minutes", op_id);
2020             decode_transition_key(op->user_data, NULL, NULL, NULL, &target_rc);
2021             lrmd__set_result(op, target_rc, PCMK_EXEC_DONE, NULL);
2022             controld_ack_event_directly(NULL, NULL, rsc, op, rsc->id);
2023         }
2024 
2025         pending->params = op->params;
2026         op->params = NULL;
2027 
2028     } else if (lrm_state_is_local(lrm_state)) {
2029         crm_err("Could not initiate %s action for resource %s locally: %s "
2030                 QB_XS " rc=%d", operation, rsc->id, pcmk_rc_str(rc), rc);
2031         fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
2032                        PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
2033         process_lrm_event(lrm_state, op, NULL, NULL);
2034         register_fsa_error(C_FSA_INTERNAL, I_FAIL, NULL);
2035 
2036     } else {
2037         crm_err("Could not initiate %s action for resource %s remotely on %s: "
2038                 "%s " QB_XS " rc=%d",
2039                 operation, rsc->id, lrm_state->node_name, pcmk_rc_str(rc), rc);
2040         fake_op_status(lrm_state, op, PCMK_EXEC_NOT_CONNECTED,
2041                        PCMK_OCF_UNKNOWN_ERROR, pcmk_rc_str(rc));
2042         process_lrm_event(lrm_state, op, NULL, NULL);
2043     }
2044 
2045     free(op_id);
2046     lrmd_free_event(op);
2047 }
2048 
2049 static char *
2050 unescape_newlines(const char *string)
     /* [previous][next][first][last][top][bottom][index][help] */
2051 {
2052     char *pch = NULL;
2053     char *ret = NULL;
2054     static const char *escaped_newline = "\\n";
2055 
2056     if (!string) {
2057         return NULL;
2058     }
2059 
2060     ret = pcmk__str_copy(string);
2061     pch = strstr(ret, escaped_newline);
2062     while (pch != NULL) {
2063         /* Replace newline escape pattern with actual newline (and a space so we
2064          * don't have to shuffle the rest of the buffer)
2065          */
2066         pch[0] = '\n';
2067         pch[1] = ' ';
2068         pch = strstr(pch, escaped_newline);
2069     }
2070 
2071     return ret;
2072 }
2073 
2074 static bool
2075 did_lrm_rsc_op_fail(lrm_state_t *lrm_state, const char * rsc_id,
     /* [previous][next][first][last][top][bottom][index][help] */
2076                     const char * op_type, guint interval_ms)
2077 {
2078     rsc_history_t *entry = NULL;
2079 
2080     CRM_CHECK(lrm_state != NULL, return FALSE);
2081     CRM_CHECK(rsc_id != NULL, return FALSE);
2082     CRM_CHECK(op_type != NULL, return FALSE);
2083 
2084     entry = g_hash_table_lookup(lrm_state->resource_history, rsc_id);
2085     if (entry == NULL || entry->failed == NULL) {
2086         return FALSE;
2087     }
2088 
2089     if (pcmk__str_eq(entry->failed->rsc_id, rsc_id, pcmk__str_none)
2090         && pcmk__str_eq(entry->failed->op_type, op_type, pcmk__str_casei)
2091         && entry->failed->interval_ms == interval_ms) {
2092         return TRUE;
2093     }
2094 
2095     return FALSE;
2096 }
2097 
2098 /*!
2099  * \internal
2100  * \brief Log the result of an executor action (actual or synthesized)
2101  *
2102  * \param[in] op         Executor action to log result for
2103  * \param[in] op_key     Operation key for action
2104  * \param[in] node_name  Name of node action was performed on, if known
2105  * \param[in] confirmed  Whether to log that graph action was confirmed
2106  */
2107 static void
2108 log_executor_event(const lrmd_event_data_t *op, const char *op_key,
     /* [previous][next][first][last][top][bottom][index][help] */
2109                    const char *node_name, gboolean confirmed)
2110 {
2111     int log_level = LOG_ERR;
2112     GString *str = g_string_sized_new(100); // reasonable starting size
2113 
2114     pcmk__g_strcat(str,
2115                    "Result of ",
2116                    pcmk__readable_action(op->op_type, op->interval_ms),
2117                    " operation for ", op->rsc_id, NULL);
2118 
2119     if (node_name != NULL) {
2120         pcmk__g_strcat(str, " on ", node_name, NULL);
2121     }
2122 
2123     switch (op->op_status) {
2124         case PCMK_EXEC_DONE:
2125             log_level = LOG_NOTICE;
2126             pcmk__g_strcat(str, ": ", crm_exit_str((crm_exit_t) op->rc), NULL);
2127             break;
2128 
2129         case PCMK_EXEC_TIMEOUT:
2130             pcmk__g_strcat(str,
2131                            ": ", pcmk_exec_status_str(op->op_status), " after ",
2132                            pcmk__readable_interval(op->timeout), NULL);
2133             break;
2134 
2135         case PCMK_EXEC_CANCELLED:
2136             log_level = LOG_INFO;
2137             pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
2138                            NULL);
2139             break;
2140 
2141         default:
2142             pcmk__g_strcat(str, ": ", pcmk_exec_status_str(op->op_status),
2143                            NULL);
2144             break;
2145     }
2146 
2147     if ((op->exit_reason != NULL)
2148         && ((op->op_status != PCMK_EXEC_DONE) || (op->rc != PCMK_OCF_OK))) {
2149 
2150         pcmk__g_strcat(str, " (", op->exit_reason, ")", NULL);
2151     }
2152 
2153     g_string_append(str, " " QB_XS);
2154     g_string_append_printf(str, " graph action %sconfirmed; call=%d key=%s",
2155                            (confirmed? "" : "un"), op->call_id, op_key);
2156     if (op->op_status == PCMK_EXEC_DONE) {
2157         g_string_append_printf(str, " rc=%d", op->rc);
2158     }
2159 
2160     do_crm_log(log_level, "%s", str->str);
2161     g_string_free(str, TRUE);
2162 
2163     /* The services library has already logged the output at info or debug
2164      * level, so just raise to notice if it looks like a failure.
2165      */
2166     if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
2167         char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
2168                                          op->rsc_id, op->op_type,
2169                                          op->interval_ms, node_name);
2170 
2171         crm_log_output(LOG_NOTICE, prefix, op->output);
2172         free(prefix);
2173     }
2174 }
2175 
2176 void
2177 process_lrm_event(lrm_state_t *lrm_state, lrmd_event_data_t *op,
     /* [previous][next][first][last][top][bottom][index][help] */
2178                   active_op_t *pending, const xmlNode *action_xml)
2179 {
2180     char *op_id = NULL;
2181     char *op_key = NULL;
2182 
2183     gboolean remove = FALSE;
2184     gboolean removed = FALSE;
2185     bool need_direct_ack = FALSE;
2186     lrmd_rsc_info_t *rsc = NULL;
2187     const char *node_name = NULL;
2188 
2189     CRM_CHECK(op != NULL, return);
2190     CRM_CHECK(op->rsc_id != NULL, return);
2191 
2192     // Remap new status codes for older DCs
2193     if (compare_version(controld_globals.dc_version, "3.2.0") < 0) {
2194         switch (op->op_status) {
2195             case PCMK_EXEC_NOT_CONNECTED:
2196                 lrmd__set_result(op, PCMK_OCF_CONNECTION_DIED,
2197                                  PCMK_EXEC_ERROR, op->exit_reason);
2198                 break;
2199             case PCMK_EXEC_INVALID:
2200                 lrmd__set_result(op, CRM_DIRECT_NACK_RC, PCMK_EXEC_ERROR,
2201                                  op->exit_reason);
2202                 break;
2203             default:
2204                 break;
2205         }
2206     }
2207 
2208     op_id = make_stop_id(op->rsc_id, op->call_id);
2209     op_key = pcmk__op_key(op->rsc_id, op->op_type, op->interval_ms);
2210 
2211     // Get resource info if available (from executor state or action XML)
2212     if (lrm_state) {
2213         rsc = lrm_state_get_rsc_info(lrm_state, op->rsc_id, 0);
2214     }
2215     if ((rsc == NULL) && action_xml) {
2216         xmlNode *xml = pcmk__xe_first_child(action_xml, PCMK_XE_PRIMITIVE, NULL,
2217                                             NULL);
2218 
2219         const char *standard = crm_element_value(xml, PCMK_XA_CLASS);
2220         const char *provider = crm_element_value(xml, PCMK_XA_PROVIDER);
2221         const char *type = crm_element_value(xml, PCMK_XA_TYPE);
2222 
2223         if (standard && type) {
2224             crm_info("%s agent information not cached, using %s%s%s:%s from action XML",
2225                      op->rsc_id, standard,
2226                      (provider? ":" : ""), (provider? provider : ""), type);
2227             rsc = lrmd_new_rsc_info(op->rsc_id, standard, provider, type);
2228         } else {
2229             crm_err("Can't process %s result because %s agent information not cached or in XML",
2230                     op_key, op->rsc_id);
2231         }
2232     }
2233 
2234     // Get node name if available (from executor state or action XML)
2235     if (lrm_state) {
2236         node_name = lrm_state->node_name;
2237     } else if (action_xml) {
2238         node_name = crm_element_value(action_xml, PCMK__META_ON_NODE);
2239     }
2240 
2241     if(pending == NULL) {
2242         remove = TRUE;
2243         if (lrm_state) {
2244             pending = g_hash_table_lookup(lrm_state->active_ops, op_id);
2245         }
2246     }
2247 
2248     if (op->op_status == PCMK_EXEC_ERROR) {
2249         switch(op->rc) {
2250             case PCMK_OCF_NOT_RUNNING:
2251             case PCMK_OCF_RUNNING_PROMOTED:
2252             case PCMK_OCF_DEGRADED:
2253             case PCMK_OCF_DEGRADED_PROMOTED:
2254                 // Leave it to the TE/scheduler to decide if this is an error
2255                 op->op_status = PCMK_EXEC_DONE;
2256                 break;
2257             default:
2258                 /* Nothing to do */
2259                 break;
2260         }
2261     }
2262 
2263     if (op->op_status != PCMK_EXEC_CANCELLED) {
2264         /* We might not record the result, so directly acknowledge it to the
2265          * originator instead, so it doesn't time out waiting for the result
2266          * (especially important if part of a transition).
2267          */
2268         need_direct_ack = TRUE;
2269 
2270         if (controld_action_is_recordable(op->op_type)) {
2271             if (node_name && rsc) {
2272                 // We should record the result, and happily, we can
2273                 time_t lock_time = (pending == NULL)? 0 : pending->lock_time;
2274 
2275                 controld_update_resource_history(node_name, rsc, op, lock_time);
2276                 need_direct_ack = FALSE;
2277 
2278             } else if (op->rsc_deleted) {
2279                 /* We shouldn't record the result (likely the resource was
2280                  * refreshed, cleaned, or removed while this operation was
2281                  * in flight).
2282                  */
2283                 crm_notice("Not recording %s result in CIB because "
2284                            "resource information was removed since it was initiated",
2285                            op_key);
2286             } else {
2287                 /* This shouldn't be possible; the executor didn't consider the
2288                  * resource deleted, but we couldn't find resource or node
2289                  * information.
2290                  */
2291                 crm_err("Unable to record %s result in CIB: %s", op_key,
2292                         (node_name? "No resource information" : "No node name"));
2293             }
2294         }
2295 
2296     } else if (op->interval_ms == 0) {
2297         /* A non-recurring operation was cancelled. Most likely, the
2298          * never-initiated action was removed from the executor's pending
2299          * operations list upon resource removal.
2300          */
2301         need_direct_ack = TRUE;
2302 
2303     } else if (pending == NULL) {
2304         /* This recurring operation was cancelled, but was not pending. No
2305          * transition actions are waiting on it, nothing needs to be done.
2306          */
2307 
2308     } else if (op->user_data == NULL) {
2309         /* This recurring operation was cancelled and pending, but we don't
2310          * have a transition key. This should never happen.
2311          */
2312         crm_err("Recurring operation %s was cancelled without transition information",
2313                 op_key);
2314 
2315     } else if (pcmk_is_set(pending->flags, active_op_remove)) {
2316         /* This recurring operation was cancelled (by us) and pending, and we
2317          * have been waiting for it to finish.
2318          */
2319         if (lrm_state) {
2320             controld_delete_action_history(op);
2321         }
2322 
2323         /* Directly acknowledge failed recurring actions here. The above call to
2324          * controld_delete_action_history() will not erase any corresponding
2325          * last_failure entry, which means that the DC won't confirm the
2326          * cancellation via process_op_deletion(), and the transition would
2327          * otherwise wait for the action timer to pop.
2328          */
2329         if (did_lrm_rsc_op_fail(lrm_state, pending->rsc_id,
2330                                 pending->op_type, pending->interval_ms)) {
2331             need_direct_ack = TRUE;
2332         }
2333 
2334     } else if (op->rsc_deleted) {
2335         /* This recurring operation was cancelled (but not by us, and the
2336          * executor does not have resource information, likely due to resource
2337          * cleanup, refresh, or removal) and pending.
2338          */
2339         crm_debug("Recurring op %s was cancelled due to resource deletion",
2340                   op_key);
2341         need_direct_ack = TRUE;
2342 
2343     } else {
2344         /* This recurring operation was cancelled (but not by us, likely by the
2345          * executor before stopping the resource) and pending. We don't need to
2346          * do anything special.
2347          */
2348     }
2349 
2350     if (need_direct_ack) {
2351         controld_ack_event_directly(NULL, NULL, NULL, op, op->rsc_id);
2352     }
2353 
2354     if(remove == FALSE) {
2355         /* The caller will do this afterwards, but keep the logging consistent */
2356         removed = TRUE;
2357 
2358     } else if (lrm_state && ((op->interval_ms == 0)
2359                              || (op->op_status == PCMK_EXEC_CANCELLED))) {
2360 
2361         gboolean found = g_hash_table_remove(lrm_state->active_ops, op_id);
2362 
2363         if (op->interval_ms != 0) {
2364             removed = TRUE;
2365         } else if (found) {
2366             removed = TRUE;
2367             crm_trace("Op %s (call=%d, stop-id=%s, remaining=%u): Confirmed",
2368                       op_key, op->call_id, op_id,
2369                       g_hash_table_size(lrm_state->active_ops));
2370         }
2371     }
2372 
2373     log_executor_event(op, op_key, node_name, removed);
2374 
2375     if (lrm_state) {
2376         if (!pcmk__str_eq(op->op_type, PCMK_ACTION_META_DATA,
2377                           pcmk__str_casei)) {
2378             crmd_alert_resource_op(lrm_state->node_name, op);
2379         } else if (rsc && (op->rc == PCMK_OCF_OK)) {
2380             char *metadata = unescape_newlines(op->output);
2381 
2382             controld_cache_metadata(lrm_state->metadata_cache, rsc, metadata);
2383             free(metadata);
2384         }
2385     }
2386 
2387     if (op->rsc_deleted) {
2388         crm_info("Deletion of resource '%s' complete after %s", op->rsc_id, op_key);
2389         if (lrm_state) {
2390             delete_rsc_entry(lrm_state, NULL, op->rsc_id, NULL, pcmk_ok, NULL,
2391                              true);
2392         }
2393     }
2394 
2395     /* If a shutdown was escalated while operations were pending,
2396      * then the FSA will be stalled right now... allow it to continue
2397      */
2398     controld_trigger_fsa();
2399     if (lrm_state && rsc) {
2400         update_history_cache(lrm_state, rsc, op);
2401     }
2402 
2403     lrmd_free_rsc_info(rsc);
2404     free(op_key);
2405     free(op_id);
2406 }

/* [previous][next][first][last][top][bottom][index][help] */